From c017b4be3e84176cab10eca5e6c4faeb8cfc6f3e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 28 Oct 2009 13:33:09 +0000
Subject: kmemleak: Simplify the kmemleak_scan_area() function prototype

This function was taking non-necessary arguments which can be determined
by kmemleak. The patch also modifies the calling sites.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 kernel/module.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index 8b7d8805819d..1eb952097077 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2043,9 +2043,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
 	unsigned int i;
 
 	/* only scan the sections containing data */
-	kmemleak_scan_area(mod->module_core, (unsigned long)mod -
-			   (unsigned long)mod->module_core,
-			   sizeof(struct module), GFP_KERNEL);
+	kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
 
 	for (i = 1; i < hdr->e_shnum; i++) {
 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -2054,8 +2052,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
 		    && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
 			continue;
 
-		kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr -
-				   (unsigned long)mod->module_core,
+		kmemleak_scan_area((void *)sechdrs[i].sh_addr,
 				   sechdrs[i].sh_size, GFP_KERNEL);
 	}
 }
-- 
cgit v1.2.3


From a6f5aa1ea05686ad6e84593a00a04161e6dfb3a3 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 28 Oct 2009 13:33:10 +0000
Subject: kmemleak: Scan the _ftrace_events section in modules

This section contains pointers to allocated objects and not scanning it
leads to false positives.

Reported-by: Zdenek Kabelac <zdenek.kabelac@gmail.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 kernel/module.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index 1eb952097077..dd29ba43c34f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2380,6 +2380,12 @@ static noinline struct module *load_module(void __user *umod,
 					 "_ftrace_events",
 					 sizeof(*mod->trace_events),
 					 &mod->num_trace_events);
+	/*
+	 * This section contains pointers to allocated objects in the trace
+	 * code and not scanning it leads to false positives.
+	 */
+	kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
+			   mod->num_trace_events, GFP_KERNEL);
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 	/* sechdrs[0].sh_size is always zero */
-- 
cgit v1.2.3


From 14d8c9f3c09e7fd7b9af80904289fe204f5b93c6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 10 Dec 2009 00:53:17 +0000
Subject: signal: Fix racy access to __task_cred in kill_pid_info_as_uid()

kill_pid_info_as_uid() accesses __task_cred() without being in a RCU
read side critical section. tasklist_lock is not protecting that when
CONFIG_TREE_PREEMPT_RCU=y.

Convert the whole tasklist_lock section to rcu and use
lock_task_sighand to prevent the exit race.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20091210004703.232302055@linutronix.de>
Acked-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/signal.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 6b982f2cf524..73316568a69c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1175,11 +1175,12 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
 	int ret = -EINVAL;
 	struct task_struct *p;
 	const struct cred *pcred;
+	unsigned long flags;
 
 	if (!valid_signal(sig))
 		return ret;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	p = pid_task(pid, PIDTYPE_PID);
 	if (!p) {
 		ret = -ESRCH;
@@ -1196,14 +1197,16 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
 	ret = security_task_kill(p, info, sig, secid);
 	if (ret)
 		goto out_unlock;
-	if (sig && p->sighand) {
-		unsigned long flags;
-		spin_lock_irqsave(&p->sighand->siglock, flags);
-		ret = __send_signal(sig, info, p, 1, 0);
-		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+
+	if (sig) {
+		if (lock_task_sighand(p, &flags)) {
+			ret = __send_signal(sig, info, p, 1, 0);
+			unlock_task_sighand(p, &flags);
+		} else
+			ret = -ESRCH;
 	}
 out_unlock:
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
-- 
cgit v1.2.3


From 7cf7db8df0b78076eafa4ead47559344ca7b7a43 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 10 Dec 2009 00:53:21 +0000
Subject: signals: Fix more rcu assumptions

1) Remove the misleading comment in __sigqueue_alloc() which claims
   that holding a spinlock is equivalent to rcu_read_lock().

2) Add a rcu_read_lock/unlock around the __task_cred() access
   in __sigqueue_alloc()

This needs to be revisited to remove the remaining users of
read_lock(&tasklist_lock) but that's outside the scope of this patch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20091210004703.269843657@linutronix.de>
---
 kernel/signal.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 73316568a69c..f67545f9394c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -218,13 +218,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
 	struct user_struct *user;
 
 	/*
-	 * We won't get problems with the target's UID changing under us
-	 * because changing it requires RCU be used, and if t != current, the
-	 * caller must be holding the RCU readlock (by way of a spinlock) and
-	 * we use RCU protection here
+	 * Protect access to @t credentials. This can go away when all
+	 * callers hold rcu read lock.
 	 */
+	rcu_read_lock();
 	user = get_uid(__task_cred(t)->user);
 	atomic_inc(&user->sigpending);
+	rcu_read_unlock();
 
 	if (override_rlimit ||
 	    atomic_read(&user->sigpending) <=
-- 
cgit v1.2.3


From d4581a239a40319205762b76c01eb6363f277efa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 10 Dec 2009 00:52:51 +0000
Subject: sys: Fix missing rcu protection for __task_cred() access

commit c69e8d9 (CRED: Use RCU to access another task's creds and to
release a task's own creds) added non rcu_read_lock() protected access
to task creds of the target task in set_prio_one().

The comment above the function says:
 * - the caller must hold the RCU read lock

The calling code in sys_setpriority does read_lock(&tasklist_lock) but
not rcu_read_lock(). This works only when CONFIG_TREE_PREEMPT_RCU=n.
With CONFIG_TREE_PREEMPT_RCU=y the rcu_callbacks can run in the tick
interrupt when they see no read side critical section.

There is another instance of __task_cred() in sys_setpriority() itself
which is equally unprotected.

Wrap the whole code section into a rcu read side critical section to
fix this quick and dirty.

Will be revisited in course of the read_lock(&tasklist_lock) -> rcu
crusade.

Oleg noted further:

This also fixes another bug here. find_task_by_vpid() is not safe
without rcu_read_lock(). I do not mean it is not safe to use the
result, just find_pid_ns() by itself is not safe.

Usually tasklist gives enough protection, but if copy_process() fails
it calls free_pid() lockless and does call_rcu(delayed_put_pid().
This means, without rcu lock find_pid_ns() can't scan the hash table
safely.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20091210004703.029784964@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/sys.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index 9968c5fb55b9..bc1dc61c31ed 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -163,6 +163,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
 	if (niceval > 19)
 		niceval = 19;
 
+	rcu_read_lock();
 	read_lock(&tasklist_lock);
 	switch (which) {
 		case PRIO_PROCESS:
@@ -200,6 +201,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
 	}
 out_unlock:
 	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 out:
 	return error;
 }
-- 
cgit v1.2.3


From bb6eddf7676e1c1f3e637aa93c5224488d99036f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 10 Dec 2009 15:35:10 +0100
Subject: clockevents: Prevent clockevent_devices list corruption on cpu
 hotplug

Xiaotian Feng triggered a list corruption in the clock events list on
CPU hotplug and debugged the root cause.

If a CPU registers more than one per cpu clock event device, then only
the active clock event device is removed on CPU_DEAD. The unused
devices are kept in the clock events device list.

On CPU up the clock event devices are registered again, which means
that we list_add an already enqueued list_head. That results in list
corruption.

Resolve this by removing all devices which are associated to the dead
CPU on CPU_DEAD.

Reported-by: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Xiaotian Feng <dfeng@redhat.com>
Cc: stable@kernel.org
---
 kernel/time/clockevents.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 20a8920029ee..91db2e33d86a 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -238,8 +238,9 @@ void clockevents_exchange_device(struct clock_event_device *old,
  */
 void clockevents_notify(unsigned long reason, void *arg)
 {
-	struct list_head *node, *tmp;
+	struct clock_event_device *dev, *tmp;
 	unsigned long flags;
+	int cpu;
 
 	spin_lock_irqsave(&clockevents_lock, flags);
 	clockevents_do_notify(reason, arg);
@@ -250,8 +251,19 @@ void clockevents_notify(unsigned long reason, void *arg)
 		 * Unregister the clock event devices which were
 		 * released from the users in the notify chain.
 		 */
-		list_for_each_safe(node, tmp, &clockevents_released)
-			list_del(node);
+		list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+			list_del(&dev->list);
+		/*
+		 * Now check whether the CPU has left unused per cpu devices
+		 */
+		cpu = *((int *)arg);
+		list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+			if (cpumask_test_cpu(cpu, dev->cpumask) &&
+			    cpumask_weight(dev->cpumask) == 1) {
+				BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+				list_del(&dev->list);
+			}
+		}
 		break;
 	default:
 		break;
-- 
cgit v1.2.3


From 01fc0ac198eabcbf460e1ed058860a935b6c2c9a Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 19 Apr 2009 21:57:19 +0200
Subject: kbuild: move bounds.h to include/generated

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore                 | 1 -
 Kbuild                     | 2 +-
 Makefile                   | 2 +-
 include/linux/mmzone.h     | 2 +-
 include/linux/page-flags.h | 2 +-
 kernel/bounds.c            | 2 +-
 6 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/.gitignore b/.gitignore
index 946c7ec5c922..36d9cd6d4281 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,7 +52,6 @@ include/linux/autoconf.h
 include/linux/compile.h
 include/linux/version.h
 include/linux/utsrelease.h
-include/linux/bounds.h
 include/generated
 
 # stgit generated dirs
diff --git a/Kbuild b/Kbuild
index f056b4feee51..1165d7a5ca4a 100644
--- a/Kbuild
+++ b/Kbuild
@@ -8,7 +8,7 @@
 #####
 # 1) Generate bounds.h
 
-bounds-file := include/linux/bounds.h
+bounds-file := include/generated/bounds.h
 
 always  := $(bounds-file)
 targets := $(bounds-file) kernel/bounds.s
diff --git a/Makefile b/Makefile
index 07711786dc95..b58e9312ce30 100644
--- a/Makefile
+++ b/Makefile
@@ -1197,7 +1197,7 @@ MRPROPER_DIRS  += include/config include2 usr/include include/generated
 MRPROPER_FILES += .config .config.old include/asm .version .old_version \
                   include/linux/autoconf.h include/linux/version.h      \
                   include/linux/utsrelease.h                            \
-                  include/linux/bounds.h include/asm*/asm-offsets.h     \
+                  include/asm*/asm-offsets.h                            \
 		  Module.symvers Module.markers tags TAGS cscope*
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6f7561730d88..30fe668c2542 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -15,7 +15,7 @@
 #include <linux/seqlock.h>
 #include <linux/nodemask.h>
 #include <linux/pageblock-flags.h>
-#include <linux/bounds.h>
+#include <generated/bounds.h>
 #include <asm/atomic.h>
 #include <asm/page.h>
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6b202b173955..ef36725aa515 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -8,7 +8,7 @@
 #include <linux/types.h>
 #ifndef __GENERATING_BOUNDS_H
 #include <linux/mm_types.h>
-#include <linux/bounds.h>
+#include <generated/bounds.h>
 #endif /* !__GENERATING_BOUNDS_H */
 
 /*
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 3c5301381837..98a51f26c136 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -12,7 +12,7 @@
 
 void foo(void)
 {
-	/* The enum constants to put into include/linux/bounds.h */
+	/* The enum constants to put into include/generated/bounds.h */
 	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
 	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
 	/* End of constants */
-- 
cgit v1.2.3


From 273b281fa22c293963ee3e6eec418f5dda2dbc83 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 18 Oct 2009 00:52:28 +0200
Subject: kbuild: move utsrelease.h to include/generated

Fix up all users of utsrelease.h

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore                                   | 1 -
 Makefile                                     | 5 ++---
 arch/alpha/boot/bootp.c                      | 2 +-
 arch/alpha/boot/bootpz.c                     | 2 +-
 arch/alpha/boot/main.c                       | 2 +-
 arch/frv/kernel/setup.c                      | 2 +-
 arch/powerpc/platforms/52xx/efika.c          | 2 +-
 arch/powerpc/platforms/amigaone/setup.c      | 2 +-
 arch/powerpc/platforms/chrp/setup.c          | 2 +-
 arch/powerpc/platforms/powermac/bootx_init.c | 2 +-
 arch/x86/boot/header.S                       | 2 +-
 arch/x86/boot/version.c                      | 2 +-
 drivers/staging/panel/panel.c                | 2 +-
 include/linux/vermagic.h                     | 2 +-
 init/version.c                               | 2 +-
 kernel/kexec.c                               | 2 +-
 kernel/trace/trace.c                         | 2 +-
 17 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/.gitignore b/.gitignore
index c6c19ea6ea96..002d5304968b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,7 +47,6 @@ Module.symvers
 #
 include/config
 include/linux/version.h
-include/linux/utsrelease.h
 include/generated
 
 # stgit generated dirs
diff --git a/Makefile b/Makefile
index 3bdd932e3d88..860224d7cbcf 100644
--- a/Makefile
+++ b/Makefile
@@ -968,7 +968,7 @@ endif
 # prepare2 creates a makefile if using a separate output directory
 prepare2: prepare3 outputmakefile
 
-prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \
+prepare1: prepare2 include/linux/version.h include/generated/utsrelease.h \
                    include/config/auto.conf
 	$(cmd_crmodverdir)
 
@@ -1005,7 +1005,7 @@ endef
 include/linux/version.h: $(srctree)/Makefile FORCE
 	$(call filechk,version.h)
 
-include/linux/utsrelease.h: include/config/kernel.release FORCE
+include/generated/utsrelease.h: include/config/kernel.release FORCE
 	$(call filechk,utsrelease.h)
 
 PHONY += headerdep
@@ -1151,7 +1151,6 @@ CLEAN_FILES +=	vmlinux System.map \
 MRPROPER_DIRS  += include/config usr/include include/generated
 MRPROPER_FILES += .config .config.old .version .old_version             \
                   include/linux/version.h                               \
-                  include/linux/utsrelease.h                            \
 		  Module.symvers Module.markers tags TAGS cscope*
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c
index 3af21c789339..3c8d1b25c661 100644
--- a/arch/alpha/boot/bootp.c
+++ b/arch/alpha/boot/bootp.c
@@ -9,7 +9,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c
index 1036b515e20c..ade3f129dc27 100644
--- a/arch/alpha/boot/bootpz.c
+++ b/arch/alpha/boot/bootpz.c
@@ -11,7 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
index 89f3be071ae5..644b7db55438 100644
--- a/arch/alpha/boot/main.c
+++ b/arch/alpha/boot/main.c
@@ -7,7 +7,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index 55e4fab7c0bc..75cf7f4b2fa8 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -10,7 +10,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index bcc69e1f77c1..45c0cb9b67e6 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/pci.h>
 #include <linux/of.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 9290a7a442d0..fb4eb0df054c 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/seq_file.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 
 #include <asm/machdep.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index cd4ad9aea760..0a6f5ab8aab3 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -23,7 +23,7 @@
 #include <linux/reboot.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/adb.h>
 #include <linux/module.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index cf660916ae0b..9dd789a7370d 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/page.h>
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b31cc54b4641..93e689f4bd86 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -16,7 +16,7 @@
  */
 
 #include <asm/segment.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <asm/boot.h>
 #include <asm/e820.h>
 #include <asm/page_types.h>
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index 4d88763e39cb..2b15aa488ffb 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -13,7 +13,7 @@
  */
 
 #include "boot.h"
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <generated/compile.h>
 
 const char kernel_version[] =
diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c
index 4ce399b6d237..f98a52448eae 100644
--- a/drivers/staging/panel/panel.c
+++ b/drivers/staging/panel/panel.c
@@ -55,7 +55,7 @@
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 
 #include <linux/io.h>
 #include <asm/uaccess.h>
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 79b9837d9ca0..cf97b5b9d1fe 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -1,4 +1,4 @@
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/module.h>
 
 /* Simply sanity version stamp for modules. */
diff --git a/init/version.c b/init/version.c
index 82328aaca1ef..adff586401a5 100644
--- a/init/version.c
+++ b/init/version.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/version.h>
 
 #ifndef CONFIG_KALLSYMS
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e2107f98..83f54e2a6eed 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -21,7 +21,7 @@
 #include <linux/hardirq.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/utsname.h>
 #include <linux/numa.h>
 #include <linux/suspend.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 88bd9ae2a9ed..bfb1b64bfa9d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
  *  Copyright (C) 2004 William Lee Irwin III
  */
 #include <linux/ring_buffer.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/stacktrace.h>
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
-- 
cgit v1.2.3


From 7539a3b3d1f892dd97eaf094134d7de55c13befe Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 13 Dec 2009 00:07:30 +0100
Subject: sched: Make wakeup side and atomic variants of completion API irq
 safe

Alan Stern noticed that all the wakeup side (and atomic) variants of the
completion APIs should be irq safe, but the newly introduced
completion_done() and try_wait_for_completion() aren't. The use of the
irq unsafe variants in IRQ contexts can cause crashes/hangs.

Fix the problem by making them use spin_lock_irqsave() and
spin_lock_irqrestore().

Reported-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: pm list <linux-pm@lists.linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: David Chinner <david@fromorbit.com>
Cc: Lachlan McIlroy <lachlan@sgi.com>
LKML-Reference: <200912130007.30541.rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index ff39cadf621e..8b3532f262d7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5908,14 +5908,15 @@ EXPORT_SYMBOL(wait_for_completion_killable);
  */
 bool try_wait_for_completion(struct completion *x)
 {
+	unsigned long flags;
 	int ret = 1;
 
-	spin_lock_irq(&x->wait.lock);
+	spin_lock_irqsave(&x->wait.lock, flags);
 	if (!x->done)
 		ret = 0;
 	else
 		x->done--;
-	spin_unlock_irq(&x->wait.lock);
+	spin_unlock_irqrestore(&x->wait.lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(try_wait_for_completion);
@@ -5930,12 +5931,13 @@ EXPORT_SYMBOL(try_wait_for_completion);
  */
 bool completion_done(struct completion *x)
 {
+	unsigned long flags;
 	int ret = 1;
 
-	spin_lock_irq(&x->wait.lock);
+	spin_lock_irqsave(&x->wait.lock, flags);
 	if (!x->done)
 		ret = 0;
-	spin_unlock_irq(&x->wait.lock);
+	spin_unlock_irqrestore(&x->wait.lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(completion_done);
-- 
cgit v1.2.3


From 663997d417330a59a566452f52cfa04c8ffd190b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 12 Dec 2009 13:57:27 -0800
Subject: sched: Use pr_fmt() and pr_<level>()

- Convert printk(KERN_<level> to pr_<level> (not KERN_DEBUG)
 - Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 - Coalesce long format strings
 - Add missing \n to "ERROR: !SD_LOAD_BALANCE domain has parent"

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1260655047.2637.7.camel@Joe-Laptop.home>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c          | 94 ++++++++++++++++++++++---------------------------
 kernel/sched_idletask.c |  2 +-
 2 files changed, 43 insertions(+), 53 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 8b3532f262d7..258c73c6a2f3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -26,6 +26,8 @@
  *              Thomas Gleixner, Mike Kravetz
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
@@ -5337,8 +5339,8 @@ static noinline void __schedule_bug(struct task_struct *prev)
 {
 	struct pt_regs *regs = get_irq_regs();
 
-	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-		prev->comm, prev->pid, preempt_count());
+	pr_err("BUG: scheduling while atomic: %s/%d/0x%08x\n",
+	       prev->comm, prev->pid, preempt_count());
 
 	debug_show_held_locks(prev);
 	print_modules();
@@ -6906,23 +6908,23 @@ void sched_show_task(struct task_struct *p)
 	unsigned state;
 
 	state = p->state ? __ffs(p->state) + 1 : 0;
-	printk(KERN_INFO "%-13.13s %c", p->comm,
+	pr_info("%-13.13s %c", p->comm,
 		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
 #if BITS_PER_LONG == 32
 	if (state == TASK_RUNNING)
-		printk(KERN_CONT " running  ");
+		pr_cont(" running  ");
 	else
-		printk(KERN_CONT " %08lx ", thread_saved_pc(p));
+		pr_cont(" %08lx ", thread_saved_pc(p));
 #else
 	if (state == TASK_RUNNING)
-		printk(KERN_CONT "  running task    ");
+		pr_cont("  running task    ");
 	else
-		printk(KERN_CONT " %016lx ", thread_saved_pc(p));
+		pr_cont(" %016lx ", thread_saved_pc(p));
 #endif
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	free = stack_not_used(p);
 #endif
-	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
+	pr_cont("%5lu %5d %6d 0x%08lx\n", free,
 		task_pid_nr(p), task_pid_nr(p->real_parent),
 		(unsigned long)task_thread_info(p)->flags);
 
@@ -6934,11 +6936,9 @@ void show_state_filter(unsigned long state_filter)
 	struct task_struct *g, *p;
 
 #if BITS_PER_LONG == 32
-	printk(KERN_INFO
-		"  task                PC stack   pid father\n");
+	pr_info("  task                PC stack   pid father\n");
 #else
-	printk(KERN_INFO
-		"  task                        PC stack   pid father\n");
+	pr_info("  task                        PC stack   pid father\n");
 #endif
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
@@ -7296,9 +7296,8 @@ again:
 		 * leave kernel.
 		 */
 		if (p->mm && printk_ratelimit()) {
-			printk(KERN_INFO "process %d (%s) no "
-			       "longer affine to cpu%d\n",
-			       task_pid_nr(p), p->comm, dead_cpu);
+			pr_info("process %d (%s) no longer affine to cpu%d\n",
+				task_pid_nr(p), p->comm, dead_cpu);
 		}
 	}
 
@@ -7805,48 +7804,44 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
 
 	if (!(sd->flags & SD_LOAD_BALANCE)) {
-		printk("does not load-balance\n");
+		pr_cont("does not load-balance\n");
 		if (sd->parent)
-			printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
-					" has parent");
+			pr_err("ERROR: !SD_LOAD_BALANCE domain has parent\n");
 		return -1;
 	}
 
-	printk(KERN_CONT "span %s level %s\n", str, sd->name);
+	pr_cont("span %s level %s\n", str, sd->name);
 
 	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-		printk(KERN_ERR "ERROR: domain->span does not contain "
-				"CPU%d\n", cpu);
+		pr_err("ERROR: domain->span does not contain CPU%d\n", cpu);
 	}
 	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
-		printk(KERN_ERR "ERROR: domain->groups does not contain"
-				" CPU%d\n", cpu);
+		pr_err("ERROR: domain->groups does not contain CPU%d\n", cpu);
 	}
 
 	printk(KERN_DEBUG "%*s groups:", level + 1, "");
 	do {
 		if (!group) {
-			printk("\n");
-			printk(KERN_ERR "ERROR: group is NULL\n");
+			pr_cont("\n");
+			pr_err("ERROR: group is NULL\n");
 			break;
 		}
 
 		if (!group->cpu_power) {
-			printk(KERN_CONT "\n");
-			printk(KERN_ERR "ERROR: domain->cpu_power not "
-					"set\n");
+			pr_cont("\n");
+			pr_err("ERROR: domain->cpu_power not set\n");
 			break;
 		}
 
 		if (!cpumask_weight(sched_group_cpus(group))) {
-			printk(KERN_CONT "\n");
-			printk(KERN_ERR "ERROR: empty group\n");
+			pr_cont("\n");
+			pr_err("ERROR: empty group\n");
 			break;
 		}
 
 		if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
-			printk(KERN_CONT "\n");
-			printk(KERN_ERR "ERROR: repeated CPUs\n");
+			pr_cont("\n");
+			pr_err("ERROR: repeated CPUs\n");
 			break;
 		}
 
@@ -7854,23 +7849,21 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
-		printk(KERN_CONT " %s", str);
+		pr_cont(" %s", str);
 		if (group->cpu_power != SCHED_LOAD_SCALE) {
-			printk(KERN_CONT " (cpu_power = %d)",
-				group->cpu_power);
+			pr_cont(" (cpu_power = %d)", group->cpu_power);
 		}
 
 		group = group->next;
 	} while (group != sd->groups);
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 
 	if (!cpumask_equal(sched_domain_span(sd), groupmask))
-		printk(KERN_ERR "ERROR: groups don't span domain->span\n");
+		pr_err("ERROR: groups don't span domain->span\n");
 
 	if (sd->parent &&
 	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
-		printk(KERN_ERR "ERROR: parent span is not a superset "
-			"of domain->span\n");
+		pr_err("ERROR: parent span is not a superset of domain->span\n");
 	return 0;
 }
 
@@ -8426,8 +8419,7 @@ static int build_numa_sched_groups(struct s_data *d,
 	sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
 			  GFP_KERNEL, num);
 	if (!sg) {
-		printk(KERN_WARNING "Can not alloc domain group for node %d\n",
-		       num);
+		pr_warning("Can not alloc domain group for node %d\n", num);
 		return -ENOMEM;
 	}
 	d->sched_group_nodes[num] = sg;
@@ -8456,8 +8448,8 @@ static int build_numa_sched_groups(struct s_data *d,
 		sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
 				  GFP_KERNEL, num);
 		if (!sg) {
-			printk(KERN_WARNING
-			       "Can not alloc domain group for node %d\n", j);
+			pr_warning("Can not alloc domain group for node %d\n",
+				   j);
 			return -ENOMEM;
 		}
 		sg->cpu_power = 0;
@@ -8685,7 +8677,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 	d->sched_group_nodes = kcalloc(nr_node_ids,
 				      sizeof(struct sched_group *), GFP_KERNEL);
 	if (!d->sched_group_nodes) {
-		printk(KERN_WARNING "Can not alloc sched group node list\n");
+		pr_warning("Can not alloc sched group node list\n");
 		return sa_notcovered;
 	}
 	sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
@@ -8702,7 +8694,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 		return sa_send_covered;
 	d->rd = alloc_rootdomain();
 	if (!d->rd) {
-		printk(KERN_WARNING "Cannot alloc root domain\n");
+		pr_warning("Cannot alloc root domain\n");
 		return sa_tmpmask;
 	}
 	return sa_rootdomain;
@@ -9684,13 +9676,11 @@ void __might_sleep(char *file, int line, int preempt_offset)
 		return;
 	prev_jiffy = jiffies;
 
-	printk(KERN_ERR
-		"BUG: sleeping function called from invalid context at %s:%d\n",
-			file, line);
-	printk(KERN_ERR
-		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-			in_atomic(), irqs_disabled(),
-			current->pid, current->comm);
+	pr_err("BUG: sleeping function called from invalid context at %s:%d\n",
+	       file, line);
+	pr_err("in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+	       in_atomic(), irqs_disabled(),
+	       current->pid, current->comm);
 
 	debug_show_held_locks(current);
 	if (irqs_disabled())
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 33d5384a73a8..b810e22772d5 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -35,7 +35,7 @@ static void
 dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
 {
 	spin_unlock_irq(&rq->lock);
-	printk(KERN_ERR "bad: scheduling from the idle thread!\n");
+	pr_err("bad: scheduling from the idle thread!\n");
 	dump_stack();
 	spin_lock_irq(&rq->lock);
 }
-- 
cgit v1.2.3


From 5fe85be081edf0ac92d83f9c39e0ab5c1371eb82 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 9 Dec 2009 10:14:58 +0000
Subject: sched: Use rcu in sys_sched_getscheduler/sys_sched_getparam()

read_lock(&tasklist_lock) does not protect
sys_sched_getscheduler and sys_sched_getparam() against a
concurrent update of the policy or scheduler parameters as
do_sched_setscheduler() does not take the tasklist_lock. The
accessed integers can be retrieved w/o locking and are snapshots
anyway.

Using rcu_read_lock() to protect find_task_by_vpid() and prevent
the task struct from going away is not changing the above
situation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <20091209100706.753790977@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 258c73c6a2f3..1782beed2fa7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6458,7 +6458,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
 		return -EINVAL;
 
 	retval = -ESRCH;
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	p = find_process_by_pid(pid);
 	if (p) {
 		retval = security_task_getscheduler(p);
@@ -6466,7 +6466,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
 			retval = p->policy
 				| (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
 	}
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return retval;
 }
 
@@ -6484,7 +6484,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
 	if (!param || pid < 0)
 		return -EINVAL;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	p = find_process_by_pid(pid);
 	retval = -ESRCH;
 	if (!p)
@@ -6495,7 +6495,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
 		goto out_unlock;
 
 	lp.sched_priority = p->rt_priority;
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 
 	/*
 	 * This one might sleep, we cannot do it with a spinlock held ...
@@ -6505,7 +6505,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
 	return retval;
 
 out_unlock:
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return retval;
 }
 
-- 
cgit v1.2.3


From 23f5d142519621b16cf2b378cf8adf4dcf01a616 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 9 Dec 2009 10:15:01 +0000
Subject: sched: Use rcu in sched_get/set_affinity()

tasklist_lock is held read locked to protect the
find_task_by_vpid() call and to prevent the task going away.
sched_setaffinity acquires a task struct ref and drops tasklist
lock right away. The access to the cpus_allowed mask is
protected by rq->lock.

rcu_read_lock() provides the same protection here.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <20091209100706.789059966@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 1782beed2fa7..79893123325c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6516,22 +6516,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 	int retval;
 
 	get_online_cpus();
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 
 	p = find_process_by_pid(pid);
 	if (!p) {
-		read_unlock(&tasklist_lock);
+		rcu_read_unlock();
 		put_online_cpus();
 		return -ESRCH;
 	}
 
-	/*
-	 * It is not safe to call set_cpus_allowed with the
-	 * tasklist_lock held. We will bump the task_struct's
-	 * usage count and then drop tasklist_lock.
-	 */
+	/* Prevent p going away */
 	get_task_struct(p);
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 
 	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
 		retval = -ENOMEM;
@@ -6617,7 +6613,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
 	int retval;
 
 	get_online_cpus();
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 
 	retval = -ESRCH;
 	p = find_process_by_pid(pid);
@@ -6633,7 +6629,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
 	task_rq_unlock(rq, &flags);
 
 out_unlock:
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	put_online_cpus();
 
 	return retval;
-- 
cgit v1.2.3


From 1a551ae715825bb2a2107a2dd68de024a1fa4e32 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 9 Dec 2009 10:15:11 +0000
Subject: sched: Use rcu in sched_get_rr_param()

read_lock(&tasklist_lock) does not protect
sys_sched_get_rr_param() against a concurrent update of the
policy or scheduler parameters as do_sched_scheduler() does not
take the tasklist_lock.

The access to task->sched_class->get_rr_interval is protected by
task_rq_lock(task).

Use rcu_read_lock() to protect find_task_by_vpid() and prevent
the task struct from going away.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <20091209100706.862897167@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 79893123325c..db5c26692dd5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6873,7 +6873,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 		return -EINVAL;
 
 	retval = -ESRCH;
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	p = find_process_by_pid(pid);
 	if (!p)
 		goto out_unlock;
@@ -6886,13 +6886,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 	time_slice = p->sched_class->get_rr_interval(rq, p);
 	task_rq_unlock(rq, &flags);
 
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	jiffies_to_timespec(time_slice, &t);
 	retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
 	return retval;
 
 out_unlock:
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return retval;
 }
 
-- 
cgit v1.2.3


From b9f8fcd55bbdb037e5332dbdb7b494f0b70861ac Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sun, 13 Dec 2009 18:25:02 -0800
Subject: sched: Fix cpu_clock() in NMIs, on !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK

Relax stable-sched-clock architectures to not save/disable/restore
hardirqs in cpu_clock().

The background is that I was trying to resolve a sparc64 perf
issue when I discovered this problem.

On sparc64 I implement pseudo NMIs by simply running the kernel
at IRQ level 14 when local_irq_disable() is called, this allows
performance counter events to still come in at IRQ level 15.

This doesn't work if any code in an NMI handler does
local_irq_save() or local_irq_disable() since the "disable" will
kick us back to cpu IRQ level 14 thus letting NMIs back in and
we recurse.

The only path which that does that in the perf event IRQ
handling path is the code supporting frequency based events.  It
uses cpu_clock().

cpu_clock() simply invokes sched_clock() with IRQs disabled.

And that's a fundamental bug all on it's own, particularly for
the HAVE_UNSTABLE_SCHED_CLOCK case.  NMIs can thus get into the
sched_clock() code interrupting the local IRQ disable code
sections of it.

Furthermore, for the not-HAVE_UNSTABLE_SCHED_CLOCK case, the IRQ
disabling done by cpu_clock() is just pure overhead and
completely unnecessary.

So the core problem is that sched_clock() is not NMI safe, but
we are invoking it from NMI contexts in the perf events code
(via cpu_clock()).

A less important issue is the overhead of IRQ disabling when it
isn't necessary in cpu_clock().

CONFIG_HAVE_UNSTABLE_SCHED_CLOCK architectures are not
affected by this patch.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091213.182502.215092085.davem@davemloft.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_clock.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 479ce5682d7c..5b496132c28a 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -236,6 +236,18 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
+unsigned long long cpu_clock(int cpu)
+{
+	unsigned long long clock;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	clock = sched_clock_cpu(cpu);
+	local_irq_restore(flags);
+
+	return clock;
+}
+
 #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 void sched_clock_init(void)
@@ -251,17 +263,12 @@ u64 sched_clock_cpu(int cpu)
 	return sched_clock();
 }
 
-#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 unsigned long long cpu_clock(int cpu)
 {
-	unsigned long long clock;
-	unsigned long flags;
+	return sched_clock_cpu(cpu);
+}
 
-	local_irq_save(flags);
-	clock = sched_clock_cpu(cpu);
-	local_irq_restore(flags);
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
-	return clock;
-}
 EXPORT_SYMBOL_GPL(cpu_clock);
-- 
cgit v1.2.3


From 9ee349ad6d326df3633d43f54202427295999c47 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Wed, 16 Dec 2009 18:04:32 +0100
Subject: sched: Fix set_cpu_active() in cpu_down()

Sachin found cpu hotplug test failures on powerpc, which made
the kernel hang on his POWER box.

The problem is that we fail to re-activate a cpu when a
hot-unplug fails. Fix this by moving the de-activation into
_cpu_down after doing the initial checks.

Remove the synchronize_sched() calls and rely on those implied
by rebuilding the sched domains using the new mask.

Reported-by: Sachin Sant <sachinp@in.ibm.com>
Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Tested-by: Sachin Sant <sachinp@in.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170517.500272612@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/cpu.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 291ac586f37f..1c8ddd6ee940 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -209,6 +209,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 		return -ENOMEM;
 
 	cpu_hotplug_begin();
+	set_cpu_active(cpu, false);
 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
 					hcpu, -1, &nr_calls);
 	if (err == NOTIFY_BAD) {
@@ -280,18 +281,6 @@ int __ref cpu_down(unsigned int cpu)
 		goto out;
 	}
 
-	set_cpu_active(cpu, false);
-
-	/*
-	 * Make sure the all cpus did the reschedule and are not
-	 * using stale version of the cpu_active_mask.
-	 * This is not strictly necessary becuase stop_machine()
-	 * that we run down the line already provides the required
-	 * synchronization. But it's really a side effect and we do not
-	 * want to depend on the innards of the stop_machine here.
-	 */
-	synchronize_sched();
-
 	err = _cpu_down(cpu, 0);
 
 out:
@@ -382,19 +371,12 @@ int disable_nonboot_cpus(void)
 		return error;
 	cpu_maps_update_begin();
 	first_cpu = cpumask_first(cpu_online_mask);
-	/* We take down all of the non-boot CPUs in one shot to avoid races
+	/*
+	 * We take down all of the non-boot CPUs in one shot to avoid races
 	 * with the userspace trying to use the CPU hotplug at the same time
 	 */
 	cpumask_clear(frozen_cpus);
 
-	for_each_online_cpu(cpu) {
-		if (cpu == first_cpu)
-			continue;
-		set_cpu_active(cpu, false);
-	}
-
-	synchronize_sched();
-
 	printk("Disabling non-boot CPUs ...\n");
 	for_each_online_cpu(cpu) {
 		if (cpu == first_cpu)
-- 
cgit v1.2.3


From e6c8fba7771563b2f3dfb96a78f36ec17e15bdf0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:33 +0100
Subject: sched: Fix task_hot() test order

Make sure not to access sched_fair fields before verifying it is
indeed a sched_fair task.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
CC: stable@kernel.org
LKML-Reference: <20091216170517.577998058@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 9c30858b6463..1d8ca25dd6fb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2046,6 +2046,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 {
 	s64 delta;
 
+	if (p->sched_class != &fair_sched_class)
+		return 0;
+
 	/*
 	 * Buddy candidates are cache hot:
 	 */
@@ -2054,9 +2057,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 			 &p->se == cfs_rq_of(&p->se)->last))
 		return 1;
 
-	if (p->sched_class != &fair_sched_class)
-		return 0;
-
 	if (sysctl_sched_migration_cost == -1)
 		return 1;
 	if (sysctl_sched_migration_cost == 0)
-- 
cgit v1.2.3


From e4f4288842ee12747e10c354d72be7d424c0b627 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:34 +0100
Subject: sched: Select_task_rq_fair() must honour SD_LOAD_BALANCE

We should skip !SD_LOAD_BALANCE domains.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170517.653578430@chello.nl>
CC: stable@kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_fair.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5bedf6e3ebf3..ec1d2715620c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1429,6 +1429,9 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
 	}
 
 	for_each_domain(cpu, tmp) {
+		if (!(tmp->flags & SD_LOAD_BALANCE))
+			continue;
+
 		/*
 		 * If power savings logic is enabled for a domain, see if we
 		 * are not overloaded, if so, don't balance wider.
-- 
cgit v1.2.3


From 06b83b5fbea273672822b6ee93e16781046553ec Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:35 +0100
Subject: sched: Use TASK_WAKING for fork wakups

For later convenience use TASK_WAKING for fresh tasks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170517.732561278@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 1d8ca25dd6fb..1672823aabfe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2540,14 +2540,6 @@ static void __sched_fork(struct task_struct *p)
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
 #endif
-
-	/*
-	 * We mark the process as running here, but have not actually
-	 * inserted it onto the runqueue yet. This guarantees that
-	 * nobody will actually run it, and a signal or other external
-	 * event cannot wake it up and insert it on the runqueue either.
-	 */
-	p->state = TASK_RUNNING;
 }
 
 /*
@@ -2558,6 +2550,12 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	int cpu = get_cpu();
 
 	__sched_fork(p);
+	/*
+	 * We mark the process as waking here. This guarantees that
+	 * nobody will actually run it, and a signal or other external
+	 * event cannot wake it up and insert it on the runqueue either.
+	 */
+	p->state = TASK_WAKING;
 
 	/*
 	 * Revert to default priority/policy on fork if requested.
@@ -2626,7 +2624,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
-	BUG_ON(p->state != TASK_RUNNING);
+	BUG_ON(p->state != TASK_WAKING);
+	p->state = TASK_RUNNING;
 	update_rq_clock(rq);
 	activate_task(rq, p, 0);
 	trace_sched_wakeup_new(rq, p, 1);
@@ -6984,6 +6983,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	raw_spin_lock_irqsave(&rq->lock, flags);
 
 	__sched_fork(idle);
+	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
 
 	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
-- 
cgit v1.2.3


From e2912009fb7b715728311b0d8fe327a1432b3f79 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:36 +0100
Subject: sched: Ensure set_task_cpu() is never called on blocked tasks

In order to clean up the set_task_cpu() rq dependencies we need
to ensure it is never called on blocked tasks because such usage
does not pair with consistent rq->lock usage.

This puts the migration burden on ttwu().

Furthermore we need to close a race against changing
->cpus_allowed, since select_task_rq() runs with only preemption
disabled.

For sched_fork() this is safe because the child isn't in the
tasklist yet, for wakeup we fix this by synchronizing
set_cpus_allowed_ptr() against TASK_WAKING, which leaves
sched_exec to be a problem

This also closes a hole in (6ad4c1888 sched: Fix balance vs
hotplug race) where ->select_task_rq() doesn't validate the
result against the sched_domain/root_domain.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170517.807938893@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 85 +++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 66 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 1672823aabfe..33d7965f63f0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2018,22 +2018,15 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
  */
 void kthread_bind(struct task_struct *p, unsigned int cpu)
 {
-	struct rq *rq = cpu_rq(cpu);
-	unsigned long flags;
-
 	/* Must have done schedule() in kthread() before we set_task_cpu */
 	if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
 		WARN_ON(1);
 		return;
 	}
 
-	raw_spin_lock_irqsave(&rq->lock, flags);
-	update_rq_clock(rq);
-	set_task_cpu(p, cpu);
 	p->cpus_allowed = cpumask_of_cpu(cpu);
 	p->rt.nr_cpus_allowed = 1;
 	p->flags |= PF_THREAD_BOUND;
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 EXPORT_SYMBOL(kthread_bind);
 
@@ -2074,6 +2067,14 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	struct cfs_rq *old_cfsrq = task_cfs_rq(p),
 		      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
 
+#ifdef CONFIG_SCHED_DEBUG
+	/*
+	 * We should never call set_task_cpu() on a blocked task,
+	 * ttwu() will sort out the placement.
+	 */
+	WARN_ON(p->state != TASK_RUNNING && p->state != TASK_WAKING);
+#endif
+
 	trace_sched_migrate_task(p, new_cpu);
 
 	if (old_cpu != new_cpu) {
@@ -2107,13 +2108,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
 
 	/*
 	 * If the task is not on a runqueue (and not running), then
-	 * it is sufficient to simply update the task's cpu field.
+	 * the next wake-up will properly place the task.
 	 */
-	if (!p->se.on_rq && !task_running(rq, p)) {
-		update_rq_clock(rq);
-		set_task_cpu(p, dest_cpu);
+	if (!p->se.on_rq && !task_running(rq, p))
 		return 0;
-	}
 
 	init_completion(&req->done);
 	req->task = p;
@@ -2319,10 +2317,42 @@ void task_oncpu_function_call(struct task_struct *p,
 }
 
 #ifdef CONFIG_SMP
+/*
+ * Called from:
+ *
+ *  - fork, @p is stable because it isn't on the tasklist yet
+ *
+ *  - exec, @p is unstable XXX
+ *
+ *  - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
+ *             we should be good.
+ */
 static inline
 int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
 {
-	return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+	int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+
+	/*
+	 * In order not to call set_task_cpu() on a blocking task we need
+	 * to rely on ttwu() to place the task on a valid ->cpus_allowed
+	 * cpu.
+	 *
+	 * Since this is common to all placement strategies, this lives here.
+	 *
+	 * [ this allows ->select_task() to simply return task_cpu(p) and
+	 *   not worry about this generic constraint ]
+	 */
+	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+		     !cpu_active(cpu))) {
+
+		cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+		/*
+		 * XXX: race against hot-plug modifying cpu_active_mask
+		 */
+		BUG_ON(cpu >= nr_cpu_ids);
+	}
+
+	return cpu;
 }
 #endif
 
@@ -7098,7 +7128,23 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 	struct rq *rq;
 	int ret = 0;
 
+	/*
+	 * Since we rely on wake-ups to migrate sleeping tasks, don't change
+	 * the ->cpus_allowed mask from under waking tasks, which would be
+	 * possible when we change rq->lock in ttwu(), so synchronize against
+	 * TASK_WAKING to avoid that.
+	 */
+again:
+	while (p->state == TASK_WAKING)
+		cpu_relax();
+
 	rq = task_rq_lock(p, &flags);
+
+	if (p->state == TASK_WAKING) {
+		task_rq_unlock(rq, &flags);
+		goto again;
+	}
+
 	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
 		ret = -EINVAL;
 		goto out;
@@ -7154,7 +7200,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
 static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 {
 	struct rq *rq_dest, *rq_src;
-	int ret = 0, on_rq;
+	int ret = 0;
 
 	if (unlikely(!cpu_active(dest_cpu)))
 		return ret;
@@ -7170,12 +7216,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 		goto fail;
 
-	on_rq = p->se.on_rq;
-	if (on_rq)
+	/*
+	 * If we're not on a rq, the next wake-up will ensure we're
+	 * placed properly.
+	 */
+	if (p->se.on_rq) {
 		deactivate_task(rq_src, p, 0);
-
-	set_task_cpu(p, dest_cpu);
-	if (on_rq) {
+		set_task_cpu(p, dest_cpu);
 		activate_task(rq_dest, p, 0);
 		check_preempt_curr(rq_dest, p, 0);
 	}
-- 
cgit v1.2.3


From 3802290628348674985d14914f9bfee7b9084548 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:37 +0100
Subject: sched: Fix sched_exec() balancing

Since we access ->cpus_allowed without holding rq->lock we need
a retry loop to validate the result, this comes for near free
when we merge sched_migrate_task() into sched_exec() since that
already does the needed check.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170517.884743662@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 33d7965f63f0..63e55ac242d1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2322,7 +2322,7 @@ void task_oncpu_function_call(struct task_struct *p,
  *
  *  - fork, @p is stable because it isn't on the tasklist yet
  *
- *  - exec, @p is unstable XXX
+ *  - exec, @p is unstable, retry loop
  *
  *  - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
  *             we should be good.
@@ -3132,21 +3132,36 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 }
 
 /*
- * If dest_cpu is allowed for this process, migrate the task to it.
- * This is accomplished by forcing the cpu_allowed mask to only
- * allow dest_cpu, which will force the cpu onto dest_cpu. Then
- * the cpu_allowed mask is restored.
+ * sched_exec - execve() is a valuable balancing opportunity, because at
+ * this point the task has the smallest effective memory and cache footprint.
  */
-static void sched_migrate_task(struct task_struct *p, int dest_cpu)
+void sched_exec(void)
 {
+	struct task_struct *p = current;
 	struct migration_req req;
+	int dest_cpu, this_cpu;
 	unsigned long flags;
 	struct rq *rq;
 
+again:
+	this_cpu = get_cpu();
+	dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
+	if (dest_cpu == this_cpu) {
+		put_cpu();
+		return;
+	}
+
 	rq = task_rq_lock(p, &flags);
+	put_cpu();
+
+	/*
+	 * select_task_rq() can race against ->cpus_allowed
+	 */
 	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
-	    || unlikely(!cpu_active(dest_cpu)))
-		goto out;
+	    || unlikely(!cpu_active(dest_cpu))) {
+		task_rq_unlock(rq, &flags);
+		goto again;
+	}
 
 	/* force the process onto the specified CPU */
 	if (migrate_task(p, dest_cpu, &req)) {
@@ -3161,23 +3176,9 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 
 		return;
 	}
-out:
 	task_rq_unlock(rq, &flags);
 }
 
-/*
- * sched_exec - execve() is a valuable balancing opportunity, because at
- * this point the task has the smallest effective memory and cache footprint.
- */
-void sched_exec(void)
-{
-	int new_cpu, this_cpu = get_cpu();
-	new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
-	put_cpu();
-	if (new_cpu != this_cpu)
-		sched_migrate_task(current, new_cpu);
-}
-
 /*
  * pull_task - move a task from a remote runqueue to the local runqueue.
  * Both runqueues must be locked.
-- 
cgit v1.2.3


From 5da9a0fb673a0ea0a093862f95f6b89b3390c31e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:38 +0100
Subject: sched: Fix select_task_rq() vs hotplug issues

Since select_task_rq() is now responsible for guaranteeing
->cpus_allowed and cpu_active_mask, we need to verify this.

select_task_rq_rt() can blindly return
smp_processor_id()/task_cpu() without checking the valid masks,
select_task_rq_fair() can do the same in the rare case that all
SD_flags are disabled.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170517.961475466@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 75 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 40 insertions(+), 35 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 63e55ac242d1..cc40bdadee7a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2317,6 +2317,43 @@ void task_oncpu_function_call(struct task_struct *p,
 }
 
 #ifdef CONFIG_SMP
+static int select_fallback_rq(int cpu, struct task_struct *p)
+{
+	int dest_cpu;
+	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+
+	/* Look for allowed, online CPU in same node. */
+	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+			return dest_cpu;
+
+	/* Any allowed, online CPU? */
+	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+	if (dest_cpu < nr_cpu_ids)
+		return dest_cpu;
+
+	/* No more Mr. Nice Guy. */
+	if (dest_cpu >= nr_cpu_ids) {
+		rcu_read_lock();
+		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+		rcu_read_unlock();
+		dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
+
+		/*
+		 * Don't tell them about moving exiting tasks or
+		 * kernel threads (both mm NULL), since they never
+		 * leave kernel.
+		 */
+		if (p->mm && printk_ratelimit()) {
+			printk(KERN_INFO "process %d (%s) no "
+			       "longer affine to cpu%d\n",
+			       task_pid_nr(p), p->comm, cpu);
+		}
+	}
+
+	return dest_cpu;
+}
+
 /*
  * Called from:
  *
@@ -2343,14 +2380,8 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
 	 *   not worry about this generic constraint ]
 	 */
 	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
-		     !cpu_active(cpu))) {
-
-		cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
-		/*
-		 * XXX: race against hot-plug modifying cpu_active_mask
-		 */
-		BUG_ON(cpu >= nr_cpu_ids);
-	}
+		     !cpu_active(cpu)))
+		cpu = select_fallback_rq(task_cpu(p), p);
 
 	return cpu;
 }
@@ -7319,36 +7350,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	int dest_cpu;
-	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
 
 again:
-	/* Look for allowed, online CPU in same node. */
-	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
-		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
-			goto move;
-
-	/* Any allowed, online CPU? */
-	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
-	if (dest_cpu < nr_cpu_ids)
-		goto move;
-
-	/* No more Mr. Nice Guy. */
-	if (dest_cpu >= nr_cpu_ids) {
-		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
-		dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
-
-		/*
-		 * Don't tell them about moving exiting tasks or
-		 * kernel threads (both mm NULL), since they never
-		 * leave kernel.
-		 */
-		if (p->mm && printk_ratelimit()) {
-			pr_info("process %d (%s) no longer affine to cpu%d\n",
-				task_pid_nr(p), p->comm, dead_cpu);
-		}
-	}
+	dest_cpu = select_fallback_rq(dead_cpu, p);
 
-move:
 	/* It can have affinity changed while we were choosing. */
 	if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
 		goto again;
-- 
cgit v1.2.3


From 881232b70b195768a71cd74ff4b4e8ab9502997b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:39 +0100
Subject: sched: Move kthread_bind() back to kthread.c

Since kthread_bind() lost its dependencies on sched.c, move it
back where it came from.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.039524041@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/kthread.c | 23 +++++++++++++++++++++++
 kernel/sched.c   | 26 --------------------------
 2 files changed, 23 insertions(+), 26 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kthread.c b/kernel/kthread.c
index ab7ae57773e1..fbb6222fe7e0 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -149,6 +149,29 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 }
 EXPORT_SYMBOL(kthread_create);
 
+/**
+ * kthread_bind - bind a just-created kthread to a cpu.
+ * @p: thread created by kthread_create().
+ * @cpu: cpu (might not be online, must be possible) for @k to run on.
+ *
+ * Description: This function is equivalent to set_cpus_allowed(),
+ * except that @cpu doesn't need to be online, and the thread must be
+ * stopped (i.e., just returned from kthread_create()).
+ */
+void kthread_bind(struct task_struct *p, unsigned int cpu)
+{
+	/* Must have done schedule() in kthread() before we set_task_cpu */
+	if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+		WARN_ON(1);
+		return;
+	}
+
+	p->cpus_allowed = cpumask_of_cpu(cpu);
+	p->rt.nr_cpus_allowed = 1;
+	p->flags |= PF_THREAD_BOUND;
+}
+EXPORT_SYMBOL(kthread_bind);
+
 /**
  * kthread_stop - stop a thread created by kthread_create().
  * @k: thread created by kthread_create().
diff --git a/kernel/sched.c b/kernel/sched.c
index cc40bdadee7a..297dc441ff96 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2004,32 +2004,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
 		p->sched_class->prio_changed(rq, p, oldprio, running);
 }
 
-/**
- * kthread_bind - bind a just-created kthread to a cpu.
- * @p: thread created by kthread_create().
- * @cpu: cpu (might not be online, must be possible) for @k to run on.
- *
- * Description: This function is equivalent to set_cpus_allowed(),
- * except that @cpu doesn't need to be online, and the thread must be
- * stopped (i.e., just returned from kthread_create()).
- *
- * Function lives here instead of kthread.c because it messes with
- * scheduler internals which require locking.
- */
-void kthread_bind(struct task_struct *p, unsigned int cpu)
-{
-	/* Must have done schedule() in kthread() before we set_task_cpu */
-	if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
-		WARN_ON(1);
-		return;
-	}
-
-	p->cpus_allowed = cpumask_of_cpu(cpu);
-	p->rt.nr_cpus_allowed = 1;
-	p->flags |= PF_THREAD_BOUND;
-}
-EXPORT_SYMBOL(kthread_bind);
-
 #ifdef CONFIG_SMP
 /*
  * Is this task likely cache-hot:
-- 
cgit v1.2.3


From efbbd05a595343a413964ad85a2ad359b7b7efbd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:40 +0100
Subject: sched: Add pre and post wakeup hooks

As will be apparent in the next patch, we need a pre wakeup hook
for sched_fair task migration, hence rename the post wakeup hook
and one pre wakeup.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.114746117@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  3 ++-
 kernel/sched.c        | 12 ++++++++----
 kernel/sched_rt.c     |  4 ++--
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c858f38e81a..2c9fa1ccebff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1091,7 +1091,8 @@ struct sched_class {
 			      enum cpu_idle_type idle);
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
-	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
 				 const struct cpumask *newmask);
diff --git a/kernel/sched.c b/kernel/sched.c
index 297dc441ff96..6c571bdd5658 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2412,6 +2412,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible--;
 	p->state = TASK_WAKING;
+
+	if (p->sched_class->task_waking)
+		p->sched_class->task_waking(rq, p);
+
 	__task_rq_unlock(rq);
 
 	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
@@ -2475,8 +2479,8 @@ out_running:
 
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
-	if (p->sched_class->task_wake_up)
-		p->sched_class->task_wake_up(rq, p);
+	if (p->sched_class->task_woken)
+		p->sched_class->task_woken(rq, p);
 
 	if (unlikely(rq->idle_stamp)) {
 		u64 delta = rq->clock - rq->idle_stamp;
@@ -2666,8 +2670,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	trace_sched_wakeup_new(rq, p, 1);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
-	if (p->sched_class->task_wake_up)
-		p->sched_class->task_wake_up(rq, p);
+	if (p->sched_class->task_woken)
+		p->sched_class->task_woken(rq, p);
 #endif
 	task_rq_unlock(rq, &flags);
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d2ea2828164e..f48328ac216f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq)
  * If we are not running and we are not going to reschedule soon, we should
  * try to push tasks away now
  */
-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = {
 	.rq_offline             = rq_offline_rt,
 	.pre_schedule		= pre_schedule_rt,
 	.post_schedule		= post_schedule_rt,
-	.task_wake_up		= task_wake_up_rt,
+	.task_woken		= task_woken_rt,
 	.switched_from		= switched_from_rt,
 #endif
 
-- 
cgit v1.2.3


From 88ec22d3edb72b261f8628226cd543589a6d5e1b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:41 +0100
Subject: sched: Remove the cfs_rq dependency from set_task_cpu()

In order to remove the cfs_rq dependency from set_task_cpu() we
need to ensure the task is cfs_rq invariant for all callsites.

The simple approach is to substract cfs_rq->min_vruntime from
se->vruntime on dequeue, and add cfs_rq->min_vruntime on
enqueue.

However, this has the downside of breaking FAIR_SLEEPERS since
we loose the old vruntime as we only maintain the relative
position.

To solve this, we observe that we only migrate runnable tasks,
we do this using deactivate_task(.sleep=0) and
activate_task(.wakeup=0), therefore we can restrain the
min_vruntime invariance to that state.

The only other case is wakeup balancing, since we want to
maintain the old vruntime we cannot make it relative on dequeue,
but since we don't migrate inactive tasks, we can do so right
before we activate it again.

This is where we need the new pre-wakeup hook, we need to call
this while still holding the old rq->lock. We could fold it into
->select_task_rq(), but since that has multiple callsites and
would obfuscate the locking requirements, that seems like a
fudge.

This leaves the fork() case, simply make sure that ->task_fork()
leaves the ->vruntime in a relative state.

This covers all cases where set_task_cpu() gets called, and
ensures it sees a relative vruntime.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.191697025@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        |  6 +-----
 kernel/sched_fair.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 46 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c9fa1ccebff..973b2b89f86d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1116,7 +1116,7 @@ struct sched_class {
 					 struct task_struct *task);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	void (*moved_group) (struct task_struct *p);
+	void (*moved_group) (struct task_struct *p, int on_rq);
 #endif
 };
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 6c571bdd5658..f92ce63edfff 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2038,8 +2038,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 	int old_cpu = task_cpu(p);
-	struct cfs_rq *old_cfsrq = task_cfs_rq(p),
-		      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
 
 #ifdef CONFIG_SCHED_DEBUG
 	/*
@@ -2056,8 +2054,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
 				     1, 1, NULL, 0);
 	}
-	p->se.vruntime -= old_cfsrq->min_vruntime -
-					 new_cfsrq->min_vruntime;
 
 	__set_task_cpu(p, new_cpu);
 }
@@ -10102,7 +10098,7 @@ void sched_move_task(struct task_struct *tsk)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (tsk->sched_class->moved_group)
-		tsk->sched_class->moved_group(tsk);
+		tsk->sched_class->moved_group(tsk, on_rq);
 #endif
 
 	if (unlikely(running))
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ec1d2715620c..42ac3c9f66f6 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	curr->sum_exec_runtime += delta_exec;
 	schedstat_add(cfs_rq, exec_clock, delta_exec);
 	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
+
 	curr->vruntime += delta_exec_weighted;
 	update_min_vruntime(cfs_rq);
 }
@@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	se->vruntime = vruntime;
 }
 
+#define ENQUEUE_WAKEUP	1
+#define ENQUEUE_MIGRATE 2
+
 static void
-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+	/*
+	 * Update the normalized vruntime before updating min_vruntime
+	 * through callig update_curr().
+	 */
+	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
+		se->vruntime += cfs_rq->min_vruntime;
+
 	/*
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
 	account_entity_enqueue(cfs_rq, se);
 
-	if (wakeup) {
+	if (flags & ENQUEUE_WAKEUP) {
 		place_entity(cfs_rq, se, 0);
 		enqueue_sleeper(cfs_rq, se);
 	}
@@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 		__dequeue_entity(cfs_rq, se);
 	account_entity_dequeue(cfs_rq, se);
 	update_min_vruntime(cfs_rq);
+
+	/*
+	 * Normalize the entity after updating the min_vruntime because the
+	 * update can refer to the ->curr item and we need to reflect this
+	 * movement in our normalized position.
+	 */
+	if (!sleep)
+		se->vruntime -= cfs_rq->min_vruntime;
 }
 
 /*
@@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+	int flags = 0;
+
+	if (wakeup)
+		flags |= ENQUEUE_WAKEUP;
+	if (p->state == TASK_WAKING)
+		flags |= ENQUEUE_MIGRATE;
 
 	for_each_sched_entity(se) {
 		if (se->on_rq)
 			break;
 		cfs_rq = cfs_rq_of(se);
-		enqueue_entity(cfs_rq, se, wakeup);
-		wakeup = 1;
+		enqueue_entity(cfs_rq, se, flags);
+		flags = ENQUEUE_WAKEUP;
 	}
 
 	hrtick_update(rq);
@@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
 
 #ifdef CONFIG_SMP
 
+static void task_waking_fair(struct rq *rq, struct task_struct *p)
+{
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	se->vruntime -= cfs_rq->min_vruntime;
+}
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * effective_load() calculates the load change as seen from the root_task_group
@@ -1978,6 +2011,8 @@ static void task_fork_fair(struct task_struct *p)
 		resched_task(rq->curr);
 	}
 
+	se->vruntime -= cfs_rq->min_vruntime;
+
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
@@ -2031,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void moved_group_fair(struct task_struct *p)
+static void moved_group_fair(struct task_struct *p, int on_rq)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
 
 	update_curr(cfs_rq);
-	place_entity(cfs_rq, &p->se, 1);
+	if (!on_rq)
+		place_entity(cfs_rq, &p->se, 1);
 }
 #endif
 
@@ -2076,6 +2112,8 @@ static const struct sched_class fair_sched_class = {
 	.move_one_task		= move_one_task_fair,
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
+
+	.task_waking		= task_waking_fair,
 #endif
 
 	.set_curr_task          = set_curr_task_fair,
-- 
cgit v1.2.3


From 738d2be4301007f054541c5c4bf7fb6a361c9b3a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:42 +0100
Subject: sched: Simplify set_task_cpu()

Rearrange code a bit now that its a simpler function.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.269101883@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index f92ce63edfff..8a2bfd37ab4f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2034,11 +2034,8 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 	return delta < (s64)sysctl_sched_migration_cost;
 }
 
-
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
-	int old_cpu = task_cpu(p);
-
 #ifdef CONFIG_SCHED_DEBUG
 	/*
 	 * We should never call set_task_cpu() on a blocked task,
@@ -2049,11 +2046,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	trace_sched_migrate_task(p, new_cpu);
 
-	if (old_cpu != new_cpu) {
-		p->se.nr_migrations++;
-		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
-				     1, 1, NULL, 0);
-	}
+	if (task_cpu(p) == new_cpu)
+		return;
+
+	p->se.nr_migrations++;
+	perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
 
 	__set_task_cpu(p, new_cpu);
 }
-- 
cgit v1.2.3


From 6e1415467614e854fee660ff6648bd10fa976e95 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 15 Dec 2009 19:27:45 +0000
Subject: NOMMU: Optimise away the {dac_,}mmap_min_addr tests

In NOMMU mode clamp dac_mmap_min_addr to zero to cause the tests on it to be
skipped by the compiler.  We do this as the minimum mmap address doesn't make
any sense in NOMMU mode.

mmap_min_addr and round_hint_to_min() can be discarded entirely in NOMMU mode.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 7 +++++++
 kernel/sysctl.c          | 2 ++
 mm/Kconfig               | 1 +
 security/Makefile        | 3 ++-
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/include/linux/security.h b/include/linux/security.h
index 466cbadbd1ef..2c627d361c02 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -95,8 +95,13 @@ struct seq_file;
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
+#ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
 extern unsigned long dac_mmap_min_addr;
+#else
+#define dac_mmap_min_addr	0UL
+#endif
+
 /*
  * Values used in the task_security_ops calls
  */
@@ -121,6 +126,7 @@ struct request_sock;
 #define LSM_UNSAFE_PTRACE	2
 #define LSM_UNSAFE_PTRACE_CAP	4
 
+#ifdef CONFIG_MMU
 /*
  * If a hint addr is less than mmap_min_addr change hint to be as
  * low as possible but still greater than mmap_min_addr
@@ -135,6 +141,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
 }
 extern int mmap_min_addr_handler(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
 
 #ifdef CONFIG_SECURITY
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 45e4bef0012a..856a24eadf7e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1214,6 +1214,7 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 #endif
+#ifdef CONFIG_MMU
 	{
 		.procname	= "mmap_min_addr",
 		.data		= &dac_mmap_min_addr,
@@ -1221,6 +1222,7 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= mmap_min_addr_handler,
 	},
+#endif
 #ifdef CONFIG_NUMA
 	{
 		.procname	= "numa_zonelist_order",
diff --git a/mm/Kconfig b/mm/Kconfig
index 43ea8c3a2bbf..ee9f3e0f2b69 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -221,6 +221,7 @@ config KSM
 
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
+	depends on MMU
         default 4096
         help
 	  This is the portion of low virtual memory which should be protected
diff --git a/security/Makefile b/security/Makefile
index bb44e350c618..da20a193c8dd 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -8,7 +8,8 @@ subdir-$(CONFIG_SECURITY_SMACK)		+= smack
 subdir-$(CONFIG_SECURITY_TOMOYO)        += tomoyo
 
 # always enable default capabilities
-obj-y		+= commoncap.o min_addr.o
+obj-y					+= commoncap.o
+obj-$(CONFIG_MMU)			+= min_addr.o
 
 # Object file lists
 obj-$(CONFIG_SECURITY)			+= security.o capability.o
-- 
cgit v1.2.3


From cf1e367ee84e02ac349ad0858eb65e8a6a511c8b Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Thu, 17 Dec 2009 11:15:42 +1100
Subject: timers: Remove duplicate setting of new_base in __mod_timer()

new_base is set using per_cpu(tvec_bases, cpu) after selecting the
desired value of cpu immediately below so this line is a unnecessary.

Signed-off-by: Simon Horman <horms@verge.net.au>
LKML-Reference: <20091217001542.GD25317@verge.net.au>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/timer.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/timer.c b/kernel/timer.c
index 5db5a8d26811..15533b792397 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -656,8 +656,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 
 	debug_activate(timer, expires);
 
-	new_base = __get_cpu_var(tvec_bases);
-
 	cpu = smp_processor_id();
 
 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
-- 
cgit v1.2.3


From f6325e30ebd6fc870315b017a5d4a6ab15bf790b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 17 Dec 2009 11:43:08 -0600
Subject: cpumask: use cpu_online in kernel/perf_event.c

Also, we want to check against nr_cpu_ids, not num_possible_cpus().
The latter works, but the correct bounds check is < nr_cpu_ids.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
To: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/perf_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 8ab86988bd24..97d1a3dd7a59 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1614,7 +1614,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
 		 * offline CPU and activate it when the CPU comes up, but
 		 * that's for later.
 		 */
-		if (!cpu_isset(cpu, cpu_online_map))
+		if (!cpu_online(cpu))
 			return ERR_PTR(-ENODEV);
 
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
-- 
cgit v1.2.3


From 62ac12795095dc959649c66ace78708e7ac52477 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 17 Dec 2009 11:43:26 -0600
Subject: cpumask: avoid dereferencing struct cpumask

struct cpumask will be undefined soon with CONFIG_CPUMASK_OFFSTACK=y,
to avoid them being declared on the stack.

cpumask_bits() does what we want here (of course, this code is crap).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
To: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer_list.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 28265636b6c2..bdfb8dd1050c 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -237,10 +237,10 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 	print_tickdevice(m, tick_get_broadcast_device(), -1);
 	SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
-		   tick_get_broadcast_mask()->bits[0]);
+		   cpumask_bits(tick_get_broadcast_mask())[0]);
 #ifdef CONFIG_TICK_ONESHOT
 	SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
-		   tick_get_broadcast_oneshot_mask()->bits[0]);
+		   cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
 #endif
 	SEQ_printf(m, "\n");
 #endif
-- 
cgit v1.2.3


From 416eb39556a03d1c7e52b0791e9052ccd71db241 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 17 Dec 2009 06:05:49 +0100
Subject: sched: Make warning less noisy

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170517.807938893@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 8a2bfd37ab4f..af7dfa74e6bb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2041,7 +2041,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	 * We should never call set_task_cpu() on a blocked task,
 	 * ttwu() will sort out the placement.
 	 */
-	WARN_ON(p->state != TASK_RUNNING && p->state != TASK_WAKING);
+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING);
 #endif
 
 	trace_sched_migrate_task(p, new_cpu);
-- 
cgit v1.2.3


From 234da7bcdc7aaa935846534c3b726dbc79a9cdd5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 16 Dec 2009 20:21:05 +0100
Subject: sched: Teach might_sleep() about preemptible RCU

In practice, it is harmless to voluntarily sleep in a
rcu_read_lock() section if we are running under preempt rcu, but
it is illegal if we build a kernel running non-preemptable rcu.

Currently, might_sleep() doesn't notice sleepable operations
under rcu_read_lock() sections if we are running under
preemptable rcu because preempt_count() is left untouched after
rcu_read_lock() in this case. But we want developers who test
their changes under such config to notice the "sleeping while
atomic" issues.

So we add rcu_read_lock_nesting to prempt_count() in
might_sleep() checks.

[ v2: Handle rcu-tiny ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1260991265-8451-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutiny.h |  5 +++++
 include/linux/rcutree.h | 11 +++++++++++
 kernel/sched.c          |  2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index c4ba9a78721e..96cc307ed9f4 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -101,4 +101,9 @@ static inline void exit_rcu(void)
 {
 }
 
+static inline int rcu_preempt_depth(void)
+{
+	return 0;
+}
+
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c93eee5911b0..8044b1b94333 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,6 +45,12 @@ extern void __rcu_read_unlock(void);
 extern void synchronize_rcu(void);
 extern void exit_rcu(void);
 
+/*
+ * Defined as macro as it is a very low level header
+ * included from areas that don't even know about current
+ */
+#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
+
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 static inline void __rcu_read_lock(void)
@@ -63,6 +69,11 @@ static inline void exit_rcu(void)
 {
 }
 
+static inline int rcu_preempt_depth(void)
+{
+	return 0;
+}
+
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 static inline void __rcu_read_lock_bh(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index af7dfa74e6bb..7be88a7be047 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9682,7 +9682,7 @@ void __init sched_init(void)
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 static inline int preempt_count_equals(int preempt_offset)
 {
-	int nested = preempt_count() & ~PREEMPT_ACTIVE;
+	int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
 
 	return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
 }
-- 
cgit v1.2.3


From 077614ee1e93245a3b9a4e1213659405dbeb0ba6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:31 +0100
Subject: sched: Fix broken assertion

There's a preemption race in the set_task_cpu() debug check in
that when we get preempted after setting task->state we'd still
be on the rq proper, but fail the test.

Check for preempted tasks, since those are always on the RQ.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121830.137155561@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 7be88a7be047..720df108a2d6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2041,7 +2041,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	 * We should never call set_task_cpu() on a blocked task,
 	 * ttwu() will sort out the placement.
 	 */
-	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING);
+	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
+			!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
 #endif
 
 	trace_sched_migrate_task(p, new_cpu);
-- 
cgit v1.2.3


From 3e26120cc7c819c97bc07281ca1fb9017cfe9a39 Mon Sep 17 00:00:00 2001
From: WANG Cong <amwang@redhat.com>
Date: Thu, 17 Dec 2009 15:27:05 -0800
Subject: kernel/sysctl.c: fix the incomplete part of
 sysctl_max_map_count-should-be-non-negative.patch

It is a mistake that we used 'proc_dointvec', it should be
'proc_dointvec_minmax', as in the original patch.

Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 45e4bef0012a..6665761c006d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1131,7 +1131,7 @@ static struct ctl_table vm_table[] = {
 		.data		= &sysctl_max_map_count,
 		.maxlen		= sizeof(sysctl_max_map_count),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
 	},
 #else
-- 
cgit v1.2.3


From 9cd80bbb07fcd6d4d037fad4297496d3b132ac6b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 17 Dec 2009 15:27:15 -0800
Subject: do_wait() optimization: do not place sub-threads on
 task_struct->children list

Thanks to Roland who pointed out de_thread() issues.

Currently we add sub-threads to ->real_parent->children list.  This buys
nothing but slows down do_wait().

With this patch ->children contains only main threads (group leaders).
The only complication is that forget_original_parent() should iterate over
sub-threads by hand, and de_thread() needs another list_replace() when it
changes ->group_leader.

Henceforth do_wait_thread() can never see task_detached() && !EXIT_DEAD
tasks, we can remove this check (and we can unify do_wait_thread() and
ptrace_do_wait()).

This change can confuse the optimistic search in mm_update_next_owner(),
but this is fixable and minor.

Perhaps badness() and oom_kill_process() should be updated, but they
should be fixed in any case.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ratan Nalumasu <rnalumasu@gmail.com>
Cc: Vitaly Mayatskikh <vmayatsk@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c     |  2 ++
 kernel/exit.c | 36 +++++++++++++++++-------------------
 kernel/fork.c |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'kernel')

diff --git a/fs/exec.c b/fs/exec.c
index 623a5cc3076a..77db9a97a773 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -826,7 +826,9 @@ static int de_thread(struct task_struct *tsk)
 		attach_pid(tsk, PIDTYPE_PID,  task_pid(leader));
 		transfer_pid(leader, tsk, PIDTYPE_PGID);
 		transfer_pid(leader, tsk, PIDTYPE_SID);
+
 		list_replace_rcu(&leader->tasks, &tsk->tasks);
+		list_replace_init(&leader->sibling, &tsk->sibling);
 
 		tsk->group_leader = tsk;
 		leader->group_leader = tsk;
diff --git a/kernel/exit.c b/kernel/exit.c
index 5962d7ccf243..546774a31a66 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -68,10 +68,10 @@ static void __unhash_process(struct task_struct *p)
 		detach_pid(p, PIDTYPE_SID);
 
 		list_del_rcu(&p->tasks);
+		list_del_init(&p->sibling);
 		__get_cpu_var(process_counts)--;
 	}
 	list_del_rcu(&p->thread_group);
-	list_del_init(&p->sibling);
 }
 
 /*
@@ -736,12 +736,9 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
 /*
 * Any that need to be release_task'd are put on the @dead list.
  */
-static void reparent_thread(struct task_struct *father, struct task_struct *p,
+static void reparent_leader(struct task_struct *father, struct task_struct *p,
 				struct list_head *dead)
 {
-	if (p->pdeath_signal)
-		group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
-
 	list_move_tail(&p->sibling, &p->real_parent->children);
 
 	if (task_detached(p))
@@ -780,12 +777,18 @@ static void forget_original_parent(struct task_struct *father)
 	reaper = find_new_reaper(father);
 
 	list_for_each_entry_safe(p, n, &father->children, sibling) {
-		p->real_parent = reaper;
-		if (p->parent == father) {
-			BUG_ON(task_ptrace(p));
-			p->parent = p->real_parent;
-		}
-		reparent_thread(father, p, &dead_children);
+		struct task_struct *t = p;
+		do {
+			t->real_parent = reaper;
+			if (t->parent == father) {
+				BUG_ON(task_ptrace(t));
+				t->parent = t->real_parent;
+			}
+			if (t->pdeath_signal)
+				group_send_sig_info(t->pdeath_signal,
+						    SEND_SIG_NOINFO, t);
+		} while_each_thread(p, t);
+		reparent_leader(father, p, &dead_children);
 	}
 	write_unlock_irq(&tasklist_lock);
 
@@ -1551,14 +1554,9 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
 	struct task_struct *p;
 
 	list_for_each_entry(p, &tsk->children, sibling) {
-		/*
-		 * Do not consider detached threads.
-		 */
-		if (!task_detached(p)) {
-			int ret = wait_consider_task(wo, 0, p);
-			if (ret)
-				return ret;
-		}
+		int ret = wait_consider_task(wo, 0, p);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 202a0ba63d3c..5b2959b3ffc2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1291,7 +1291,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	}
 
 	if (likely(p->pid)) {
-		list_add_tail(&p->sibling, &p->real_parent->children);
 		tracehook_finish_clone(p, clone_flags, trace);
 
 		if (thread_group_leader(p)) {
@@ -1303,6 +1302,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			p->signal->tty = tty_kref_get(current->signal->tty);
 			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
 			attach_pid(p, PIDTYPE_SID, task_session(current));
+			list_add_tail(&p->sibling, &p->real_parent->children);
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
 		}
-- 
cgit v1.2.3


From 6485536bcf499839a54dcda8a8d47ea0bd29b375 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 17 Dec 2009 15:27:27 -0800
Subject: printk: fix new kernel-doc warnings

Fix kernel-doc warnings in printk.c:

Warning(kernel/printk.c:1422): No description found for parameter 'dumper'
Warning(kernel/printk.c:1422): Excess function parameter 'dump' description in 'kmsg_dump_register'
Warning(kernel/printk.c:1451): No description found for parameter 'dumper'
Warning(kernel/printk.c:1451): Excess function parameter 'dump' description in 'kmsg_dump_unregister'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 1ded8e7dd19b..17463ca2e229 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1412,7 +1412,7 @@ static LIST_HEAD(dump_list);
 
 /**
  * kmsg_dump_register - register a kernel log dumper.
- * @dump: pointer to the kmsg_dumper structure
+ * @dumper: pointer to the kmsg_dumper structure
  *
  * Adds a kernel log dumper to the system. The dump callback in the
  * structure will be called when the kernel oopses or panics and must be
@@ -1442,7 +1442,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_register);
 
 /**
  * kmsg_dump_unregister - unregister a kmsg dumper.
- * @dump: pointer to the kmsg_dumper structure
+ * @dumper: pointer to the kmsg_dumper structure
  *
  * Removes a dump device from the system. Returns zero on success and
  * %-EINVAL otherwise.
-- 
cgit v1.2.3


From 6f5d51148921c242680a7a1d9913384a30ab3cbe Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sat, 19 Dec 2009 15:59:45 +0000
Subject: fix braindamage in audit_tree.c untag_chunk()

... aka "Al had badly fscked up when writing that thing and nobody
noticed until Eric had fixed leaks that used to mask the breakage".

The function essentially creates a copy of old array sans one element
and replaces the references to elements of original (they are on cyclic
lists) with those to corresponding elements of new one.  After that the
old one is fair game for freeing.

First of all, there's a dumb braino: when we get to list_replace_init we
use indices for wrong arrays - position in new one with the old array
and vice versa.

Another bug is more subtle - termination condition is wrong if the
element to be excluded happens to be the last one.  We shouldn't go
until we fill the new array, we should go until we'd finished the old
one.  Otherwise the element we are trying to kill will remain on the
cyclic lists...

That crap used to be masked by several leaks, so it was not quite
trivial to hit.  Eric had fixed some of those leaks a while ago and the
shit had hit the fan...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/audit_tree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2451dc6f3282..b36aa9651ba2 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,7 +277,7 @@ static void untag_chunk(struct node *p)
 		owner->root = NULL;
 	}
 
-	for (i = j = 0; i < size; i++, j++) {
+	for (i = j = 0; j <= size; i++, j++) {
 		struct audit_tree *s;
 		if (&chunk->owners[j] == p) {
 			list_del_init(&p->list);
@@ -290,7 +290,7 @@ static void untag_chunk(struct node *p)
 		if (!s) /* result of earlier fallback */
 			continue;
 		get_tree(s);
-		list_replace_init(&chunk->owners[i].list, &new->owners[j].list);
+		list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
 	}
 
 	list_replace_rcu(&chunk->hash, &new->hash);
-- 
cgit v1.2.3


From b4c30aad39805902cf5b855aa8a8b22d728ad057 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sat, 19 Dec 2009 16:03:30 +0000
Subject: fix more leaks in audit_tree.c tag_chunk()

Several leaks in audit_tree didn't get caught by commit
318b6d3d7ddbcad3d6867e630711b8a705d873d7, including the leak on normal
exit in case of multiple rules refering to the same chunk.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/audit_tree.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index b36aa9651ba2..4b05bd9479db 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	for (n = 0; n < old->count; n++) {
 		if (old->owners[n].owner == tree) {
 			spin_unlock(&hash_lock);
-			put_inotify_watch(watch);
+			put_inotify_watch(&old->watch);
 			return 0;
 		}
 	}
 	spin_unlock(&hash_lock);
 
 	chunk = alloc_chunk(old->count + 1);
-	if (!chunk)
+	if (!chunk) {
+		put_inotify_watch(&old->watch);
 		return -ENOMEM;
+	}
 
 	mutex_lock(&inode->inotify_mutex);
 	if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
@@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	spin_unlock(&hash_lock);
 	inotify_evict_watch(&old->watch);
 	mutex_unlock(&inode->inotify_mutex);
-	put_inotify_watch(&old->watch);
+	put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
+	put_inotify_watch(&old->watch); /* and kill it */
 	return 0;
 }
 
-- 
cgit v1.2.3


From 3df0fc5b2e9d8092dcaeb5ae0b6753d85c851d66 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sun, 20 Dec 2009 14:23:57 +0100
Subject: sched: Restore printk sanity

Revert the braindead pr_* crap. (Commit 663997d "sched: Use
pr_fmt() and pr_<level>()")

It's dumb and causes stupid "sched: " strings all over the place.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Mike Galbraith <efault@gmx.de>
Cc: Joe Perches <joe@perches.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <1261315437.4314.6.camel@laptop>
[ i dont mind the pr_*() patterns that much - but Peter dislikes them with a vengence. ]
[ - v2: remove spurious diffstat from changelog :-/ ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c          | 89 +++++++++++++++++++++++++++----------------------
 kernel/sched_idletask.c |  2 +-
 2 files changed, 50 insertions(+), 41 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 720df108a2d6..7ffde2ae7868 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -26,8 +26,6 @@
  *              Thomas Gleixner, Mike Kravetz
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
@@ -5375,8 +5373,8 @@ static noinline void __schedule_bug(struct task_struct *prev)
 {
 	struct pt_regs *regs = get_irq_regs();
 
-	pr_err("BUG: scheduling while atomic: %s/%d/0x%08x\n",
-	       prev->comm, prev->pid, preempt_count());
+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
+		prev->comm, prev->pid, preempt_count());
 
 	debug_show_held_locks(prev);
 	print_modules();
@@ -6940,23 +6938,23 @@ void sched_show_task(struct task_struct *p)
 	unsigned state;
 
 	state = p->state ? __ffs(p->state) + 1 : 0;
-	pr_info("%-13.13s %c", p->comm,
+	printk(KERN_INFO "%-13.13s %c", p->comm,
 		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
 #if BITS_PER_LONG == 32
 	if (state == TASK_RUNNING)
-		pr_cont(" running  ");
+		printk(KERN_CONT " running  ");
 	else
-		pr_cont(" %08lx ", thread_saved_pc(p));
+		printk(KERN_CONT " %08lx ", thread_saved_pc(p));
 #else
 	if (state == TASK_RUNNING)
-		pr_cont("  running task    ");
+		printk(KERN_CONT "  running task    ");
 	else
-		pr_cont(" %016lx ", thread_saved_pc(p));
+		printk(KERN_CONT " %016lx ", thread_saved_pc(p));
 #endif
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	free = stack_not_used(p);
 #endif
-	pr_cont("%5lu %5d %6d 0x%08lx\n", free,
+	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
 		task_pid_nr(p), task_pid_nr(p->real_parent),
 		(unsigned long)task_thread_info(p)->flags);
 
@@ -6968,9 +6966,11 @@ void show_state_filter(unsigned long state_filter)
 	struct task_struct *g, *p;
 
 #if BITS_PER_LONG == 32
-	pr_info("  task                PC stack   pid father\n");
+	printk(KERN_INFO
+		"  task                PC stack   pid father\n");
 #else
-	pr_info("  task                        PC stack   pid father\n");
+	printk(KERN_INFO
+		"  task                        PC stack   pid father\n");
 #endif
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
@@ -7828,44 +7828,48 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
 
 	if (!(sd->flags & SD_LOAD_BALANCE)) {
-		pr_cont("does not load-balance\n");
+		printk("does not load-balance\n");
 		if (sd->parent)
-			pr_err("ERROR: !SD_LOAD_BALANCE domain has parent\n");
+			printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
+					" has parent");
 		return -1;
 	}
 
-	pr_cont("span %s level %s\n", str, sd->name);
+	printk(KERN_CONT "span %s level %s\n", str, sd->name);
 
 	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-		pr_err("ERROR: domain->span does not contain CPU%d\n", cpu);
+		printk(KERN_ERR "ERROR: domain->span does not contain "
+				"CPU%d\n", cpu);
 	}
 	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
-		pr_err("ERROR: domain->groups does not contain CPU%d\n", cpu);
+		printk(KERN_ERR "ERROR: domain->groups does not contain"
+				" CPU%d\n", cpu);
 	}
 
 	printk(KERN_DEBUG "%*s groups:", level + 1, "");
 	do {
 		if (!group) {
-			pr_cont("\n");
-			pr_err("ERROR: group is NULL\n");
+			printk("\n");
+			printk(KERN_ERR "ERROR: group is NULL\n");
 			break;
 		}
 
 		if (!group->cpu_power) {
-			pr_cont("\n");
-			pr_err("ERROR: domain->cpu_power not set\n");
+			printk(KERN_CONT "\n");
+			printk(KERN_ERR "ERROR: domain->cpu_power not "
+					"set\n");
 			break;
 		}
 
 		if (!cpumask_weight(sched_group_cpus(group))) {
-			pr_cont("\n");
-			pr_err("ERROR: empty group\n");
+			printk(KERN_CONT "\n");
+			printk(KERN_ERR "ERROR: empty group\n");
 			break;
 		}
 
 		if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
-			pr_cont("\n");
-			pr_err("ERROR: repeated CPUs\n");
+			printk(KERN_CONT "\n");
+			printk(KERN_ERR "ERROR: repeated CPUs\n");
 			break;
 		}
 
@@ -7873,21 +7877,23 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
-		pr_cont(" %s", str);
+		printk(KERN_CONT " %s", str);
 		if (group->cpu_power != SCHED_LOAD_SCALE) {
-			pr_cont(" (cpu_power = %d)", group->cpu_power);
+			printk(KERN_CONT " (cpu_power = %d)",
+				group->cpu_power);
 		}
 
 		group = group->next;
 	} while (group != sd->groups);
-	pr_cont("\n");
+	printk(KERN_CONT "\n");
 
 	if (!cpumask_equal(sched_domain_span(sd), groupmask))
-		pr_err("ERROR: groups don't span domain->span\n");
+		printk(KERN_ERR "ERROR: groups don't span domain->span\n");
 
 	if (sd->parent &&
 	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
-		pr_err("ERROR: parent span is not a superset of domain->span\n");
+		printk(KERN_ERR "ERROR: parent span is not a superset "
+			"of domain->span\n");
 	return 0;
 }
 
@@ -8443,7 +8449,8 @@ static int build_numa_sched_groups(struct s_data *d,
 	sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
 			  GFP_KERNEL, num);
 	if (!sg) {
-		pr_warning("Can not alloc domain group for node %d\n", num);
+		printk(KERN_WARNING "Can not alloc domain group for node %d\n",
+		       num);
 		return -ENOMEM;
 	}
 	d->sched_group_nodes[num] = sg;
@@ -8472,8 +8479,8 @@ static int build_numa_sched_groups(struct s_data *d,
 		sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
 				  GFP_KERNEL, num);
 		if (!sg) {
-			pr_warning("Can not alloc domain group for node %d\n",
-				   j);
+			printk(KERN_WARNING
+			       "Can not alloc domain group for node %d\n", j);
 			return -ENOMEM;
 		}
 		sg->cpu_power = 0;
@@ -8701,7 +8708,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 	d->sched_group_nodes = kcalloc(nr_node_ids,
 				      sizeof(struct sched_group *), GFP_KERNEL);
 	if (!d->sched_group_nodes) {
-		pr_warning("Can not alloc sched group node list\n");
+		printk(KERN_WARNING "Can not alloc sched group node list\n");
 		return sa_notcovered;
 	}
 	sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
@@ -8718,7 +8725,7 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 		return sa_send_covered;
 	d->rd = alloc_rootdomain();
 	if (!d->rd) {
-		pr_warning("Cannot alloc root domain\n");
+		printk(KERN_WARNING "Cannot alloc root domain\n");
 		return sa_tmpmask;
 	}
 	return sa_rootdomain;
@@ -9700,11 +9707,13 @@ void __might_sleep(char *file, int line, int preempt_offset)
 		return;
 	prev_jiffy = jiffies;
 
-	pr_err("BUG: sleeping function called from invalid context at %s:%d\n",
-	       file, line);
-	pr_err("in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-	       in_atomic(), irqs_disabled(),
-	       current->pid, current->comm);
+	printk(KERN_ERR
+		"BUG: sleeping function called from invalid context at %s:%d\n",
+			file, line);
+	printk(KERN_ERR
+		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+			in_atomic(), irqs_disabled(),
+			current->pid, current->comm);
 
 	debug_show_held_locks(current);
 	if (irqs_disabled())
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 21b969a28725..5f93b570d383 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -35,7 +35,7 @@ static void
 dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
 {
 	raw_spin_unlock_irq(&rq->lock);
-	pr_err("bad: scheduling from the idle thread!\n");
+	printk(KERN_ERR "bad: scheduling from the idle thread!\n");
 	dump_stack();
 	raw_spin_lock_irq(&rq->lock);
 }
-- 
cgit v1.2.3


From 70f1120527797adb31c68bdc6f1b45e182c342c7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sun, 20 Dec 2009 17:36:27 +0100
Subject: sched: Fix hotplug hang

The hot-unplug kstopmachine usage does a wakeup after
deactivating the cpu, hence we cannot use cpu_active()
here but must rely on the good olde online.

Reported-by: Sachin Sant <sachinp@in.ibm.com>
Reported-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Jens Axboe <jens.axboe@oracle.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1261326987.4314.24.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 7ffde2ae7868..87f1f47beffe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2346,7 +2346,7 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
 	 *   not worry about this generic constraint ]
 	 */
 	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
-		     !cpu_active(cpu)))
+		     !cpu_online(cpu)))
 		cpu = select_fallback_rq(task_cpu(p), p);
 
 	return cpu;
-- 
cgit v1.2.3


From 0e2c8b8f55072a98b99e7bdad55c912084d6a526 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 20 Dec 2009 10:50:02 +0100
Subject: resources: fix call to alignf() in allocate_resource()

The second parameter to alignf() in allocate_resource() must
reflect what new resource is attempted to be allocated, else
functions like pcibios_align_resource() (at least on x86) or
pcmcia_align() can't work correctly.

Commit 1e5ad9679016275d422e36b12a98b0927d76f556 broke this by
setting the "new" resource until we're about to return success.
To keep the resource untouched when allocate_resource() fails,
a "tmp" resource is introduced.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/resource.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/resource.c b/kernel/resource.c
index dc15686b7a77..af96c1e4b54b 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -308,37 +308,37 @@ static int find_resource(struct resource *root, struct resource *new,
 			 void *alignf_data)
 {
 	struct resource *this = root->child;
-	resource_size_t start, end;
+	struct resource tmp = *new;
 
-	start = root->start;
+	tmp.start = root->start;
 	/*
 	 * Skip past an allocated resource that starts at 0, since the assignment
-	 * of this->start - 1 to new->end below would cause an underflow.
+	 * of this->start - 1 to tmp->end below would cause an underflow.
 	 */
 	if (this && this->start == 0) {
-		start = this->end + 1;
+		tmp.start = this->end + 1;
 		this = this->sibling;
 	}
 	for(;;) {
 		if (this)
-			end = this->start - 1;
+			tmp.end = this->start - 1;
 		else
-			end = root->end;
-		if (start < min)
-			start = min;
-		if (end > max)
-			end = max;
-		start = ALIGN(start, align);
+			tmp.end = root->end;
+		if (tmp.start < min)
+			tmp.start = min;
+		if (tmp.end > max)
+			tmp.end = max;
+		tmp.start = ALIGN(tmp.start, align);
 		if (alignf)
-			alignf(alignf_data, new, size, align);
-		if (start < end && end - start >= size - 1) {
-			new->start = start;
-			new->end = start + size - 1;
+			alignf(alignf_data, &tmp, size, align);
+		if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) {
+			new->start = tmp.start;
+			new->end = tmp.start + size - 1;
 			return 0;
 		}
 		if (!this)
 			break;
-		start = this->end + 1;
+		tmp.start = this->end + 1;
 		this = this->sibling;
 	}
 	return -EBUSY;
-- 
cgit v1.2.3


From c757bea93bea4b77ebd181cc6dca60c15e3b1a2c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 21 Dec 2009 22:35:16 -0500
Subject: tracing: Fix setting tracer specific options

The function __set_tracer_option() takes as its last parameter a
"neg" value. If set it should negate the value of the option.

The trace_options_write() passed the value written to the file
which is what the new value needs to be set as. But since this
is not the negative, it never sets the value.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ee61915935d5..d0a4c12d1f1c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3949,7 +3949,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	if (!!(topt->flags->val & topt->opt->bit) != val) {
 		mutex_lock(&trace_types_lock);
 		ret = __set_tracer_option(current_trace, topt->flags,
-					  topt->opt, val);
+					  topt->opt, !val);
 		mutex_unlock(&trace_types_lock);
 		if (ret)
 			return ret;
-- 
cgit v1.2.3


From 628ff7c1d8d8466a5ad8078bd0206a130f8b8a51 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Fri, 18 Dec 2009 09:41:24 -0800
Subject: anonfd: Allow making anon files read-only

It seems a couple places such as arch/ia64/kernel/perfmon.c and
drivers/infiniband/core/uverbs_main.c could use anon_inode_getfile()
instead of a private pseudo-fs + alloc_file(), if only there were a way
to get a read-only file.  So provide this by having anon_inode_getfile()
create a read-only file if we pass O_RDONLY in flags.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c    | 12 ++++++++++--
 fs/eventfd.c        |  2 +-
 fs/eventpoll.c      |  2 +-
 fs/signalfd.c       |  2 +-
 fs/timerfd.c        |  2 +-
 kernel/perf_event.c |  2 +-
 virt/kvm/kvm_main.c |  4 ++--
 7 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2c994591f4d7..598237e97221 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -89,11 +89,19 @@ struct file *anon_inode_getfile(const char *name,
 	struct qstr this;
 	struct path path;
 	struct file *file;
+	fmode_t mode;
 	int error;
 
 	if (IS_ERR(anon_inode_inode))
 		return ERR_PTR(-ENODEV);
 
+	switch (flags & O_ACCMODE) {
+	case O_RDONLY: mode = FMODE_READ;		break;
+	case O_WRONLY: mode = FMODE_WRITE;		break;
+	case O_RDWR:   mode = FMODE_READ | FMODE_WRITE;	break;
+	default:       return ERR_PTR(-EINVAL);
+	}
+
 	if (fops->owner && !try_module_get(fops->owner))
 		return ERR_PTR(-ENOENT);
 
@@ -121,13 +129,13 @@ struct file *anon_inode_getfile(const char *name,
 	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
-	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, fops);
+	file = alloc_file(&path, mode, fops);
 	if (!file)
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
 
 	file->f_pos = 0;
-	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
+	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->f_version = 0;
 	file->private_data = priv;
 
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8b47e4200e65..d26402ff06ea 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -339,7 +339,7 @@ struct file *eventfd_file_create(unsigned int count, int flags)
 	ctx->flags = flags;
 
 	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
-				  flags & EFD_SHARED_FCNTL_FLAGS);
+				  O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
 	if (IS_ERR(file))
 		eventfd_free_ctx(ctx);
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 366c503f9657..bd056a5b4efc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1206,7 +1206,7 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
 	 * a file structure and a free file descriptor.
 	 */
 	error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
-				 flags & O_CLOEXEC);
+				 O_RDWR | (flags & O_CLOEXEC));
 	if (error < 0)
 		ep_free(ep);
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index b07565c94386..1dabe4ee02fe 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -236,7 +236,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
 		 * anon_inode_getfd() will install the fd.
 		 */
 		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
-				       flags & (O_CLOEXEC | O_NONBLOCK));
+				       O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
 		if (ufd < 0)
 			kfree(ctx);
 	} else {
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b042bd7034b1..1bfc95ad5f71 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -200,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
 
 	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
-			       flags & TFD_SHARED_FCNTL_FLAGS);
+			       O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
 	if (ufd < 0)
 		kfree(ctx);
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index e0eb4a2fe183..1f38270f08c7 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4724,7 +4724,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	if (IS_ERR(event))
 		goto err_put_context;
 
-	err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0);
+	err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR);
 	if (err < 0)
 		goto err_free_put_context;
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e1f2bf8d7b1e..b5af88167613 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1177,7 +1177,7 @@ static struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
+	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
 }
 
 /*
@@ -1638,7 +1638,7 @@ static int kvm_dev_ioctl_create_vm(void)
 	kvm = kvm_create_vm();
 	if (IS_ERR(kvm))
 		return PTR_ERR(kvm);
-	fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0);
+	fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
 	if (fd < 0)
 		kvm_put_kvm(kvm);
 
-- 
cgit v1.2.3


From 5300990c0370e804e49d9a59d928c5d53fb73487 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Dec 2009 10:15:07 -0500
Subject: Sanitize f_flags helpers

* pull ACC_MODE to fs.h; we have several copies all over the place
* nightmarish expression calculating f_mode by f_flags deserves a helper
too (OPEN_FMODE(flags))

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c       | 10 +---------
 fs/namei.c             |  2 --
 fs/open.c              |  2 +-
 include/linux/fs.h     |  3 +++
 kernel/auditsc.c       |  1 -
 security/tomoyo/file.c |  1 -
 6 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'kernel')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 598237e97221..9f0bf13291e5 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -89,19 +89,11 @@ struct file *anon_inode_getfile(const char *name,
 	struct qstr this;
 	struct path path;
 	struct file *file;
-	fmode_t mode;
 	int error;
 
 	if (IS_ERR(anon_inode_inode))
 		return ERR_PTR(-ENODEV);
 
-	switch (flags & O_ACCMODE) {
-	case O_RDONLY: mode = FMODE_READ;		break;
-	case O_WRONLY: mode = FMODE_WRITE;		break;
-	case O_RDWR:   mode = FMODE_READ | FMODE_WRITE;	break;
-	default:       return ERR_PTR(-EINVAL);
-	}
-
 	if (fops->owner && !try_module_get(fops->owner))
 		return ERR_PTR(-ENOENT);
 
@@ -129,7 +121,7 @@ struct file *anon_inode_getfile(const char *name,
 	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
-	file = alloc_file(&path, mode, fops);
+	file = alloc_file(&path, OPEN_FMODE(flags), fops);
 	if (!file)
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
diff --git a/fs/namei.c b/fs/namei.c
index d517f73aa36b..68921d9b5302 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -37,8 +37,6 @@
 
 #include "internal.h"
 
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
-
 /* [Feb-1997 T. Schoebel-Theuer]
  * Fundamental changes in the pathname lookup mechanisms (namei)
  * were necessary because of omirr.  The reason is that omirr needs
diff --git a/fs/open.c b/fs/open.c
index 6daee28f6e8f..040cef72bc00 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -828,7 +828,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	struct inode *inode;
 	int error;
 
-	f->f_mode = (__force fmode_t)((f->f_flags+1) & O_ACCMODE) | FMODE_LSEEK |
+	f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
 				FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cca191933ff6..9e13b533aaef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2464,5 +2464,8 @@ int proc_nr_files(struct ctl_table *table, int write,
 
 int __init get_filesystem_list(char *buf);
 
+#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 267e484f0198..fc0f928167e7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -250,7 +250,6 @@ struct audit_context {
 #endif
 };
 
-#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 static inline int open_arg(int flags, int mask)
 {
 	int n = ACC_MODE(flags);
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 8346938809b1..9a6c58881c0a 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -12,7 +12,6 @@
 #include "common.h"
 #include "tomoyo.h"
 #include "realpath.h"
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 
 /*
  * tomoyo_globally_readable_file_entry is a structure which is used for holding
-- 
cgit v1.2.3


From 83f57a11d84460dfe2afdb5a8bc759953428e38b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 22 Dec 2009 14:10:37 -0800
Subject: Revert "time: Remove xtime_cache"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 7bc7d637452383d56ba4368d4336b0dde1bb476d, as
requested by John Stultz. Quoting John:

 "Petr Titěra reported an issue where he saw odd atime regressions with
  2.6.33 where there were a full second worth of nanoseconds in the
  nanoseconds field.

  He also reviewed the time code and narrowed down the problem: unhandled
  overflow of the nanosecond field caused by rounding up the
  sub-nanosecond accumulated time.

  Details:

   * At the end of update_wall_time(), we currently round up the
  sub-nanosecond portion of accumulated time when storing it into xtime.
  This was added to avoid time inconsistencies caused when the
  sub-nanosecond portion was truncated when storing into xtime.
  Unfortunately we don't handle the possible second overflow caused by
  that rounding.

   * Previously the xtime_cache code hid this overflow by normalizing the
  xtime value when storing into the xtime_cache.

   * We could try to handle the second overflow after the rounding up, but
  since this affects the timekeeping's internal state, this would further
  complicate the next accumulation cycle, causing small errors in ntp
  steering. As much as I'd like to get rid of it, the xtime_cache code is
  known to work.

   * The correct fix is really to include the sub-nanosecond portion in the
  timekeeping accessor function, so we don't need to round up at during
  accumulation. This would greatly simplify the accumulation code.
  Unfortunately, we can't do this safely until the last three
  non-GENERIC_TIME arches (sparc32, arm, cris) are converted  (those
  patches are in -mm) and we kill off the spots where arches set xtime
  directly. This is all 2.6.34 material, so I think reverting the
  xtime_cache change is the best approach for now.

  Many thanks to Petr for both reporting and finding the issue!"

Reported-by: Petr Titěra <P.Titera@century.cz>
Requested-by: john stultz <johnstul@us.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/time.c             |  1 +
 kernel/time/timekeeping.c | 27 +++++++++++++++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time.c b/kernel/time.c
index c6324d96009e..804798005d19 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -136,6 +136,7 @@ static inline void warp_clock(void)
 	write_seqlock_irq(&xtime_lock);
 	wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
 	xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+	update_xtime_cache(0);
 	write_sequnlock_irq(&xtime_lock);
 	clock_was_set();
 }
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index af4135f05825..7faaa32fbf4f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,6 +165,13 @@ struct timespec raw_time;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
+static struct timespec xtime_cache __attribute__ ((aligned (16)));
+void update_xtime_cache(u64 nsec)
+{
+	xtime_cache = xtime;
+	timespec_add_ns(&xtime_cache, nsec);
+}
+
 /* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
@@ -325,6 +332,8 @@ int do_settimeofday(struct timespec *tv)
 
 	xtime = *tv;
 
+	update_xtime_cache(0);
+
 	timekeeper.ntp_error = 0;
 	ntp_clear();
 
@@ -550,6 +559,7 @@ void __init timekeeping_init(void)
 	}
 	set_normalized_timespec(&wall_to_monotonic,
 				-boot.tv_sec, -boot.tv_nsec);
+	update_xtime_cache(0);
 	total_sleep_time.tv_sec = 0;
 	total_sleep_time.tv_nsec = 0;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -583,6 +593,7 @@ static int timekeeping_resume(struct sys_device *dev)
 		wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
 		total_sleep_time = timespec_add_safe(total_sleep_time, ts);
 	}
+	update_xtime_cache(0);
 	/* re-base the last cycle value */
 	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
 	timekeeper.ntp_error = 0;
@@ -722,6 +733,7 @@ static void timekeeping_adjust(s64 offset)
 				timekeeper.ntp_error_shift;
 }
 
+
 /**
  * logarithmic_accumulation - shifted accumulation of cycles
  *
@@ -765,6 +777,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
 	return offset;
 }
 
+
 /**
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
@@ -774,6 +787,7 @@ void update_wall_time(void)
 {
 	struct clocksource *clock;
 	cycle_t offset;
+	u64 nsecs;
 	int shift = 0, maxshift;
 
 	/* Make sure we're fully resumed: */
@@ -839,6 +853,9 @@ void update_wall_time(void)
 	timekeeper.ntp_error +=	timekeeper.xtime_nsec <<
 				timekeeper.ntp_error_shift;
 
+	nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
+	update_xtime_cache(nsecs);
+
 	/* check to see if there is a new clocksource to use */
 	update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
 }
@@ -875,13 +892,13 @@ void monotonic_to_bootbased(struct timespec *ts)
 
 unsigned long get_seconds(void)
 {
-	return xtime.tv_sec;
+	return xtime_cache.tv_sec;
 }
 EXPORT_SYMBOL(get_seconds);
 
 struct timespec __current_kernel_time(void)
 {
-	return xtime;
+	return xtime_cache;
 }
 
 struct timespec current_kernel_time(void)
@@ -891,7 +908,8 @@ struct timespec current_kernel_time(void)
 
 	do {
 		seq = read_seqbegin(&xtime_lock);
-		now = xtime;
+
+		now = xtime_cache;
 	} while (read_seqretry(&xtime_lock, seq));
 
 	return now;
@@ -905,7 +923,8 @@ struct timespec get_monotonic_coarse(void)
 
 	do {
 		seq = read_seqbegin(&xtime_lock);
-		now = xtime;
+
+		now = xtime_cache;
 		mono = wall_to_monotonic;
 	} while (read_seqretry(&xtime_lock, seq));
 
-- 
cgit v1.2.3


From 45465487897a1c6d508b14b904dc5777f7ec7e04 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:26 -0800
Subject: kfifo: move struct kfifo in place

This is a new generic kernel FIFO implementation.

The current kernel fifo API is not very widely used, because it has to
many constrains.  Only 17 files in the current 2.6.31-rc5 used it.
FIFO's are like list's a very basic thing and a kfifo API which handles
the most use case would save a lot of development time and memory
resources.

I think this are the reasons why kfifo is not in use:

 - The API is to simple, important functions are missing
 - A fifo can be only allocated dynamically
 - There is a requirement of a spinlock whether you need it or not
 - There is no support for data records inside a fifo

So I decided to extend the kfifo in a more generic way without blowing up
the API to much.  The new API has the following benefits:

 - Generic usage: For kernel internal use and/or device driver.
 - Provide an API for the most use case.
 - Slim API: The whole API provides 25 functions.
 - Linux style habit.
 - DECLARE_KFIFO, DEFINE_KFIFO and INIT_KFIFO Macros
 - Direct copy_to_user from the fifo and copy_from_user into the fifo.
 - The kfifo itself is an in place member of the using data structure, this save an
   indirection access and does not waste the kernel allocator.
 - Lockless access: if only one reader and one writer is active on the fifo,
   which is the common use case, no additional locking is necessary.
 - Remove spinlock - give the user the freedom of choice what kind of locking to use if
   one is required.
 - Ability to handle records. Three type of records are supported:
   - Variable length records between 0-255 bytes, with a record size
     field of 1 bytes.
   - Variable length records between 0-65535 bytes, with a record size
     field of 2 bytes.
   - Fixed size records, which no record size field.
 - Preserve memory resource.
 - Performance!
 - Easy to use!

This patch:

Since most users want to have the kfifo as part of another object,
reorganize the code to allow including struct kfifo in another data
structure.  This requires changing the kfifo_alloc and kfifo_init
prototypes so that we pass an existing kfifo pointer into them.  This
patch changes the implementation and all existing users.

[akpm@linux-foundation.org: fix warning]
Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 21 +++++-----
 drivers/char/sonypi.c                       | 40 +++++++++---------
 drivers/infiniband/hw/cxgb3/cxio_hal.h      |  9 ++--
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 60 +++++++++++++-------------
 drivers/media/video/meye.c                  | 48 ++++++++++-----------
 drivers/media/video/meye.h                  |  4 +-
 drivers/net/wireless/libertas/cmd.c         |  4 +-
 drivers/net/wireless/libertas/dev.h         |  4 +-
 drivers/net/wireless/libertas/main.c        | 16 ++++---
 drivers/platform/x86/fujitsu-laptop.c       | 18 ++++----
 drivers/platform/x86/sony-laptop.c          | 46 ++++++++++----------
 drivers/scsi/libiscsi.c                     | 22 ++++------
 drivers/scsi/libiscsi_tcp.c                 | 29 +++++++------
 drivers/scsi/libsrp.c                       | 13 +++---
 drivers/usb/host/fhci-sched.c               | 10 ++---
 drivers/usb/host/fhci-tds.c                 | 35 ++++++++--------
 drivers/usb/host/fhci.h                     | 10 ++---
 drivers/usb/serial/usb-serial.c             |  5 +--
 include/linux/kfifo.h                       | 11 ++---
 include/scsi/libiscsi.h                     |  3 +-
 include/scsi/libiscsi_tcp.h                 |  2 +-
 include/scsi/libsrp.h                       |  2 +-
 kernel/kfifo.c                              | 65 +++++++++++++++--------------
 net/dccp/probe.c                            | 20 ++++-----
 24 files changed, 238 insertions(+), 259 deletions(-)

(limited to 'kernel')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index d3400b20444f..0f39bec28b45 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -358,7 +358,7 @@ struct port {
 	u8 update_flow_control;
 	struct ctrl_ul ctrl_ul;
 	struct ctrl_dl ctrl_dl;
-	struct kfifo *fifo_ul;
+	struct kfifo fifo_ul;
 	void __iomem *dl_addr[2];
 	u32 dl_size[2];
 	u8 toggle_dl;
@@ -685,8 +685,8 @@ static int nozomi_read_config_table(struct nozomi *dc)
 		dump_table(dc);
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
-			dc->port[i].fifo_ul =
-			    kfifo_alloc(FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
+			kfifo_alloc(&dc->port[i].fifo_ul,
+				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = __kfifo_get(port->fifo_ul, dc->send_buf,
+	size = __kfifo_get(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -988,11 +988,11 @@ static int receive_flow_control(struct nozomi *dc)
 
 	} else if (old_ctrl.CTS == 0 && ctrl_dl.CTS == 1) {
 
-		if (__kfifo_len(dc->port[port].fifo_ul)) {
+		if (__kfifo_len(&dc->port[port].fifo_ul)) {
 			DBG1("Enable interrupt (0x%04X) on port: %d",
 				enable_ier, port);
 			DBG1("Data in buffer [%d], enable transmit! ",
-				__kfifo_len(dc->port[port].fifo_ul));
+				__kfifo_len(&dc->port[port].fifo_ul));
 			enable_transmit_ul(port, dc);
 		} else {
 			DBG1("No data in buffer...");
@@ -1536,8 +1536,7 @@ static void __devexit nozomi_card_exit(struct pci_dev *pdev)
 	free_irq(pdev->irq, dc);
 
 	for (i = 0; i < MAX_PORT; i++)
-		if (dc->port[i].fifo_ul)
-			kfifo_free(dc->port[i].fifo_ul);
+		kfifo_free(&dc->port[i].fifo_ul);
 
 	kfree(dc->send_buf);
 
@@ -1673,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = __kfifo_put(port->fifo_ul, (unsigned char *)buffer, count);
+	rval = __kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
@@ -1721,7 +1720,7 @@ static int ntty_write_room(struct tty_struct *tty)
 	if (!port->port.count)
 		goto exit;
 
-	room = port->fifo_ul->size - __kfifo_len(port->fifo_ul);
+	room = port->fifo_ul.size - __kfifo_len(&port->fifo_ul);
 
 exit:
 	mutex_unlock(&port->tty_sem);
@@ -1878,7 +1877,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty)
 		goto exit_in_buffer;
 	}
 
-	rval = __kfifo_len(port->fifo_ul);
+	rval = __kfifo_len(&port->fifo_ul);
 
 exit_in_buffer:
 	return rval;
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 8c262aaf7c26..9e6efb1f029f 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -487,7 +487,7 @@ static struct sonypi_device {
 	int camera_power;
 	int bluetooth_power;
 	struct mutex lock;
-	struct kfifo *fifo;
+	struct kfifo fifo;
 	spinlock_t fifo_lock;
 	wait_queue_head_t fifo_proc_list;
 	struct fasync_struct *fifo_async;
@@ -496,7 +496,7 @@ static struct sonypi_device {
 	struct input_dev *input_jog_dev;
 	struct input_dev *input_key_dev;
 	struct work_struct input_work;
-	struct kfifo *input_fifo;
+	struct kfifo input_fifo;
 	spinlock_t input_fifo_lock;
 } sonypi_device;
 
@@ -777,7 +777,7 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get(sonypi_device.input_fifo, (unsigned char *)&kp,
+	while (kfifo_get(&sonypi_device.input_fifo, (unsigned char *)&kp,
 			 sizeof(kp)) == sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
@@ -827,7 +827,7 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put(sonypi_device.input_fifo,
+		kfifo_put(&sonypi_device.input_fifo,
 			  (unsigned char *)&kp, sizeof(kp));
 		schedule_work(&sonypi_device.input_work);
 	}
@@ -880,7 +880,7 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put(sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put(&sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
 
@@ -906,7 +906,7 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	mutex_lock(&sonypi_device.lock);
 	/* Flush input queue on first open */
 	if (!sonypi_device.open_count)
-		kfifo_reset(sonypi_device.fifo);
+		kfifo_reset(&sonypi_device.fifo);
 	sonypi_device.open_count++;
 	mutex_unlock(&sonypi_device.lock);
 	unlock_kernel();
@@ -919,17 +919,17 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	ssize_t ret;
 	unsigned char c;
 
-	if ((kfifo_len(sonypi_device.fifo) == 0) &&
+	if ((kfifo_len(&sonypi_device.fifo) == 0) &&
 	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	ret = wait_event_interruptible(sonypi_device.fifo_proc_list,
-				       kfifo_len(sonypi_device.fifo) != 0);
+				       kfifo_len(&sonypi_device.fifo) != 0);
 	if (ret)
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get(&sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -946,7 +946,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 static unsigned int sonypi_misc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &sonypi_device.fifo_proc_list, wait);
-	if (kfifo_len(sonypi_device.fifo))
+	if (kfifo_len(&sonypi_device.fifo))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
@@ -1313,11 +1313,11 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 			"http://www.linux.it/~malattia/wiki/index.php/Sony_drivers\n");
 
 	spin_lock_init(&sonypi_device.fifo_lock);
-	sonypi_device.fifo = kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
+	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL,
 					 &sonypi_device.fifo_lock);
-	if (IS_ERR(sonypi_device.fifo)) {
+	if (error) {
 		printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-		return PTR_ERR(sonypi_device.fifo);
+		return error;
 	}
 
 	init_waitqueue_head(&sonypi_device.fifo_proc_list);
@@ -1393,12 +1393,10 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 		}
 
 		spin_lock_init(&sonypi_device.input_fifo_lock);
-		sonypi_device.input_fifo =
-			kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
-				    &sonypi_device.input_fifo_lock);
-		if (IS_ERR(sonypi_device.input_fifo)) {
+		error = kfifo_alloc(&sonypi_device.input_fifo, SONYPI_BUF_SIZE,
+				GFP_KERNEL, &sonypi_device.input_fifo_lock);
+		if (error) {
 			printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-			error = PTR_ERR(sonypi_device.input_fifo);
 			goto err_inpdev_unregister;
 		}
 
@@ -1423,7 +1421,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 		pci_disable_device(pcidev);
  err_put_pcidev:
 	pci_dev_put(pcidev);
-	kfifo_free(sonypi_device.fifo);
+	kfifo_free(&sonypi_device.fifo);
 
 	return error;
 }
@@ -1438,7 +1436,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 	if (useinput) {
 		input_unregister_device(sonypi_device.input_key_dev);
 		input_unregister_device(sonypi_device.input_jog_dev);
-		kfifo_free(sonypi_device.input_fifo);
+		kfifo_free(&sonypi_device.input_fifo);
 	}
 
 	misc_deregister(&sonypi_misc_device);
@@ -1451,7 +1449,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 		pci_dev_put(sonypi_device.dev);
 	}
 
-	kfifo_free(sonypi_device.fifo);
+	kfifo_free(&sonypi_device.fifo);
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index bfd03bf8be54..f3d440cc68f2 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -34,6 +34,7 @@
 
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/kfifo.h>
 
 #include "t3_cpl.h"
 #include "t3cdev.h"
@@ -75,13 +76,13 @@ struct cxio_hal_ctrl_qp {
 };
 
 struct cxio_hal_resource {
-	struct kfifo *tpt_fifo;
+	struct kfifo tpt_fifo;
 	spinlock_t tpt_fifo_lock;
-	struct kfifo *qpid_fifo;
+	struct kfifo qpid_fifo;
 	spinlock_t qpid_fifo_lock;
-	struct kfifo *cqid_fifo;
+	struct kfifo cqid_fifo;
 	spinlock_t cqid_fifo_lock;
-	struct kfifo *pdid_fifo;
+	struct kfifo pdid_fifo;
 	spinlock_t pdid_fifo_lock;
 };
 
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index bd233c087653..65072bdfc1bf 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -39,12 +39,12 @@
 #include "cxio_resource.h"
 #include "cxio_hal.h"
 
-static struct kfifo *rhdl_fifo;
+static struct kfifo rhdl_fifo;
 static spinlock_t rhdl_fifo_lock;
 
 #define RANDOM_SIZE 16
 
-static int __cxio_init_resource_fifo(struct kfifo **fifo,
+static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				   spinlock_t *fifo_lock,
 				   u32 nr, u32 skip_low,
 				   u32 skip_high,
@@ -55,12 +55,11 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo,
 	u32 rarray[16];
 	spin_lock_init(fifo_lock);
 
-	*fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock);
-	if (IS_ERR(*fifo))
+	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL, fifo_lock))
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		__kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32));
+		__kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -72,33 +71,33 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			__kfifo_put(*fifo,
+			__kfifo_put(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			__kfifo_put(*fifo,
+			__kfifo_put(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			__kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32));
+			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32));
 	return 0;
 }
 
-static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock,
+static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
 				   u32 nr, u32 skip_low, u32 skip_high)
 {
 	return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
 					  skip_high, 0));
 }
 
-static int cxio_init_resource_fifo_random(struct kfifo **fifo,
+static int cxio_init_resource_fifo_random(struct kfifo *fifo,
 				   spinlock_t * fifo_lock,
 				   u32 nr, u32 skip_low, u32 skip_high)
 {
@@ -113,15 +112,14 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
 
-	rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32),
+	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
 					      GFP_KERNEL,
-					      &rdev_p->rscp->qpid_fifo_lock);
-	if (IS_ERR(rdev_p->rscp->qpid_fifo))
+					      &rdev_p->rscp->qpid_fifo_lock))
 		return -ENOMEM;
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			__kfifo_put(rdev_p->rscp->qpid_fifo,
+			__kfifo_put(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
@@ -134,7 +132,7 @@ int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
 
 void cxio_hal_destroy_rhdl_resource(void)
 {
-	kfifo_free(rhdl_fifo);
+	kfifo_free(&rhdl_fifo);
 }
 
 /* nr_* must be power of 2 */
@@ -167,11 +165,11 @@ int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
 		goto pdid_err;
 	return 0;
 pdid_err:
-	kfifo_free(rscp->cqid_fifo);
+	kfifo_free(&rscp->cqid_fifo);
 cqid_err:
-	kfifo_free(rscp->qpid_fifo);
+	kfifo_free(&rscp->qpid_fifo);
 qpid_err:
-	kfifo_free(rscp->tpt_fifo);
+	kfifo_free(&rscp->tpt_fifo);
 tpt_err:
 	return -ENOMEM;
 }
@@ -195,17 +193,17 @@ static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
 
 u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->tpt_fifo);
+	return cxio_hal_get_resource(&rscp->tpt_fifo);
 }
 
 void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
 {
-	cxio_hal_put_resource(rscp->tpt_fifo, stag);
+	cxio_hal_put_resource(&rscp->tpt_fifo, stag);
 }
 
 u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 {
-	u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
+	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo);
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
@@ -213,35 +211,35 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
 {
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
-	cxio_hal_put_resource(rscp->qpid_fifo, qpid);
+	cxio_hal_put_resource(&rscp->qpid_fifo, qpid);
 }
 
 u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->cqid_fifo);
+	return cxio_hal_get_resource(&rscp->cqid_fifo);
 }
 
 void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
 {
-	cxio_hal_put_resource(rscp->cqid_fifo, cqid);
+	cxio_hal_put_resource(&rscp->cqid_fifo, cqid);
 }
 
 u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->pdid_fifo);
+	return cxio_hal_get_resource(&rscp->pdid_fifo);
 }
 
 void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
 {
-	cxio_hal_put_resource(rscp->pdid_fifo, pdid);
+	cxio_hal_put_resource(&rscp->pdid_fifo, pdid);
 }
 
 void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
 {
-	kfifo_free(rscp->tpt_fifo);
-	kfifo_free(rscp->cqid_fifo);
-	kfifo_free(rscp->qpid_fifo);
-	kfifo_free(rscp->pdid_fifo);
+	kfifo_free(&rscp->tpt_fifo);
+	kfifo_free(&rscp->cqid_fifo);
+	kfifo_free(&rscp->qpid_fifo);
+	kfifo_free(&rscp->pdid_fifo);
 	kfree(rscp);
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 6ffa64cd1c6d..dacbbb839b9e 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,7 +800,7 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get(meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int)) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
@@ -811,7 +811,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
 		wake_up_interruptible(&meye.proc_list);
 	} else {
 		int size;
@@ -820,7 +820,7 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get(meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int)) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
@@ -831,7 +831,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
 		wake_up_interruptible(&meye.proc_list);
 	}
 	mchip_free_frame();
@@ -859,8 +859,8 @@ static int meye_open(struct file *file)
 
 	for (i = 0; i < MEYE_MAX_BUFNBRS; i++)
 		meye.grab_buffer[i].state = MEYE_BUF_UNUSED;
-	kfifo_reset(meye.grabq);
-	kfifo_reset(meye.doneq);
+	kfifo_reset(&meye.grabq);
+	kfifo_reset(&meye.doneq);
 	return 0;
 }
 
@@ -933,7 +933,7 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put(meye.grabq, (unsigned char *)nb, sizeof(int));
+	kfifo_put(&meye.grabq, (unsigned char *)nb, sizeof(int));
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -965,7 +965,7 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get(meye.doneq, (unsigned char *)&unused, sizeof(int));
+		kfifo_get(&meye.doneq, (unsigned char *)&unused, sizeof(int));
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
@@ -1452,7 +1452,7 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put(meye.grabq, (unsigned char *)&buf->index, sizeof(int));
+	kfifo_put(&meye.grabq, (unsigned char *)&buf->index, sizeof(int));
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -1467,18 +1467,18 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 
 	mutex_lock(&meye.lock);
 
-	if (kfifo_len(meye.doneq) == 0 && file->f_flags & O_NONBLOCK) {
+	if (kfifo_len(&meye.doneq) == 0 && file->f_flags & O_NONBLOCK) {
 		mutex_unlock(&meye.lock);
 		return -EAGAIN;
 	}
 
 	if (wait_event_interruptible(meye.proc_list,
-				     kfifo_len(meye.doneq) != 0) < 0) {
+				     kfifo_len(&meye.doneq) != 0) < 0) {
 		mutex_unlock(&meye.lock);
 		return -EINTR;
 	}
 
-	if (!kfifo_get(meye.doneq, (unsigned char *)&reqnr,
+	if (!kfifo_get(&meye.doneq, (unsigned char *)&reqnr,
 		       sizeof(int))) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
@@ -1529,8 +1529,8 @@ static int vidioc_streamoff(struct file *file, void *fh, enum v4l2_buf_type i)
 {
 	mutex_lock(&meye.lock);
 	mchip_hic_stop();
-	kfifo_reset(meye.grabq);
-	kfifo_reset(meye.doneq);
+	kfifo_reset(&meye.grabq);
+	kfifo_reset(&meye.doneq);
 
 	for (i = 0; i < MEYE_MAX_BUFNBRS; i++)
 		meye.grab_buffer[i].state = MEYE_BUF_UNUSED;
@@ -1572,7 +1572,7 @@ static unsigned int meye_poll(struct file *file, poll_table *wait)
 
 	mutex_lock(&meye.lock);
 	poll_wait(file, &meye.proc_list, wait);
-	if (kfifo_len(meye.doneq))
+	if (kfifo_len(&meye.doneq))
 		res = POLLIN | POLLRDNORM;
 	mutex_unlock(&meye.lock);
 	return res;
@@ -1745,16 +1745,14 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	}
 
 	spin_lock_init(&meye.grabq_lock);
-	meye.grabq = kfifo_alloc(sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.grabq_lock);
-	if (IS_ERR(meye.grabq)) {
+	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
+				 &meye.grabq_lock)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
-	meye.doneq = kfifo_alloc(sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.doneq_lock);
-	if (IS_ERR(meye.doneq)) {
+	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
+				 &meye.doneq_lock)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
@@ -1868,9 +1866,9 @@ outregions:
 outenabledev:
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERA, 0);
 outsonypienable:
-	kfifo_free(meye.doneq);
+	kfifo_free(&meye.doneq);
 outkfifoalloc2:
-	kfifo_free(meye.grabq);
+	kfifo_free(&meye.grabq);
 outkfifoalloc1:
 	vfree(meye.grab_temp);
 outvmalloc:
@@ -1901,8 +1899,8 @@ static void __devexit meye_remove(struct pci_dev *pcidev)
 
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERA, 0);
 
-	kfifo_free(meye.doneq);
-	kfifo_free(meye.grabq);
+	kfifo_free(&meye.doneq);
+	kfifo_free(&meye.grabq);
 
 	vfree(meye.grab_temp);
 
diff --git a/drivers/media/video/meye.h b/drivers/media/video/meye.h
index 5f70a106ba2b..1321ad5d6597 100644
--- a/drivers/media/video/meye.h
+++ b/drivers/media/video/meye.h
@@ -303,9 +303,9 @@ struct meye {
 	struct meye_grab_buffer grab_buffer[MEYE_MAX_BUFNBRS];
 	int vma_use_count[MEYE_MAX_BUFNBRS]; /* mmap count */
 	struct mutex lock;		/* mutex for open/mmap... */
-	struct kfifo *grabq;		/* queue for buffers to be grabbed */
+	struct kfifo grabq;		/* queue for buffers to be grabbed */
 	spinlock_t grabq_lock;		/* lock protecting the queue */
-	struct kfifo *doneq;		/* queue for grabbed buffers */
+	struct kfifo doneq;		/* queue for grabbed buffers */
 	spinlock_t doneq_lock;		/* lock protecting the queue */
 	wait_queue_head_t proc_list;	/* wait queue */
 	struct video_device *video_dev;	/* video device parameters */
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index b9b371bfa30f..ffed17f4f506 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -1365,7 +1365,7 @@ static void lbs_send_confirmsleep(struct lbs_private *priv)
 	priv->dnld_sent = DNLD_RES_RECEIVED;
 
 	/* If nothing to do, go back to sleep (?) */
-	if (!__kfifo_len(priv->event_fifo) && !priv->resp_len[priv->resp_idx])
+	if (!__kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
 		priv->psstate = PS_STATE_SLEEP;
 
 	spin_unlock_irqrestore(&priv->driver_lock, flags);
@@ -1439,7 +1439,7 @@ void lbs_ps_confirm_sleep(struct lbs_private *priv)
 	}
 
 	/* Pending events or command responses? */
-	if (__kfifo_len(priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
+	if (__kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
 		allowed = 0;
 		lbs_deb_host("pending events or command responses\n");
 	}
diff --git a/drivers/net/wireless/libertas/dev.h b/drivers/net/wireless/libertas/dev.h
index 6a8d2b291d8c..05bb298dfae9 100644
--- a/drivers/net/wireless/libertas/dev.h
+++ b/drivers/net/wireless/libertas/dev.h
@@ -10,7 +10,7 @@
 #include "scan.h"
 #include "assoc.h"
 
-
+#include <linux/kfifo.h>
 
 /** sleep_params */
 struct sleep_params {
@@ -120,7 +120,7 @@ struct lbs_private {
 	u32 resp_len[2];
 
 	/* Events sent from hardware to driver */
-	struct kfifo *event_fifo;
+	struct kfifo event_fifo;
 
 	/** thread to service interrupts */
 	struct task_struct *main_thread;
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index db38a5a719fa..403909287414 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -459,7 +459,7 @@ static int lbs_thread(void *data)
 		else if (!list_empty(&priv->cmdpendingq) &&
 					!(priv->wakeup_dev_required))
 			shouldsleep = 0;	/* We have a command to send */
-		else if (__kfifo_len(priv->event_fifo))
+		else if (__kfifo_len(&priv->event_fifo))
 			shouldsleep = 0;	/* We have an event to process */
 		else
 			shouldsleep = 1;	/* No command */
@@ -511,9 +511,9 @@ static int lbs_thread(void *data)
 
 		/* Process hardware events, e.g. card removed, link lost */
 		spin_lock_irq(&priv->driver_lock);
-		while (__kfifo_len(priv->event_fifo)) {
+		while (__kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			__kfifo_get(priv->event_fifo, (unsigned char *) &event,
+			__kfifo_get(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -883,10 +883,9 @@ static int lbs_init_adapter(struct lbs_private *priv)
 	priv->resp_len[0] = priv->resp_len[1] = 0;
 
 	/* Create the event FIFO */
-	priv->event_fifo = kfifo_alloc(sizeof(u32) * 16, GFP_KERNEL, NULL);
-	if (IS_ERR(priv->event_fifo)) {
+	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL, NULL);
+	if (ret) {
 		lbs_pr_err("Out of memory allocating event FIFO buffer\n");
-		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -901,8 +900,7 @@ static void lbs_free_adapter(struct lbs_private *priv)
 	lbs_deb_enter(LBS_DEB_MAIN);
 
 	lbs_free_cmd_buffer(priv);
-	if (priv->event_fifo)
-		kfifo_free(priv->event_fifo);
+	kfifo_free(&priv->event_fifo);
 	del_timer(&priv->command_timer);
 	del_timer(&priv->auto_deepsleep_timer);
 	kfree(priv->networks);
@@ -1177,7 +1175,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	__kfifo_put(priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	__kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index bcd4ba8be7db..f999fba0e25e 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -164,7 +164,7 @@ struct fujitsu_hotkey_t {
 	struct input_dev *input;
 	char phys[32];
 	struct platform_device *pf_device;
-	struct kfifo *fifo;
+	struct kfifo fifo;
 	spinlock_t fifo_lock;
 	int rfkill_supported;
 	int rfkill_state;
@@ -824,12 +824,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
 	/* kfifo */
 	spin_lock_init(&fujitsu_hotkey->fifo_lock);
-	fujitsu_hotkey->fifo =
-	    kfifo_alloc(RINGBUFFERSIZE * sizeof(int), GFP_KERNEL,
-			&fujitsu_hotkey->fifo_lock);
-	if (IS_ERR(fujitsu_hotkey->fifo)) {
+	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+			GFP_KERNEL, &fujitsu_hotkey->fifo_lock);
+	if (error) {
 		printk(KERN_ERR "kfifo_alloc failed\n");
-		error = PTR_ERR(fujitsu_hotkey->fifo);
 		goto err_stop;
 	}
 
@@ -934,7 +932,7 @@ err_unregister_input_dev:
 err_free_input_dev:
 	input_free_device(input);
 err_free_fifo:
-	kfifo_free(fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_hotkey->fifo);
 err_stop:
 	return result;
 }
@@ -956,7 +954,7 @@ static int acpi_fujitsu_hotkey_remove(struct acpi_device *device, int type)
 
 	input_free_device(input);
 
-	kfifo_free(fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_hotkey->fifo);
 
 	fujitsu_hotkey->acpi_handle = NULL;
 
@@ -1008,7 +1006,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put(fujitsu_hotkey->fifo,
+				status = kfifo_put(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
 						   sizeof(keycode));
 				if (status != sizeof(keycode)) {
@@ -1022,7 +1020,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 			} else if (keycode == 0) {
 				while ((status =
 					kfifo_get
-					(fujitsu_hotkey->fifo, (unsigned char *)
+					(&fujitsu_hotkey->fifo, (unsigned char *)
 					 &keycode_r,
 					 sizeof
 					 (keycode_r))) == sizeof(keycode_r)) {
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 7a2cc8a5c975..04625a048e74 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -142,7 +142,7 @@ struct sony_laptop_input_s {
 	atomic_t		users;
 	struct input_dev	*jog_dev;
 	struct input_dev	*key_dev;
-	struct kfifo		*fifo;
+	struct kfifo		fifo;
 	spinlock_t		fifo_lock;
 	struct workqueue_struct	*wq;
 };
@@ -300,7 +300,7 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get(sony_laptop_input.fifo, (unsigned char *)&kp,
+	while (kfifo_get(&sony_laptop_input.fifo, (unsigned char *)&kp,
 			 sizeof(kp)) == sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
@@ -362,7 +362,7 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put(sony_laptop_input.fifo,
+		kfifo_put(&sony_laptop_input.fifo,
 			  (unsigned char *)&kp, sizeof(kp));
 
 		if (!work_pending(&sony_laptop_release_key_work))
@@ -385,12 +385,11 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device)
 
 	/* kfifo */
 	spin_lock_init(&sony_laptop_input.fifo_lock);
-	sony_laptop_input.fifo =
-		kfifo_alloc(SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
+	error =
+	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
 			    &sony_laptop_input.fifo_lock);
-	if (IS_ERR(sony_laptop_input.fifo)) {
+	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
-		error = PTR_ERR(sony_laptop_input.fifo);
 		goto err_dec_users;
 	}
 
@@ -474,7 +473,7 @@ err_destroy_wq:
 	destroy_workqueue(sony_laptop_input.wq);
 
 err_free_kfifo:
-	kfifo_free(sony_laptop_input.fifo);
+	kfifo_free(&sony_laptop_input.fifo);
 
 err_dec_users:
 	atomic_dec(&sony_laptop_input.users);
@@ -500,7 +499,7 @@ static void sony_laptop_remove_input(void)
 	}
 
 	destroy_workqueue(sony_laptop_input.wq);
-	kfifo_free(sony_laptop_input.fifo);
+	kfifo_free(&sony_laptop_input.fifo);
 }
 
 /*********** Platform Device ***********/
@@ -2079,7 +2078,7 @@ static struct attribute_group spic_attribute_group = {
 
 struct sonypi_compat_s {
 	struct fasync_struct	*fifo_async;
-	struct kfifo		*fifo;
+	struct kfifo		fifo;
 	spinlock_t		fifo_lock;
 	wait_queue_head_t	fifo_proc_list;
 	atomic_t		open_count;
@@ -2104,12 +2103,12 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	/* Flush input queue on first open */
 	unsigned long flags;
 
-	spin_lock_irqsave(sonypi_compat.fifo->lock, flags);
+	spin_lock_irqsave(&sonypi_compat.fifo_lock, flags);
 
 	if (atomic_inc_return(&sonypi_compat.open_count) == 1)
-		__kfifo_reset(sonypi_compat.fifo);
+		__kfifo_reset(&sonypi_compat.fifo);
 
-	spin_unlock_irqrestore(sonypi_compat.fifo->lock, flags);
+	spin_unlock_irqrestore(&sonypi_compat.fifo_lock, flags);
 
 	return 0;
 }
@@ -2120,17 +2119,17 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	ssize_t ret;
 	unsigned char c;
 
-	if ((kfifo_len(sonypi_compat.fifo) == 0) &&
+	if ((kfifo_len(&sonypi_compat.fifo) == 0) &&
 	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	ret = wait_event_interruptible(sonypi_compat.fifo_proc_list,
-				       kfifo_len(sonypi_compat.fifo) != 0);
+				       kfifo_len(&sonypi_compat.fifo) != 0);
 	if (ret)
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get(&sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -2147,7 +2146,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 static unsigned int sonypi_misc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &sonypi_compat.fifo_proc_list, wait);
-	if (kfifo_len(sonypi_compat.fifo))
+	if (kfifo_len(&sonypi_compat.fifo))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
@@ -2309,7 +2308,7 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put(sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put(&sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
 }
@@ -2319,11 +2318,12 @@ static int sonypi_compat_init(void)
 	int error;
 
 	spin_lock_init(&sonypi_compat.fifo_lock);
-	sonypi_compat.fifo = kfifo_alloc(SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
+	error =
+	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
 					 &sonypi_compat.fifo_lock);
-	if (IS_ERR(sonypi_compat.fifo)) {
+	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
-		return PTR_ERR(sonypi_compat.fifo);
+		return error;
 	}
 
 	init_waitqueue_head(&sonypi_compat.fifo_proc_list);
@@ -2342,14 +2342,14 @@ static int sonypi_compat_init(void)
 	return 0;
 
 err_free_kfifo:
-	kfifo_free(sonypi_compat.fifo);
+	kfifo_free(&sonypi_compat.fifo);
 	return error;
 }
 
 static void sonypi_compat_exit(void)
 {
 	misc_deregister(&sonypi_misc_device);
-	kfifo_free(sonypi_compat.fifo);
+	kfifo_free(&sonypi_compat.fifo);
 }
 #else
 static int sonypi_compat_init(void) { return 0; }
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index b7689f3d05f5..cf0aa7e90be9 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	__kfifo_put(session->cmdpool.queue, (void*)&task, sizeof(void*));
+	__kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!__kfifo_get(session->cmdpool.queue,
+		if (!__kfifo_get(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!__kfifo_get(conn->session->cmdpool.queue,
+	if (!__kfifo_get(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2461,12 +2461,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 	if (q->pool == NULL)
 		return -ENOMEM;
 
-	q->queue = kfifo_init((void*)q->pool, max * sizeof(void*),
-			      GFP_KERNEL, NULL);
-	if (IS_ERR(q->queue)) {
-		q->queue = NULL;
-		goto enomem;
-	}
+	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*), NULL);
 
 	for (i = 0; i < max; i++) {
 		q->pool[i] = kzalloc(item_size, GFP_KERNEL);
@@ -2474,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		__kfifo_put(q->queue, (void*)&q->pool[i], sizeof(void*));
+		__kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2497,7 +2492,6 @@ void iscsi_pool_free(struct iscsi_pool *q)
 	for (i = 0; i < q->max; i++)
 		kfree(q->pool[i]);
 	kfree(q->pool);
-	kfree(q->queue);
 }
 EXPORT_SYMBOL_GPL(iscsi_pool_free);
 
@@ -2825,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!__kfifo_get(session->cmdpool.queue,
+	if (!__kfifo_get(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2845,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	__kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2908,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	__kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index ca25ee5190b0..a83ee56a185e 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (__kfifo_get(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+	while (__kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = __kfifo_get(tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = __kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	__kfifo_put(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	__kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -951,7 +951,7 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
 		return conn->session->tt->init_pdu(task, 0, task->data_count);
 	}
 
-	BUG_ON(__kfifo_len(tcp_task->r2tqueue));
+	BUG_ON(__kfifo_len(&tcp_task->r2tqueue));
 	tcp_task->exp_datasn = 0;
 
 	/* Prepare PDU, optionally w/ immediate data */
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				__kfifo_put(tcp_task->r2tpool.queue,
+				__kfifo_put(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			__kfifo_get(tcp_task->r2tqueue,
+			__kfifo_get(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
@@ -1127,9 +1127,8 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 		}
 
 		/* R2T xmit queue */
-		tcp_task->r2tqueue = kfifo_alloc(
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
-		if (tcp_task->r2tqueue == ERR_PTR(-ENOMEM)) {
+		if (kfifo_alloc(&tcp_task->r2tqueue,
+		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL)) {
 			iscsi_pool_free(&tcp_task->r2tpool);
 			goto r2t_alloc_fail;
 		}
@@ -1142,7 +1141,7 @@ r2t_alloc_fail:
 		struct iscsi_task *task = session->cmds[i];
 		struct iscsi_tcp_task *tcp_task = task->dd_data;
 
-		kfifo_free(tcp_task->r2tqueue);
+		kfifo_free(&tcp_task->r2tqueue);
 		iscsi_pool_free(&tcp_task->r2tpool);
 	}
 	return -ENOMEM;
@@ -1157,7 +1156,7 @@ void iscsi_tcp_r2tpool_free(struct iscsi_session *session)
 		struct iscsi_task *task = session->cmds[i];
 		struct iscsi_tcp_task *tcp_task = task->dd_data;
 
-		kfifo_free(tcp_task->r2tqueue);
+		kfifo_free(&tcp_task->r2tqueue);
 		iscsi_pool_free(&tcp_task->r2tpool);
 	}
 }
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 9ad38e81e343..b1b5e51ca8e3 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -58,19 +58,16 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 		goto free_pool;
 
 	spin_lock_init(&q->lock);
-	q->queue = kfifo_init((void *) q->pool, max * sizeof(void *),
-			      GFP_KERNEL, &q->lock);
-	if (IS_ERR(q->queue))
-		goto free_item;
+	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *),
+			      &q->lock);
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		__kfifo_put(q->queue, (void *) &iue, sizeof(void *));
+		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
 	return 0;
 
-free_item:
 	kfree(q->items);
 free_pool:
 	kfree(q->pool);
@@ -167,7 +164,7 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get(target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_get(&target->iu_queue.queue, (void *) &iue, sizeof(void *));
 	if (!iue)
 		return iue;
 	iue->target = target;
@@ -179,7 +176,7 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put(iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_put(&iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
 
diff --git a/drivers/usb/host/fhci-sched.c b/drivers/usb/host/fhci-sched.c
index 00a29855d0c4..ff43747a614f 100644
--- a/drivers/usb/host/fhci-sched.c
+++ b/drivers/usb/host/fhci-sched.c
@@ -37,7 +37,7 @@ static void recycle_frame(struct fhci_usb *usb, struct packet *pkt)
 	pkt->info = 0;
 	pkt->priv_data = NULL;
 
-	cq_put(usb->ep0->empty_frame_Q, pkt);
+	cq_put(&usb->ep0->empty_frame_Q, pkt);
 }
 
 /* confirm submitted packet */
@@ -57,7 +57,7 @@ void fhci_transaction_confirm(struct fhci_usb *usb, struct packet *pkt)
 		if ((td->data + td->actual_len) && trans_len)
 			memcpy(td->data + td->actual_len, pkt->data,
 			       trans_len);
-		cq_put(usb->ep0->dummy_packets_Q, pkt->data);
+		cq_put(&usb->ep0->dummy_packets_Q, pkt->data);
 	}
 
 	recycle_frame(usb, pkt);
@@ -213,7 +213,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 	}
 
 	/* update frame object fields before transmitting */
-	pkt = cq_get(usb->ep0->empty_frame_Q);
+	pkt = cq_get(&usb->ep0->empty_frame_Q);
 	if (!pkt) {
 		fhci_dbg(usb->fhci, "there is no empty frame\n");
 		return -1;
@@ -222,7 +222,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 
 	pkt->info = 0;
 	if (data == NULL) {
-		data = cq_get(usb->ep0->dummy_packets_Q);
+		data = cq_get(&usb->ep0->dummy_packets_Q);
 		BUG_ON(!data);
 		pkt->info = PKT_DUMMY_PACKET;
 	}
@@ -246,7 +246,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 		list_del_init(&td->frame_lh);
 		td->status = USB_TD_OK;
 		if (pkt->info & PKT_DUMMY_PACKET)
-			cq_put(usb->ep0->dummy_packets_Q, pkt->data);
+			cq_put(&usb->ep0->dummy_packets_Q, pkt->data);
 		recycle_frame(usb, pkt);
 		usb->actual_frame->total_bytes -= (len + PROTOCOL_OVERHEAD);
 		fhci_err(usb->fhci, "host transaction failed\n");
diff --git a/drivers/usb/host/fhci-tds.c b/drivers/usb/host/fhci-tds.c
index b40332290319..d224ab467a40 100644
--- a/drivers/usb/host/fhci-tds.c
+++ b/drivers/usb/host/fhci-tds.c
@@ -106,33 +106,33 @@ void fhci_ep0_free(struct fhci_usb *usb)
 			cpm_muram_free(cpm_muram_offset(ep->td_base));
 
 		if (ep->conf_frame_Q) {
-			size = cq_howmany(ep->conf_frame_Q);
+			size = cq_howmany(&ep->conf_frame_Q);
 			for (; size; size--) {
-				struct packet *pkt = cq_get(ep->conf_frame_Q);
+				struct packet *pkt = cq_get(&ep->conf_frame_Q);
 
 				kfree(pkt);
 			}
-			cq_delete(ep->conf_frame_Q);
+			cq_delete(&ep->conf_frame_Q);
 		}
 
 		if (ep->empty_frame_Q) {
-			size = cq_howmany(ep->empty_frame_Q);
+			size = cq_howmany(&ep->empty_frame_Q);
 			for (; size; size--) {
-				struct packet *pkt = cq_get(ep->empty_frame_Q);
+				struct packet *pkt = cq_get(&ep->empty_frame_Q);
 
 				kfree(pkt);
 			}
-			cq_delete(ep->empty_frame_Q);
+			cq_delete(&ep->empty_frame_Q);
 		}
 
 		if (ep->dummy_packets_Q) {
-			size = cq_howmany(ep->dummy_packets_Q);
+			size = cq_howmany(&ep->dummy_packets_Q);
 			for (; size; size--) {
-				u8 *buff = cq_get(ep->dummy_packets_Q);
+				u8 *buff = cq_get(&ep->dummy_packets_Q);
 
 				kfree(buff);
 			}
-			cq_delete(ep->dummy_packets_Q);
+			cq_delete(&ep->dummy_packets_Q);
 		}
 
 		kfree(ep);
@@ -175,10 +175,9 @@ u32 fhci_create_ep(struct fhci_usb *usb, enum fhci_mem_alloc data_mem,
 	ep->td_base = cpm_muram_addr(ep_offset);
 
 	/* zero all queue pointers */
-	ep->conf_frame_Q = cq_new(ring_len + 2);
-	ep->empty_frame_Q = cq_new(ring_len + 2);
-	ep->dummy_packets_Q = cq_new(ring_len + 2);
-	if (!ep->conf_frame_Q || !ep->empty_frame_Q || !ep->dummy_packets_Q) {
+	if (cq_new(&ep->conf_frame_Q, ring_len + 2) ||
+	    cq_new(&ep->empty_frame_Q, ring_len + 2) ||
+	    cq_new(&ep->dummy_packets_Q, ring_len + 2)) {
 		err_for = "frame_queues";
 		goto err;
 	}
@@ -199,8 +198,8 @@ u32 fhci_create_ep(struct fhci_usb *usb, enum fhci_mem_alloc data_mem,
 			err_for = "buffer";
 			goto err;
 		}
-		cq_put(ep->empty_frame_Q, pkt);
-		cq_put(ep->dummy_packets_Q, buff);
+		cq_put(&ep->empty_frame_Q, pkt);
+		cq_put(&ep->dummy_packets_Q, buff);
 	}
 
 	/* we put the endpoint parameter RAM right behind the TD ring */
@@ -319,7 +318,7 @@ static void fhci_td_transaction_confirm(struct fhci_usb *usb)
 		if ((buf == DUMMY2_BD_BUFFER) && !(td_status & ~TD_W))
 			continue;
 
-		pkt = cq_get(ep->conf_frame_Q);
+		pkt = cq_get(&ep->conf_frame_Q);
 		if (!pkt)
 			fhci_err(usb->fhci, "no frame to confirm\n");
 
@@ -460,9 +459,9 @@ u32 fhci_host_transaction(struct fhci_usb *usb,
 		out_be16(&td->length, pkt->len);
 
 	/* put the frame to the confirmation queue */
-	cq_put(ep->conf_frame_Q, pkt);
+	cq_put(&ep->conf_frame_Q, pkt);
 
-	if (cq_howmany(ep->conf_frame_Q) == 1)
+	if (cq_howmany(&ep->conf_frame_Q) == 1)
 		out_8(&usb->fhci->regs->usb_comm, USB_CMD_STR_FIFO);
 
 	return 0;
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 7116284ed21a..2277428ef5d3 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -423,9 +423,9 @@ struct endpoint {
 	struct usb_td __iomem *td_base; /* first TD in the ring */
 	struct usb_td __iomem *conf_td; /* next TD for confirm after transac */
 	struct usb_td __iomem *empty_td;/* next TD for new transaction req. */
-	struct kfifo *empty_frame_Q;  /* Empty frames list to use */
-	struct kfifo *conf_frame_Q;   /* frames passed to TDs,waiting for tx */
-	struct kfifo *dummy_packets_Q;/* dummy packets for the CRC overun */
+	struct kfifo empty_frame_Q;  /* Empty frames list to use */
+	struct kfifo conf_frame_Q;   /* frames passed to TDs,waiting for tx */
+	struct kfifo dummy_packets_Q;/* dummy packets for the CRC overun */
 
 	bool already_pushed_dummy_bd;
 };
@@ -493,9 +493,9 @@ static inline struct usb_hcd *fhci_to_hcd(struct fhci_hcd *fhci)
 }
 
 /* fifo of pointers */
-static inline struct kfifo *cq_new(int size)
+static inline int cq_new(struct kfifo *fifo, int size)
 {
-	return kfifo_alloc(size * sizeof(void *), GFP_KERNEL, NULL);
+	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL, NULL);
 }
 
 static inline void cq_delete(struct kfifo *kfifo)
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 4543f359be75..44b72d47fac2 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -939,9 +939,8 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		port->write_fifo = kfifo_alloc(PAGE_SIZE, GFP_KERNEL,
-			&port->lock);
-		if (IS_ERR(port->write_fifo))
+		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL,
+			&port->lock))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index ad6bdf5a5970..c3f8d82efd34 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -1,6 +1,7 @@
 /*
- * A simple kernel FIFO implementation.
+ * A generic kernel FIFO implementation.
  *
+ * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
  * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -32,10 +33,10 @@ struct kfifo {
 	spinlock_t *lock;	/* protects concurrent modifications */
 };
 
-extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-				gfp_t gfp_mask, spinlock_t *lock);
-extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask,
-				 spinlock_t *lock);
+extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+			unsigned int size, spinlock_t *lock);
+extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
+			gfp_t gfp_mask, spinlock_t *lock);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 7394e3bc8f4b..ff92b46f5153 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -28,6 +28,7 @@
 #include <linux/mutex.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/kfifo.h>
 #include <scsi/iscsi_proto.h>
 #include <scsi/iscsi_if.h>
 #include <scsi/scsi_transport_iscsi.h>
@@ -231,7 +232,7 @@ struct iscsi_conn {
 };
 
 struct iscsi_pool {
-	struct kfifo		*queue;		/* FIFO Queue */
+	struct kfifo		queue;		/* FIFO Queue */
 	void			**pool;		/* Pool of elements */
 	int			max;		/* Max number of elements */
 };
diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h
index 9e3182e659db..741ae7ed4394 100644
--- a/include/scsi/libiscsi_tcp.h
+++ b/include/scsi/libiscsi_tcp.h
@@ -80,7 +80,7 @@ struct iscsi_tcp_task {
 	int			data_offset;
 	struct iscsi_r2t_info	*r2t;		/* in progress solict R2T */
 	struct iscsi_pool	r2tpool;
-	struct kfifo		*r2tqueue;
+	struct kfifo		r2tqueue;
 	void			*dd_data;
 };
 
diff --git a/include/scsi/libsrp.h b/include/scsi/libsrp.h
index ba615e4c1d7c..07e3adde21d9 100644
--- a/include/scsi/libsrp.h
+++ b/include/scsi/libsrp.h
@@ -21,7 +21,7 @@ struct srp_buf {
 struct srp_queue {
 	void *pool;
 	void *items;
-	struct kfifo *queue;
+	struct kfifo queue;
 	spinlock_t lock;
 };
 
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 3765ff3c1bbe..8da6bb9782bb 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -1,6 +1,7 @@
 /*
- * A simple kernel FIFO implementation.
+ * A generic kernel FIFO implementation.
  *
+ * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
  * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -26,49 +27,51 @@
 #include <linux/kfifo.h>
 #include <linux/log2.h>
 
+static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+		unsigned int size, spinlock_t *lock)
+{
+	fifo->buffer = buffer;
+	fifo->size = size;
+	fifo->lock = lock;
+
+	kfifo_reset(fifo);
+}
+
 /**
- * kfifo_init - allocates a new FIFO using a preallocated buffer
+ * kfifo_init - initialize a FIFO using a preallocated buffer
+ * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
  * @size: the size of the internal buffer, this have to be a power of 2.
- * @gfp_mask: get_free_pages mask, passed to kmalloc()
  * @lock: the lock to be used to protect the fifo buffer
  *
- * Do NOT pass the kfifo to kfifo_free() after use! Simply free the
- * &struct kfifo with kfree().
  */
-struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-			 gfp_t gfp_mask, spinlock_t *lock)
+void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size,
+			spinlock_t *lock)
 {
-	struct kfifo *fifo;
-
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
 
-	fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
-	if (!fifo)
-		return ERR_PTR(-ENOMEM);
-
-	fifo->buffer = buffer;
-	fifo->size = size;
-	fifo->in = fifo->out = 0;
-	fifo->lock = lock;
-
-	return fifo;
+	_kfifo_init(fifo, buffer, size, lock);
 }
 EXPORT_SYMBOL(kfifo_init);
 
 /**
- * kfifo_alloc - allocates a new FIFO and its internal buffer
- * @size: the size of the internal buffer to be allocated.
+ * kfifo_alloc - allocates a new FIFO internal buffer
+ * @fifo: the fifo to assign then new buffer
+ * @size: the size of the buffer to be allocated, this have to be a power of 2.
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
  * @lock: the lock to be used to protect the fifo buffer
  *
+ * This function dynamically allocates a new fifo internal buffer
+ *
  * The size will be rounded-up to a power of 2.
+ * The buffer will be release with kfifo_free().
+ * Return 0 if no error, otherwise the an error code
  */
-struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
+int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
+			spinlock_t *lock)
 {
 	unsigned char *buffer;
-	struct kfifo *ret;
 
 	/*
 	 * round up to the next power of 2, since our 'let the indices
@@ -80,26 +83,24 @@ struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
 	}
 
 	buffer = kmalloc(size, gfp_mask);
-	if (!buffer)
-		return ERR_PTR(-ENOMEM);
-
-	ret = kfifo_init(buffer, size, gfp_mask, lock);
+	if (!buffer) {
+		_kfifo_init(fifo, 0, 0, NULL);
+		return -ENOMEM;
+	}
 
-	if (IS_ERR(ret))
-		kfree(buffer);
+	_kfifo_init(fifo, buffer, size, lock);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_alloc);
 
 /**
- * kfifo_free - frees the FIFO
+ * kfifo_free - frees the FIFO internal buffer
  * @fifo: the fifo to be freed.
  */
 void kfifo_free(struct kfifo *fifo)
 {
 	kfree(fifo->buffer);
-	kfree(fifo);
 }
 EXPORT_SYMBOL(kfifo_free);
 
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index dc328425fa20..6230ceb0823e 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -43,7 +43,7 @@ static int bufsize = 64 * 1024;
 static const char procname[] = "dccpprobe";
 
 static struct {
-	struct kfifo	  *fifo;
+	struct kfifo	  fifo;
 	spinlock_t	  lock;
 	wait_queue_head_t wait;
 	struct timespec	  tstart;
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(dccpw.fifo, tbuf, len);
+	kfifo_put(&dccpw.fifo, tbuf, len);
 	wake_up(&dccpw.wait);
 }
 
@@ -109,7 +109,7 @@ static struct jprobe dccp_send_probe = {
 
 static int dccpprobe_open(struct inode *inode, struct file *file)
 {
-	kfifo_reset(dccpw.fifo);
+	kfifo_reset(&dccpw.fifo);
 	getnstimeofday(&dccpw.tstart);
 	return 0;
 }
@@ -131,11 +131,11 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(dccpw.fifo) != 0);
+					 __kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(dccpw.fifo, tbuf, len);
+	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,10 +156,8 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
-	if (IS_ERR(dccpw.fifo))
-		return PTR_ERR(dccpw.fifo);
-
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
 
@@ -172,14 +170,14 @@ static __init int dccpprobe_init(void)
 err1:
 	proc_net_remove(&init_net, procname);
 err0:
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	return ret;
 }
 module_init(dccpprobe_init);
 
 static __exit void dccpprobe_exit(void)
 {
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	proc_net_remove(&init_net, procname);
 	unregister_jprobe(&dccp_send_probe);
 
-- 
cgit v1.2.3


From c1e13f25674ed564948ecb7dfe5f83e578892896 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:27 -0800
Subject: kfifo: move out spinlock

Move the pointer to the spinlock out of struct kfifo.  Most users in
tree do not actually use a spinlock, so the few exceptions now have to
call kfifo_{get,put}_locked, which takes an extra argument to a
spinlock.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       |  2 +-
 drivers/char/sonypi.c                       | 21 ++++----
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 36 +++++++------
 drivers/media/video/meye.c                  | 35 +++++++------
 drivers/net/wireless/libertas/main.c        |  2 +-
 drivers/platform/x86/fujitsu-laptop.c       | 18 ++++---
 drivers/platform/x86/sony-laptop.c          | 22 ++++----
 drivers/scsi/libiscsi.c                     |  2 +-
 drivers/scsi/libiscsi_tcp.c                 |  2 +-
 drivers/scsi/libsrp.c                       |  9 ++--
 drivers/usb/host/fhci.h                     |  2 +-
 drivers/usb/serial/generic.c                |  4 +-
 drivers/usb/serial/usb-serial.c             |  3 +-
 include/linux/kfifo.h                       | 80 +++++++++++++----------------
 kernel/kfifo.c                              | 17 +++---
 net/dccp/probe.c                            |  6 +--
 16 files changed, 131 insertions(+), 130 deletions(-)

(limited to 'kernel')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 0f39bec28b45..935b30d80adf 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -686,7 +686,7 @@ static int nozomi_read_config_table(struct nozomi *dc)
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
 			kfifo_alloc(&dc->port[i].fifo_ul,
-				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
+				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 9e6efb1f029f..dbcb3bd192c7 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -777,8 +777,9 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get(&sonypi_device.input_fifo, (unsigned char *)&kp,
-			 sizeof(kp)) == sizeof(kp)) {
+	while (kfifo_get_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
+			 sizeof(kp), &sonypi_device.input_fifo_lock)
+			== sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
 		input_sync(kp.dev);
@@ -827,8 +828,9 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put(&sonypi_device.input_fifo,
-			  (unsigned char *)&kp, sizeof(kp));
+		kfifo_put_locked(&sonypi_device.input_fifo,
+			(unsigned char *)&kp, sizeof(kp),
+			&sonypi_device.input_fifo_lock);
 		schedule_work(&sonypi_device.input_work);
 	}
 }
@@ -880,7 +882,8 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put(&sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put_locked(&sonypi_device.fifo, (unsigned char *)&event,
+			sizeof(event), &sonypi_device.fifo_lock);
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
 
@@ -929,7 +932,8 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(&sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get_locked(&sonypi_device.fifo, &c, sizeof(c),
+				 &sonypi_device.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -1313,8 +1317,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 			"http://www.linux.it/~malattia/wiki/index.php/Sony_drivers\n");
 
 	spin_lock_init(&sonypi_device.fifo_lock);
-	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL,
-					 &sonypi_device.fifo_lock);
+	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 		return error;
@@ -1394,7 +1397,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 
 		spin_lock_init(&sonypi_device.input_fifo_lock);
 		error = kfifo_alloc(&sonypi_device.input_fifo, SONYPI_BUF_SIZE,
-				GFP_KERNEL, &sonypi_device.input_fifo_lock);
+				GFP_KERNEL);
 		if (error) {
 			printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 			goto err_inpdev_unregister;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 65072bdfc1bf..98f24e6d906e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -55,7 +55,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 	u32 rarray[16];
 	spin_lock_init(fifo_lock);
 
-	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL, fifo_lock))
+	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
@@ -86,7 +86,8 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_get_locked(fifo, (unsigned char *) &entry,
+				sizeof(u32), fifo_lock);
 	return 0;
 }
 
@@ -113,8 +114,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
 
 	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
-					      GFP_KERNEL,
-					      &rdev_p->rscp->qpid_fifo_lock))
+					      GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
@@ -177,33 +177,37 @@ tpt_err:
 /*
  * returns 0 if no resource available
  */
-static u32 cxio_hal_get_resource(struct kfifo *fifo)
+static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
 {
 	u32 entry;
-	if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
+	if (kfifo_get_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
 		return entry;
 	else
 		return 0;	/* fifo emptry */
 }
 
-static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
+static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
+		u32 entry)
 {
-	BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
+	BUG_ON(
+	kfifo_put_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
+	== 0);
 }
 
 u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->tpt_fifo);
+	return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock);
 }
 
 void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
 {
-	cxio_hal_put_resource(&rscp->tpt_fifo, stag);
+	cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag);
 }
 
 u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 {
-	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo);
+	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
+			&rscp->qpid_fifo_lock);
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
@@ -211,27 +215,27 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
 {
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
-	cxio_hal_put_resource(&rscp->qpid_fifo, qpid);
+	cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
 }
 
 u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->cqid_fifo);
+	return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock);
 }
 
 void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
 {
-	cxio_hal_put_resource(&rscp->cqid_fifo, cqid);
+	cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid);
 }
 
 u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->pdid_fifo);
+	return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock);
 }
 
 void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
 {
-	cxio_hal_put_resource(&rscp->pdid_fifo, pdid);
+	cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid);
 }
 
 void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index dacbbb839b9e..38bcedfd9fec 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,8 +800,8 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
-			      sizeof(int)) != sizeof(int)) {
+		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
 		}
@@ -811,7 +811,8 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	} else {
 		int size;
@@ -820,8 +821,8 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
-			      sizeof(int)) != sizeof(int)) {
+		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
 		}
@@ -831,7 +832,8 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	}
 	mchip_free_frame();
@@ -933,7 +935,8 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put(&meye.grabq, (unsigned char *)nb, sizeof(int));
+	kfifo_put_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
+			 &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -965,7 +968,8 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get(&meye.doneq, (unsigned char *)&unused, sizeof(int));
+		kfifo_get_locked(&meye.doneq, (unsigned char *)&unused,
+				sizeof(int), &meye.doneq_lock);
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
@@ -1452,7 +1456,8 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put(&meye.grabq, (unsigned char *)&buf->index, sizeof(int));
+	kfifo_put_locked(&meye.grabq, (unsigned char *)&buf->index,
+			sizeof(int), &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -1478,8 +1483,8 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 		return -EINTR;
 	}
 
-	if (!kfifo_get(&meye.doneq, (unsigned char *)&reqnr,
-		       sizeof(int))) {
+	if (!kfifo_get_locked(&meye.doneq, (unsigned char *)&reqnr,
+		       sizeof(int), &meye.doneq_lock)) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
 	}
@@ -1745,14 +1750,14 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	}
 
 	spin_lock_init(&meye.grabq_lock);
-	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.grabq_lock)) {
+	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS,
+				GFP_KERNEL)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
-	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.doneq_lock)) {
+	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS,
+				GFP_KERNEL)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 403909287414..2cc7ecd8d123 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -883,7 +883,7 @@ static int lbs_init_adapter(struct lbs_private *priv)
 	priv->resp_len[0] = priv->resp_len[1] = 0;
 
 	/* Create the event FIFO */
-	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL, NULL);
+	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL);
 	if (ret) {
 		lbs_pr_err("Out of memory allocating event FIFO buffer\n");
 		goto out;
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index f999fba0e25e..13dc7bedcfce 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -825,7 +825,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	/* kfifo */
 	spin_lock_init(&fujitsu_hotkey->fifo_lock);
 	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
-			GFP_KERNEL, &fujitsu_hotkey->fifo_lock);
+			GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR "kfifo_alloc failed\n");
 		goto err_stop;
@@ -1006,9 +1006,10 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put(&fujitsu_hotkey->fifo,
+				status = kfifo_put_locked(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
-						   sizeof(keycode));
+						   sizeof(keycode),
+						   &fujitsu_hotkey->fifo_lock);
 				if (status != sizeof(keycode)) {
 					vdbg_printk(FUJLAPTOP_DBG_WARN,
 					    "Could not push keycode [0x%x]\n",
@@ -1019,11 +1020,12 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				}
 			} else if (keycode == 0) {
 				while ((status =
-					kfifo_get
-					(&fujitsu_hotkey->fifo, (unsigned char *)
-					 &keycode_r,
-					 sizeof
-					 (keycode_r))) == sizeof(keycode_r)) {
+					kfifo_get_locked(
+					 &fujitsu_hotkey->fifo,
+					 (unsigned char *) &keycode_r,
+					 sizeof(keycode_r),
+					 &fujitsu_hotkey->fifo_lock))
+					 == sizeof(keycode_r)) {
 					input_report_key(input, keycode_r, 0);
 					input_sync(input);
 					vdbg_printk(FUJLAPTOP_DBG_TRACE,
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 04625a048e74..1538a0a3c0af 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -300,8 +300,9 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get(&sony_laptop_input.fifo, (unsigned char *)&kp,
-			 sizeof(kp)) == sizeof(kp)) {
+	while (kfifo_get_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
+			sizeof(kp), &sony_laptop_input.fifo_lock)
+			== sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
 		input_sync(kp.dev);
@@ -362,8 +363,9 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put(&sony_laptop_input.fifo,
-			  (unsigned char *)&kp, sizeof(kp));
+		kfifo_put_locked(&sony_laptop_input.fifo,
+			  (unsigned char *)&kp, sizeof(kp),
+			  &sony_laptop_input.fifo_lock);
 
 		if (!work_pending(&sony_laptop_release_key_work))
 			queue_work(sony_laptop_input.wq,
@@ -386,8 +388,7 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device)
 	/* kfifo */
 	spin_lock_init(&sony_laptop_input.fifo_lock);
 	error =
-	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
-			    &sony_laptop_input.fifo_lock);
+	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
 		goto err_dec_users;
@@ -2129,7 +2130,8 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(&sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get_locked(&sonypi_compat.fifo, &c, sizeof(c),
+			  &sonypi_compat.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -2308,7 +2310,8 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put(&sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put_locked(&sonypi_compat.fifo, (unsigned char *)&event,
+			sizeof(event), &sonypi_compat.fifo_lock);
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
 }
@@ -2319,8 +2322,7 @@ static int sonypi_compat_init(void)
 
 	spin_lock_init(&sonypi_compat.fifo_lock);
 	error =
-	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
-					 &sonypi_compat.fifo_lock);
+	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
 		return error;
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index cf0aa7e90be9..1bccbc1e588e 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2461,7 +2461,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 	if (q->pool == NULL)
 		return -ENOMEM;
 
-	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*), NULL);
+	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*));
 
 	for (i = 0; i < max; i++) {
 		q->pool[i] = kzalloc(item_size, GFP_KERNEL);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index a83ee56a185e..41643c860d26 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -1128,7 +1128,7 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 
 		/* R2T xmit queue */
 		if (kfifo_alloc(&tcp_task->r2tqueue,
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL)) {
+		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL)) {
 			iscsi_pool_free(&tcp_task->r2tpool);
 			goto r2t_alloc_fail;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index b1b5e51ca8e3..db1b41c55fd3 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -58,8 +58,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 		goto free_pool;
 
 	spin_lock_init(&q->lock);
-	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *),
-			      &q->lock);
+	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
 		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
@@ -164,7 +163,8 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get(&target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_get_locked(&target->iu_queue.queue, (void *) &iue,
+			sizeof(void *), &target->iu_queue.lock);
 	if (!iue)
 		return iue;
 	iue->target = target;
@@ -176,7 +176,8 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put(&iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_put_locked(&iue->target->iu_queue.queue, (void *) &iue,
+			sizeof(void *), &iue->target->iu_queue.lock);
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
 
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 2277428ef5d3..a76da201183b 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -495,7 +495,7 @@ static inline struct usb_hcd *fhci_to_hcd(struct fhci_hcd *fhci)
 /* fifo of pointers */
 static inline int cq_new(struct kfifo *fifo, int size)
 {
-	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL, NULL);
+	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL);
 }
 
 static inline void cq_delete(struct kfifo *kfifo)
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index bbe005cefcfb..61eef18218be 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_get(port->write_fifo, data, port->bulk_out_size);
+	count = kfifo_get_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_put(port->write_fifo, buf, count);
+	count = kfifo_put_locked(port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 44b72d47fac2..636a4f23445e 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -939,8 +939,7 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL,
-			&port->lock))
+		if (kfifo_alloc(port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c3f8d82efd34..e0f5c9d4197d 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -30,13 +30,12 @@ struct kfifo {
 	unsigned int size;	/* the size of the allocated buffer */
 	unsigned int in;	/* data is added at offset (in % size) */
 	unsigned int out;	/* data is extracted from off. (out % size) */
-	spinlock_t *lock;	/* protects concurrent modifications */
 };
 
 extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
-			unsigned int size, spinlock_t *lock);
+			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
-			gfp_t gfp_mask, spinlock_t *lock);
+			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
@@ -58,58 +57,67 @@ static inline void __kfifo_reset(struct kfifo *fifo)
  */
 static inline void kfifo_reset(struct kfifo *fifo)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(fifo->lock, flags);
-
 	__kfifo_reset(fifo);
+}
+
+/**
+ * __kfifo_len - returns the number of bytes available in the FIFO
+ * @fifo: the fifo to be used.
+ */
+static inline unsigned int __kfifo_len(struct kfifo *fifo)
+{
+	register unsigned int	out;
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	out = fifo->out;
+	smp_rmb();
+	return fifo->in - out;
 }
 
 /**
- * kfifo_put - puts some data into the FIFO
+ * kfifo_put_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
- * @buffer: the data to be added.
- * @len: the length of the data to be added.
+ * @from: the data to be added.
+ * @n: the length of the data to be added.
+ * @lock: pointer to the spinlock to use for locking.
  *
- * This function copies at most @len bytes from the @buffer into
+ * This function copies at most @len bytes from the @from buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline unsigned int kfifo_put(struct kfifo *fifo,
-				const unsigned char *buffer, unsigned int len)
+static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
+		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
 
-	spin_lock_irqsave(fifo->lock, flags);
+	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_put(fifo, buffer, len);
+	ret = __kfifo_put(fifo, from, n);
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
 }
 
 /**
- * kfifo_get - gets some data from the FIFO
+ * kfifo_get_locked - gets some data from the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
- * @buffer: where the data must be copied.
- * @len: the size of the destination buffer.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @lock: pointer to the spinlock to use for locking.
  *
  * This function copies at most @len bytes from the FIFO into the
- * @buffer and returns the number of copied bytes.
+ * @to buffer and returns the number of copied bytes.
  */
-static inline unsigned int kfifo_get(struct kfifo *fifo,
-				     unsigned char *buffer, unsigned int len)
+static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
+	unsigned char *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
 
-	spin_lock_irqsave(fifo->lock, flags);
+	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_get(fifo, buffer, len);
+	ret = __kfifo_get(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
@@ -118,36 +126,18 @@ static inline unsigned int kfifo_get(struct kfifo *fifo,
 	if (fifo->in == fifo->out)
 		fifo->in = fifo->out = 0;
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
 }
 
-/**
- * __kfifo_len - returns the number of bytes available in the FIFO, no locking version
- * @fifo: the fifo to be used.
- */
-static inline unsigned int __kfifo_len(struct kfifo *fifo)
-{
-	return fifo->in - fifo->out;
-}
-
 /**
  * kfifo_len - returns the number of bytes available in the FIFO
  * @fifo: the fifo to be used.
  */
 static inline unsigned int kfifo_len(struct kfifo *fifo)
 {
-	unsigned long flags;
-	unsigned int ret;
-
-	spin_lock_irqsave(fifo->lock, flags);
-
-	ret = __kfifo_len(fifo);
-
-	spin_unlock_irqrestore(fifo->lock, flags);
-
-	return ret;
+	return __kfifo_len(fifo);
 }
 
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 8da6bb9782bb..4950bdbe3477 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,11 +28,10 @@
 #include <linux/log2.h>
 
 static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
-		unsigned int size, spinlock_t *lock)
+		unsigned int size)
 {
 	fifo->buffer = buffer;
 	fifo->size = size;
-	fifo->lock = lock;
 
 	kfifo_reset(fifo);
 }
@@ -42,16 +41,14 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
  * @size: the size of the internal buffer, this have to be a power of 2.
- * @lock: the lock to be used to protect the fifo buffer
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size,
-			spinlock_t *lock)
+void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
 
-	_kfifo_init(fifo, buffer, size, lock);
+	_kfifo_init(fifo, buffer, size);
 }
 EXPORT_SYMBOL(kfifo_init);
 
@@ -60,7 +57,6 @@ EXPORT_SYMBOL(kfifo_init);
  * @fifo: the fifo to assign then new buffer
  * @size: the size of the buffer to be allocated, this have to be a power of 2.
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
- * @lock: the lock to be used to protect the fifo buffer
  *
  * This function dynamically allocates a new fifo internal buffer
  *
@@ -68,8 +64,7 @@ EXPORT_SYMBOL(kfifo_init);
  * The buffer will be release with kfifo_free().
  * Return 0 if no error, otherwise the an error code
  */
-int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
-			spinlock_t *lock)
+int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
 {
 	unsigned char *buffer;
 
@@ -84,11 +79,11 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
 
 	buffer = kmalloc(size, gfp_mask);
 	if (!buffer) {
-		_kfifo_init(fifo, 0, 0, NULL);
+		_kfifo_init(fifo, 0, 0);
 		return -ENOMEM;
 	}
 
-	_kfifo_init(fifo, buffer, size, lock);
+	_kfifo_init(fifo, buffer, size);
 
 	return 0;
 }
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 6230ceb0823e..c6b50351aa78 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(&dccpw.fifo, tbuf, len);
+	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
+	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,7 +156,7 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
 		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
-- 
cgit v1.2.3


From e64c026dd09b73faf20707711402fc5ed55a8e70 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: cleanup namespace

change name of __kfifo_* functions to kfifo_*, because the prefix __kfifo
should be reserved for internal functions only.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 12 +++++------
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 10 ++++-----
 drivers/net/wireless/libertas/cmd.c         |  4 ++--
 drivers/net/wireless/libertas/main.c        |  8 ++++----
 drivers/platform/x86/sony-laptop.c          |  2 +-
 drivers/scsi/libiscsi.c                     | 14 ++++++-------
 drivers/scsi/libiscsi_tcp.c                 | 20 +++++++++---------
 drivers/scsi/libsrp.c                       |  2 +-
 drivers/usb/host/fhci.h                     |  6 +++---
 drivers/usb/serial/generic.c                |  4 ++--
 include/linux/kfifo.h                       | 32 +++++++----------------------
 kernel/kfifo.c                              | 12 +++++------
 net/dccp/probe.c                            |  2 +-
 13 files changed, 55 insertions(+), 73 deletions(-)

(limited to 'kernel')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 935b30d80adf..61f5bfe74f38 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = __kfifo_get(&port->fifo_ul, dc->send_buf,
+	size = kfifo_get(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -988,11 +988,11 @@ static int receive_flow_control(struct nozomi *dc)
 
 	} else if (old_ctrl.CTS == 0 && ctrl_dl.CTS == 1) {
 
-		if (__kfifo_len(&dc->port[port].fifo_ul)) {
+		if (kfifo_len(&dc->port[port].fifo_ul)) {
 			DBG1("Enable interrupt (0x%04X) on port: %d",
 				enable_ier, port);
 			DBG1("Data in buffer [%d], enable transmit! ",
-				__kfifo_len(&dc->port[port].fifo_ul));
+				kfifo_len(&dc->port[port].fifo_ul));
 			enable_transmit_ul(port, dc);
 		} else {
 			DBG1("No data in buffer...");
@@ -1672,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = __kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
+	rval = kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
@@ -1720,7 +1720,7 @@ static int ntty_write_room(struct tty_struct *tty)
 	if (!port->port.count)
 		goto exit;
 
-	room = port->fifo_ul.size - __kfifo_len(&port->fifo_ul);
+	room = port->fifo_ul.size - kfifo_len(&port->fifo_ul);
 
 exit:
 	mutex_unlock(&port->tty_sem);
@@ -1877,7 +1877,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty)
 		goto exit_in_buffer;
 	}
 
-	rval = __kfifo_len(&port->fifo_ul);
+	rval = kfifo_len(&port->fifo_ul);
 
 exit_in_buffer:
 	return rval;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 98f24e6d906e..d7d18fb02c93 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -59,7 +59,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		__kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -71,19 +71,19 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			__kfifo_put(fifo,
+			kfifo_put(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			__kfifo_put(fifo,
+			kfifo_put(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
+			kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
 		kfifo_get_locked(fifo, (unsigned char *) &entry,
@@ -119,7 +119,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			__kfifo_put(&rdev_p->rscp->qpid_fifo,
+			kfifo_put(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index ffed17f4f506..42611bea76a3 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -1365,7 +1365,7 @@ static void lbs_send_confirmsleep(struct lbs_private *priv)
 	priv->dnld_sent = DNLD_RES_RECEIVED;
 
 	/* If nothing to do, go back to sleep (?) */
-	if (!__kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
+	if (!kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
 		priv->psstate = PS_STATE_SLEEP;
 
 	spin_unlock_irqrestore(&priv->driver_lock, flags);
@@ -1439,7 +1439,7 @@ void lbs_ps_confirm_sleep(struct lbs_private *priv)
 	}
 
 	/* Pending events or command responses? */
-	if (__kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
+	if (kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
 		allowed = 0;
 		lbs_deb_host("pending events or command responses\n");
 	}
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 2cc7ecd8d123..0622104f0a03 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -459,7 +459,7 @@ static int lbs_thread(void *data)
 		else if (!list_empty(&priv->cmdpendingq) &&
 					!(priv->wakeup_dev_required))
 			shouldsleep = 0;	/* We have a command to send */
-		else if (__kfifo_len(&priv->event_fifo))
+		else if (kfifo_len(&priv->event_fifo))
 			shouldsleep = 0;	/* We have an event to process */
 		else
 			shouldsleep = 1;	/* No command */
@@ -511,9 +511,9 @@ static int lbs_thread(void *data)
 
 		/* Process hardware events, e.g. card removed, link lost */
 		spin_lock_irq(&priv->driver_lock);
-		while (__kfifo_len(&priv->event_fifo)) {
+		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			__kfifo_get(&priv->event_fifo, (unsigned char *) &event,
+			kfifo_get(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -1175,7 +1175,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	__kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 1538a0a3c0af..36e5dc6fc953 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -2107,7 +2107,7 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	spin_lock_irqsave(&sonypi_compat.fifo_lock, flags);
 
 	if (atomic_inc_return(&sonypi_compat.open_count) == 1)
-		__kfifo_reset(&sonypi_compat.fifo);
+		kfifo_reset(&sonypi_compat.fifo);
 
 	spin_unlock_irqrestore(&sonypi_compat.fifo_lock, flags);
 
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 1bccbc1e588e..5f0c46f43ee1 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	__kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
+	kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!__kfifo_get(&session->cmdpool.queue,
+		if (!kfifo_get(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!__kfifo_get(&conn->session->cmdpool.queue,
+	if (!kfifo_get(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2469,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		__kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
+		kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2819,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!__kfifo_get(&session->cmdpool.queue,
+	if (!kfifo_get(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2839,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2902,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 41643c860d26..c0be926637b1 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (__kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+	while (kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = __kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	__kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -951,7 +951,7 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
 		return conn->session->tt->init_pdu(task, 0, task->data_count);
 	}
 
-	BUG_ON(__kfifo_len(&tcp_task->r2tqueue));
+	BUG_ON(kfifo_len(&tcp_task->r2tqueue));
 	tcp_task->exp_datasn = 0;
 
 	/* Prepare PDU, optionally w/ immediate data */
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				__kfifo_put(&tcp_task->r2tpool.queue,
+				kfifo_put(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			__kfifo_get(&tcp_task->r2tqueue,
+			kfifo_get(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index db1b41c55fd3..975e448cfcb9 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -61,7 +61,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
+		kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index a76da201183b..96aa787f208f 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -505,19 +505,19 @@ static inline void cq_delete(struct kfifo *kfifo)
 
 static inline unsigned int cq_howmany(struct kfifo *kfifo)
 {
-	return __kfifo_len(kfifo) / sizeof(void *);
+	return kfifo_len(kfifo) / sizeof(void *);
 }
 
 static inline int cq_put(struct kfifo *kfifo, void *p)
 {
-	return __kfifo_put(kfifo, (void *)&p, sizeof(p));
+	return kfifo_put(kfifo, (void *)&p, sizeof(p));
 }
 
 static inline void *cq_get(struct kfifo *kfifo)
 {
 	void *p = NULL;
 
-	__kfifo_get(kfifo, (void *)&p, sizeof(p));
+	kfifo_get(kfifo, (void *)&p, sizeof(p));
 	return p;
 }
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 61eef18218be..d0a2e464cacd 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -276,7 +276,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 	if (port->write_urb_busy)
 		start_io = false;
 	else {
-		start_io = (__kfifo_len(port->write_fifo) != 0);
+		start_io = (kfifo_len(port->write_fifo) != 0);
 		port->write_urb_busy = start_io;
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -370,7 +370,7 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
 	} else if (serial->num_bulk_out)
-		room = port->write_fifo->size - __kfifo_len(port->write_fifo);
+		room = port->write_fifo->size - kfifo_len(port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index e0f5c9d4197d..a893acda3964 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -37,34 +37,25 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern unsigned int __kfifo_put(struct kfifo *fifo,
+extern unsigned int kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
-extern unsigned int __kfifo_get(struct kfifo *fifo,
+extern unsigned int kfifo_get(struct kfifo *fifo,
 				unsigned char *buffer, unsigned int len);
 
-/**
- * __kfifo_reset - removes the entire FIFO contents, no locking version
- * @fifo: the fifo to be emptied.
- */
-static inline void __kfifo_reset(struct kfifo *fifo)
-{
-	fifo->in = fifo->out = 0;
-}
-
 /**
  * kfifo_reset - removes the entire FIFO contents
  * @fifo: the fifo to be emptied.
  */
 static inline void kfifo_reset(struct kfifo *fifo)
 {
-	__kfifo_reset(fifo);
+	fifo->in = fifo->out = 0;
 }
 
 /**
- * __kfifo_len - returns the number of bytes available in the FIFO
+ * kfifo_len - returns the number of used bytes in the FIFO
  * @fifo: the fifo to be used.
  */
-static inline unsigned int __kfifo_len(struct kfifo *fifo)
+static inline unsigned int kfifo_len(struct kfifo *fifo)
 {
 	register unsigned int	out;
 
@@ -92,7 +83,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_put(fifo, from, n);
+	ret = kfifo_put(fifo, from, n);
 
 	spin_unlock_irqrestore(lock, flags);
 
@@ -117,7 +108,7 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_get(fifo, to, n);
+	ret = kfifo_get(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
@@ -131,13 +122,4 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 	return ret;
 }
 
-/**
- * kfifo_len - returns the number of bytes available in the FIFO
- * @fifo: the fifo to be used.
- */
-static inline unsigned int kfifo_len(struct kfifo *fifo)
-{
-	return __kfifo_len(fifo);
-}
-
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 4950bdbe3477..963ffde4af1a 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -100,7 +100,7 @@ void kfifo_free(struct kfifo *fifo)
 EXPORT_SYMBOL(kfifo_free);
 
 /**
- * __kfifo_put - puts some data into the FIFO, no locking version
+ * kfifo_put - puts some data into the FIFO, no locking version
  * @fifo: the fifo to be used.
  * @buffer: the data to be added.
  * @len: the length of the data to be added.
@@ -112,7 +112,7 @@ EXPORT_SYMBOL(kfifo_free);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int __kfifo_put(struct kfifo *fifo,
+unsigned int kfifo_put(struct kfifo *fifo,
 			const unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
@@ -144,10 +144,10 @@ unsigned int __kfifo_put(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(__kfifo_put);
+EXPORT_SYMBOL(kfifo_put);
 
 /**
- * __kfifo_get - gets some data from the FIFO, no locking version
+ * kfifo_get - gets some data from the FIFO, no locking version
  * @fifo: the fifo to be used.
  * @buffer: where the data must be copied.
  * @len: the size of the destination buffer.
@@ -158,7 +158,7 @@ EXPORT_SYMBOL(__kfifo_put);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int __kfifo_get(struct kfifo *fifo,
+unsigned int kfifo_get(struct kfifo *fifo,
 			 unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
@@ -190,4 +190,4 @@ unsigned int __kfifo_get(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(__kfifo_get);
+EXPORT_SYMBOL(kfifo_get);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index c6b50351aa78..9ef36849edd7 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -131,7 +131,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(&dccpw.fifo) != 0);
+					 kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-- 
cgit v1.2.3


From 7acd72eb85f1c7a15e8b5eb554994949241737f1 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: rename kfifo_put... into kfifo_in... and kfifo_get... into
 kfifo_out...

rename kfifo_put...  into kfifo_in...  to prevent miss use of old non in
kernel-tree drivers

ditto for kfifo_get...  -> kfifo_out...

Improve the prototypes of kfifo_in and kfifo_out to make the kerneldoc
annotations more readable.

Add mini "howto porting to the new API" in kfifo.h

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       |  4 +--
 drivers/char/sonypi.c                       |  8 +++---
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 16 ++++++------
 drivers/media/video/meye.c                  | 16 ++++++------
 drivers/net/wireless/libertas/main.c        |  5 ++--
 drivers/platform/x86/fujitsu-laptop.c       |  4 +--
 drivers/platform/x86/sony-laptop.c          |  8 +++---
 drivers/scsi/libiscsi.c                     | 14 +++++------
 drivers/scsi/libiscsi_tcp.c                 | 18 ++++++-------
 drivers/scsi/libsrp.c                       |  6 ++---
 drivers/usb/host/fhci.h                     |  4 +--
 drivers/usb/serial/generic.c                |  4 +--
 include/linux/kfifo.h                       | 39 +++++++++++++++++++++--------
 kernel/kfifo.c                              | 32 +++++++++++------------
 net/dccp/probe.c                            |  4 +--
 15 files changed, 101 insertions(+), 81 deletions(-)

(limited to 'kernel')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 61f5bfe74f38..9ef243429014 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = kfifo_get(&port->fifo_ul, dc->send_buf,
+	size = kfifo_out(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -1672,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
+	rval = kfifo_in(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index dbcb3bd192c7..0798754a607c 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -777,7 +777,7 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
+	while (kfifo_out_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
 			 sizeof(kp), &sonypi_device.input_fifo_lock)
 			== sizeof(kp)) {
 		msleep(10);
@@ -828,7 +828,7 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put_locked(&sonypi_device.input_fifo,
+		kfifo_in_locked(&sonypi_device.input_fifo,
 			(unsigned char *)&kp, sizeof(kp),
 			&sonypi_device.input_fifo_lock);
 		schedule_work(&sonypi_device.input_work);
@@ -882,7 +882,7 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put_locked(&sonypi_device.fifo, (unsigned char *)&event,
+	kfifo_in_locked(&sonypi_device.fifo, (unsigned char *)&event,
 			sizeof(event), &sonypi_device.fifo_lock);
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
@@ -932,7 +932,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get_locked(&sonypi_device.fifo, &c, sizeof(c),
+	       (kfifo_out_locked(&sonypi_device.fifo, &c, sizeof(c),
 				 &sonypi_device.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index d7d18fb02c93..dcbf2606c438 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -59,7 +59,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -71,22 +71,22 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			kfifo_put(fifo,
+			kfifo_in(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			kfifo_put(fifo,
+			kfifo_in(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
+			kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get_locked(fifo, (unsigned char *) &entry,
+		kfifo_out_locked(fifo, (unsigned char *) &entry,
 				sizeof(u32), fifo_lock);
 	return 0;
 }
@@ -119,7 +119,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			kfifo_put(&rdev_p->rscp->qpid_fifo,
+			kfifo_in(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
@@ -180,7 +180,7 @@ tpt_err:
 static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
 {
 	u32 entry;
-	if (kfifo_get_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
+	if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
 		return entry;
 	else
 		return 0;	/* fifo emptry */
@@ -190,7 +190,7 @@ static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
 		u32 entry)
 {
 	BUG_ON(
-	kfifo_put_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
+	kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
 	== 0);
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 38bcedfd9fec..884a569d60a2 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,7 +800,7 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_out_locked(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
@@ -811,7 +811,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+		kfifo_in_locked(&meye.doneq, (unsigned char *)&reqnr,
 				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	} else {
@@ -821,7 +821,7 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_out_locked(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
@@ -832,7 +832,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+		kfifo_in_locked(&meye.doneq, (unsigned char *)&reqnr,
 				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	}
@@ -935,7 +935,7 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
+	kfifo_in_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
 			 &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
@@ -968,7 +968,7 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get_locked(&meye.doneq, (unsigned char *)&unused,
+		kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
 				sizeof(int), &meye.doneq_lock);
 	}
 	*i = meye.grab_buffer[*i].size;
@@ -1456,7 +1456,7 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put_locked(&meye.grabq, (unsigned char *)&buf->index,
+	kfifo_in_locked(&meye.grabq, (unsigned char *)&buf->index,
 			sizeof(int), &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
@@ -1483,7 +1483,7 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 		return -EINTR;
 	}
 
-	if (!kfifo_get_locked(&meye.doneq, (unsigned char *)&reqnr,
+	if (!kfifo_out_locked(&meye.doneq, (unsigned char *)&reqnr,
 		       sizeof(int), &meye.doneq_lock)) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 0622104f0a03..2bcfa745524a 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -513,7 +513,8 @@ static int lbs_thread(void *data)
 		spin_lock_irq(&priv->driver_lock);
 		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			kfifo_get(&priv->event_fifo, (unsigned char *) &event,
+
+			kfifo_out(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -1175,7 +1176,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	kfifo_in(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 13dc7bedcfce..b66029bd75d0 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1006,7 +1006,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put_locked(&fujitsu_hotkey->fifo,
+				status = kfifo_in_locked(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
 						   sizeof(keycode),
 						   &fujitsu_hotkey->fifo_lock);
@@ -1020,7 +1020,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				}
 			} else if (keycode == 0) {
 				while ((status =
-					kfifo_get_locked(
+					kfifo_out_locked(
 					 &fujitsu_hotkey->fifo,
 					 (unsigned char *) &keycode_r,
 					 sizeof(keycode_r),
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 36e5dc6fc953..2896ca4cd9ab 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -300,7 +300,7 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
+	while (kfifo_out_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
 			sizeof(kp), &sony_laptop_input.fifo_lock)
 			== sizeof(kp)) {
 		msleep(10);
@@ -363,7 +363,7 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put_locked(&sony_laptop_input.fifo,
+		kfifo_in_locked(&sony_laptop_input.fifo,
 			  (unsigned char *)&kp, sizeof(kp),
 			  &sony_laptop_input.fifo_lock);
 
@@ -2130,7 +2130,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get_locked(&sonypi_compat.fifo, &c, sizeof(c),
+	       (kfifo_out_locked(&sonypi_compat.fifo, &c, sizeof(c),
 			  &sonypi_compat.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
@@ -2310,7 +2310,7 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put_locked(&sonypi_compat.fifo, (unsigned char *)&event,
+	kfifo_in_locked(&sonypi_compat.fifo, (unsigned char *)&event,
 			sizeof(event), &sonypi_compat.fifo_lock);
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 5f0c46f43ee1..c28a712fd4db 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
+	kfifo_in(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!kfifo_get(&session->cmdpool.queue,
+		if (!kfifo_out(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!kfifo_get(&conn->session->cmdpool.queue,
+	if (!kfifo_out(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2469,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
+		kfifo_in(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2819,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!kfifo_get(&session->cmdpool.queue,
+	if (!kfifo_out(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2839,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2902,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index c0be926637b1..d51ffeca2ec9 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+	while (kfifo_out(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = kfifo_out(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	kfifo_in(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				kfifo_put(&tcp_task->r2tpool.queue,
+				kfifo_in(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			kfifo_get(&tcp_task->r2tqueue,
+			kfifo_out(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 975e448cfcb9..8424b8606efb 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -61,7 +61,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
+		kfifo_in(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
@@ -163,7 +163,7 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get_locked(&target->iu_queue.queue, (void *) &iue,
+	kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
 			sizeof(void *), &target->iu_queue.lock);
 	if (!iue)
 		return iue;
@@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put_locked(&iue->target->iu_queue.queue, (void *) &iue,
+	kfifo_in_locked(&iue->target->iu_queue.queue, (void *) &iue,
 			sizeof(void *), &iue->target->iu_queue.lock);
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 96aa787f208f..72dae1c5ab38 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -510,14 +510,14 @@ static inline unsigned int cq_howmany(struct kfifo *kfifo)
 
 static inline int cq_put(struct kfifo *kfifo, void *p)
 {
-	return kfifo_put(kfifo, (void *)&p, sizeof(p));
+	return kfifo_in(kfifo, (void *)&p, sizeof(p));
 }
 
 static inline void *cq_get(struct kfifo *kfifo)
 {
 	void *p = NULL;
 
-	kfifo_get(kfifo, (void *)&p, sizeof(p));
+	kfifo_out(kfifo, (void *)&p, sizeof(p));
 	return p;
 }
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index d0a2e464cacd..b0f1183755c9 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_get_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
+	count = kfifo_out_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_put_locked(port->write_fifo, buf, count, &port->lock);
+	count = kfifo_in_locked(port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index a893acda3964..1b59c4a0e85f 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -19,6 +19,25 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  */
+
+/*
+ * Howto porting drivers to the new generic fifo API:
+ *
+ * - Modify the declaration of the "struct kfifo *" object into a
+ *   in-place "struct kfifo" object
+ * - Init the in-place object with kfifo_alloc() or kfifo_init()
+ *   Note: The address of the in-place "struct kfifo" object must be
+ *   passed as the first argument to this functions
+ * - Replace the use of __kfifo_put into kfifo_in and __kfifo_get
+ *   into kfifo_out
+ * - Replace the use of kfifo_put into kfifo_in_locked and kfifo_get
+ *   into kfifo_out_locked
+ *   Note: the spinlock pointer formerly passed to kfifo_init/kfifo_alloc
+ *   must be passed now to the kfifo_in_locked and kfifo_out_locked
+ *   as the last parameter.
+ * - All formerly name __kfifo_* functions has been renamed into kfifo_*
+ */
+
 #ifndef _LINUX_KFIFO_H
 #define _LINUX_KFIFO_H
 
@@ -37,10 +56,10 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern unsigned int kfifo_put(struct kfifo *fifo,
-				const unsigned char *buffer, unsigned int len);
-extern unsigned int kfifo_get(struct kfifo *fifo,
-				unsigned char *buffer, unsigned int len);
+extern __must_check unsigned int kfifo_in(struct kfifo *fifo,
+				const unsigned char *from, unsigned int len);
+extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
+				unsigned char *to, unsigned int len);
 
 /**
  * kfifo_reset - removes the entire FIFO contents
@@ -65,7 +84,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
 }
 
 /**
- * kfifo_put_locked - puts some data into the FIFO using a spinlock for locking
+ * kfifo_in_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
  * @from: the data to be added.
  * @n: the length of the data to be added.
@@ -75,7 +94,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
+static inline __must_check unsigned int kfifo_in_locked(struct kfifo *fifo,
 		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
@@ -83,7 +102,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = kfifo_put(fifo, from, n);
+	ret = kfifo_in(fifo, from, n);
 
 	spin_unlock_irqrestore(lock, flags);
 
@@ -91,7 +110,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 }
 
 /**
- * kfifo_get_locked - gets some data from the FIFO using a spinlock for locking
+ * kfifo_out_locked - gets some data from the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @n: the size of the destination buffer.
@@ -100,7 +119,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
  * This function copies at most @len bytes from the FIFO into the
  * @to buffer and returns the number of copied bytes.
  */
-static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
+static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	unsigned char *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
@@ -108,7 +127,7 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = kfifo_get(fifo, to, n);
+	ret = kfifo_out(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 963ffde4af1a..d659442e73f2 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -100,20 +100,20 @@ void kfifo_free(struct kfifo *fifo)
 EXPORT_SYMBOL(kfifo_free);
 
 /**
- * kfifo_put - puts some data into the FIFO, no locking version
+ * kfifo_in - puts some data into the FIFO
  * @fifo: the fifo to be used.
- * @buffer: the data to be added.
+ * @from: the data to be added.
  * @len: the length of the data to be added.
  *
- * This function copies at most @len bytes from the @buffer into
+ * This function copies at most @len bytes from the @from buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_put(struct kfifo *fifo,
-			const unsigned char *buffer, unsigned int len)
+unsigned int kfifo_in(struct kfifo *fifo,
+			const unsigned char *from, unsigned int len)
 {
 	unsigned int l;
 
@@ -128,10 +128,10 @@ unsigned int kfifo_put(struct kfifo *fifo,
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
-	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
+	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), from, l);
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	memcpy(fifo->buffer, buffer + l, len - l);
+	memcpy(fifo->buffer, from + l, len - l);
 
 	/*
 	 * Ensure that we add the bytes to the kfifo -before-
@@ -144,22 +144,22 @@ unsigned int kfifo_put(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_put);
+EXPORT_SYMBOL(kfifo_in);
 
 /**
- * kfifo_get - gets some data from the FIFO, no locking version
+ * kfifo_out - gets some data from the FIFO
  * @fifo: the fifo to be used.
- * @buffer: where the data must be copied.
+ * @to: where the data must be copied.
  * @len: the size of the destination buffer.
  *
  * This function copies at most @len bytes from the FIFO into the
- * @buffer and returns the number of copied bytes.
+ * @to buffer and returns the number of copied bytes.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_get(struct kfifo *fifo,
-			 unsigned char *buffer, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo,
+			 unsigned char *to, unsigned int len)
 {
 	unsigned int l;
 
@@ -174,10 +174,10 @@ unsigned int kfifo_get(struct kfifo *fifo,
 
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
-	memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+	memcpy(to, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	memcpy(buffer + l, fifo->buffer, len - l);
+	memcpy(to + l, fifo->buffer, len - l);
 
 	/*
 	 * Ensure that we remove the bytes from the kfifo -before-
@@ -190,4 +190,4 @@ unsigned int kfifo_get(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_get);
+EXPORT_SYMBOL(kfifo_out);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 9ef36849edd7..a1362dc8abb0 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
-- 
cgit v1.2.3


From a121f24accac1600bf5b6fb1e12eeabdfed7cb1a Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:31 -0800
Subject: kfifo: add kfifo_skip, kfifo_from_user and kfifo_to_user

Add kfifo_reset_out() for save lockless discard the fifo output
 Add kfifo_skip() to skip a number of output bytes
 Add kfifo_from_user() to copy user space data into the fifo
 Add kfifo_to_user() to copy fifo data to user space

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  47 +++++++++++++++++
 kernel/kfifo.c        | 139 ++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 170 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index dd53eed3e2af..d3230fb08bc7 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -124,6 +124,16 @@ static inline void kfifo_reset(struct kfifo *fifo)
 	fifo->in = fifo->out = 0;
 }
 
+/**
+ * kfifo_reset_out - skip FIFO contents
+ * @fifo: the fifo to be emptied.
+ */
+static inline void kfifo_reset_out(struct kfifo *fifo)
+{
+	smp_mb();
+	fifo->out = fifo->in;
+}
+
 /**
  * kfifo_size - returns the size of the fifo in bytes
  * @fifo: the fifo to be used.
@@ -231,4 +241,41 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	return ret;
 }
 
+extern void kfifo_skip(struct kfifo *fifo, unsigned int len);
+
+extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int n);
+
+extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int n);
+
+/**
+ * __kfifo_add_out internal helper function for updating the out offset
+ */
+static inline void __kfifo_add_out(struct kfifo *fifo,
+				unsigned int off)
+{
+	smp_mb();
+	fifo->out += off;
+}
+
+/**
+ * __kfifo_add_in internal helper function for updating the in offset
+ */
+static inline void __kfifo_add_in(struct kfifo *fifo,
+				unsigned int off)
+{
+	smp_wmb();
+	fifo->in += off;
+}
+
+/**
+ * __kfifo_off internal helper function for calculating the index of a
+ * given offeset
+ */
+static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
+{
+	return off & (fifo->size - 1);
+}
+
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index d659442e73f2..2a78425ef67f 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -26,6 +26,7 @@
 #include <linux/err.h>
 #include <linux/kfifo.h>
 #include <linux/log2.h>
+#include <linux/uaccess.h>
 
 static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 		unsigned int size)
@@ -99,6 +100,21 @@ void kfifo_free(struct kfifo *fifo)
 }
 EXPORT_SYMBOL(kfifo_free);
 
+/**
+ * kfifo_skip - skip output data
+ * @fifo: the fifo to be used.
+ * @len: number of bytes to skip
+ */
+void kfifo_skip(struct kfifo *fifo, unsigned int len)
+{
+	if (len < kfifo_len(fifo)) {
+		__kfifo_add_out(fifo, len);
+		return;
+	}
+	kfifo_reset_out(fifo);
+}
+EXPORT_SYMBOL(kfifo_skip);
+
 /**
  * kfifo_in - puts some data into the FIFO
  * @fifo: the fifo to be used.
@@ -115,6 +131,7 @@ EXPORT_SYMBOL(kfifo_free);
 unsigned int kfifo_in(struct kfifo *fifo,
 			const unsigned char *from, unsigned int len)
 {
+	unsigned int off;
 	unsigned int l;
 
 	len = min(len, fifo->size - fifo->in + fifo->out);
@@ -126,21 +143,16 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	smp_mb();
 
+	off = __kfifo_off(fifo, fifo->in);
+
 	/* first put the data starting from fifo->in to buffer end */
-	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
-	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), from, l);
+	l = min(len, fifo->size - off);
+	memcpy(fifo->buffer + off, from, l);
 
 	/* then put the rest (if any) at the beginning of the buffer */
 	memcpy(fifo->buffer, from + l, len - l);
 
-	/*
-	 * Ensure that we add the bytes to the kfifo -before-
-	 * we update the fifo->in index.
-	 */
-
-	smp_wmb();
-
-	fifo->in += len;
+	__kfifo_add_in(fifo, len);
 
 	return len;
 }
@@ -161,6 +173,7 @@ EXPORT_SYMBOL(kfifo_in);
 unsigned int kfifo_out(struct kfifo *fifo,
 			 unsigned char *to, unsigned int len)
 {
+	unsigned int off;
 	unsigned int l;
 
 	len = min(len, fifo->in - fifo->out);
@@ -172,22 +185,116 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	smp_rmb();
 
+	off = __kfifo_off(fifo, fifo->out);
+
 	/* first get the data from fifo->out until the end of the buffer */
-	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
-	memcpy(to, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+	l = min(len, fifo->size - off);
+	memcpy(to, fifo->buffer + off, l);
 
 	/* then get the rest (if any) from the beginning of the buffer */
 	memcpy(to + l, fifo->buffer, len - l);
 
+	__kfifo_add_out(fifo, len);
+
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out);
+
+/**
+ * kfifo_from_user - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from into the
+ * FIFO depending and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int len)
+{
+	unsigned int off;
+	unsigned int l;
+	int ret;
+
+	len = min(len, fifo->size - fifo->in + fifo->out);
+
 	/*
-	 * Ensure that we remove the bytes from the kfifo -before-
-	 * we update the fifo->out index.
+	 * Ensure that we sample the fifo->out index -before- we
+	 * start putting bytes into the kfifo.
 	 */
 
 	smp_mb();
 
-	fifo->out += len;
+	off = __kfifo_off(fifo, fifo->in);
+
+	/* first put the data starting from fifo->in to buffer end */
+	l = min(len, fifo->size - off);
+	ret = copy_from_user(fifo->buffer + off, from, l);
+
+	if (unlikely(ret))
+		return l - ret;
+
+	/* then put the rest (if any) at the beginning of the buffer */
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+
+	if (unlikely(ret))
+		return len - ret;
+
+	__kfifo_add_in(fifo, len);
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_out);
+EXPORT_SYMBOL(kfifo_from_user);
+
+/**
+ * kfifo_to_user - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @to buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len)
+{
+	unsigned int off;
+	unsigned int l;
+	int ret;
+
+	len = min(len, fifo->in - fifo->out);
+
+	/*
+	 * Ensure that we sample the fifo->in index -before- we
+	 * start removing bytes from the kfifo.
+	 */
+
+	smp_rmb();
+
+	off = __kfifo_off(fifo, fifo->out);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - off);
+	ret = copy_to_user(to, fifo->buffer + off, l);
+
+	if (unlikely(ret))
+		return l - ret;
+
+	/* then get the rest (if any) from the beginning of the buffer */
+	ret = copy_to_user(to + l, fifo->buffer, len - l);
+
+	if (unlikely(ret))
+		return len - ret;
+
+	__kfifo_add_out(fifo, len);
+
+	return len;
+}
+EXPORT_SYMBOL(kfifo_to_user);
+
-- 
cgit v1.2.3


From 86d4880313603810901f639ccb5c88ff13d4ad3c Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:32 -0800
Subject: kfifo: add record handling functions

Add kfifo_in_rec() - puts some record data into the FIFO
 Add kfifo_out_rec() - gets some record data from the FIFO
 Add kfifo_from_user_rec() - puts some data from user space into the FIFO
 Add kfifo_to_user_rec() - gets data from the FIFO and write it to user space
 Add kfifo_peek_rec() - gets the size of the next FIFO record field
 Add kfifo_skip_rec() - skip the next fifo out record
 Add kfifo_avail_rec() - determinate the number of bytes available in a record FIFO

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/kfifo.c        | 286 +++++++++++++++++++++++++++++--------------
 2 files changed, 523 insertions(+), 93 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index d3230fb08bc7..486e8ad3bb50 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -278,4 +278,334 @@ static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
 	return off & (fifo->size - 1);
 }
 
+/**
+ * __kfifo_peek_n internal helper function for determinate the length of
+ * the next record in the fifo
+ */
+static inline unsigned int __kfifo_peek_n(struct kfifo *fifo,
+				unsigned int recsize)
+{
+#define __KFIFO_GET(fifo, off, shift) \
+	((fifo)->buffer[__kfifo_off((fifo), (fifo)->out+(off))] << (shift))
+
+	unsigned int l;
+
+	l = __KFIFO_GET(fifo, 0, 0);
+
+	if (--recsize)
+		l |= __KFIFO_GET(fifo, 1, 8);
+
+	return l;
+#undef	__KFIFO_GET
+}
+
+/**
+ * __kfifo_poke_n internal helper function for storing the length of
+ * the next record into the fifo
+ */
+static inline void __kfifo_poke_n(struct kfifo *fifo,
+			unsigned int recsize, unsigned int n)
+{
+#define __KFIFO_PUT(fifo, off, val, shift) \
+		( \
+		(fifo)->buffer[__kfifo_off((fifo), (fifo)->in+(off))] = \
+		(unsigned char)((val) >> (shift)) \
+		)
+
+	__KFIFO_PUT(fifo, 0, n, 0);
+
+	if (--recsize)
+		__KFIFO_PUT(fifo, 1, n, 8);
+#undef	__KFIFO_PUT
+}
+
+/**
+ * __kfifo_in_... internal functions for put date into the fifo
+ * do not call it directly, use kfifo_in_rec() instead
+ */
+extern unsigned int __kfifo_in_n(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize);
+
+extern unsigned int __kfifo_in_generic(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize);
+
+static inline unsigned int __kfifo_in_rec(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize)
+{
+	unsigned int ret;
+
+	ret = __kfifo_in_n(fifo, from, n, recsize);
+
+	if (likely(ret == 0)) {
+		if (recsize)
+			__kfifo_poke_n(fifo, recsize, n);
+		__kfifo_add_in(fifo, n + recsize);
+	}
+	return ret;
+}
+
+/**
+ * kfifo_in_rec - puts some record data into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: the data to be added.
+ * @n: the length of the data to be added.
+ * @recsize: size of record field
+ *
+ * This function copies @n bytes from the @from into the FIFO and returns
+ * the number of bytes which cannot be copied.
+ * A returned value greater than the @n value means that the record doesn't
+ * fit into the buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_in_rec(struct kfifo *fifo,
+	void *from, unsigned int n, unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_in_generic(fifo, from, n, recsize);
+	return __kfifo_in_rec(fifo, from, n, recsize);
+}
+
+/**
+ * __kfifo_out_... internal functions for get date from the fifo
+ * do not call it directly, use kfifo_out_rec() instead
+ */
+extern unsigned int __kfifo_out_n(struct kfifo *fifo,
+	void *to, unsigned int reclen, unsigned int recsize);
+
+extern unsigned int __kfifo_out_generic(struct kfifo *fifo,
+	void *to, unsigned int n,
+	unsigned int recsize, unsigned int *total);
+
+static inline unsigned int __kfifo_out_rec(struct kfifo *fifo,
+	void *to, unsigned int n, unsigned int recsize,
+	unsigned int *total)
+{
+	unsigned int l;
+
+	if (!recsize) {
+		l = n;
+		if (total)
+			*total = l;
+	} else {
+		l = __kfifo_peek_n(fifo, recsize);
+		if (total)
+			*total = l;
+		if (n < l)
+			return l;
+	}
+
+	return __kfifo_out_n(fifo, to, l, recsize);
+}
+
+/**
+ * kfifo_out_rec - gets some record data from the FIFO
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @recsize: size of record field
+ * @total: pointer where the total number of to copied bytes should stored
+ *
+ * This function copies at most @n bytes from the FIFO to @to and returns the
+ * number of bytes which cannot be copied.
+ * A returned value greater than the @n value means that the record doesn't
+ * fit into the @to buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_out_rec(struct kfifo *fifo,
+	void *to, unsigned int n, unsigned int recsize,
+	unsigned int *total)
+
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_out_generic(fifo, to, n, recsize, total);
+	return __kfifo_out_rec(fifo, to, n, recsize, total);
+}
+
+/**
+ * __kfifo_from_user_... internal functions for transfer from user space into
+ * the fifo. do not call it directly, use kfifo_from_user_rec() instead
+ */
+extern unsigned int __kfifo_from_user_n(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize);
+
+extern unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize);
+
+static inline unsigned int __kfifo_from_user_rec(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize)
+{
+	unsigned int ret;
+
+	ret = __kfifo_from_user_n(fifo, from, n, recsize);
+
+	if (likely(ret == 0)) {
+		if (recsize)
+			__kfifo_poke_n(fifo, recsize, n);
+		__kfifo_add_in(fifo, n + recsize);
+	}
+	return ret;
+}
+
+/**
+ * kfifo_from_user_rec - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @n: the length of the data to be added.
+ * @recsize: size of record field
+ *
+ * This function copies @n bytes from the @from into the
+ * FIFO and returns the number of bytes which cannot be copied.
+ *
+ * If the returned value is equal or less the @n value, the copy_from_user()
+ * functions has failed. Otherwise the record doesn't fit into the buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_from_user_rec(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_from_user_generic(fifo, from, n, recsize);
+	return __kfifo_from_user_rec(fifo, from, n, recsize);
+}
+
+/**
+ * __kfifo_to_user_... internal functions for transfer fifo data into user space
+ * do not call it directly, use kfifo_to_user_rec() instead
+ */
+extern unsigned int __kfifo_to_user_n(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned int reclen,
+	unsigned int recsize);
+
+extern unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned int recsize,
+	unsigned int *total);
+
+static inline unsigned int __kfifo_to_user_rec(struct kfifo *fifo,
+	void __user *to, unsigned int n,
+	unsigned int recsize, unsigned int *total)
+{
+	unsigned int l;
+
+	if (!recsize) {
+		l = n;
+		if (total)
+			*total = l;
+	} else {
+		l = __kfifo_peek_n(fifo, recsize);
+		if (total)
+			*total = l;
+		if (n < l)
+			return l;
+	}
+
+	return __kfifo_to_user_n(fifo, to, n, l, recsize);
+}
+
+/**
+ * kfifo_to_user_rec - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @recsize: size of record field
+ * @total: pointer where the total number of to copied bytes should stored
+ *
+ * This function copies at most @n bytes from the FIFO to the @to.
+ * In case of an error, the function returns the number of bytes which cannot
+ * be copied.
+ * If the returned value is equal or less the @n value, the copy_to_user()
+ * functions has failed. Otherwise the record doesn't fit into the @to buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_to_user_rec(struct kfifo *fifo,
+		void __user *to, unsigned int n, unsigned int recsize,
+		unsigned int *total)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_to_user_generic(fifo, to, n, recsize, total);
+	return __kfifo_to_user_rec(fifo, to, n, recsize, total);
+}
+
+/**
+ * __kfifo_peek_... internal functions for peek into the next fifo record
+ * do not call it directly, use kfifo_peek_rec() instead
+ */
+extern unsigned int __kfifo_peek_generic(struct kfifo *fifo,
+				unsigned int recsize);
+
+/**
+ * kfifo_peek_rec - gets the size of the next FIFO record data
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ *
+ * This function returns the size of the next FIFO record in number of bytes
+ */
+static inline __must_check unsigned int kfifo_peek_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_peek_generic(fifo, recsize);
+	if (!recsize)
+		return kfifo_len(fifo);
+	return __kfifo_peek_n(fifo, recsize);
+}
+
+/**
+ * __kfifo_skip_... internal functions for skip the next fifo record
+ * do not call it directly, use kfifo_skip_rec() instead
+ */
+extern void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize);
+
+static inline void __kfifo_skip_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	unsigned int l;
+
+	if (recsize) {
+		l = __kfifo_peek_n(fifo, recsize);
+
+		if (l + recsize <= kfifo_len(fifo)) {
+			__kfifo_add_out(fifo, l + recsize);
+			return;
+		}
+	}
+	kfifo_reset_out(fifo);
+}
+
+/**
+ * kfifo_skip_rec - skip the next fifo out record
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ *
+ * This function skips the next FIFO record
+ */
+static inline void kfifo_skip_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		__kfifo_skip_generic(fifo, recsize);
+	else
+		__kfifo_skip_rec(fifo, recsize);
+}
+
+/**
+ * kfifo_avail_rec - returns the number of bytes available in a record FIFO
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ */
+static inline __must_check unsigned int kfifo_avail_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	unsigned int l = kfifo_size(fifo) - kfifo_len(fifo);
+
+	return (l > recsize) ? l - recsize : 0;
+}
+
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 2a78425ef67f..e92d519f93b1 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -115,27 +115,11 @@ void kfifo_skip(struct kfifo *fifo, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_skip);
 
-/**
- * kfifo_in - puts some data into the FIFO
- * @fifo: the fifo to be used.
- * @from: the data to be added.
- * @len: the length of the data to be added.
- *
- * This function copies at most @len bytes from the @from buffer into
- * the FIFO depending on the free space, and returns the number of
- * bytes copied.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_in(struct kfifo *fifo,
-			const unsigned char *from, unsigned int len)
+static inline void __kfifo_in_data(struct kfifo *fifo,
+		const void *from, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 
-	len = min(len, fifo->size - fifo->in + fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->out index -before- we
 	 * start putting bytes into the kfifo.
@@ -143,7 +127,7 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	smp_mb();
 
-	off = __kfifo_off(fifo, fifo->in);
+	off = __kfifo_off(fifo, fifo->in + off);
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
@@ -151,33 +135,13 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	/* then put the rest (if any) at the beginning of the buffer */
 	memcpy(fifo->buffer, from + l, len - l);
-
-	__kfifo_add_in(fifo, len);
-
-	return len;
 }
-EXPORT_SYMBOL(kfifo_in);
 
-/**
- * kfifo_out - gets some data from the FIFO
- * @fifo: the fifo to be used.
- * @to: where the data must be copied.
- * @len: the size of the destination buffer.
- *
- * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_out(struct kfifo *fifo,
-			 unsigned char *to, unsigned int len)
+static inline void __kfifo_out_data(struct kfifo *fifo,
+		void *to, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 
-	len = min(len, fifo->in - fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->in index -before- we
 	 * start removing bytes from the kfifo.
@@ -185,7 +149,7 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	smp_rmb();
 
-	off = __kfifo_off(fifo, fifo->out);
+	off = __kfifo_off(fifo, fifo->out + off);
 
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
@@ -193,34 +157,14 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	/* then get the rest (if any) from the beginning of the buffer */
 	memcpy(to + l, fifo->buffer, len - l);
-
-	__kfifo_add_out(fifo, len);
-
-	return len;
 }
-EXPORT_SYMBOL(kfifo_out);
 
-/**
- * kfifo_from_user - puts some data from user space into the FIFO
- * @fifo: the fifo to be used.
- * @from: pointer to the data to be added.
- * @len: the length of the data to be added.
- *
- * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 	int ret;
 
-	len = min(len, fifo->size - fifo->in + fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->out index -before- we
 	 * start putting bytes into the kfifo.
@@ -228,29 +172,101 @@ unsigned int kfifo_from_user(struct kfifo *fifo,
 
 	smp_mb();
 
-	off = __kfifo_off(fifo, fifo->in);
+	off = __kfifo_off(fifo, fifo->in + off);
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
 
 	if (unlikely(ret))
-		return l - ret;
+		return ret + len - l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	return copy_from_user(fifo->buffer, from + l, len - l);
+}
+
+static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off)
+{
+	unsigned int l;
+	int ret;
+
+	/*
+	 * Ensure that we sample the fifo->in index -before- we
+	 * start removing bytes from the kfifo.
+	 */
+
+	smp_rmb();
+
+	off = __kfifo_off(fifo, fifo->out + off);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - off);
+	ret = copy_to_user(to, fifo->buffer + off, l);
 
 	if (unlikely(ret))
-		return len - ret;
+		return ret + len - l;
 
-	__kfifo_add_in(fifo, len);
+	/* then get the rest (if any) from the beginning of the buffer */
+	return copy_to_user(to + l, fifo->buffer, len - l);
+}
 
+unsigned int __kfifo_in_n(struct kfifo *fifo,
+	const void *from, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_avail(fifo) < len + recsize)
+		return len + 1;
+
+	__kfifo_in_data(fifo, from, len, recsize);
+	return 0;
+}
+EXPORT_SYMBOL(__kfifo_in_n);
+
+/**
+ * kfifo_in - puts some data into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from buffer into
+ * the FIFO depending on the free space, and returns the number of
+ * bytes copied.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+				unsigned int len)
+{
+	len = min(kfifo_avail(fifo), len);
+
+	__kfifo_in_data(fifo, from, len, 0);
+	__kfifo_add_in(fifo, len);
 	return len;
 }
-EXPORT_SYMBOL(kfifo_from_user);
+EXPORT_SYMBOL(kfifo_in);
+
+unsigned int __kfifo_in_generic(struct kfifo *fifo,
+	const void *from, unsigned int len, unsigned int recsize)
+{
+	return __kfifo_in_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_in_generic);
+
+unsigned int __kfifo_out_n(struct kfifo *fifo,
+	void *to, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_len(fifo) < len + recsize)
+		return len;
+
+	__kfifo_out_data(fifo, to, len, recsize);
+	__kfifo_add_out(fifo, len + recsize);
+	return 0;
+}
+EXPORT_SYMBOL(__kfifo_out_n);
 
 /**
- * kfifo_to_user - gets data from the FIFO and write it to user space
+ * kfifo_out - gets some data from the FIFO
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
@@ -261,40 +277,124 @@ EXPORT_SYMBOL(kfifo_from_user);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
 {
-	unsigned int off;
-	unsigned int l;
-	int ret;
+	len = min(kfifo_len(fifo), len);
 
-	len = min(len, fifo->in - fifo->out);
+	__kfifo_out_data(fifo, to, len, 0);
+	__kfifo_add_out(fifo, len);
 
-	/*
-	 * Ensure that we sample the fifo->in index -before- we
-	 * start removing bytes from the kfifo.
-	 */
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out);
 
-	smp_rmb();
+unsigned int __kfifo_out_generic(struct kfifo *fifo,
+	void *to, unsigned int len, unsigned int recsize,
+	unsigned int *total)
+{
+	return __kfifo_out_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_out_generic);
 
-	off = __kfifo_off(fifo, fifo->out);
+unsigned int __kfifo_from_user_n(struct kfifo *fifo,
+	const void __user *from, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_avail(fifo) < len + recsize)
+		return len + 1;
 
-	/* first get the data from fifo->out until the end of the buffer */
-	l = min(len, fifo->size - off);
-	ret = copy_to_user(to, fifo->buffer + off, l);
+	return __kfifo_from_user_data(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_n);
 
-	if (unlikely(ret))
-		return l - ret;
+/**
+ * kfifo_from_user - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from into the
+ * FIFO depending and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int len)
+{
+	len = min(kfifo_avail(fifo), len);
+	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	__kfifo_add_in(fifo, len);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_from_user);
 
-	/* then get the rest (if any) from the beginning of the buffer */
-	ret = copy_to_user(to + l, fifo->buffer, len - l);
+unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
+	const void __user *from, unsigned int len, unsigned int recsize)
+{
+	return __kfifo_from_user_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_generic);
 
-	if (unlikely(ret))
-		return len - ret;
+unsigned int __kfifo_to_user_n(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned int reclen,
+	unsigned int recsize)
+{
+	unsigned int ret;
 
-	__kfifo_add_out(fifo, len);
+	if (kfifo_len(fifo) < reclen + recsize)
+		return len;
 
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+
+	if (likely(ret == 0))
+		__kfifo_add_out(fifo, reclen + recsize);
+
+	return ret;
+}
+EXPORT_SYMBOL(__kfifo_to_user_n);
+
+/**
+ * kfifo_to_user - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @to buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len)
+{
+	len = min(kfifo_len(fifo), len);
+	len -= __kfifo_to_user_data(fifo, to, len, 0);
+	__kfifo_add_out(fifo, len);
 	return len;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
+unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned int recsize,
+	unsigned int *total)
+{
+	return __kfifo_to_user_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_to_user_generic);
+
+unsigned int __kfifo_peek_generic(struct kfifo *fifo, unsigned int recsize)
+{
+	if (recsize == 0)
+		return kfifo_avail(fifo);
+
+	return __kfifo_peek_n(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_peek_generic);
+
+void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize)
+{
+	__kfifo_skip_rec(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_skip_generic);
+
-- 
cgit v1.2.3


From 0c69774e6ce94364cfaa8bdeb18061edc414bc5a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 22 Dec 2009 15:43:19 +0100
Subject: sched: Revert 738d2be, simplify set_task_cpu()

Effectively reverts 738d2be4301007f054541c5c4bf7fb6a361c9b3a.

As demonstrated by Eric, we really need to call __set_task_cpu()
early in the fork() path to properly initialize the various task
state -- specifically the cgroup state through set_task_rq().

[ we could probably fix this by explicitly calling
  __set_task_cpu() from   sched_fork(), but lets try that for the
  next cycle and simply revert to the old behaviour for now. ]

Reported-by: Eric Paris <eparis@redhat.com>
Tested-by: Eric Paris <eparis@redhat.com>,
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: efault@gmx.de
LKML-Reference: <1261492999.4937.36.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 87f1f47beffe..c535cc4f6428 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2045,11 +2045,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	trace_sched_migrate_task(p, new_cpu);
 
-	if (task_cpu(p) == new_cpu)
-		return;
-
-	p->se.nr_migrations++;
-	perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
+	if (task_cpu(p) != new_cpu) {
+		p->se.nr_migrations++;
+		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
+	}
 
 	__set_task_cpu(p, new_cpu);
 }
-- 
cgit v1.2.3


From 4440095c8268c1a5e11577097d2be429cec036ca Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 23 Dec 2009 21:00:20 +0100
Subject: SYSCTL: Print binary sysctl warnings (nearly) only once

When printing legacy sysctls print the warning message
for each of them only once.  This way there is a guarantee
the syslog won't be flooded for any sane program.

The original attempt at this made the tables non const and stored
the flag inline.

Linus suggested using a separate hash table for this, this is based on a
code snippet from him.

The hash implies this is not exact and can sometimes not print a
new sysctl due to a hash collision, but in practice this should not
be a problem

I used a FNV32 hash over the binary string with a 32byte bitmap. This
gives relatively little collisions when all the predefined binary sysctls
are hashed:

size 256
bucket
length      number
0:          [25]
1:          [67]
2:          [88]
3:          [47]
4:          [22]
5:          [6]
6:          [1]

The worst case is a single collision of 6 hash values.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 kernel/sysctl_binary.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 112533d5fc08..8f5d16e0707a 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1417,6 +1417,35 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
 	return;
 }
 
+#define WARN_ONCE_HASH_BITS 8
+#define WARN_ONCE_HASH_SIZE (1<<WARN_ONCE_HASH_BITS)
+
+static DECLARE_BITMAP(warn_once_bitmap, WARN_ONCE_HASH_SIZE);
+
+#define FNV32_OFFSET 2166136261U
+#define FNV32_PRIME 0x01000193
+
+/*
+ * Print each legacy sysctl (approximately) only once.
+ * To avoid making the tables non-const use a external
+ * hash-table instead.
+ * Worst case hash collision: 6, but very rarely.
+ * NOTE! We don't use the SMP-safe bit tests. We simply
+ * don't care enough.
+ */
+static void warn_on_bintable(const int *name, int nlen)
+{
+	int i;
+	u32 hash = FNV32_OFFSET;
+
+	for (i = 0; i < nlen; i++)
+		hash = (hash ^ name[i]) * FNV32_PRIME;
+	hash %= WARN_ONCE_HASH_SIZE;
+	if (__test_and_set_bit(hash, warn_once_bitmap))
+		return;
+	deprecated_sysctl_warning(name, nlen);
+}
+
 static ssize_t do_sysctl(int __user *args_name, int nlen,
 	void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
 {
@@ -1431,7 +1460,7 @@ static ssize_t do_sysctl(int __user *args_name, int nlen,
 		if (get_user(name[i], args_name + i))
 			return -EFAULT;
 
-	deprecated_sysctl_warning(name, nlen);
+	warn_on_bintable(name, nlen);
 
 	return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen);
 }
-- 
cgit v1.2.3


From c2ef6661ce62e26a8c0978e521fab646128a144b Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 21 Dec 2009 13:02:24 +0100
Subject: kprobes: Fix distinct type warning

Every time I see this:

 kernel/kprobes.c: In function 'register_kretprobe':
 kernel/kprobes.c:1038: warning: comparison of distinct pointer types lacks a cast

I'm wondering if something changed in common code and we need to
do something for s390. Apparently that's not the case.
Let's get rid of this annoying warning.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
LKML-Reference: <20091221120224.GA4471@osiris.boeblingen.de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/kprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e5342a344c43..b7df302a0204 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1035,7 +1035,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 	/* Pre-allocate memory for max kretprobe instances */
 	if (rp->maxactive <= 0) {
 #ifdef CONFIG_PREEMPT
-		rp->maxactive = max(10, 2 * num_possible_cpus());
+		rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
 #else
 		rp->maxactive = num_possible_cpus();
 #endif
-- 
cgit v1.2.3


From 40892367bc893f3abf6f5ca8ac2ed1c98ba26a77 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 21 Dec 2009 12:01:17 -0800
Subject: tracing: Kconfig spelling fixes and cleanups

Fix filename reference (ftrace-implementation.txt ->
ftrace-design.txt).

Fix spelling, punctuation, grammar.

Fix help text indentation and line lengths to reduce need for
horizontal scrolling or larger window sizes.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20091221120117.3fb49cdc.randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 112 +++++++++++++++++++++++++--------------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d006554888dc..6c22d8a2f289 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,17 +12,17 @@ config NOP_TRACER
 config HAVE_FTRACE_NMI_ENTER
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_TRACER
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_GRAPH_TRACER
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FUNCTION_GRAPH_FP_TEST
 	bool
@@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_DYNAMIC_FTRACE
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_FTRACE_MCOUNT_RECORD
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config HAVE_HW_BRANCH_TRACER
 	bool
@@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER
 config HAVE_SYSCALL_TRACEPOINTS
 	bool
 	help
-	  See Documentation/trace/ftrace-implementation.txt
+	  See Documentation/trace/ftrace-design.txt
 
 config TRACER_MAX_TRACE
 	bool
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP
 # This allows those options to appear when no other tracer is selected. But the
 # options do not appear when something else selects it. We need the two options
 # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
-# hidding of the automatic options.
+# hiding of the automatic options.
 
 config TRACING
 	bool
@@ -119,7 +119,7 @@ menuconfig FTRACE
 	bool "Tracers"
 	default y if DEBUG_KERNEL
 	help
-	 Enable the kernel tracing infrastructure.
+	  Enable the kernel tracing infrastructure.
 
 if FTRACE
 
@@ -133,7 +133,7 @@ config FUNCTION_TRACER
 	help
 	  Enable the kernel to trace every kernel function. This is done
 	  by using a compiler feature to insert a small, 5-byte No-Operation
-	  instruction to the beginning of every kernel function, which NOP
+	  instruction at the beginning of every kernel function, which NOP
 	  sequence is then dynamically patched into a tracer call when
 	  tracing is enabled by the administrator. If it's runtime disabled
 	  (the bootup default), then the overhead of the instructions is very
@@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER
 	  and its entry.
 	  Its first purpose is to trace the duration of functions and
 	  draw a call graph for each thread with some information like
-	  the return value. This is done by setting the current return 
+	  the return value. This is done by setting the current return
 	  address on the current task structure into a stack of calls.
 
 
@@ -173,7 +173,7 @@ config IRQSOFF_TRACER
 
 	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
 
-	  (Note that kernel size and overhead increases with this option
+	  (Note that kernel size and overhead increase with this option
 	  enabled. This option and the preempt-off timing option can be
 	  used together or separately.)
 
@@ -186,7 +186,7 @@ config PREEMPT_TRACER
 	select TRACER_MAX_TRACE
 	select RING_BUFFER_ALLOW_SWAP
 	help
-	  This option measures the time spent in preemption off critical
+	  This option measures the time spent in preemption-off critical
 	  sections, with microsecond accuracy.
 
 	  The default measurement method is a maximum search, which is
@@ -195,7 +195,7 @@ config PREEMPT_TRACER
 
 	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
 
-	  (Note that kernel size and overhead increases with this option
+	  (Note that kernel size and overhead increase with this option
 	  enabled. This option and the irqs-off timing option can be
 	  used together or separately.)
 
@@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS
 	depends on !GENERIC_TRACER
 	select TRACING
 	help
-	  This tracer hooks to various trace points in the kernel
+	  This tracer hooks to various trace points in the kernel,
 	  allowing the user to pick and choose which trace point they
 	  want to trace. It also includes the sched_switch tracer plugin.
 
@@ -265,19 +265,19 @@ choice
 	 The likely/unlikely profiler only looks at the conditions that
 	 are annotated with a likely or unlikely macro.
 
-	 The "all branch" profiler will profile every if statement in the
+	 The "all branch" profiler will profile every if-statement in the
 	 kernel. This profiler will also enable the likely/unlikely
-	 profiler as well.
+	 profiler.
 
-	 Either of the above profilers add a bit of overhead to the system.
-	 If unsure choose "No branch profiling".
+	 Either of the above profilers adds a bit of overhead to the system.
+	 If unsure, choose "No branch profiling".
 
 config BRANCH_PROFILE_NONE
 	bool "No branch profiling"
 	help
-	 No branch profiling. Branch profiling adds a bit of overhead.
-	 Only enable it if you want to analyse the branching behavior.
-	 Otherwise keep it disabled.
+	  No branch profiling. Branch profiling adds a bit of overhead.
+	  Only enable it if you want to analyse the branching behavior.
+	  Otherwise keep it disabled.
 
 config PROFILE_ANNOTATED_BRANCHES
 	bool "Trace likely/unlikely profiler"
@@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES
 
 	  /sys/kernel/debug/tracing/profile_annotated_branch
 
-	  Note: this will add a significant overhead, only turn this
+	  Note: this will add a significant overhead; only turn this
 	  on if you need to profile the system's use of these macros.
 
 config PROFILE_ALL_BRANCHES
@@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES
 
 	  This configuration, when enabled, will impose a great overhead
 	  on the system. This should only be enabled when the system
-	  is to be analyzed
+	  is to be analyzed in much detail.
 endchoice
 
 config TRACING_BRANCHES
@@ -335,7 +335,7 @@ config POWER_TRACER
 	depends on X86
 	select GENERIC_TRACER
 	help
-	  This tracer helps developers to analyze and optimize the kernels
+	  This tracer helps developers to analyze and optimize the kernel's
 	  power management decisions, specifically the C-state and P-state
 	  behavior.
 
@@ -391,14 +391,14 @@ config HW_BRANCH_TRACER
 	select GENERIC_TRACER
 	help
 	  This tracer records all branches on the system in a circular
-	  buffer giving access to the last N branches for each cpu.
+	  buffer, giving access to the last N branches for each cpu.
 
 config KMEMTRACE
 	bool "Trace SLAB allocations"
 	select GENERIC_TRACER
 	help
 	  kmemtrace provides tracing for slab allocator functions, such as
-	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
+	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
 	  data is then fed to the userspace application in order to analyse
 	  allocation hotspots, internal fragmentation and so on, making it
 	  possible to see how well an allocator performs, as well as debug
@@ -417,15 +417,15 @@ config WORKQUEUE_TRACER
 	bool "Trace workqueues"
 	select GENERIC_TRACER
 	help
-	  The workqueue tracer provides some statistical informations
+	  The workqueue tracer provides some statistical information
           about each cpu workqueue thread such as the number of the
           works inserted and executed since their creation. It can help
-          to evaluate the amount of work each of them have to perform.
+          to evaluate the amount of work each of them has to perform.
           For example it can help a developer to decide whether he should
-          choose a per cpu workqueue instead of a singlethreaded one.
+          choose a per-cpu workqueue instead of a singlethreaded one.
 
 config BLK_DEV_IO_TRACE
-	bool "Support for tracing block io actions"
+	bool "Support for tracing block IO actions"
 	depends on SYSFS
 	depends on BLOCK
 	select RELAY
@@ -456,15 +456,15 @@ config KPROBE_EVENT
 	select TRACING
 	default y
 	help
-	  This allows the user to add tracing events (similar to tracepoints) on the fly
-	  via the ftrace interface. See Documentation/trace/kprobetrace.txt
-	  for more details.
+	  This allows the user to add tracing events (similar to tracepoints)
+	  on the fly via the ftrace interface. See
+	  Documentation/trace/kprobetrace.txt for more details.
 
 	  Those events can be inserted wherever kprobes can probe, and record
 	  various register and memory values.
 
-	  This option is also required by perf-probe subcommand of perf tools. If
-	  you want to use perf tools, this option is strongly recommended.
+	  This option is also required by perf-probe subcommand of perf tools.
+	  If you want to use perf tools, this option is strongly recommended.
 
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
@@ -472,32 +472,32 @@ config DYNAMIC_FTRACE
 	depends on HAVE_DYNAMIC_FTRACE
 	default y
 	help
-         This option will modify all the calls to ftrace dynamically
-	 (will patch them out of the binary image and replaces them
-	 with a No-Op instruction) as they are called. A table is
-	 created to dynamically enable them again.
+          This option will modify all the calls to ftrace dynamically
+	  (will patch them out of the binary image and replace them
+	  with a No-Op instruction) as they are called. A table is
+	  created to dynamically enable them again.
 
-	 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
-	 has native performance as long as no tracing is active.
+	  This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
+	  otherwise has native performance as long as no tracing is active.
 
-	 The changes to the code are done by a kernel thread that
-	 wakes up once a second and checks to see if any ftrace calls
-	 were made. If so, it runs stop_machine (stops all CPUS)
-	 and modifies the code to jump over the call to ftrace.
+	  The changes to the code are done by a kernel thread that
+	  wakes up once a second and checks to see if any ftrace calls
+	  were made. If so, it runs stop_machine (stops all CPUS)
+	  and modifies the code to jump over the call to ftrace.
 
 config FUNCTION_PROFILER
 	bool "Kernel function profiler"
 	depends on FUNCTION_TRACER
 	default n
 	help
-	 This option enables the kernel function profiler. A file is created
-	 in debugfs called function_profile_enabled which defaults to zero.
-	 When a 1 is echoed into this file profiling begins, and when a
-	 zero is entered, profiling stops. A file in the trace_stats
-	 directory called functions, that show the list of functions that
-	 have been hit and their counters.
+	  This option enables the kernel function profiler. A file is created
+	  in debugfs called function_profile_enabled which defaults to zero.
+	  When a 1 is echoed into this file profiling begins, and when a
+	  zero is entered, profiling stops. A "functions" file is created in
+	  the trace_stats directory; this file shows the list of functions that
+	  have been hit and their counters.
 
-	 If in doubt, say N
+	  If in doubt, say N.
 
 config FTRACE_MCOUNT_RECORD
 	def_bool y
@@ -556,8 +556,8 @@ config RING_BUFFER_BENCHMARK
 	tristate "Ring buffer benchmark stress tester"
 	depends on RING_BUFFER
 	help
-	  This option creates a test to stress the ring buffer and bench mark it.
-	  It creates its own ring buffer such that it will not interfer with
+	  This option creates a test to stress the ring buffer and benchmark it.
+	  It creates its own ring buffer such that it will not interfere with
 	  any other users of the ring buffer (such as ftrace). It then creates
 	  a producer and consumer that will run for 10 seconds and sleep for
 	  10 seconds. Each interval it will print out the number of events
@@ -566,7 +566,7 @@ config RING_BUFFER_BENCHMARK
 	  It does not disable interrupts or raise its priority, so it may be
 	  affected by processes that are running.
 
-	  If unsure, say N
+	  If unsure, say N.
 
 endif # FTRACE
 
-- 
cgit v1.2.3


From 88f7a890d74137ab0d126a5d65679cd620f1a289 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 14:22:22 +0800
Subject: ksym_tracer: Fix to make the tracer work

ksym tracer doesn't work:

 # echo tasklist_lock:rw- > ksym_trace_filter
 -bash: echo: write error: No such device

It's because we pass to perf_event_create_kernel_counter()
a cpu number which is not present.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B3AF19E.1010201@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/hw_breakpoint.c    | 10 +++++++---
 kernel/trace/trace_ksym.c |  1 -
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 366eedf949c0..48fb0bb6992a 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,6 +40,7 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/init.h>
+#include <linux/cpu.h>
 #include <linux/smp.h>
 
 #include <linux/hw_breakpoint.h>
@@ -388,7 +389,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 	if (!cpu_events)
 		return ERR_PTR(-ENOMEM);
 
-	for_each_possible_cpu(cpu) {
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
 		bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
 
@@ -399,18 +401,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			goto fail;
 		}
 	}
+	put_online_cpus();
 
 	return cpu_events;
 
 fail:
-	for_each_possible_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		pevent = per_cpu_ptr(cpu_events, cpu);
 		if (IS_ERR(*pevent))
 			break;
 		unregister_hw_breakpoint(*pevent);
 	}
+	put_online_cpus();
+
 	free_percpu(cpu_events);
-	/* return the error if any */
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index faf37fa4408c..340b6ff193e0 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 	entry->attr.bp_addr = addr;
 	entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
 
-	ret = -EAGAIN;
 	entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
 					ksym_hbp_handler);
 
-- 
cgit v1.2.3


From 3d13ec2efdb5843ad91e57b60d50b44d922cf063 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 14:23:19 +0800
Subject: ksym_tracer: Fix to allow writing newline to ksym_trace_filter

It used to work, but now doesn't:

 # echo > ksym_filter
 bash: echo: write error: Invalid argument

It's caused by d954fbf0ff6b5fdfb32350e85a2f15d3db976506
("tracing: Fix wrong usage of strstrip in trace_ksyms").

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B3AF1D7.5040400@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_ksym.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 340b6ff193e0..160a8d8b37a2 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -299,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 	 * 2: echo 0 > ksym_trace_filter
 	 * 3: echo "*:---" > ksym_trace_filter
 	 */
-	if (!buf[0] || !strcmp(buf, "0") ||
-	    !strcmp(buf, "*:---")) {
+	if (!input_string[0] || !strcmp(input_string, "0") ||
+	    !strcmp(input_string, "*:---")) {
 		__ksym_trace_reset();
 		ret = 0;
 		goto out;
-- 
cgit v1.2.3


From e6d9491bf8ba6728cc86aeabbc688d20ec0563b5 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 14:23:40 +0800
Subject: ksym_tracer: Fix race when incrementing count

We are under rcu read section but not holding the write lock, so
count++ is not atomic. Use atomic64_t instead.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B3AF1EC.9010608@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_ksym.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 160a8d8b37a2..67d79f709fc5 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -32,6 +32,8 @@
 #include <linux/hw_breakpoint.h>
 #include <asm/hw_breakpoint.h>
 
+#include <asm/atomic.h>
+
 /*
  * For now, let us restrict the no. of symbols traced simultaneously to number
  * of available hardware breakpoint registers.
@@ -44,7 +46,7 @@ struct trace_ksym {
 	struct perf_event	**ksym_hbp;
 	struct perf_event_attr	attr;
 #ifdef CONFIG_PROFILE_KSYM_TRACER
-	unsigned long		counter;
+	atomic64_t		counter;
 #endif
 	struct hlist_node	ksym_hlist;
 };
@@ -69,9 +71,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
-		if ((entry->attr.bp_addr == hbp_hit_addr) &&
-		    (entry->counter <= MAX_UL_INT)) {
-			entry->counter++;
+		if (entry->attr.bp_addr == hbp_hit_addr) {
+			atomic64_inc(&entry->counter);
 			break;
 		}
 	}
@@ -501,7 +502,8 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
 		seq_printf(m, "  %-36s", fn_name);
 	else
 		seq_printf(m, "  %-36s", "<NA>");
-	seq_printf(m, " %15lu\n", entry->counter);
+	seq_printf(m, " %15llu\n",
+		   (unsigned long long)atomic64_read(&entry->counter));
 
 	return 0;
 }
-- 
cgit v1.2.3


From 53ab668064edaeef99c0ee22799483d45f4c81f6 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 14:24:03 +0800
Subject: ksym_tracer: Remove trace_stat

trace_stat is problematic. Don't use it, use seqfile instead.

This fixes a race that reading the stat file is not protected by
any lock, which can lead to use after free.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B3AF203.40200@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_ksym.c | 127 ++++++++++++++++++----------------------------
 1 file changed, 50 insertions(+), 77 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 67d79f709fc5..94103cdcf9d8 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -26,7 +26,6 @@
 #include <linux/fs.h>
 
 #include "trace_output.h"
-#include "trace_stat.h"
 #include "trace.h"
 
 #include <linux/hw_breakpoint.h>
@@ -444,103 +443,77 @@ struct tracer ksym_tracer __read_mostly =
 	.print_line	= ksym_trace_output
 };
 
-__init static int init_ksym_trace(void)
-{
-	struct dentry *d_tracer;
-	struct dentry *entry;
-
-	d_tracer = tracing_init_dentry();
-	ksym_filter_entry_count = 0;
-
-	entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
-				    NULL, &ksym_tracing_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'ksym_trace_filter' file\n");
-
-	return register_tracer(&ksym_tracer);
-}
-device_initcall(init_ksym_trace);
-
-
 #ifdef CONFIG_PROFILE_KSYM_TRACER
-static int ksym_tracer_stat_headers(struct seq_file *m)
+static int ksym_profile_show(struct seq_file *m, void *v)
 {
+	struct hlist_node *node;
+	struct trace_ksym *entry;
+	int access_type = 0;
+	char fn_name[KSYM_NAME_LEN];
+
 	seq_puts(m, "  Access Type ");
 	seq_puts(m, "  Symbol                                       Counter\n");
 	seq_puts(m, "  ----------- ");
 	seq_puts(m, "  ------                                       -------\n");
-	return 0;
-}
 
-static int ksym_tracer_stat_show(struct seq_file *m, void *v)
-{
-	struct hlist_node *stat = v;
-	struct trace_ksym *entry;
-	int access_type = 0;
-	char fn_name[KSYM_NAME_LEN];
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
 
-	entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+		access_type = entry->attr.bp_type;
 
-	access_type = entry->attr.bp_type;
+		switch (access_type) {
+		case HW_BREAKPOINT_R:
+			seq_puts(m, "  R           ");
+			break;
+		case HW_BREAKPOINT_W:
+			seq_puts(m, "  W           ");
+			break;
+		case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+			seq_puts(m, "  RW          ");
+			break;
+		default:
+			seq_puts(m, "  NA          ");
+		}
 
-	switch (access_type) {
-	case HW_BREAKPOINT_R:
-		seq_puts(m, "  R           ");
-		break;
-	case HW_BREAKPOINT_W:
-		seq_puts(m, "  W           ");
-		break;
-	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
-		seq_puts(m, "  RW          ");
-		break;
-	default:
-		seq_puts(m, "  NA          ");
+		if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
+			seq_printf(m, "  %-36s", fn_name);
+		else
+			seq_printf(m, "  %-36s", "<NA>");
+		seq_printf(m, " %15llu\n",
+			   (unsigned long long)atomic64_read(&entry->counter));
 	}
-
-	if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
-		seq_printf(m, "  %-36s", fn_name);
-	else
-		seq_printf(m, "  %-36s", "<NA>");
-	seq_printf(m, " %15llu\n",
-		   (unsigned long long)atomic64_read(&entry->counter));
+	rcu_read_unlock();
 
 	return 0;
 }
 
-static void *ksym_tracer_stat_start(struct tracer_stat *trace)
+static int ksym_profile_open(struct inode *node, struct file *file)
 {
-	return ksym_filter_head.first;
-}
-
-static void *
-ksym_tracer_stat_next(void *v, int idx)
-{
-	struct hlist_node *stat = v;
-
-	return stat->next;
+	return single_open(file, ksym_profile_show, NULL);
 }
 
-static struct tracer_stat ksym_tracer_stats = {
-	.name = "ksym_tracer",
-	.stat_start = ksym_tracer_stat_start,
-	.stat_next = ksym_tracer_stat_next,
-	.stat_headers = ksym_tracer_stat_headers,
-	.stat_show = ksym_tracer_stat_show
+static const struct file_operations ksym_profile_fops = {
+	.open		= ksym_profile_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
 
-__init static int ksym_tracer_stat_init(void)
+__init static int init_ksym_trace(void)
 {
-	int ret;
+	struct dentry *d_tracer;
 
-	ret = register_stat_tracer(&ksym_tracer_stats);
-	if (ret) {
-		printk(KERN_WARNING "Warning: could not register "
-				    "ksym tracer stats\n");
-		return 1;
-	}
+	d_tracer = tracing_init_dentry();
 
-	return 0;
+	trace_create_file("ksym_trace_filter", 0644, d_tracer,
+			  NULL, &ksym_tracing_fops);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+	trace_create_file("ksym_profile", 0444, d_tracer,
+			  NULL, &ksym_profile_fops);
+#endif
+
+	return register_tracer(&ksym_tracer);
 }
-fs_initcall(ksym_tracer_stat_init);
-#endif /* CONFIG_PROFILE_KSYM_TRACER */
+device_initcall(init_ksym_trace);
-- 
cgit v1.2.3


From 79b408210885b9f7f0b067b07a09d68f4da3a700 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:19 +0800
Subject: tracing/kprobe: Show sign of fields in trace_kprobe format files

The format files of trace_kprobe do not show the sign of the fields.
The other format files show the field signed type of the fields and
this patch makes the trace_kprobe formats consistent with the others.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D27.5040009@cn.fujitsu.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7ecab06547a5..83f1e6ef7063 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1182,10 +1182,11 @@ static int __probe_event_show_format(struct trace_seq *s,
 #undef SHOW_FIELD
 #define SHOW_FIELD(type, item, name)					\
 	do {								\
-		ret = trace_seq_printf(s, "\tfield: " #type " %s;\t"	\
-				"offset:%u;\tsize:%u;\n", name,		\
+		ret = trace_seq_printf(s, "\tfield:" #type " %s;\t"	\
+				"offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
 				(unsigned int)offsetof(typeof(field), item),\
-				(unsigned int)sizeof(type));		\
+				(unsigned int)sizeof(type),		\
+				is_signed_type(type));			\
 		if (!ret)						\
 			return 0;					\
 	} while (0)
-- 
cgit v1.2.3


From fb7ae981cb9fe8665b9da97e8734745e030c151d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:38 +0800
Subject: tracing: Fix sign fields in ftrace_define_fields_##call()

Add is_signed_type() call to trace_define_field() in ftrace macros.

The code previously just passed in 0 (false), disregarding whether
or not the field was actually a signed type.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D3A.6020007@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h      | 7 ++++---
 kernel/trace/trace_export.c | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 73523151a731..c6fe03e902ca 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -414,7 +414,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0, FILTER_OTHER);	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -422,8 +423,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 #define __dynamic_array(type, item, len)				       \
 	ret = trace_define_field(event_call, "__data_loc " #type "[]", #item,  \
 				 offsetof(typeof(field), __data_loc_##item),   \
-				 sizeof(field.__data_loc_##item), 0,	       \
-				 FILTER_OTHER);
+				 sizeof(field.__data_loc_##item),	       \
+				 is_signed_type(type), FILTER_OTHER);
 
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 458e5bfe26d0..d4fa5dc1ee4e 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0, FILTER_OTHER);	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field),		\
 					  container.item),		\
-				 sizeof(field.container.item), 0,	\
-				 FILTER_OTHER);				\
+				 sizeof(field.container.item),		\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
-- 
cgit v1.2.3


From 05cbaa2853cdfc255fdd04e65a82bfe9208c4e52 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 30 Dec 2009 16:00:35 +0100
Subject: perf: Fix NULL deref in inheritance code

Liming found a NULL deref when a task has a perf context but no
counters  when it forks.

This can occur in two cases, a race during construction where
the fork hits after installing the context but before the first
counter gets inserted, or more reproducably, a fork after the
last counter is closed (which leaves the context around).

Reported-by: Wang Liming <liming.wang@windriver.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
CC: <stable@kernel.org>
LKML-Reference: <1262185684.7135.222.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_event.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 03cc061398d1..58ed1dae5875 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5148,7 +5148,7 @@ int perf_event_init_task(struct task_struct *child)
 					    GFP_KERNEL);
 			if (!child_ctx) {
 				ret = -ENOMEM;
-				goto exit;
+				break;
 			}
 
 			__perf_event_init_context(child_ctx, child);
@@ -5164,7 +5164,7 @@ int perf_event_init_task(struct task_struct *child)
 		}
 	}
 
-	if (inherited_all) {
+	if (child_ctx && inherited_all) {
 		/*
 		 * Mark the child context as a clone of the parent
 		 * context, or of whatever the parent is a clone of.
@@ -5184,7 +5184,6 @@ int perf_event_init_task(struct task_struct *child)
 		get_ctx(child_ctx->parent_ctx);
 	}
 
-exit:
 	mutex_unlock(&parent_ctx->mutex);
 
 	perf_unpin_context(parent_ctx);
-- 
cgit v1.2.3


From 10b465aaf9536ee5a16652fa0700740183d48ec9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 19 Dec 2009 14:43:01 +0000
Subject: modules: Skip empty sections when exporting section notes

Commit 35dead4 "modules: don't export section names of empty sections
via sysfs" changed the set of sections that have attributes, but did
not change the iteration over these attributes in add_notes_attrs().
This can lead to add_notes_attrs() creating attributes with the wrong
names or with null name pointers.

Introduce a sect_empty() function and use it in both add_sect_attrs()
and add_notes_attrs().

Reported-by: Martin Michlmayr <tbm@cyrius.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Tested-by: Martin Michlmayr <tbm@cyrius.com>
Cc: stable@kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/module.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index e96b8ed1cb6a..f82386bd9ee9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1010,6 +1010,12 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
  * J. Corbet <corbet@lwn.net>
  */
 #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS)
+
+static inline bool sect_empty(const Elf_Shdr *sect)
+{
+	return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
+}
+
 struct module_sect_attr
 {
 	struct module_attribute mattr;
@@ -1051,8 +1057,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
 
 	/* Count loaded sections and allocate structures */
 	for (i = 0; i < nsect; i++)
-		if (sechdrs[i].sh_flags & SHF_ALLOC
-		    && sechdrs[i].sh_size)
+		if (!sect_empty(&sechdrs[i]))
 			nloaded++;
 	size[0] = ALIGN(sizeof(*sect_attrs)
 			+ nloaded * sizeof(sect_attrs->attrs[0]),
@@ -1070,9 +1075,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
 	sattr = &sect_attrs->attrs[0];
 	gattr = &sect_attrs->grp.attrs[0];
 	for (i = 0; i < nsect; i++) {
-		if (! (sechdrs[i].sh_flags & SHF_ALLOC))
-			continue;
-		if (!sechdrs[i].sh_size)
+		if (sect_empty(&sechdrs[i]))
 			continue;
 		sattr->address = sechdrs[i].sh_addr;
 		sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
@@ -1156,7 +1159,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
 	/* Count notes sections and allocate structures.  */
 	notes = 0;
 	for (i = 0; i < nsect; i++)
-		if ((sechdrs[i].sh_flags & SHF_ALLOC) &&
+		if (!sect_empty(&sechdrs[i]) &&
 		    (sechdrs[i].sh_type == SHT_NOTE))
 			++notes;
 
@@ -1172,7 +1175,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
 	notes_attrs->notes = notes;
 	nattr = &notes_attrs->attrs[0];
 	for (loaded = i = 0; i < nsect; ++i) {
-		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+		if (sect_empty(&sechdrs[i]))
 			continue;
 		if (sechdrs[i].sh_type == SHT_NOTE) {
 			nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
-- 
cgit v1.2.3


From 8767ba2796a1c894e6d9524584a26a8224f0543d Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Fri, 8 Jan 2010 14:42:38 -0800
Subject: kmod: fix resource leak in call_usermodehelper_pipe()

Fix resource (write-pipe file) leak in call_usermodehelper_pipe().

When call_usermodehelper_exec() fails, write-pipe file is opened and
call_usermodehelper_pipe() just returns an error.  Since it is hard for
caller to determine whether the error occured when opening the pipe or
executing the helper, the caller cannot close the pipe by themselves.

I've found this resoruce leak when testing coredump.  You can check how
the resource leaks as below;

$ echo "|nocommand" > /proc/sys/kernel/core_pattern
$ ulimit -c unlimited
$ while [ 1 ]; do ./segv; done &> /dev/null &
$ cat /proc/meminfo (<- repeat it)

where segv.c is;
//-----
int main () {
        char *p = 0;
        *p = 1;
}
//-----

This patch closes write-pipe file if call_usermodehelper_exec() failed.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kmod.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 25b103190364..bf0e231d9702 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -520,13 +520,15 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 		return -ENOMEM;
 
 	ret = call_usermodehelper_stdinpipe(sub_info, filp);
-	if (ret < 0)
-		goto out;
+	if (ret < 0) {
+		call_usermodehelper_freeinfo(sub_info);
+		return ret;
+	}
 
-	return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+	ret = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
+	if (ret < 0)	/* Failed to execute helper, close pipe */
+		filp_close(*filp, NULL);
 
-  out:
-	call_usermodehelper_freeinfo(sub_info);
 	return ret;
 }
 EXPORT_SYMBOL(call_usermodehelper_pipe);
-- 
cgit v1.2.3


From bd4f490a079730aadfaf9a728303ea0135c01945 Mon Sep 17 00:00:00 2001
From: Dave Anderson <anderson@redhat.com>
Date: Fri, 8 Jan 2010 14:42:50 -0800
Subject: cgroups: fix 2.6.32 regression causing BUG_ON() in cgroup_diput()

The LTP cgroup test suite generates a "kernel BUG at kernel/cgroup.c:790!"
here in cgroup_diput():

                 /*
                  * if we're getting rid of the cgroup, refcount should ensure
                  * that there are no pidlists left.
                  */
                 BUG_ON(!list_empty(&cgrp->pidlists));

The cgroup pidlist rework in 2.6.32 generates the BUG_ON, which is caused
when pidlist_array_load() calls cgroup_pidlist_find():

(1) if a matching cgroup_pidlist is found, it down_write's the mutex of the
     pre-existing cgroup_pidlist, and increments its use_count.
(2) if no matching cgroup_pidlist is found, then a new one is allocated, it
     down_write's its mutex, and the use_count is set to 0.
(3) the matching, or new, cgroup_pidlist gets returned back to pidlist_array_load(),
     which increments its use_count -- regardless whether new or pre-existing --
     and up_write's the mutex.

So if a matching list is ever encountered by cgroup_pidlist_find() during
the life of a cgroup directory, it results in an inflated use_count value,
preventing it from ever getting released by cgroup_release_pid_array().
Then if the directory is subsequently removed, cgroup_diput() hits the
BUG_ON() when it finds that the directory's cgroup is still populated with
a pidlist.

The patch simply removes the use_count increment when a matching pidlist
is found by cgroup_pidlist_find(), because it gets bumped by the calling
pidlist_array_load() function while still protected by the list's mutex.

Signed-off-by: Dave Anderson <anderson@redhat.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: Paul Menage <menage@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0249f4be9b5c..1fbcc748044a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2468,7 +2468,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
 			/* make sure l doesn't vanish out from under us */
 			down_write(&l->mutex);
 			mutex_unlock(&cgrp->pidlist_mutex);
-			l->use_count++;
 			return l;
 		}
 	}
-- 
cgit v1.2.3


From b45c6e76bc2c72f6426c14bed64fdcbc9bf37cb0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 8 Jan 2010 14:42:52 -0800
Subject: kernel/signal.c: fix kernel information leak with
 print-fatal-signals=1

When print-fatal-signals is enabled it's possible to dump any memory
reachable by the kernel to the log by simply jumping to that address from
user space.

Or crash the system if there's some hardware with read side effects.

The fatal signals handler will dump 16 bytes at the execution address,
which is fully controlled by ring 3.

In addition when something jumps to a unmapped address there will be up to
16 additional useless page faults, which might be potentially slow (and at
least is not very efficient)

Fortunately this option is off by default and only there on i386.

But fix it by checking for kernel addresses and also stopping when there's
a page fault.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index d09692b40376..934ae5e687b9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -979,7 +979,8 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
 		for (i = 0; i < 16; i++) {
 			unsigned char insn;
 
-			__get_user(insn, (unsigned char *)(regs->ip + i));
+			if (get_user(insn, (unsigned char *)(regs->ip + i)))
+				break;
 			printk("%02x ", insn);
 		}
 	}
-- 
cgit v1.2.3