12 files changed, 71 insertions, 259 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index f60afe742599..dfa96956dae0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -40,7 +40,6 @@ obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
-obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3fe21e19c96e..1a3c23936d43 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1,6 +1,4 @@
 /*
- *  kernel/cgroup.c
- *
  *  Generic process-grouping system.
  *
  *  Based originally on the cpuset system, extracted by Paul Menage
@@ -2200,7 +2198,8 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss)
 {
 	struct cgroup_subsys_state *css;
 	struct list_head *l;
-	printk(KERN_ERR "Initializing cgroup subsys %s\n", ss->name);
+
+	printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
 
 	/* Create the top cgroup state for this subsystem */
 	ss->root = &rootnode;
@@ -2273,7 +2272,7 @@ int __init cgroup_init_early(void)
 		BUG_ON(!ss->create);
 		BUG_ON(!ss->destroy);
 		if (ss->subsys_id != i) {
-			printk(KERN_ERR "Subsys %s id == %d\n",
+			printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
 			       ss->name, ss->subsys_id);
 			BUG();
 		}
@@ -2605,7 +2604,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
 	dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
 	if (IS_ERR(dentry)) {
 		printk(KERN_INFO
-		       "Couldn't allocate dentry for %s: %ld\n", nodename,
+		       "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
 		       PTR_ERR(dentry));
 		ret = PTR_ERR(dentry);
 		goto out_release;
diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c
deleted file mode 100644
index 731e47e7f164..000000000000
--- a/kernel/cpu_acct.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * kernel/cpu_acct.c - CPU accounting cgroup subsystem
- *
- * Copyright (C) Google Inc, 2006
- *
- * Developed by Paul Menage (menage@google.com) and Balbir Singh
- * (balbir@in.ibm.com)
- *
- */
-
-/*
- * Example cgroup subsystem for reporting total CPU usage of tasks in a
- * cgroup, along with percentage load over a time interval
- */
-
-#include <linux/module.h>
-#include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/rcupdate.h>
-
-#include <asm/div64.h>
-
-struct cpuacct {
-	struct cgroup_subsys_state css;
-	spinlock_t lock;
-	/* total time used by this class */
-	cputime64_t time;
-
-	/* time when next load calculation occurs */
-	u64 next_interval_check;
-
-	/* time used in current period */
-	cputime64_t current_interval_time;
-
-	/* time used in last period */
-	cputime64_t last_interval_time;
-};
-
-struct cgroup_subsys cpuacct_subsys;
-
-static inline struct cpuacct *cgroup_ca(struct cgroup *cont)
-{
-	return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id),
-			    struct cpuacct, css);
-}
-
-static inline struct cpuacct *task_ca(struct task_struct *task)
-{
-	return container_of(task_subsys_state(task, cpuacct_subsys_id),
-			    struct cpuacct, css);
-}
-
-#define INTERVAL (HZ * 10)
-
-static inline u64 next_interval_boundary(u64 now)
-{
-	/* calculate the next interval boundary beyond the
-	 * current time */
-	do_div(now, INTERVAL);
-	return (now + 1) * INTERVAL;
-}
-
-static struct cgroup_subsys_state *cpuacct_create(
-	struct cgroup_subsys *ss, struct cgroup *cont)
-{
-	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
-
-	if (!ca)
-		return ERR_PTR(-ENOMEM);
-	spin_lock_init(&ca->lock);
-	ca->next_interval_check = next_interval_boundary(get_jiffies_64());
-	return &ca->css;
-}
-
-static void cpuacct_destroy(struct cgroup_subsys *ss,
-			    struct cgroup *cont)
-{
-	kfree(cgroup_ca(cont));
-}
-
-/* Lazily update the load calculation if necessary. Called with ca locked */
-static void cpuusage_update(struct cpuacct *ca)
-{
-	u64 now = get_jiffies_64();
-
-	/* If we're not due for an update, return */
-	if (ca->next_interval_check > now)
-		return;
-
-	if (ca->next_interval_check <= (now - INTERVAL)) {
-		/* If it's been more than an interval since the last
-		 * check, then catch up - the last interval must have
-		 * been zero load */
-		ca->last_interval_time = 0;
-		ca->next_interval_check = next_interval_boundary(now);
-	} else {
-		/* If a steal takes the last interval time negative,
-		 * then we just ignore it */
-		if ((s64)ca->current_interval_time > 0)
-			ca->last_interval_time = ca->current_interval_time;
-		else
-			ca->last_interval_time = 0;
-		ca->next_interval_check += INTERVAL;
-	}
-	ca->current_interval_time = 0;
-}
-
-static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft)
-{
-	struct cpuacct *ca = cgroup_ca(cont);
-	u64 time;
-
-	spin_lock_irq(&ca->lock);
-	cpuusage_update(ca);
-	time = cputime64_to_jiffies64(ca->time);
-	spin_unlock_irq(&ca->lock);
-
-	/* Convert 64-bit jiffies to seconds */
-	time *= 1000;
-	do_div(time, HZ);
-	return time;
-}
-
-static u64 load_read(struct cgroup *cont, struct cftype *cft)
-{
-	struct cpuacct *ca = cgroup_ca(cont);
-	u64 time;
-
-	/* Find the time used in the previous interval */
-	spin_lock_irq(&ca->lock);
-	cpuusage_update(ca);
-	time = cputime64_to_jiffies64(ca->last_interval_time);
-	spin_unlock_irq(&ca->lock);
-
-	/* Convert time to a percentage, to give the load in the
-	 * previous period */
-	time *= 100;
-	do_div(time, INTERVAL);
-
-	return time;
-}
-
-static struct cftype files[] = {
-	{
-		.name = "usage",
-		.read_uint = cpuusage_read,
-	},
-	{
-		.name = "load",
-		.read_uint = load_read,
-	}
-};
-
-static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont)
-{
-	return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
-}
-
-void cpuacct_charge(struct task_struct *task, cputime_t cputime)
-{
-
-	struct cpuacct *ca;
-	unsigned long flags;
-
-	if (!cpuacct_subsys.active)
-		return;
-	rcu_read_lock();
-	ca = task_ca(task);
-	if (ca) {
-		spin_lock_irqsave(&ca->lock, flags);
-		cpuusage_update(ca);
-		ca->time = cputime64_add(ca->time, cputime);
-		ca->current_interval_time =
-			cputime64_add(ca->current_interval_time, cputime);
-		spin_unlock_irqrestore(&ca->lock, flags);
-	}
-	rcu_read_unlock();
-}
-
-struct cgroup_subsys cpuacct_subsys = {
-	.name = "cpuacct",
-	.create = cpuacct_create,
-	.destroy = cpuacct_destroy,
-	.populate = cpuacct_populate,
-	.subsys_id = cpuacct_subsys_id,
-};
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e391cbb1f566..dc335ad27525 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -178,9 +178,11 @@ fastcall unsigned int __do_IRQ(unsigned int irq)
 		 */
 		if (desc->chip->ack)
 			desc->chip->ack(irq);
-		action_ret = handle_IRQ_event(irq, desc->action);
-		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
+		if (likely(!(desc->status & IRQ_DISABLED))) {
+			action_ret = handle_IRQ_event(irq, desc->action);
+			if (!noirqdebug)
+				note_interrupt(irq, desc, action_ret);
+		}
 		desc->chip->end(irq);
 		return 1;
 	}
diff --git a/kernel/marker.c b/kernel/marker.c
index ccb48d9a3657..5323cfaedbce 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -28,7 +28,7 @@ extern struct marker __start___markers[];
 extern struct marker __stop___markers[];
 
 /*
- * module_mutex nests inside markers_mutex. Markers mutex protects the builtin
+ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
  * and module markers, the hash table and deferred_sync.
  */
 static DEFINE_MUTEX(markers_mutex);
@@ -257,7 +257,6 @@ static void disable_marker(struct marker *elem)
  * @refcount: number of references left to the given probe_module (out)
  *
  * Updates the probe callback corresponding to a range of markers.
- * Must be called with markers_mutex held.
  */
 void marker_update_probe_range(struct marker *begin,
 	struct marker *end, struct module *probe_module,
@@ -266,6 +265,7 @@ void marker_update_probe_range(struct marker *begin,
 	struct marker *iter;
 	struct marker_entry *mark_entry;
 
+	mutex_lock(&markers_mutex);
 	for (iter = begin; iter < end; iter++) {
 		mark_entry = get_marker(iter->name);
 		if (mark_entry && mark_entry->refcount) {
@@ -281,6 +281,7 @@ void marker_update_probe_range(struct marker *begin,
 			disable_marker(iter);
 		}
 	}
+	mutex_unlock(&markers_mutex);
 }
 
 /*
@@ -293,7 +294,6 @@ static void marker_update_probes(struct module *probe_module)
 {
 	int refcount = 0;
 
-	mutex_lock(&markers_mutex);
 	/* Core kernel markers */
 	marker_update_probe_range(__start___markers,
 			__stop___markers, probe_module, &refcount);
@@ -303,7 +303,6 @@ static void marker_update_probes(struct module *probe_module)
 		synchronize_sched();
 		deferred_sync = 0;
 	}
-	mutex_unlock(&markers_mutex);
 }
 
 /**
@@ -320,7 +319,7 @@ int marker_probe_register(const char *name, const char *format,
 			marker_probe_func *probe, void *private)
 {
 	struct marker_entry *entry;
-	int ret = 0, need_update = 0;
+	int ret = 0;
 
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
@@ -335,11 +334,11 @@ int marker_probe_register(const char *name, const char *format,
 	ret = add_marker(name, format, probe, private);
 	if (ret)
 		goto end;
-	need_update = 1;
+	mutex_unlock(&markers_mutex);
+	marker_update_probes(NULL);
+	return ret;
 end:
 	mutex_unlock(&markers_mutex);
-	if (need_update)
-		marker_update_probes(NULL);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(marker_probe_register);
@@ -355,7 +354,6 @@ void *marker_probe_unregister(const char *name)
 	struct module *probe_module;
 	struct marker_entry *entry;
 	void *private;
-	int need_update = 0;
 
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
@@ -368,11 +366,11 @@ void *marker_probe_unregister(const char *name)
 	probe_module = __module_text_address((unsigned long)entry->probe);
 	private = remove_marker(name);
 	deferred_sync = 1;
-	need_update = 1;
+	mutex_unlock(&markers_mutex);
+	marker_update_probes(probe_module);
+	return private;
 end:
 	mutex_unlock(&markers_mutex);
-	if (need_update)
-		marker_update_probes(probe_module);
 	return private;
 }
 EXPORT_SYMBOL_GPL(marker_probe_unregister);
@@ -392,7 +390,6 @@ void *marker_probe_unregister_private_data(void *private)
 	struct marker_entry *entry;
 	int found = 0;
 	unsigned int i;
-	int need_update = 0;
 
 	mutex_lock(&markers_mutex);
 	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
@@ -414,11 +411,11 @@ iter_end:
 	probe_module = __module_text_address((unsigned long)entry->probe);
 	private = remove_marker(entry->name);
 	deferred_sync = 1;
-	need_update = 1;
+	mutex_unlock(&markers_mutex);
+	marker_update_probes(probe_module);
+	return private;
 end:
 	mutex_unlock(&markers_mutex);
-	if (need_update)
-		marker_update_probes(probe_module);
 	return private;
 }
 EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
@@ -434,7 +431,7 @@ EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
 int marker_arm(const char *name)
 {
 	struct marker_entry *entry;
-	int ret = 0, need_update = 0;
+	int ret = 0;
 
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
@@ -447,11 +444,9 @@ int marker_arm(const char *name)
 	 */
 	if (entry->refcount++)
 		goto end;
-	need_update = 1;
 end:
 	mutex_unlock(&markers_mutex);
-	if (need_update)
-		marker_update_probes(NULL);
+	marker_update_probes(NULL);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(marker_arm);
@@ -467,7 +462,7 @@ EXPORT_SYMBOL_GPL(marker_arm);
 int marker_disarm(const char *name)
 {
 	struct marker_entry *entry;
-	int ret = 0, need_update = 0;
+	int ret = 0;
 
 	mutex_lock(&markers_mutex);
 	entry = get_marker(name);
@@ -486,11 +481,9 @@ int marker_disarm(const char *name)
 		ret = -EPERM;
 		goto end;
 	}
-	need_update = 1;
 end:
 	mutex_unlock(&markers_mutex);
-	if (need_update)
-		marker_update_probes(NULL);
+	marker_update_probes(NULL);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(marker_disarm);
diff --git a/kernel/params.c b/kernel/params.c
index 16f269e9ddc9..2a4c51487e72 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -592,19 +592,16 @@ static void __init param_sysfs_builtin(void)
 
 	for (i=0; i < __stop___param - __start___param; i++) {
 		char *dot;
-		size_t kplen;
+		size_t max_name_len;
 
 		kp = &__start___param[i];
-		kplen = strlen(kp->name);
+		max_name_len =
+			min_t(size_t, MAX_KBUILD_MODNAME, strlen(kp->name));
 
-		/* We do not handle args without periods. */
-		if (kplen > MAX_KBUILD_MODNAME) {
-			DEBUGP("kernel parameter name is too long: %s\n", kp->name);
-			continue;
-		}
-		dot = memchr(kp->name, '.', kplen);
+		dot = memchr(kp->name, '.', max_name_len);
 		if (!dot) {
-			DEBUGP("couldn't find period in %s\n", kp->name);
+			DEBUGP("couldn't find period in first %d characters "
+			       "of %s\n", MAX_KBUILD_MODNAME, kp->name);
 			continue;
 		}
 		name_len = dot - kp->name;
diff --git a/kernel/pid.c b/kernel/pid.c
index d1db36b94674..f815455431bf 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -537,6 +537,7 @@ err_alloc:
 	return NULL;
 }
 
+#ifdef CONFIG_PID_NS
 static struct pid_namespace *create_pid_namespace(int level)
 {
 	struct pid_namespace *ns;
@@ -621,6 +622,7 @@ void free_pid_ns(struct kref *kref)
 	if (parent != NULL)
 		put_pid_ns(parent);
 }
+#endif /* CONFIG_PID_NS */
 
 void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 {
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 8b15f777010a..05b64790fe83 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -456,7 +456,17 @@ static int software_resume(void)
 	int error;
 	unsigned int flags;
 
-	mutex_lock(&pm_mutex);
+	/*
+	 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
+	 * is configured into the kernel. Since the regular hibernate
+	 * trigger path is via sysfs which takes a buffer mutex before
+	 * calling hibernate functions (which take pm_mutex) this can
+	 * cause lockdep to complain about a possible ABBA deadlock
+	 * which cannot happen since we're in the boot code here and
+	 * sysfs can't be invoked yet. Therefore, we use a subclass
+	 * here to avoid lockdep complaining.
+	 */
+	mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
 	if (!swsusp_resume_device) {
 		if (!strlen(resume_file)) {
 			mutex_unlock(&pm_mutex);
diff --git a/kernel/resource.c b/kernel/resource.c
index a358142ff48f..2eb553d9b517 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -277,7 +277,7 @@ walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
 	int ret = -1;
 	res.start = (u64) start_pfn << PAGE_SHIFT;
 	res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
-	res.flags = IORESOURCE_MEM;
+	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	orig_end = res.end;
 	while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
 		pfn = (unsigned long)(res.start >> PAGE_SHIFT);
diff --git a/kernel/sched.c b/kernel/sched.c
index b18f231a4875..4fb3532dd7e8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -52,7 +52,6 @@
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/percpu.h>
-#include <linux/cpu_acct.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
 #include <linux/sysctl.h>
@@ -3338,13 +3337,9 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	cputime64_t tmp;
-	struct rq *rq = this_rq();
 
 	p->utime = cputime_add(p->utime, cputime);
 
-	if (p != rq->idle)
-		cpuacct_charge(p, cputime);
-
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
 	if (TASK_NICE(p) > 0)
@@ -3408,10 +3403,9 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
 	else if (softirq_count())
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-	else if (p != rq->idle) {
+	else if (p != rq->idle)
 		cpustat->system = cputime64_add(cpustat->system, tmp);
-		cpuacct_charge(p, cputime);
-	} else if (atomic_read(&rq->nr_iowait) > 0)
+	else if (atomic_read(&rq->nr_iowait) > 0)
 		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 	else
 		cpustat->idle = cputime64_add(cpustat->idle, tmp);
@@ -3447,10 +3441,8 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 		else
 			cpustat->idle = cputime64_add(cpustat->idle, tmp);
-	} else {
+	} else
 		cpustat->steal = cputime64_add(cpustat->steal, tmp);
-		cpuacct_charge(p, -tmp);
-	}
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3a1744fed2b6..0deed82a6156 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2620,6 +2620,10 @@ static int deprecated_sysctl_warning(struct __sysctl_args *args)
 	int name[CTL_MAXNAME];
 	int i;
 
+	/* Check args->nlen. */
+	if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
+		return -ENOTDIR;
+
 	/* Read in the sysctl name for better debug message logging */
 	for (i = 0; i < args->nlen; i++)
 		if (get_user(name[i], args->name + i))
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 354e74bc17c1..07e86a828073 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -398,31 +398,31 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 
 	fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
 	file = fget_light(fd, &fput_needed);
-	if (file) {
-		size = nla_total_size(sizeof(struct cgroupstats));
+	if (!file)
+		return 0;
 
-		rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
-					size);
-		if (rc < 0)
-			goto err;
+	size = nla_total_size(sizeof(struct cgroupstats));
 
-		na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
-					sizeof(struct cgroupstats));
-		stats = nla_data(na);
-		memset(stats, 0, sizeof(*stats));
+	rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
+				size);
+	if (rc < 0)
+		goto err;
 
-		rc = cgroupstats_build(stats, file->f_dentry);
-		if (rc < 0)
-			goto err;
+	na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
+				sizeof(struct cgroupstats));
+	stats = nla_data(na);
+	memset(stats, 0, sizeof(*stats));
 
-		fput_light(file, fput_needed);
-		return send_reply(rep_skb, info->snd_pid);
+	rc = cgroupstats_build(stats, file->f_dentry);
+	if (rc < 0) {
+		nlmsg_free(rep_skb);
+		goto err;
 	}
 
+	rc = send_reply(rep_skb, info->snd_pid);
+
 err:
-	if (file)
-		fput_light(file, fput_needed);
-	nlmsg_free(rep_skb);
+	fput_light(file, fput_needed);
 	return rc;
 }