Merge remote-tracking branch 'linus/master' into staging/for_v3.8

* linus/master: (1428 commits) futex: avoid wake_futex() for a PI futex_q watchdog: using u64 in get_sample_period() writeback: put unused inodes to LRU after writeback completion mm: vmscan: check for fatal signals iff the process was throttled Revert "mm: remove __GFP_NO_KSWAPD" proc: check vma->vm_file before dereferencing UAPI: strip the _UAPI prefix from header guards during header installation include/linux/bug.h: fix sparse warning related to BUILD_BUG_ON_INVALID Linux 3.7-rc7 powerpc/eeh: Do not invalidate PE properly ALSA: hda - Fix build without CONFIG_PM of/address: sparc: Declare of_iomap as an extern function for sparc again PM / QoS: fix wrong error-checking condition bnx2x: remove redundant warning log vxlan: fix command usage in its doc 8139cp: revert "set ring address before enabling receiver" MPI: Fix compilation on MIPS with GCC 4.4 and newer MIPS: Fix crash that occurs when function tracing is enabled MIPS: Merge overlapping bootmem ranges jbd: Fix lock ordering bug in journal_unmap_buffer() ...
author: Mauro Carvalho Chehab <mchehab@redhat.com> 2012-11-28 10:22:38 +0100
committer: Mauro Carvalho Chehab <mchehab@redhat.com> 2012-11-28 10:22:38 +0100
commit: d033a308d81ec83908760a15a841c2bd0d5e0ea3 (patch)
tree: 647abd67987fe4d7fee80105fc823edf3f53b4f6 /kernel
parent: [media] dma-mapping: fix dma_common_get_sgtable() conditional compilation (diff)
parent: Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git... (diff)
download: linux-d033a308d81ec83908760a15a841c2bd0d5e0ea3.tar.xz
linux-d033a308d81ec83908760a15a841c2bd0d5e0ea3.zip
13 files changed, 293 insertions, 294 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 0dfeca4324ee..86e3285ae7e5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -174,10 +174,8 @@ signing_key.priv signing_key.x509: x509.genkey
 	@echo "###"
 	@echo "### If this takes a long time, you might wish to run rngd in the"
 	@echo "### background to keep the supply of entropy topped up.  It"
-	@echo "### needs to be run as root, and should use a hardware random"
-	@echo "### number generator if one is available, eg:"
-	@echo "###"
-	@echo "###     rngd -r /dev/hwrandom"
+	@echo "### needs to be run as root, and uses a hardware random"
+	@echo "### number generator if one is available."
 	@echo "###"
 	openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \
 		-x509 -config x509.genkey \
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 13774b3b39aa..f24f724620dd 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1962,9 +1962,8 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
 	 * trading it for newcg is protected by cgroup_mutex, we're safe to drop
 	 * it here; it will be freed under RCU.
 	 */
-	put_css_set(oldcg);
-
 	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
+	put_css_set(oldcg);
 }
 
 /**
@@ -4815,31 +4814,20 @@ static const struct file_operations proc_cgroupstats_operations = {
  *
  * A pointer to the shared css_set was automatically copied in
  * fork.c by dup_task_struct().  However, we ignore that copy, since
- * it was not made under the protection of RCU, cgroup_mutex or
- * threadgroup_change_begin(), so it might no longer be a valid
- * cgroup pointer.  cgroup_attach_task() might have already changed
- * current->cgroups, allowing the previously referenced cgroup
- * group to be removed and freed.
- *
- * Outside the pointer validity we also need to process the css_set
- * inheritance between threadgoup_change_begin() and
- * threadgoup_change_end(), this way there is no leak in any process
- * wide migration performed by cgroup_attach_proc() that could otherwise
- * miss a thread because it is too early or too late in the fork stage.
+ * it was not made under the protection of RCU or cgroup_mutex, so
+ * might no longer be a valid cgroup pointer.  cgroup_attach_task() might
+ * have already changed current->cgroups, allowing the previously
+ * referenced cgroup group to be removed and freed.
  *
  * At the point that cgroup_fork() is called, 'current' is the parent
  * task, and the passed argument 'child' points to the child task.
  */
 void cgroup_fork(struct task_struct *child)
 {
-	/*
-	 * We don't need to task_lock() current because current->cgroups
-	 * can't be changed concurrently here. The parent obviously hasn't
-	 * exited and called cgroup_exit(), and we are synchronized against
-	 * cgroup migration through threadgroup_change_begin().
-	 */
+	task_lock(current);
 	child->cgroups = current->cgroups;
 	get_css_set(child->cgroups);
+	task_unlock(current);
 	INIT_LIST_HEAD(&child->cg_list);
 }
 
@@ -4895,19 +4883,10 @@ void cgroup_post_fork(struct task_struct *child)
 	 */
 	if (use_task_css_set_links) {
 		write_lock(&css_set_lock);
-		if (list_empty(&child->cg_list)) {
-			/*
-			 * It's safe to use child->cgroups without task_lock()
-			 * here because we are protected through
-			 * threadgroup_change_begin() against concurrent
-			 * css_set change in cgroup_task_migrate(). Also
-			 * the task can't exit at that point until
-			 * wake_up_new_task() is called, so we are protected
-			 * against cgroup_exit() setting child->cgroup to
-			 * init_css_set.
-			 */
+		task_lock(child);
+		if (list_empty(&child->cg_list))
 			list_add(&child->cg_list, &child->cgroups->tasks);
-		}
+		task_unlock(child);
 		write_unlock(&css_set_lock);
 	}
 }
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 98256bc71ee1..5cc4e7e42e68 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -78,15 +78,23 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
  */
 static atomic_t uprobe_events = ATOMIC_INIT(0);
 
+/* Have a copy of original instruction */
+#define UPROBE_COPY_INSN	0
+/* Dont run handlers when first register/ last unregister in progress*/
+#define UPROBE_RUN_HANDLER	1
+/* Can skip singlestep */
+#define UPROBE_SKIP_SSTEP	2
+
 struct uprobe {
 	struct rb_node		rb_node;	/* node in the rb tree */
 	atomic_t		ref;
 	struct rw_semaphore	consumer_rwsem;
+	struct mutex		copy_mutex;	/* TODO: kill me and UPROBE_COPY_INSN */
 	struct list_head	pending_list;
 	struct uprobe_consumer	*consumers;
 	struct inode		*inode;		/* Also hold a ref to inode */
 	loff_t			offset;
-	int			flags;
+	unsigned long		flags;
 	struct arch_uprobe	arch;
 };
 
@@ -100,17 +108,12 @@ struct uprobe {
  */
 static bool valid_vma(struct vm_area_struct *vma, bool is_register)
 {
-	if (!vma->vm_file)
-		return false;
-
-	if (!is_register)
-		return true;
+	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED;
 
-	if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED))
-				== (VM_READ|VM_EXEC))
-		return true;
+	if (is_register)
+		flags |= VM_WRITE;
 
-	return false;
+	return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC;
 }
 
 static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset)
@@ -193,19 +196,44 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)
 	return *insn == UPROBE_SWBP_INSN;
 }
 
+static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode)
+{
+	void *kaddr = kmap_atomic(page);
+	memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE);
+	kunmap_atomic(kaddr);
+}
+
+static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode)
+{
+	uprobe_opcode_t old_opcode;
+	bool is_swbp;
+
+	copy_opcode(page, vaddr, &old_opcode);
+	is_swbp = is_swbp_insn(&old_opcode);
+
+	if (is_swbp_insn(new_opcode)) {
+		if (is_swbp)		/* register: already installed? */
+			return 0;
+	} else {
+		if (!is_swbp)		/* unregister: was it changed by us? */
+			return 0;
+	}
+
+	return 1;
+}
+
 /*
  * NOTE:
  * Expect the breakpoint instruction to be the smallest size instruction for
  * the architecture. If an arch has variable length instruction and the
  * breakpoint instruction is not of the smallest length instruction
- * supported by that architecture then we need to modify read_opcode /
+ * supported by that architecture then we need to modify is_swbp_at_addr and
  * write_opcode accordingly. This would never be a problem for archs that
  * have fixed length instructions.
  */
 
 /*
  * write_opcode - write the opcode at a given virtual address.
- * @auprobe: arch breakpointing information.
  * @mm: the probed process address space.
  * @vaddr: the virtual address to store the opcode.
  * @opcode: opcode to be written at @vaddr.
@@ -216,8 +244,8 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)
  * For mm @mm, write the opcode at @vaddr.
  * Return 0 (success) or a negative errno.
  */
-static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
-			unsigned long vaddr, uprobe_opcode_t opcode)
+static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
+			uprobe_opcode_t opcode)
 {
 	struct page *old_page, *new_page;
 	void *vaddr_old, *vaddr_new;
@@ -226,10 +254,14 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 
 retry:
 	/* Read the page with vaddr into memory */
-	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
+	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
 	if (ret <= 0)
 		return ret;
 
+	ret = verify_opcode(old_page, vaddr, &opcode);
+	if (ret <= 0)
+		goto put_old;
+
 	ret = -ENOMEM;
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
 	if (!new_page)
@@ -264,63 +296,6 @@ put_old:
 }
 
 /**
- * read_opcode - read the opcode at a given virtual address.
- * @mm: the probed process address space.
- * @vaddr: the virtual address to read the opcode.
- * @opcode: location to store the read opcode.
- *
- * Called with mm->mmap_sem held (for read and with a reference to
- * mm.
- *
- * For mm @mm, read the opcode at @vaddr and store it in @opcode.
- * Return 0 (success) or a negative errno.
- */
-static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode)
-{
-	struct page *page;
-	void *vaddr_new;
-	int ret;
-
-	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
-	if (ret <= 0)
-		return ret;
-
-	vaddr_new = kmap_atomic(page);
-	vaddr &= ~PAGE_MASK;
-	memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE);
-	kunmap_atomic(vaddr_new);
-
-	put_page(page);
-
-	return 0;
-}
-
-static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
-{
-	uprobe_opcode_t opcode;
-	int result;
-
-	if (current->mm == mm) {
-		pagefault_disable();
-		result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
-								sizeof(opcode));
-		pagefault_enable();
-
-		if (likely(result == 0))
-			goto out;
-	}
-
-	result = read_opcode(mm, vaddr, &opcode);
-	if (result)
-		return result;
-out:
-	if (is_swbp_insn(&opcode))
-		return 1;
-
-	return 0;
-}
-
-/**
  * set_swbp - store breakpoint at a given address.
  * @auprobe: arch specific probepoint information.
  * @mm: the probed process address space.
@@ -331,18 +306,7 @@ out:
  */
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	int result;
-	/*
-	 * See the comment near uprobes_hash().
-	 */
-	result = is_swbp_at_addr(mm, vaddr);
-	if (result == 1)
-		return 0;
-
-	if (result)
-		return result;
-
-	return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
+	return write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
 }
 
 /**
@@ -357,16 +321,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned
 int __weak
 set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	int result;
-
-	result = is_swbp_at_addr(mm, vaddr);
-	if (!result)
-		return -EINVAL;
-
-	if (result != 1)
-		return result;
-
-	return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
+	return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
 }
 
 static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -473,7 +428,7 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
 	spin_unlock(&uprobes_treelock);
 
 	/* For now assume that the instruction need not be single-stepped */
-	uprobe->flags |= UPROBE_SKIP_SSTEP;
+	__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
 
 	return u;
 }
@@ -495,6 +450,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 	uprobe->inode = igrab(inode);
 	uprobe->offset = offset;
 	init_rwsem(&uprobe->consumer_rwsem);
+	mutex_init(&uprobe->copy_mutex);
 
 	/* add to uprobes_tree, sorted on inode:offset */
 	cur_uprobe = insert_uprobe(uprobe);
@@ -515,7 +471,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
 {
 	struct uprobe_consumer *uc;
 
-	if (!(uprobe->flags & UPROBE_RUN_HANDLER))
+	if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))
 		return;
 
 	down_read(&uprobe->consumer_rwsem);
@@ -621,29 +577,43 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp)
 	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
 }
 
-/*
- * How mm->uprobes_state.count gets updated
- * uprobe_mmap() increments the count if
- * 	- it successfully adds a breakpoint.
- * 	- it cannot add a breakpoint, but sees that there is a underlying
- * 	  breakpoint (via a is_swbp_at_addr()).
- *
- * uprobe_munmap() decrements the count if
- * 	- it sees a underlying breakpoint, (via is_swbp_at_addr)
- * 	  (Subsequent uprobe_unregister wouldnt find the breakpoint
- * 	  unless a uprobe_mmap kicks in, since the old vma would be
- * 	  dropped just after uprobe_munmap.)
- *
- * uprobe_register increments the count if:
- * 	- it successfully adds a breakpoint.
- *
- * uprobe_unregister decrements the count if:
- * 	- it sees a underlying breakpoint and removes successfully.
- * 	  (via is_swbp_at_addr)
- * 	  (Subsequent uprobe_munmap wouldnt find the breakpoint
- * 	  since there is no underlying breakpoint after the
- * 	  breakpoint removal.)
- */
+static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
+				struct mm_struct *mm, unsigned long vaddr)
+{
+	int ret = 0;
+
+	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
+		return ret;
+
+	mutex_lock(&uprobe->copy_mutex);
+	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
+		goto out;
+
+	ret = copy_insn(uprobe, file);
+	if (ret)
+		goto out;
+
+	ret = -ENOTSUPP;
+	if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
+		goto out;
+
+	ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
+	if (ret)
+		goto out;
+
+	/* write_opcode() assumes we don't cross page boundary */
+	BUG_ON((uprobe->offset & ~PAGE_MASK) +
+			UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
+
+	smp_wmb(); /* pairs with rmb() in find_active_uprobe() */
+	set_bit(UPROBE_COPY_INSN, &uprobe->flags);
+
+ out:
+	mutex_unlock(&uprobe->copy_mutex);
+
+	return ret;
+}
+
 static int
 install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 			struct vm_area_struct *vma, unsigned long vaddr)
@@ -661,24 +631,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 	if (!uprobe->consumers)
 		return 0;
 
-	if (!(uprobe->flags & UPROBE_COPY_INSN)) {
-		ret = copy_insn(uprobe, vma->vm_file);
-		if (ret)
-			return ret;
-
-		if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
-			return -ENOTSUPP;
-
-		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
-		if (ret)
-			return ret;
-
-		/* write_opcode() assumes we don't cross page boundary */
-		BUG_ON((uprobe->offset & ~PAGE_MASK) +
-				UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
-
-		uprobe->flags |= UPROBE_COPY_INSN;
-	}
+	ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
+	if (ret)
+		return ret;
 
 	/*
 	 * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(),
@@ -697,15 +652,15 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
 	return ret;
 }
 
-static void
+static int
 remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
 {
 	/* can happen if uprobe_register() fails */
 	if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
-		return;
+		return 0;
 
 	set_bit(MMF_RECALC_UPROBES, &mm->flags);
-	set_orig_insn(&uprobe->arch, mm, vaddr);
+	return set_orig_insn(&uprobe->arch, mm, vaddr);
 }
 
 /*
@@ -820,7 +775,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 		struct mm_struct *mm = info->mm;
 		struct vm_area_struct *vma;
 
-		if (err)
+		if (err && is_register)
 			goto free;
 
 		down_write(&mm->mmap_sem);
@@ -836,7 +791,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 		if (is_register)
 			err = install_breakpoint(uprobe, mm, vma, info->vaddr);
 		else
-			remove_breakpoint(uprobe, mm, info->vaddr);
+			err |= remove_breakpoint(uprobe, mm, info->vaddr);
 
  unlock:
 		up_write(&mm->mmap_sem);
@@ -893,13 +848,15 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
 	mutex_lock(uprobes_hash(inode));
 	uprobe = alloc_uprobe(inode, offset);
 
-	if (uprobe && !consumer_add(uprobe, uc)) {
+	if (!uprobe) {
+		ret = -ENOMEM;
+	} else if (!consumer_add(uprobe, uc)) {
 		ret = __uprobe_register(uprobe);
 		if (ret) {
 			uprobe->consumers = NULL;
 			__uprobe_unregister(uprobe);
 		} else {
-			uprobe->flags |= UPROBE_RUN_HANDLER;
+			set_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
 		}
 	}
 
@@ -932,7 +889,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
 	if (consumer_del(uprobe, uc)) {
 		if (!uprobe->consumers) {
 			__uprobe_unregister(uprobe);
-			uprobe->flags &= ~UPROBE_RUN_HANDLER;
+			clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
 		}
 	}
 
@@ -1393,10 +1350,11 @@ bool uprobe_deny_signal(void)
  */
 static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
 {
-	if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
-		return true;
-
-	uprobe->flags &= ~UPROBE_SKIP_SSTEP;
+	if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
+		if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
+			return true;
+		clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
+	}
 	return false;
 }
 
@@ -1419,6 +1377,30 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
 	clear_bit(MMF_HAS_UPROBES, &mm->flags);
 }
 
+static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
+{
+	struct page *page;
+	uprobe_opcode_t opcode;
+	int result;
+
+	pagefault_disable();
+	result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
+							sizeof(opcode));
+	pagefault_enable();
+
+	if (likely(result == 0))
+		goto out;
+
+	result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
+	if (result < 0)
+		return result;
+
+	copy_opcode(page, vaddr, &opcode);
+	put_page(page);
+ out:
+	return is_swbp_insn(&opcode);
+}
+
 static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 {
 	struct mm_struct *mm = current->mm;
@@ -1489,38 +1471,41 @@ static void handle_swbp(struct pt_regs *regs)
 		}
 		return;
 	}
+	/*
+	 * TODO: move copy_insn/etc into _register and remove this hack.
+	 * After we hit the bp, _unregister + _register can install the
+	 * new and not-yet-analyzed uprobe at the same address, restart.
+	 */
+	smp_rmb(); /* pairs with wmb() in install_breakpoint() */
+	if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
+		goto restart;
 
 	utask = current->utask;
 	if (!utask) {
 		utask = add_utask();
 		/* Cannot allocate; re-execute the instruction. */
 		if (!utask)
-			goto cleanup_ret;
+			goto restart;
 	}
-	utask->active_uprobe = uprobe;
+
 	handler_chain(uprobe, regs);
-	if (uprobe->flags & UPROBE_SKIP_SSTEP && can_skip_sstep(uprobe, regs))
-		goto cleanup_ret;
+	if (can_skip_sstep(uprobe, regs))
+		goto out;
 
-	utask->state = UTASK_SSTEP;
 	if (!pre_ssout(uprobe, regs, bp_vaddr)) {
 		arch_uprobe_enable_step(&uprobe->arch);
+		utask->active_uprobe = uprobe;
+		utask->state = UTASK_SSTEP;
 		return;
 	}
 
-cleanup_ret:
-	if (utask) {
-		utask->active_uprobe = NULL;
-		utask->state = UTASK_RUNNING;
-	}
-	if (!(uprobe->flags & UPROBE_SKIP_SSTEP))
-
-		/*
-		 * cannot singlestep; cannot skip instruction;
-		 * re-execute the instruction.
-		 */
-		instruction_pointer_set(regs, bp_vaddr);
-
+restart:
+	/*
+	 * cannot singlestep; cannot skip instruction;
+	 * re-execute the instruction.
+	 */
+	instruction_pointer_set(regs, bp_vaddr);
+out:
 	put_uprobe(uprobe);
 }
 
@@ -1552,13 +1537,12 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
 }
 
 /*
- * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag.  (and on
- * subsequent probe hits on the thread sets the state to UTASK_BP_HIT) and
- * allows the thread to return from interrupt.
+ * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and
+ * allows the thread to return from interrupt. After that handle_swbp()
+ * sets utask->active_uprobe.
  *
- * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag and
- * also sets the state to UTASK_SSTEP_ACK and allows the thread to return from
- * interrupt.
+ * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag
+ * and allows the thread to return from interrupt.
  *
  * While returning to userspace, thread notices the TIF_UPROBE flag and calls
  * uprobe_notify_resume().
@@ -1567,11 +1551,13 @@ void uprobe_notify_resume(struct pt_regs *regs)
 {
 	struct uprobe_task *utask;
 
+	clear_thread_flag(TIF_UPROBE);
+
 	utask = current->utask;
-	if (!utask || utask->state == UTASK_BP_HIT)
-		handle_swbp(regs);
-	else
+	if (utask && utask->active_uprobe)
 		handle_singlestep(utask, regs);
+	else
+		handle_swbp(regs);
 }
 
 /*
@@ -1580,17 +1566,10 @@ void uprobe_notify_resume(struct pt_regs *regs)
  */
 int uprobe_pre_sstep_notifier(struct pt_regs *regs)
 {
-	struct uprobe_task *utask;
-
 	if (!current->mm || !test_bit(MMF_HAS_UPROBES, &current->mm->flags))
 		return 0;
 
-	utask = current->utask;
-	if (utask)
-		utask->state = UTASK_BP_HIT;
-
 	set_thread_flag(TIF_UPROBE);
-
 	return 1;
 }
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 3717e7b306e0..19eb089ca003 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -716,7 +716,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
 				struct futex_pi_state **ps,
 				struct task_struct *task, int set_waiters)
 {
-	int lock_taken, ret, ownerdied = 0;
+	int lock_taken, ret, force_take = 0;
 	u32 uval, newval, curval, vpid = task_pid_vnr(task);
 
 retry:
@@ -755,17 +755,15 @@ retry:
 	newval = curval | FUTEX_WAITERS;
 
 	/*
-	 * There are two cases, where a futex might have no owner (the
-	 * owner TID is 0): OWNER_DIED. We take over the futex in this
-	 * case. We also do an unconditional take over, when the owner
-	 * of the futex died.
-	 *
-	 * This is safe as we are protected by the hash bucket lock !
+	 * Should we force take the futex? See below.
 	 */
-	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
-		/* Keep the OWNER_DIED bit */
+	if (unlikely(force_take)) {
+		/*
+		 * Keep the OWNER_DIED and the WAITERS bit and set the
+		 * new TID value.
+		 */
 		newval = (curval & ~FUTEX_TID_MASK) | vpid;
-		ownerdied = 0;
+		force_take = 0;
 		lock_taken = 1;
 	}
 
@@ -775,7 +773,7 @@ retry:
 		goto retry;
 
 	/*
-	 * We took the lock due to owner died take over.
+	 * We took the lock due to forced take over.
 	 */
 	if (unlikely(lock_taken))
 		return 1;
@@ -790,20 +788,25 @@ retry:
 		switch (ret) {
 		case -ESRCH:
 			/*
-			 * No owner found for this futex. Check if the
-			 * OWNER_DIED bit is set to figure out whether
-			 * this is a robust futex or not.
+			 * We failed to find an owner for this
+			 * futex. So we have no pi_state to block
+			 * on. This can happen in two cases:
+			 *
+			 * 1) The owner died
+			 * 2) A stale FUTEX_WAITERS bit
+			 *
+			 * Re-read the futex value.
 			 */
 			if (get_futex_value_locked(&curval, uaddr))
 				return -EFAULT;
 
 			/*
-			 * We simply start over in case of a robust
-			 * futex. The code above will take the futex
-			 * and return happy.
+			 * If the owner died or we have a stale
+			 * WAITERS bit the owner TID in the user space
+			 * futex is 0.
 			 */
-			if (curval & FUTEX_OWNER_DIED) {
-				ownerdied = 1;
+			if (!(curval & FUTEX_TID_MASK)) {
+				force_take = 1;
 				goto retry;
 			}
 		default:
@@ -840,6 +843,9 @@ static void wake_futex(struct futex_q *q)
 {
 	struct task_struct *p = q->task;
 
+	if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
+		return;
+
 	/*
 	 * We set q->lock_ptr = NULL _before_ we wake up the task. If
 	 * a non-futex wake up happens on another CPU then the task
@@ -1075,6 +1081,10 @@ retry_private:
 
 	plist_for_each_entry_safe(this, next, head, list) {
 		if (match_futex (&this->key, &key1)) {
+			if (this->pi_state || this->rt_waiter) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
 			wake_futex(this);
 			if (++ret >= nr_wake)
 				break;
@@ -1087,6 +1097,10 @@ retry_private:
 		op_ret = 0;
 		plist_for_each_entry_safe(this, next, head, list) {
 			if (match_futex (&this->key, &key2)) {
+				if (this->pi_state || this->rt_waiter) {
+					ret = -EINVAL;
+					goto out_unlock;
+				}
 				wake_futex(this);
 				if (++op_ret >= nr_wake2)
 					break;
@@ -1095,6 +1109,7 @@ retry_private:
 		ret += op_ret;
 	}
 
+out_unlock:
 	double_unlock_hb(hb1, hb2);
 out_put_keys:
 	put_futex_key(&key2);
@@ -1384,9 +1399,13 @@ retry_private:
 		/*
 		 * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
 		 * be paired with each other and no other futex ops.
+		 *
+		 * We should never be requeueing a futex_q with a pi_state,
+		 * which is awaiting a futex_unlock_pi().
 		 */
 		if ((requeue_pi && !this->rt_waiter) ||
-		    (!requeue_pi && this->rt_waiter)) {
+		    (!requeue_pi && this->rt_waiter) ||
+		    this->pi_state) {
 			ret = -EINVAL;
 			break;
 		}
diff --git a/kernel/module-internal.h b/kernel/module-internal.h
index 6114a13419bd..24f9247b7d02 100644
--- a/kernel/module-internal.h
+++ b/kernel/module-internal.h
@@ -11,5 +11,4 @@
 
 extern struct key *modsign_keyring;
 
-extern int mod_verify_sig(const void *mod, unsigned long modlen,
-			  const void *sig, unsigned long siglen);
+extern int mod_verify_sig(const void *mod, unsigned long *_modlen);
diff --git a/kernel/module.c b/kernel/module.c
index 0e2da8695f8e..6e48c3a43599 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2293,12 +2293,17 @@ static void layout_symtab(struct module *mod, struct load_info *info)
 	src = (void *)info->hdr + symsect->sh_offset;
 	nsrc = symsect->sh_size / sizeof(*src);
 
+	/* strtab always starts with a nul, so offset 0 is the empty string. */
+	strtab_size = 1;
+
 	/* Compute total space required for the core symbols' strtab. */
-	for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src)
-		if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
-			strtab_size += strlen(&info->strtab[src->st_name]) + 1;
+	for (ndst = i = 0; i < nsrc; i++) {
+		if (i == 0 ||
+		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
+			strtab_size += strlen(&info->strtab[src[i].st_name])+1;
 			ndst++;
 		}
+	}
 
 	/* Append room for core symbols at end of core part. */
 	info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
@@ -2332,15 +2337,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
 	mod->core_symtab = dst = mod->module_core + info->symoffs;
 	mod->core_strtab = s = mod->module_core + info->stroffs;
 	src = mod->symtab;
-	*dst = *src;
 	*s++ = 0;
-	for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
-		if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
-			continue;
-
-		dst[ndst] = *src;
-		dst[ndst++].st_name = s - mod->core_strtab;
-		s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1;
+	for (ndst = i = 0; i < mod->num_symtab; i++) {
+		if (i == 0 ||
+		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
+			dst[ndst] = src[i];
+			dst[ndst++].st_name = s - mod->core_strtab;
+			s += strlcpy(s, &mod->strtab[src[i].st_name],
+				     KSYM_NAME_LEN) + 1;
+		}
 	}
 	mod->core_num_syms = ndst;
 }
@@ -2421,25 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod,
 
 #ifdef CONFIG_MODULE_SIG
 static int module_sig_check(struct load_info *info,
-			    const void *mod, unsigned long *len)
+			    const void *mod, unsigned long *_len)
 {
 	int err = -ENOKEY;
-	const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
-	const void *p = mod, *end = mod + *len;
-
-	/* Poor man's memmem. */
-	while ((p = memchr(p, MODULE_SIG_STRING[0], end - p))) {
-		if (p + markerlen > end)
-			break;
-
-		if (memcmp(p, MODULE_SIG_STRING, markerlen) == 0) {
-			const void *sig = p + markerlen;
-			/* Truncate module up to signature. */
-			*len = p - mod;
-			err = mod_verify_sig(mod, *len, sig, end - sig);
-			break;
-		}
-		p++;
+	unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+	unsigned long len = *_len;
+
+	if (len > markerlen &&
+	    memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
+		/* We truncate the module to discard the signature */
+		*_len -= markerlen;
+		err = mod_verify_sig(mod, _len);
 	}
 
 	if (!err) {
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index 6b09f6983ac0..ea1b1df5dbb0 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -183,27 +183,33 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
 /*
  * Verify the signature on a module.
  */
-int mod_verify_sig(const void *mod, unsigned long modlen,
-		   const void *sig, unsigned long siglen)
+int mod_verify_sig(const void *mod, unsigned long *_modlen)
 {
 	struct public_key_signature *pks;
 	struct module_signature ms;
 	struct key *key;
-	size_t sig_len;
+	const void *sig;
+	size_t modlen = *_modlen, sig_len;
 	int ret;
 
-	pr_devel("==>%s(,%lu,,%lu,)\n", __func__, modlen, siglen);
+	pr_devel("==>%s(,%zu)\n", __func__, modlen);
 
-	if (siglen <= sizeof(ms))
+	if (modlen <= sizeof(ms))
 		return -EBADMSG;
 
-	memcpy(&ms, sig + (siglen - sizeof(ms)), sizeof(ms));
-	siglen -= sizeof(ms);
+	memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms));
+	modlen -= sizeof(ms);
 
 	sig_len = be32_to_cpu(ms.sig_len);
-	if (sig_len >= siglen ||
-	    siglen - sig_len != (size_t)ms.signer_len + ms.key_id_len)
+	if (sig_len >= modlen)
 		return -EBADMSG;
+	modlen -= sig_len;
+	if ((size_t)ms.signer_len + ms.key_id_len >= modlen)
+		return -EBADMSG;
+	modlen -= (size_t)ms.signer_len + ms.key_id_len;
+
+	*_modlen = modlen;
+	sig = mod + modlen;
 
 	/* For the moment, only support RSA and X.509 identifiers */
 	if (ms.algo != PKEY_ALGO_RSA ||
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 478bad2745e3..7b07cc0dfb75 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -71,12 +71,22 @@ err_alloc:
 	return NULL;
 }
 
+/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
+#define MAX_PID_NS_LEVEL 32
+
 static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
 {
 	struct pid_namespace *ns;
 	unsigned int level = parent_pid_ns->level + 1;
-	int i, err = -ENOMEM;
+	int i;
+	int err;
+
+	if (level > MAX_PID_NS_LEVEL) {
+		err = -EINVAL;
+		goto out;
+	}
 
+	err = -ENOMEM;
 	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
 	if (ns == NULL)
 		goto out;
@@ -133,19 +143,26 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old
 	return create_pid_namespace(old_ns);
 }
 
-void free_pid_ns(struct kref *kref)
+static void free_pid_ns(struct kref *kref)
 {
-	struct pid_namespace *ns, *parent;
+	struct pid_namespace *ns;
 
 	ns = container_of(kref, struct pid_namespace, kref);
-
-	parent = ns->parent;
 	destroy_pid_namespace(ns);
+}
+
+void put_pid_ns(struct pid_namespace *ns)
+{
+	struct pid_namespace *parent;
 
-	if (parent != NULL)
-		put_pid_ns(parent);
+	while (ns != &init_pid_ns) {
+		parent = ns->parent;
+		if (!kref_put(&ns->kref, free_pid_ns))
+			break;
+		ns = parent;
+	}
 }
-EXPORT_SYMBOL_GPL(free_pid_ns);
+EXPORT_SYMBOL_GPL(put_pid_ns);
 
 void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 {
diff --git a/kernel/printk.c b/kernel/printk.c
index 66a2ea37b576..2d607f4d1797 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1890,7 +1890,6 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self,
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DEAD:
-	case CPU_DYING:
 	case CPU_DOWN_FAILED:
 	case CPU_UP_CANCELED:
 		console_lock();
diff --git a/kernel/sys.c b/kernel/sys.c
index c5cb5b99cb81..e6e0ece5f6a0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1265,15 +1265,16 @@ DECLARE_RWSEM(uts_sem);
  * Work around broken programs that cannot handle "Linux 3.0".
  * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
  */
-static int override_release(char __user *release, int len)
+static int override_release(char __user *release, size_t len)
 {
 	int ret = 0;
-	char buf[65];
 
 	if (current->personality & UNAME26) {
-		char *rest = UTS_RELEASE;
+		const char *rest = UTS_RELEASE;
+		char buf[65] = { 0 };
 		int ndots = 0;
 		unsigned v;
+		size_t copy;
 
 		while (*rest) {
 			if (*rest == '.' && ++ndots >= 3)
@@ -1283,8 +1284,9 @@ static int override_release(char __user *release, int len)
 			rest++;
 		}
 		v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
-		snprintf(buf, len, "2.6.%u%s", v, rest);
-		ret = copy_to_user(release, buf, len);
+		copy = clamp_t(size_t, len, 1, sizeof(buf));
+		copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
+		ret = copy_to_user(release, buf, copy + 1);
 	}
 	return ret;
 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b32ed0e385a5..b979426d16c6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1567,6 +1567,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
 
 		put_online_cpus();
 	} else {
+		/* Make sure this CPU has been intitialized */
+		if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
+			goto out;
+
 		cpu_buffer = buffer->buffers[cpu_id];
 
 		if (nr_pages == cpu_buffer->nr_pages)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9d4c8d5a1f53..dd4b80a9f1a9 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -116,7 +116,7 @@ static unsigned long get_timestamp(int this_cpu)
 	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
 }
 
-static unsigned long get_sample_period(void)
+static u64 get_sample_period(void)
 {
 	/*
 	 * convert watchdog_thresh from seconds to ns
@@ -125,7 +125,7 @@ static unsigned long get_sample_period(void)
 	 * and hard thresholds) to increment before the
 	 * hardlockup detector generates a warning
 	 */
-	return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
+	return get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
 }
 
 /* Commands for resetting the watchdog */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d951daa0ca9a..042d221d33cc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2982,7 +2982,7 @@ bool cancel_delayed_work(struct delayed_work *dwork)
 
 	set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
 	local_irq_restore(flags);
-	return true;
+	return ret;
 }
 EXPORT_SYMBOL(cancel_delayed_work);
author	Mauro Carvalho Chehab <mchehab@redhat.com>	2012-11-28 10:22:38 +0100
committer	Mauro Carvalho Chehab <mchehab@redhat.com>	2012-11-28 10:22:38 +0100
commit	d033a308d81ec83908760a15a841c2bd0d5e0ea3 (patch)
tree	647abd67987fe4d7fee80105fc823edf3f53b4f6 /kernel
parent	[media] dma-mapping: fix dma_common_get_sgtable() conditional compilation (diff)
parent	Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git... (diff)
download	linux-d033a308d81ec83908760a15a841c2bd0d5e0ea3.tar.xz linux-d033a308d81ec83908760a15a841c2bd0d5e0ea3.zip