summaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-04-01 21:30:01 +0200
committerTejun Heo <tj@kernel.org>2012-04-01 21:55:00 +0200
commit959d851caa48829eb85cb85aa949fd6b4c5d5bc6 (patch)
tree3ba9c94ec346275fb44c4f0d1cd2537cdff8d811 /kernel/fork.c
parentblkcg: change a spin_lock() to spin_lock_irq() (diff)
parentcgroup: make css->refcnt clearing on cgroup removal optional (diff)
downloadlinux-959d851caa48829eb85cb85aa949fd6b4c5d5bc6.tar.xz
linux-959d851caa48829eb85cb85aa949fd6b4c5d5bc6.zip
Merge branch 'for-3.5' of ../cgroup into block/for-3.5/core-merged
cgroup/for-3.5 contains the following changes which blk-cgroup needs to proceed with the on-going cleanup. * Dynamic addition and removal of cftypes to make config/stat file handling modular for policies. * cgroup removal update to not wait for css references to drain to fix blkcg removal hang caused by cfq caching cfqgs. Pull in cgroup/for-3.5 into block/for-3.5/core. This causes the following conflicts in block/blk-cgroup.c. * 761b3ef50e "cgroup: remove cgroup_subsys argument from callbacks" conflicts with blkiocg_pre_destroy() addition and blkiocg_attach() removal. Resolved by removing @subsys from all subsys methods. * 676f7c8f84 "cgroup: relocate cftype and cgroup_subsys definitions in controllers" conflicts with ->pre_destroy() and ->attach() updates and removal of modular config. Resolved by dropping forward declarations of the methods and applying updates to the relocated blkio_subsys. * 4baf6e3325 "cgroup: convert all non-memcg controllers to the new cftype interface" builds upon the previous item. Resolved by adding ->base_cftypes to the relocated blkio_subsys. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c101
1 files changed, 74 insertions, 27 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index a1b632713e43..08eb8584e2a8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
#include <linux/user-return-notifier.h>
#include <linux/oom.h>
#include <linux/khugepaged.h>
+#include <linux/signalfd.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -192,6 +193,7 @@ void __put_task_struct(struct task_struct *tsk)
WARN_ON(atomic_read(&tsk->usage));
WARN_ON(tsk == current);
+ security_task_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
@@ -354,7 +356,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
charge = 0;
if (mpnt->vm_flags & VM_ACCOUNT) {
unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
- if (security_vm_enough_memory(len))
+ if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
goto fail_nomem;
charge = len;
}
@@ -510,6 +512,23 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
return NULL;
}
+static void check_mm(struct mm_struct *mm)
+{
+ int i;
+
+ for (i = 0; i < NR_MM_COUNTERS; i++) {
+ long x = atomic_long_read(&mm->rss_stat.count[i]);
+
+ if (unlikely(x))
+ printk(KERN_ALERT "BUG: Bad rss-counter state "
+ "mm:%p idx:%d val:%ld\n", mm, i, x);
+ }
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ VM_BUG_ON(mm->pmd_huge_pte);
+#endif
+}
+
/*
* Allocate and initialize an mm_struct.
*/
@@ -537,9 +556,7 @@ void __mmdrop(struct mm_struct *mm)
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- VM_BUG_ON(mm->pmd_huge_pte);
-#endif
+ check_mm(mm);
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -667,6 +684,38 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
return mm;
}
+static void complete_vfork_done(struct task_struct *tsk)
+{
+ struct completion *vfork;
+
+ task_lock(tsk);
+ vfork = tsk->vfork_done;
+ if (likely(vfork)) {
+ tsk->vfork_done = NULL;
+ complete(vfork);
+ }
+ task_unlock(tsk);
+}
+
+static int wait_for_vfork_done(struct task_struct *child,
+ struct completion *vfork)
+{
+ int killed;
+
+ freezer_do_not_count();
+ killed = wait_for_completion_killable(vfork);
+ freezer_count();
+
+ if (killed) {
+ task_lock(child);
+ child->vfork_done = NULL;
+ task_unlock(child);
+ }
+
+ put_task_struct(child);
+ return killed;
+}
+
/* Please note the differences between mmput and mm_release.
* mmput is called whenever we stop holding onto a mm_struct,
* error success whatever.
@@ -682,8 +731,6 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
*/
void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
- struct completion *vfork_done = tsk->vfork_done;
-
/* Get rid of any futexes when releasing the mm */
#ifdef CONFIG_FUTEX
if (unlikely(tsk->robust_list)) {
@@ -703,17 +750,15 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
/* Get rid of any cached register state */
deactivate_mm(tsk, mm);
- /* notify parent sleeping on vfork() */
- if (vfork_done) {
- tsk->vfork_done = NULL;
- complete(vfork_done);
- }
+ if (tsk->vfork_done)
+ complete_vfork_done(tsk);
/*
* If we're exiting normally, clear a user-space tid field if
* requested. We leave this alone when dying by signal, to leave
* the value intact in a core dump, and to save the unnecessary
- * trouble otherwise. Userland only wants this done for a sys_exit.
+ * trouble, say, a killed vfork parent shouldn't touch this mm.
+ * Userland only wants this done for a sys_exit.
*/
if (tsk->clear_child_tid) {
if (!(tsk->flags & PF_SIGNALED) &&
@@ -934,8 +979,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
void __cleanup_sighand(struct sighand_struct *sighand)
{
- if (atomic_dec_and_test(&sighand->count))
+ if (atomic_dec_and_test(&sighand->count)) {
+ signalfd_cleanup(sighand);
kmem_cache_free(sighand_cachep, sighand);
+ }
}
@@ -1003,6 +1050,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->oom_score_adj = current->signal->oom_score_adj;
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
+ sig->has_child_subreaper = current->signal->has_child_subreaper ||
+ current->signal->is_child_subreaper;
+
mutex_init(&sig->cred_guard_mutex);
return 0;
@@ -1014,7 +1064,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
new_flags |= PF_FORKNOEXEC;
- new_flags |= PF_STARTING;
p->flags = new_flags;
}
@@ -1191,6 +1240,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_CPUSETS
p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
+ seqcount_init(&p->mems_allowed_seq);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
p->irq_events = 0;
@@ -1309,7 +1359,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
clear_all_latency_tracing(p);
/* ok, now we should be set up.. */
- p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
+ if (clone_flags & CLONE_THREAD)
+ p->exit_signal = -1;
+ else if (clone_flags & CLONE_PARENT)
+ p->exit_signal = current->group_leader->exit_signal;
+ else
+ p->exit_signal = (clone_flags & CSIGNAL);
+
p->pdeath_signal = 0;
p->exit_state = 0;
@@ -1544,16 +1600,9 @@ long do_fork(unsigned long clone_flags,
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
+ get_task_struct(p);
}
- /*
- * We set PF_STARTING at creation in case tracing wants to
- * use this to distinguish a fully live task from one that
- * hasn't finished SIGSTOP raising yet. Now we clear it
- * and set the child going.
- */
- p->flags &= ~PF_STARTING;
-
wake_up_new_task(p);
/* forking complete and child started to run, tell ptracer */
@@ -1561,10 +1610,8 @@ long do_fork(unsigned long clone_flags,
ptrace_event(trace, nr);
if (clone_flags & CLONE_VFORK) {
- freezer_do_not_count();
- wait_for_completion(&vfork);
- freezer_count();
- ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
+ if (!wait_for_vfork_done(p, &vfork))
+ ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
}
} else {
nr = PTR_ERR(p);