From e5991371ee0d1c0ce19e133c6f9075b49c5b4ae8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 6 Jan 2009 14:39:22 -0800 Subject: mm: remove cgroup_mm_owner_callbacks cgroup_mm_owner_callbacks() was brought in to support the memrlimit controller, but sneaked into mainline ahead of it. That controller has now been shelved, and the mm_owner_changed() args were inadequate for it anyway (they needed an mm pointer instead of a task pointer). Remove the dead code, and restore mm_update_next_owner() locking to how it was before: taking mmap_sem there does nothing for memcontrol.c, now the only user of mm->owner. Signed-off-by: Hugh Dickins Cc: Paul Menage Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index c9e5a1c14e08..f923724ab3c9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -642,35 +642,31 @@ retry: /* * We found no owner yet mm_users > 1: this implies that we are * most likely racing with swapoff (try_to_unuse()) or /proc or - * ptrace or page migration (get_task_mm()). Mark owner as NULL, - * so that subsystems can understand the callback and take action. + * ptrace or page migration (get_task_mm()). Mark owner as NULL. */ - down_write(&mm->mmap_sem); - cgroup_mm_owner_callbacks(mm->owner, NULL); mm->owner = NULL; - up_write(&mm->mmap_sem); return; assign_new_owner: BUG_ON(c == p); get_task_struct(c); - read_unlock(&tasklist_lock); - down_write(&mm->mmap_sem); /* * The task_lock protects c->mm from changing. * We always want mm->owner->mm == mm */ task_lock(c); + /* + * Delay read_unlock() till we have the task_lock() + * to ensure that c does not slip away underneath us + */ + read_unlock(&tasklist_lock); if (c->mm != mm) { task_unlock(c); - up_write(&mm->mmap_sem); put_task_struct(c); goto retry; } - cgroup_mm_owner_callbacks(mm->owner, c); mm->owner = c; task_unlock(c); - up_write(&mm->mmap_sem); put_task_struct(c); } #endif /* CONFIG_MM_OWNER */ -- cgit v1.2.3 From 901608d9045146aec6f14a7777ea4b1501c379f0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 6 Jan 2009 14:40:29 -0800 Subject: mm: introduce get_mm_hiwater_xxx(), fix taskstats->hiwater_xxx accounting xacct_add_tsk() relies on do_exit()->update_hiwater_xxx() and uses mm->hiwater_xxx directly, this leads to 2 problems: - taskstats_user_cmd() can call fill_pid()->xacct_add_tsk() at any moment before the task exits, so we should check the current values of rss/vm anyway. - do_exit()->update_hiwater_xxx() calls are racy. An exiting thread can be preempted right before mm->hiwater_xxx = new_val, and another thread can use A_LOT of memory and exit in between. When the first thread resumes it can be the last thread in the thread group, in that case we report the wrong hiwater_xxx values which do not take A_LOT into account. Introduce get_mm_hiwater_rss() and get_mm_hiwater_vm() helpers and change xacct_add_tsk() to use them. The first helper will also be used by rusage->ru_maxrss accounting. Kill do_exit()->update_hiwater_xxx() calls. Unless we are going to decrease rss/vm there is no point to update mm->hiwater_xxx, and nobody can look at this mm_struct when exit_mmap() actually unmaps the memory. Signed-off-by: Oleg Nesterov Acked-by: Hugh Dickins Reviewed-by: KOSAKI Motohiro Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 +++ kernel/exit.c | 5 +---- kernel/tsacct.c | 4 ++-- mm/mmap.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 38a3f4b15394..ea415136ac9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -386,6 +386,9 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) +#define get_mm_hiwater_rss(mm) max((mm)->hiwater_rss, get_mm_rss(mm)) +#define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm) + extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); diff --git a/kernel/exit.c b/kernel/exit.c index f923724ab3c9..c7740fa3252c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1051,10 +1051,7 @@ NORET_TYPE void do_exit(long code) preempt_count()); acct_update_integrals(tsk); - if (tsk->mm) { - update_hiwater_rss(tsk->mm); - update_hiwater_vm(tsk->mm); - } + group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 2dc06ab35716..43f891b05a4b 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -92,8 +92,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) mm = get_task_mm(p); if (mm) { /* adjust to KB unit */ - stats->hiwater_rss = mm->hiwater_rss * PAGE_SIZE / KB; - stats->hiwater_vm = mm->hiwater_vm * PAGE_SIZE / KB; + stats->hiwater_rss = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB; + stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB; mmput(mm); } stats->read_char = p->ioac.rchar; diff --git a/mm/mmap.c b/mm/mmap.c index e4507b23e620..1f97d8aa9b05 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2102,7 +2102,7 @@ void exit_mmap(struct mm_struct *mm) lru_add_drain(); flush_cache_mm(mm); tlb = tlb_gather_mmu(mm, 1); - /* Don't update_hiwater_rss(mm) here, do_exit already did */ + /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); -- cgit v1.2.3