summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Gushchin <guro@fb.com>2017-07-11 00:49:05 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-11 01:32:32 +0200
commit422580c3cea7faaca67f6199375b79565d3d8ebd (patch)
treeede7430559b5fe88ea08315a0cdf9bd3e99a4397
parentuserfaultfd: non-cooperative: add madvise() event for MADV_FREE request (diff)
downloadlinux-422580c3cea7faaca67f6199375b79565d3d8ebd.tar.xz
linux-422580c3cea7faaca67f6199375b79565d3d8ebd.zip
mm/oom_kill.c: add tracepoints for oom reaper-related events
During the debugging of the problem described in https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa in https://lkml.org/lkml/2017/5/19/383 , I've found that the existing debug output is not really useful to understand issues related to the oom reaper. So, I assume, that adding some tracepoints might help with debugging of similar issues. Trace the following events: 1) a process is marked as an oom victim, 2) a process is added to the oom reaper list, 3) the oom reaper starts reaping process's mm, 4) the oom reaper finished reaping, 5) the oom reaper skips reaping. How it works in practice? Below is an example which show how the problem mentioned above can be found: one process is added twice to the oom_reaper list: $ cd /sys/kernel/debug/tracing $ echo "oom:mark_victim" > set_event $ echo "oom:wake_reaper" >> set_event $ echo "oom:skip_task_reaping" >> set_event $ echo "oom:start_task_reaping" >> set_event $ echo "oom:finish_task_reaping" >> set_event $ cat trace_pipe allocate-502 [001] .... 91.836405: mark_victim: pid=502 allocate-502 [001] .N.. 91.837356: wake_reaper: pid=502 allocate-502 [000] .N.. 91.871149: wake_reaper: pid=502 oom_reaper-23 [000] .... 91.871177: start_task_reaping: pid=502 oom_reaper-23 [000] .N.. 91.879511: finish_task_reaping: pid=502 oom_reaper-23 [000] .... 91.879580: skip_task_reaping: pid=502 Link: http://lkml.kernel.org/r/20170530185231.GA13412@castle Signed-off-by: Roman Gushchin <guro@fb.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/trace/events/oom.h80
-rw-r--r--mm/oom_kill.c7
2 files changed, 87 insertions, 0 deletions
diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 38baeb27221a..c3c19d47ae5e 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
__entry->wmark_check)
);
+TRACE_EVENT(mark_victim,
+ TP_PROTO(int pid),
+
+ TP_ARGS(pid),
+
+ TP_STRUCT__entry(
+ __field(int, pid)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = pid;
+ ),
+
+ TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(wake_reaper,
+ TP_PROTO(int pid),
+
+ TP_ARGS(pid),
+
+ TP_STRUCT__entry(
+ __field(int, pid)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = pid;
+ ),
+
+ TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(start_task_reaping,
+ TP_PROTO(int pid),
+
+ TP_ARGS(pid),
+
+ TP_STRUCT__entry(
+ __field(int, pid)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = pid;
+ ),
+
+ TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(finish_task_reaping,
+ TP_PROTO(int pid),
+
+ TP_ARGS(pid),
+
+ TP_STRUCT__entry(
+ __field(int, pid)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = pid;
+ ),
+
+ TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(skip_task_reaping,
+ TP_PROTO(int pid),
+
+ TP_ARGS(pid),
+
+ TP_STRUCT__entry(
+ __field(int, pid)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = pid;
+ ),
+
+ TP_printk("pid=%d", __entry->pid)
+);
+
#ifdef CONFIG_COMPACTION
TRACE_EVENT(compact_retry,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 0e2c925e7826..9e8b4f030c1c 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
if (!down_read_trylock(&mm->mmap_sem)) {
ret = false;
+ trace_skip_task_reaping(tsk->pid);
goto unlock_oom;
}
@@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
*/
if (!mmget_not_zero(mm)) {
up_read(&mm->mmap_sem);
+ trace_skip_task_reaping(tsk->pid);
goto unlock_oom;
}
+ trace_start_task_reaping(tsk->pid);
+
/*
* Tell all users of get_user/copy_from_user etc... that the content
* is no longer stable. No barriers really needed because unmapping
@@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
* put the oom_reaper out of the way.
*/
mmput_async(mm);
+ trace_finish_task_reaping(tsk->pid);
unlock_oom:
mutex_unlock(&oom_lock);
return ret;
@@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
tsk->oom_reaper_list = oom_reaper_list;
oom_reaper_list = tsk;
spin_unlock(&oom_reaper_lock);
+ trace_wake_reaper(tsk->pid);
wake_up(&oom_reaper_wait);
}
@@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
*/
__thaw_task(tsk);
atomic_inc(&oom_victims);
+ trace_mark_victim(tsk->pid);
}
/**