From aeafd623f866c429307e3a4a39998f5f06b4f00e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 6 Dec 2016 14:18:51 +0100 Subject: perf tools: Move headers check into bash script To make it nicer and easily maintainable. Also moving the check into fixdep sub make, so its output is not scattered around the build output. Removing extra $$ from mman*.h checks. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481030331-31944-5-git-send-email-jolsa@kernel.org [ Use /bin/sh, and 'function check() {' -> 'check () {' to make it work with busybox, in Alpine Linux, for instance ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 94 +-------------------------------------------- tools/perf/check-headers.sh | 59 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 93 deletions(-) create mode 100755 tools/perf/check-headers.sh (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8f1c258b151a..e9ec531131ca 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -201,6 +201,7 @@ goals := $(filter-out all sub-make, $(MAKECMDGOALS)) $(goals) all: sub-make sub-make: fixdep + @./check-headers.sh $(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals) else # force_fixdep @@ -404,99 +405,6 @@ export JEVENTS build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): prepare FORCE - @(test -f ../../include/uapi/linux/perf_event.h && ( \ - (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true - @(test -f ../../include/linux/hash.h && ( \ - (diff -B ../include/linux/hash.h ../../include/linux/hash.h >/dev/null) \ - || echo "Warning: tools/include/linux/hash.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/linux/hw_breakpoint.h && ( \ - (diff -B ../include/uapi/linux/hw_breakpoint.h ../../include/uapi/linux/hw_breakpoint.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/hw_breakpoint.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/disabled-features.h && ( \ - (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/required-features.h && ( \ - (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \ - (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/lib/memcpy_64.S && ( \ - (diff -B -I "^EXPORT_SYMBOL" -I "^#include " ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ - || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/lib/memset_64.S && ( \ - (diff -B -I "^EXPORT_SYMBOL" -I "^#include " ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ - || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true - @(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/arm/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm64/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/arm64/include/uapi/asm/perf_regs.h ../../arch/arm64/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/arm64/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/powerpc/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/powerpc/include/uapi/asm/perf_regs.h ../../arch/powerpc/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/powerpc/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/perf_regs.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/perf_regs.h ../../arch/x86/include/uapi/asm/perf_regs.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/perf_regs.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/kvm.h ../../arch/x86/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/kvm_perf.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/kvm_perf.h ../../arch/x86/include/uapi/asm/kvm_perf.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/svm.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/svm.h ../../arch/x86/include/uapi/asm/svm.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/svm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/x86/include/uapi/asm/vmx.h && ( \ - (diff -B ../arch/x86/include/uapi/asm/vmx.h ../../arch/x86/include/uapi/asm/vmx.h >/dev/null) \ - || echo "Warning: tools/arch/x86/include/uapi/asm/vmx.h differs from kernel" >&2 )) || true - @(test -f ../../arch/powerpc/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/powerpc/include/uapi/asm/kvm.h ../../arch/powerpc/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/powerpc/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/kvm.h ../../arch/s390/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/kvm_perf.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/kvm_perf.h ../../arch/s390/include/uapi/asm/kvm_perf.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/kvm_perf.h differs from kernel" >&2 )) || true - @(test -f ../../arch/s390/include/uapi/asm/sie.h && ( \ - (diff -B ../arch/s390/include/uapi/asm/sie.h ../../arch/s390/include/uapi/asm/sie.h >/dev/null) \ - || echo "Warning: tools/arch/s390/include/uapi/asm/sie.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/arm/include/uapi/asm/kvm.h ../../arch/arm/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/arm/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../arch/arm64/include/uapi/asm/kvm.h && ( \ - (diff -B ../arch/arm64/include/uapi/asm/kvm.h ../../arch/arm64/include/uapi/asm/kvm.h >/dev/null) \ - || echo "Warning: tools/arch/arm64/include/uapi/asm/kvm.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/arch_hweight.h && ( \ - (diff -B ../include/asm-generic/bitops/arch_hweight.h ../../include/asm-generic/bitops/arch_hweight.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/arch_hweight.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/const_hweight.h && ( \ - (diff -B ../include/asm-generic/bitops/const_hweight.h ../../include/asm-generic/bitops/const_hweight.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/const_hweight.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/__fls.h && ( \ - (diff -B ../include/asm-generic/bitops/__fls.h ../../include/asm-generic/bitops/__fls.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/__fls.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/fls.h && ( \ - (diff -B ../include/asm-generic/bitops/fls.h ../../include/asm-generic/bitops/fls.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/fls.h differs from kernel" >&2 )) || true - @(test -f ../../include/asm-generic/bitops/fls64.h && ( \ - (diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \ - || echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true - @(test -f ../../include/linux/coresight-pmu.h && ( \ - (diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \ - || echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/asm-generic/mman-common.h && ( \ - (diff -B ../include/uapi/asm-generic/mman-common.h ../../include/uapi/asm-generic/mman-common.h >/dev/null) \ - || echo "Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/asm-generic/mman.h && ( \ - (diff -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>$$" ../include/uapi/asm-generic/mman.h ../../include/uapi/asm-generic/mman.h >/dev/null) \ - || echo "Warning: tools/include/uapi/asm-generic/mman.h differs from kernel" >&2 )) || true - @(test -f ../../include/uapi/linux/mman.h && ( \ - (diff -B -I "^#include <\(uapi/\)*asm/mman.h>$$" ../include/uapi/linux/mman.h ../../include/uapi/linux/mman.h >/dev/null) \ - || echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true $(Q)$(MAKE) $(build)=perf $(JEVENTS_IN): FORCE diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh new file mode 100755 index 000000000000..c747bfd7f14d --- /dev/null +++ b/tools/perf/check-headers.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +HEADERS=' +include/uapi/linux/perf_event.h +include/linux/hash.h +include/uapi/linux/hw_breakpoint.h +arch/x86/include/asm/disabled-features.h +arch/x86/include/asm/required-features.h +arch/x86/include/asm/cpufeatures.h +arch/arm/include/uapi/asm/perf_regs.h +arch/arm64/include/uapi/asm/perf_regs.h +arch/powerpc/include/uapi/asm/perf_regs.h +arch/x86/include/uapi/asm/perf_regs.h +arch/x86/include/uapi/asm/kvm.h +arch/x86/include/uapi/asm/kvm_perf.h +arch/x86/include/uapi/asm/svm.h +arch/x86/include/uapi/asm/vmx.h +arch/powerpc/include/uapi/asm/kvm.h +arch/s390/include/uapi/asm/kvm.h +arch/s390/include/uapi/asm/kvm_perf.h +arch/s390/include/uapi/asm/sie.h +arch/arm/include/uapi/asm/kvm.h +arch/arm64/include/uapi/asm/kvm.h +include/asm-generic/bitops/arch_hweight.h +include/asm-generic/bitops/const_hweight.h +include/asm-generic/bitops/__fls.h +include/asm-generic/bitops/fls.h +include/asm-generic/bitops/fls64.h +include/linux/coresight-pmu.h +include/uapi/asm-generic/mman-common.h +' + +check () { + file=$1 + opts= + + shift + while [ -n "$*" ]; do + opts="$opts \"$1\"" + shift + done + + cmd="diff $opts ../$file ../../$file > /dev/null" + + test -f ../../$file && + eval $cmd || echo "Warning: $file differs from kernel" >&2 +} + + +# simple diff check +for i in $HEADERS; do + check $i -B +done + +# diff with extra ignore lines +check arch/x86/lib/memcpy_64.S -B -I "^EXPORT_SYMBOL" -I "^#include " +check arch/x86/lib/memset_64.S -B -I "^EXPORT_SYMBOL" -I "^#include " +check include/uapi/asm-generic/mman.h -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" +check include/uapi/linux/mman.h -B -I "^#include <\(uapi/\)*asm/mman.h>" -- cgit v1.2.3 From 96039c7c52e03b7d6dd773664e74b79e3ae9856a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:50 +0900 Subject: perf sched timehist: Split is_idle_sample() The is_idle_sample() function actually does more than determining whether sample come from idle task. Split the callchain part into save_task_callchain() to make it clearer. Also checking prev_pid from trace data looks preferred than just checking sample->pid since it's possible, although rare, to have invalid 0 pid/tid on scheduling an exiting task. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-2-namhyung@kernel.org [ Remove some needless () in some return statements ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 1a3f1be93372..966eddce1609 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1939,39 +1939,40 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->total_run_time += r->dt_run; } -static bool is_idle_sample(struct perf_sched *sched, - struct perf_sample *sample, - struct perf_evsel *evsel, - struct machine *machine) +static bool is_idle_sample(struct perf_sample *sample, + struct perf_evsel *evsel) { - struct thread *thread; - struct callchain_cursor *cursor = &callchain_cursor; - /* pid 0 == swapper == idle task */ - if (sample->pid == 0) - return true; + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) + return perf_evsel__intval(evsel, sample, "prev_pid") == 0; - if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) { - if (perf_evsel__intval(evsel, sample, "prev_pid") == 0) - return true; - } + return sample->pid == 0; +} + +static void save_task_callchain(struct perf_sched *sched, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct callchain_cursor *cursor = &callchain_cursor; + struct thread *thread; /* want main thread for process - has maps */ thread = machine__findnew_thread(machine, sample->pid, sample->pid); if (thread == NULL) { pr_debug("Failed to get thread for pid %d.\n", sample->pid); - return false; + return; } if (!symbol_conf.use_callchain || sample->callchain == NULL) - return false; + return; if (thread__resolve_callchain(thread, cursor, evsel, sample, NULL, NULL, sched->max_stack + 2) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); - return false; + return; } callchain_cursor_commit(cursor); @@ -1994,8 +1995,6 @@ static bool is_idle_sample(struct perf_sched *sched, callchain_cursor_advance(cursor); } - - return false; } /* @@ -2111,7 +2110,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, { struct thread *thread; - if (is_idle_sample(sched, sample, evsel, machine)) { + if (is_idle_sample(sample, evsel)) { thread = get_idle_thread(sample->cpu); if (thread == NULL) pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); @@ -2124,6 +2123,8 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, pr_debug("Failed to get thread for tid %d. skipping sample.\n", sample->tid); } + + save_task_callchain(sched, sample, evsel, machine); } return thread; -- cgit v1.2.3 From 3bc2fa9cb829ccf6527e7117d9af769d93ee6d39 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:51 +0900 Subject: perf sched timehist: Introduce struct idle_time_data The struct idle_time_data is to keep idle stats with callchains entering to the idle task. The normal thread_runtime calculation is done transparently since it extends the struct thread_runtime. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-3-namhyung@kernel.org [ Align struct field names ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 966eddce1609..e108b0f6a246 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -230,6 +230,15 @@ struct evsel_runtime { u32 ncpu; /* highest cpu slot allocated */ }; +/* per cpu idle time data */ +struct idle_thread_runtime { + struct thread_runtime tr; + struct thread *last_thread; + struct rb_root sorted_root; + struct callchain_root callchain; + struct callchain_cursor cursor; +}; + /* track idle times per cpu */ static struct thread **idle_threads; static int idle_max_cpu; @@ -1997,13 +2006,31 @@ static void save_task_callchain(struct perf_sched *sched, } } +static int init_idle_thread(struct thread *thread) +{ + struct idle_thread_runtime *itr; + + thread__set_comm(thread, idle_comm, 0); + + itr = zalloc(sizeof(*itr)); + if (itr == NULL) + return -ENOMEM; + + init_stats(&itr->tr.run_stats); + callchain_init(&itr->callchain); + callchain_cursor_reset(&itr->cursor); + thread__set_priv(thread, itr); + + return 0; +} + /* * Track idle stats per cpu by maintaining a local thread * struct for the idle task on each cpu. */ static int init_idle_threads(int ncpu) { - int i; + int i, ret; idle_threads = zalloc(ncpu * sizeof(struct thread *)); if (!idle_threads) @@ -2017,7 +2044,9 @@ static int init_idle_threads(int ncpu) if (idle_threads[i] == NULL) return -ENOMEM; - thread__set_comm(idle_threads[i], idle_comm, 0); + ret = init_idle_thread(idle_threads[i]); + if (ret < 0) + return ret; } return 0; @@ -2064,8 +2093,8 @@ static struct thread *get_idle_thread(int cpu) idle_threads[cpu] = thread__new(0, 0); if (idle_threads[cpu]) { - idle_threads[cpu]->tid = 0; - thread__set_comm(idle_threads[cpu], idle_comm, 0); + if (init_idle_thread(idle_threads[cpu]) < 0) + return NULL; } } -- cgit v1.2.3 From 699b5b920db04a6ff5c03a519e4c182aeb350952 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:52 +0900 Subject: perf sched timehist: Save callchain when entering idle In order to investigate the idleness reason, it is necessary to keep the callchains when entering idle. This can be identified by the sched:sched_switch event having the next_pid field as 0. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-4-namhyung@kernel.org Link: http://lkml.kernel.org/r/20161213080632.19099-1-namhyung@kernel.org [ Merged fix from Namhyung, see second Link: tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index e108b0f6a246..dc83b803ca54 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -200,6 +200,7 @@ struct perf_sched { /* options for timehist command */ bool summary; bool summary_only; + bool idle_hist; bool show_callchain; unsigned int max_stack; bool show_cpu_visual; @@ -2101,6 +2102,15 @@ static struct thread *get_idle_thread(int cpu) return idle_threads[cpu]; } +static void save_idle_callchain(struct idle_thread_runtime *itr, + struct perf_sample *sample) +{ + if (!symbol_conf.use_callchain || sample->callchain == NULL) + return; + + callchain_cursor__copy(&itr->cursor, &callchain_cursor); +} + /* * handle runtime stats saved per thread */ @@ -2154,6 +2164,26 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, } save_task_callchain(sched, sample, evsel, machine); + if (sched->idle_hist) { + struct thread *idle; + struct idle_thread_runtime *itr; + + idle = get_idle_thread(sample->cpu); + if (idle == NULL) { + pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); + return NULL; + } + + itr = thread__priv(idle); + if (itr == NULL) + return NULL; + + itr->last_thread = thread; + + /* copy task callchain when entering to idle */ + if (perf_evsel__intval(evsel, sample, "next_pid") == 0) + save_idle_callchain(itr, sample); + } } return thread; -- cgit v1.2.3 From a4b2b6f56e0cfe729cf89318d44b6a875b31d95a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:53 +0900 Subject: perf sched timehist: Skip non-idle events when necessary Sometimes it only focuses on idle-related events like upcoming idle-hist feature. In this case we don't want to see other event to reduce noise. Signed-off-by: Namhyung Kim Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index dc83b803ca54..c8e7848e71a7 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2190,7 +2190,9 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, } static bool timehist_skip_sample(struct perf_sched *sched, - struct thread *thread) + struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample) { bool rc = false; @@ -2199,10 +2201,19 @@ static bool timehist_skip_sample(struct perf_sched *sched, sched->skipped_samples++; } + if (sched->idle_hist) { + if (strcmp(perf_evsel__name(evsel), "sched:sched_switch")) + rc = true; + else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 && + perf_evsel__intval(evsel, sample, "next_pid") != 0) + rc = true; + } + return rc; } static void timehist_print_wakeup_event(struct perf_sched *sched, + struct perf_evsel *evsel, struct perf_sample *sample, struct machine *machine, struct thread *awakened) @@ -2215,8 +2226,8 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, return; /* show wakeup unless both awakee and awaker are filtered */ - if (timehist_skip_sample(sched, thread) && - timehist_skip_sample(sched, awakened)) { + if (timehist_skip_sample(sched, thread, evsel, sample) && + timehist_skip_sample(sched, awakened, evsel, sample)) { return; } @@ -2261,7 +2272,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool, /* show wakeups if requested */ if (sched->show_wakeups && !perf_time__skip_sample(&sched->ptime, sample->time)) - timehist_print_wakeup_event(sched, sample, machine, thread); + timehist_print_wakeup_event(sched, evsel, sample, machine, thread); return 0; } @@ -2288,8 +2299,8 @@ static void timehist_print_migration_event(struct perf_sched *sched, if (thread == NULL) return; - if (timehist_skip_sample(sched, thread) && - timehist_skip_sample(sched, migrated)) { + if (timehist_skip_sample(sched, thread, evsel, sample) && + timehist_skip_sample(sched, migrated, evsel, sample)) { return; } @@ -2374,7 +2385,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, goto out; } - if (timehist_skip_sample(sched, thread)) + if (timehist_skip_sample(sched, thread, evsel, sample)) goto out; tr = thread__get_runtime(thread); -- cgit v1.2.3 From 07235f84ece6b66f43334881806aad3467cf3d84 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:54 +0900 Subject: perf sched timehist: Add -I/--idle-hist option The --idle-hist option is to analyze system idle state so which process makes cpu to go idle. If this option is specified, non-idle events will be skipped and processes switching to/from idle will be shown. This option is mostly useful when used with --summary(-only) option. In the idle-time summary view, idle time is accounted to previous thread which is run before idle task. The example output looks like following: Idle-time summary comm parent sched-out idle-time min-idle avg-idle max-idle stddev migrations (count) (msec) (msec) (msec) (msec) % -------------------------------------------------------------------------------------------- rcu_preempt[7] 2 95 550.872 0.011 5.798 23.146 7.63 0 migration/1[16] 2 1 15.558 15.558 15.558 15.558 0.00 0 khugepaged[39] 2 1 3.062 3.062 3.062 3.062 0.00 0 kworker/0:1H[124] 2 2 4.728 0.611 2.364 4.116 74.12 0 systemd-journal[167] 1 1 4.510 4.510 4.510 4.510 0.00 0 kworker/u16:3[558] 2 13 74.737 0.080 5.749 12.960 21.96 0 irq/34-iwlwifi[628] 2 21 118.403 0.032 5.638 23.990 24.00 0 kworker/u17:0[673] 2 1 3.523 3.523 3.523 3.523 0.00 0 dbus-daemon[722] 1 1 6.743 6.743 6.743 6.743 0.00 0 ifplugd[741] 1 1 58.826 58.826 58.826 58.826 0.00 0 wpa_supplicant[1490] 1 1 13.302 13.302 13.302 13.302 0.00 0 wpa_actiond[1492] 1 2 4.064 0.168 2.032 3.896 91.72 0 dockerd[1500] 1 1 0.055 0.055 0.055 0.055 0.00 0 ... Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-6-namhyung@kernel.org Link: http://lkml.kernel.org/r/20161213080632.19099-2-namhyung@kernel.org [ Merged fix sent by Namhyumg, as posted in the second Link: tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 4 +++ tools/perf/builtin-sched.c | 46 +++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 7775b1eb2bee..76173969ab80 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist' --migrations:: Show migration events. +-I:: +--idle-hist:: + Show idle-related events only. + --time:: Only analyze samples within given time window: ,. Times have the format seconds.microseconds. If start is not given (i.e., time diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c8e7848e71a7..0b14265432df 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2421,7 +2421,36 @@ static int timehist_sched_change_event(struct perf_tool *tool, t = ptime->end; } - timehist_update_runtime_stats(tr, t, tprev); + if (!sched->idle_hist || thread->tid == 0) { + timehist_update_runtime_stats(tr, t, tprev); + + if (sched->idle_hist) { + struct idle_thread_runtime *itr = (void *)tr; + struct thread_runtime *last_tr; + + BUG_ON(thread->tid != 0); + + if (itr->last_thread == NULL) + goto out; + + /* add current idle time as last thread's runtime */ + last_tr = thread__get_runtime(itr->last_thread); + if (last_tr == NULL) + goto out; + + timehist_update_runtime_stats(last_tr, t, tprev); + /* + * remove delta time of last thread as it's not updated + * and otherwise it will show an invalid value next + * time. we only care total run time and run stat. + */ + last_tr->dt_run = 0; + last_tr->dt_wait = 0; + last_tr->dt_delay = 0; + + itr->last_thread = NULL; + } + } if (!sched->summary_only) timehist_print_sample(sched, sample, &al, thread, t); @@ -2543,9 +2572,15 @@ static void timehist_print_summary(struct perf_sched *sched, if (comm_width < 30) comm_width = 30; - printf("\nRuntime summary\n"); - printf("%*s parent sched-in ", comm_width, "comm"); - printf(" run-time min-run avg-run max-run stddev migrations\n"); + if (sched->idle_hist) { + printf("\nIdle-time summary\n"); + printf("%*s parent sched-out ", comm_width, "comm"); + printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); + } else { + printf("\nRuntime summary\n"); + printf("%*s parent sched-in ", comm_width, "comm"); + printf(" run-time min-run avg-run max-run stddev migrations\n"); + } printf("%*s (count) ", comm_width, ""); printf(" (msec) (msec) (msec) (msec) %%\n"); printf("%.117s\n", graph_dotted_line); @@ -2561,7 +2596,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf("\n"); /* CPU idle stats not tracked when samples were skipped */ - if (sched->skipped_samples) + if (sched->skipped_samples && !sched->idle_hist) return; printf("\nIdle stats:\n"); @@ -3107,6 +3142,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), + OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), OPT_STRING(0, "time", &sched.time_str, "str", "Time span for analysis (start,stop)"), OPT_PARENT(sched_options) -- cgit v1.2.3 From ba957ebb54893acaf3dc846031073a63f021cee1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Dec 2016 23:47:55 +0900 Subject: perf sched timehist: Show callchains for idle stat When --idle-hist option is used with --summary, it now shows idle stats with callchains like below: Idle stats by callchain: CPU 0: 902.195 msec Idle time (msec) Count Callchains ---------------- ------- -------------------------------------------------- 370.589 69 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- entry_SYSCALL_64_fastpath 178.799 17 worker_thread <- kthread <- ret_from_fork 128.352 17 schedule_timeout <- rcu_gp_kthread <- kthread <- ret_from_fork 125.111 19 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_select <- core_sys_select 71.599 50 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll 23.146 1 rcu_gp_kthread <- kthread <- ret_from_fork 4.510 1 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- do_syscall_64 0.085 1 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- do_restart_poll ... Committer notes: Extra testing: # uname -a Linux jouet 4.8.8-300.fc25.x86_64 #1 SMP Tue Nov 15 18:10:06 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux 1) Run 'perf sched record -g' 2) Run 'perf sched timehist --idle --summary' Idle stats by callchain: CPU 0: 13456.840 msec Idle time (msec) Count Callchains ---------------- ----- -------------------------------------------------- 5386.637 3283 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll 2750.238 2299 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- do_syscall_64 1275.672 1287 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- entry_SYSCALL_64_fastpath 936.322 452 worker_thread <- kthread <- ret_from_fork 741.311 385 rcu_nocb_kthread <- kthread <- ret_from_fork 729.385 248 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_ppoll 365.386 229 irq_thread <- kthread <- ret_from_fork 338.934 265 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- entry_SYSCALL_64_fastpath 219.488 201 schedule_timeout <- rcu_gp_kthread <- kthread <- ret_from_fork 186.839 410 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- do_syscall_64 142.541 59 kvm_vcpu_block <- kvm_arch_vcpu_ioctl_run <- kvm_vcpu_ioctl <- do_vfs_ioctl <- sys_ioctl 83.887 92 smpboot_thread_fn <- kthread <- ret_from_fork 62.722 96 do_exit <- do_group_exit <- 0x2a5594 <- entry_SYSCALL_64_fastpath 47.894 83 pipe_wait <- pipe_read <- __vfs_read <- vfs_read <- sys_read 46.554 61 rcu_gp_kthread <- kthread <- ret_from_fork 34.337 21 schedule_timeout <- intel_fbc_work_fn <- process_one_work <- worker_thread <- kthread 29.521 14 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_select <- core_sys_select 20.274 10 schedule_timeout <- io_schedule_timeout <- bit_wait_io <- __wait_on_bit <- out_of_line_wait_on_bit 15.085 55 schedule_timeout <- unix_stream_read_generic <- unix_stream_recvmsg <- sock_recvmsg <- SYSC_recvfrom Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161208144755.16673-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 0b14265432df..c1c07bfe132c 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2448,6 +2448,9 @@ static int timehist_sched_change_event(struct perf_tool *tool, last_tr->dt_wait = 0; last_tr->dt_delay = 0; + if (itr->cursor.nr) + callchain_append(&itr->callchain, &itr->cursor, t - tprev); + itr->last_thread = NULL; } } @@ -2557,6 +2560,60 @@ static int show_deadthread_runtime(struct thread *t, void *priv) return __show_thread_runtime(t, priv); } +static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) +{ + const char *sep = " <- "; + struct callchain_list *chain; + size_t ret = 0; + char bf[1024]; + bool first; + + if (node == NULL) + return 0; + + ret = callchain__fprintf_folded(fp, node->parent); + first = (ret == 0); + + list_for_each_entry(chain, &node->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->ms.sym && chain->ms.sym->ignore) + continue; + ret += fprintf(fp, "%s%s", first ? "" : sep, + callchain_list__sym_name(chain, bf, sizeof(bf), + false)); + first = false; + } + + return ret; +} + +static size_t timehist_print_idlehist_callchain(struct rb_root *root) +{ + size_t ret = 0; + FILE *fp = stdout; + struct callchain_node *chain; + struct rb_node *rb_node = rb_first(root); + + printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); + printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, + graph_dotted_line); + + while (rb_node) { + chain = rb_entry(rb_node, struct callchain_node, rb_node); + rb_node = rb_next(rb_node); + + ret += fprintf(fp, " "); + print_sched_time(chain->hit, 12); + ret += 16; /* print_sched_time returns 2nd arg + 4 */ + ret += fprintf(fp, " %8d ", chain->count); + ret += callchain__fprintf_folded(fp, chain); + ret += fprintf(fp, "\n"); + } + + return ret; +} + static void timehist_print_summary(struct perf_sched *sched, struct perf_session *session) { @@ -2615,6 +2672,35 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" CPU %2d idle entire time window\n", i); } + if (sched->idle_hist && symbol_conf.use_callchain) { + callchain_param.mode = CHAIN_FOLDED; + callchain_param.value = CCVAL_PERIOD; + + callchain_register_param(&callchain_param); + + printf("\nIdle stats by callchain:\n"); + for (i = 0; i < idle_max_cpu; ++i) { + struct idle_thread_runtime *itr; + + t = idle_threads[i]; + if (!t) + continue; + + itr = thread__priv(t); + if (itr == NULL) + continue; + + callchain_param.sort(&itr->sorted_root, &itr->callchain, + 0, &callchain_param); + + printf(" CPU %2d:", i); + print_sched_time(itr->tr.total_run_time, 6); + printf(" msec\n"); + timehist_print_idlehist_callchain(&itr->sorted_root); + printf("\n"); + } + } + printf("\n" " Total number of unique tasks: %" PRIu64 "\n" "Total number of context switches: %" PRIu64 "\n" -- cgit v1.2.3 From 7e6a79981b7a797b37b1dbeca3fd6ae1cb6d881f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 Dec 2016 10:52:10 -0300 Subject: perf tools: Remove some needless __maybe_unused I.e. those parameters/functions _are_ used, so ditch that misleading attribute. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-13cqtjh0yojg5gzvpq1zzpl0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 13 ++++++------- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-stat.c | 6 +++--- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4b419631753d..f8ca7a4ebabc 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -208,7 +208,7 @@ static void compute_stats(struct c2c_hist_entry *c2c_he, static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, struct machine *machine) { struct c2c_hists *c2c_hists = &c2c.hists; @@ -379,7 +379,7 @@ static int symbol_width(struct hists *hists, struct sort_entry *se) static int c2c_width(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, - struct hists *hists __maybe_unused) + struct hists *hists) { struct c2c_fmt *c2c_fmt; struct c2c_dimension *dim; @@ -1127,7 +1127,7 @@ MEAN_ENTRY(mean_lcl_entry, lcl_hitm); MEAN_ENTRY(mean_load_entry, load); static int -cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cpucnt_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { struct c2c_hist_entry *c2c_he; @@ -1141,7 +1141,7 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, } static int -cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cl_idx_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { struct c2c_hist_entry *c2c_he; @@ -1155,7 +1155,7 @@ cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, } static int -cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, +cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { int width = c2c_width(fmt, hpp, he->hists); @@ -1779,7 +1779,6 @@ static int c2c_hists__init(struct c2c_hists *hists, return hpp_list__parse(&hists->list, NULL, sort); } -__maybe_unused static int c2c_hists__reinit(struct c2c_hists *c2c_hists, const char *output, const char *sort) @@ -2658,7 +2657,7 @@ out: return err; } -static int parse_record_events(const struct option *opt __maybe_unused, +static int parse_record_events(const struct option *opt, const char *str, int unset __maybe_unused) { bool *event_set = (bool *) opt->value; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d2afbe4a240d..06cc759a4597 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -648,7 +648,7 @@ report_parse_ignore_callees_opt(const struct option *opt __maybe_unused, } static int -parse_branch_mode(const struct option *opt __maybe_unused, +parse_branch_mode(const struct option *opt, const char *str __maybe_unused, int unset) { int *branch_mode = opt->value; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 688dea7cb08f..a02f2e965628 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2195,7 +2195,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, } static -int process_stat_config_event(struct perf_tool *tool __maybe_unused, +int process_stat_config_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { @@ -2238,7 +2238,7 @@ static int set_maps(struct perf_stat *st) } static -int process_thread_map_event(struct perf_tool *tool __maybe_unused, +int process_thread_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { @@ -2257,7 +2257,7 @@ int process_thread_map_event(struct perf_tool *tool __maybe_unused, } static -int process_cpu_map_event(struct perf_tool *tool __maybe_unused, +int process_cpu_map_event(struct perf_tool *tool, union perf_event *event, struct perf_session *session __maybe_unused) { -- cgit v1.2.3 From 631ac41b46d293fb3ee43a809776c1663de8d9c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:39 +0100 Subject: perf mem: Fix --all-user/--all-kernel options Removing extra '--' prefix. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: ad16511b0e40 ("perf mem: Add -U/-K (--all-user/--all-kernel) options") Link: http://lkml.kernel.org/r/1481538943-21874-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index d1ce29be560e..cd7bc4d104e2 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -70,8 +70,8 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), - OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), + OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"), OPT_END() }; -- cgit v1.2.3 From 83c2e4f3968d6871eed295f2f5675d3d70b01afa Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:40 +0100 Subject: perf evsel: Use variable instead of repeating lengthy FD macro It's more readable and will ease up following patches. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b2365a63db45..fd61ebd77c26 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1474,7 +1474,7 @@ retry_sample_id: for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < nthreads; thread++) { - int group_fd; + int fd, group_fd; if (!evsel->cgrp && !evsel->system_wide) pid = thread_map__pid(threads, thread); @@ -1484,21 +1484,22 @@ retry_open: pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, cpus->map[cpu], group_fd, flags); - FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, - pid, - cpus->map[cpu], - group_fd, flags); - if (FD(evsel, cpu, thread) < 0) { + fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], + group_fd, flags); + + FD(evsel, cpu, thread) = fd; + + if (fd < 0) { err = -errno; pr_debug2("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; } - pr_debug2(" = %d\n", FD(evsel, cpu, thread)); + pr_debug2(" = %d\n", fd); if (evsel->bpf_fd >= 0) { - int evt_fd = FD(evsel, cpu, thread); + int evt_fd = fd; int bpf_fd = evsel->bpf_fd; err = ioctl(evt_fd, -- cgit v1.2.3 From 38af91f01de0e160c17ae380acb5bab5d51066f4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:41 +0100 Subject: perf thread_map: Add thread_map__remove function Add thread_map__remove function to remove thread from thread map. Add automated test also. Committer notes: Testing it: # perf test "Remove thread map" 39: Remove thread map : Ok # perf test -v "Remove thread map" 39: Remove thread map : --- start --- test child forked, pid 4483 2 threads: 4482, 4483 1 thread: 4483 0 thread: test child finished with 0 ---- end ---- Remove thread map: Ok # Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-4-git-send-email-jolsa@kernel.org [ Added stdlib.h, to get the free() declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 4 ++++ tools/perf/tests/tests.h | 1 + tools/perf/tests/thread-map.c | 44 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/thread_map.c | 22 +++++++++++++++++++++ tools/perf/util/thread_map.h | 1 + 5 files changed, 72 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 23605202d4a1..a77dcc0d24e3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -185,6 +185,10 @@ static struct test generic_tests[] = { .desc = "Synthesize thread map", .func = test__thread_map_synthesize, }, + { + .desc = "Remove thread map", + .func = test__thread_map_remove, + }, { .desc = "Synthesize cpu map", .func = test__cpu_map_synthesize, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 0d7b251305af..a512f0c8ff5b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -80,6 +80,7 @@ const char *test__bpf_subtest_get_desc(int subtest); int test__bpf_subtest_get_nr(void); int test_session_topology(int subtest); int test__thread_map_synthesize(int subtest); +int test__thread_map_remove(int subtest); int test__cpu_map_synthesize(int subtest); int test__synthesize_stat_config(int subtest); int test__synthesize_stat(int subtest); diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index cee2a2cdc933..a4a4b4625ac3 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -93,3 +94,46 @@ int test__thread_map_synthesize(int subtest __maybe_unused) return 0; } + +int test__thread_map_remove(int subtest __maybe_unused) +{ + struct thread_map *threads; + char *str; + int i; + + TEST_ASSERT_VAL("failed to allocate map string", + asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); + + threads = thread_map__new_str(str, NULL, 0); + + TEST_ASSERT_VAL("failed to allocate thread_map", + threads); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to remove thread", + !thread_map__remove(threads, 0)); + + TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to remove thread", + !thread_map__remove(threads, 0)); + + TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0); + + if (verbose) + thread_map__fprintf(threads, stderr); + + TEST_ASSERT_VAL("failed to not remove thread", + thread_map__remove(threads, 0)); + + for (i = 0; i < threads->nr; i++) + free(threads->map[i].comm); + + free(threads); + return 0; +} diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 40585f5b7027..f9eab200fd75 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -448,3 +448,25 @@ bool thread_map__has(struct thread_map *threads, pid_t pid) return false; } + +int thread_map__remove(struct thread_map *threads, int idx) +{ + int i; + + if (threads->nr < 1) + return -EINVAL; + + if (idx >= threads->nr) + return -EINVAL; + + /* + * Free the 'idx' item and shift the rest up. + */ + free(threads->map[idx].comm); + + for (i = idx; i < threads->nr - 1; i++) + threads->map[i] = threads->map[i + 1]; + + threads->nr--; + return 0; +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index bd3b971588da..ea0ef08c6303 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -58,4 +58,5 @@ static inline char *thread_map__comm(struct thread_map *map, int thread) void thread_map__read_comms(struct thread_map *threads); bool thread_map__has(struct thread_map *threads, pid_t pid); +int thread_map__remove(struct thread_map *threads, int idx); #endif /* __PERF_THREAD_MAP_H */ -- cgit v1.2.3 From a359c17a7e1a9c99384499cf7b43d80867080789 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Dec 2016 08:46:22 +0100 Subject: perf evsel: Allow to ignore missing pid Adding perf_evsel::ignore_missing_cpu_thread bool. When set true, it allows perf to ignore error of missing pid of perf event syscall. We remove missing thread id from the thread_map, so the rest of the processing like ioctl and mmap won't get disturbed with -1 fd. The reason for supporting this is to ease up monitoring group of pids, that 'disappear' before perf opens their event. This currently leads perf to report error and exit and makes perf record's -u option unusable under certain setup. With this change we will allow this race and ignore such failure with following warning: WARNING: Ignored open failure for pid 8605 Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161213074622.GA3084@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 1 + tools/perf/util/evsel.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + 3 files changed, 46 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 9a0236a4cf95..1c27d947c2fe 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -55,6 +55,7 @@ struct record_opts { bool all_user; bool tail_synthesize; bool overwrite; + bool ignore_missing_thread; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index fd61ebd77c26..04e536ae4d88 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -990,6 +990,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, * it overloads any global configuration. */ apply_config_terms(evsel, opts); + + evsel->ignore_missing_thread = opts->ignore_missing_thread; } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -1419,6 +1421,33 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static bool ignore_missing_thread(struct perf_evsel *evsel, + struct thread_map *threads, + int thread, int err) +{ + if (!evsel->ignore_missing_thread) + return false; + + /* The system wide setup does not work with threads. */ + if (evsel->system_wide) + return false; + + /* The -ESRCH is perf event syscall errno for pid's not found. */ + if (err != -ESRCH) + return false; + + /* If there's only one thread, let it fail. */ + if (threads->nr == 1) + return false; + + if (thread_map__remove(threads, thread)) + return false; + + pr_warning("WARNING: Ignored open failure for pid %d\n", + thread_map__pid(threads, thread)); + return true; +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { @@ -1491,6 +1520,21 @@ retry_open: if (fd < 0) { err = -errno; + + if (ignore_missing_thread(evsel, threads, thread, err)) { + /* + * We just removed 1 thread, so take a step + * back on thread index and lower the upper + * nthreads limit. + */ + nthreads--; + thread--; + + /* ... and pretend like nothing have happened. */ + err = 0; + continue; + } + pr_debug2("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6abb89cd27f9..06ef6f29efa1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -120,6 +120,7 @@ struct perf_evsel { bool tracking; bool per_pkg; bool precise_max; + bool ignore_missing_thread; /* parse modifier helper */ int exclude_GH; int nr_members; -- cgit v1.2.3 From 23dc4f1586159470aac707caae526824dd077e25 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:43 +0100 Subject: perf record: Force ignore_missing_thread for uid option Enable perf_evsel::ignore_missing_thread for -u option to ignore complete failure if any of the user's processes die between its enumeration and time we open the event. Committer notes: While doing a 'make -j4 allmodconfig' we sometimes get into the race: Before: # perf record -u acme Error: The sys_perf_event_open() syscall returned with 3 (No such process) for event (cycles:ppp). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? # After: [root@jouet ~]# perf record -u acme WARNING: Ignored open failure for pid 9888 WARNING: Ignored open failure for pid 18059 [root@jouet ~]# Which is an improvement, with the races not preventing the remaining threads for the specified user from being monitored, but the message probably needs further clarification. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1481538943-21874-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fa26865364b6..74d6a035133a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1687,6 +1687,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) goto out; } + /* Enable ignoring missing threads when -u option is defined. */ + rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; + err = -ENOMEM; if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); -- cgit v1.2.3 From 3ee2eb6da20db1edad31070da38996e8e0f8adfa Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 5 Dec 2016 21:26:46 +0530 Subject: perf annotate: Support jump instruction with target as second operand Architectures like PowerPC have jump instructions that includes a target address as a second operand. For example, 'bne cr7,0xc0000000000f6154'. Add support for such instruction in perf annotate. objdump o/p: c0000000000f6140: ld r9,1032(r31) c0000000000f6144: cmpdi cr7,r9,0 c0000000000f6148: bne cr7,0xc0000000000f6154 c0000000000f614c: ld r9,2312(r30) c0000000000f6150: std r9,1032(r31) c0000000000f6154: ld r9,88(r31) Corresponding perf annotate o/p: Before patch: ld r9,1032(r31) cmpdi cr7,r9,0 v bne 3ffffffffff09f2c ld r9,2312(r30) std r9,1032(r31) 74: ld r9,88(r31) After patch: ld r9,1032(r31) cmpdi cr7,r9,0 v bne 74 ld r9,2312(r30) std r9,1032(r31) 74: ld r9,88(r31) Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Chris Riyder Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Taeung Song Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1480953407-7605-2-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ea7e0de4b9c1..590244e5781e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -223,8 +223,12 @@ bool ins__is_call(const struct ins *ins) static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map *map __maybe_unused) { const char *s = strchr(ops->raw, '+'); + const char *c = strchr(ops->raw, ','); - ops->target.addr = strtoull(ops->raw, NULL, 16); + if (c++ != NULL) + ops->target.addr = strtoull(c, NULL, 16); + else + ops->target.addr = strtoull(ops->raw, NULL, 16); if (s++ != NULL) ops->target.offset = strtoull(s, NULL, 16); -- cgit v1.2.3 From e216874cc1946d28084fa90e495e02725a29e25f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 5 Dec 2016 21:26:47 +0530 Subject: perf annotate: Fix jump target outside of function address range If jump target is outside of function range, perf is not handling it correctly. Especially when target address is lesser than function start address, target offset will be negative. But, target address declared to be unsigned, converts negative number into 2's complement. See below example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is lesser than function start address(34cf0). 34ac0 - 34cf0 = -0x230 = 0xfffffffffffffdd0 Objdump output: 0000000000034cf0 <__sigaction>: __GI___sigaction(): 34cf0: lea -0x20(%rdi),%eax 34cf3: cmp -bashx1,%eax 34cf6: jbe 34d00 <__sigaction+0x10> 34cf8: jmpq 34ac0 <__GI___libc_sigaction> 34cfd: nopl (%rax) 34d00: mov 0x386161(%rip),%rax # 3bae68 <_DYNAMIC+0x2e8> 34d07: movl -bashx16,%fs:(%rax) 34d0e: mov -bashxffffffff,%eax 34d13: retq perf annotate before applying patch: __GI___sigaction /usr/lib64/libc-2.22.so lea -0x20(%rdi),%eax cmp -bashx1,%eax v jbe 10 v jmpq fffffffffffffdd0 nop 10: mov _DYNAMIC+0x2e8,%rax movl -bashx16,%fs:(%rax) mov -bashxffffffff,%eax retq perf annotate after applying patch: __GI___sigaction /usr/lib64/libc-2.22.so lea -0x20(%rdi),%eax cmp -bashx1,%eax v jbe 10 ^ jmpq 34ac0 <__GI___libc_sigaction> nop 10: mov _DYNAMIC+0x2e8,%rax movl -bashx16,%fs:(%rax) mov -bashxffffffff,%eax retq Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Chris Riyder Cc: Kim Phillips Cc: Markus Trippelsdorf Cc: Masami Hiramatsu Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Taeung Song Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1480953407-7605-3-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 5 +++-- tools/perf/util/annotate.c | 14 +++++++++----- tools/perf/util/annotate.h | 5 +++-- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ec7a30fad149..ba36aac340bc 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__set_color(browser, color); if (dl->ins.ops && dl->ins.ops->scnprintf) { if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > (u64)dl->offset; + bool fwd = dl->ops.target.offset > dl->offset; ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : SLSMG_UARROW_CHAR); @@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy { if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) || !disasm_line__has_offset(dl) - || dl->ops.target.offset >= symbol__size(sym)) + || dl->ops.target.offset < 0 + || dl->ops.target.offset >= (s64)symbol__size(sym)) return false; return true; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 590244e5781e..c81a3950a7fe 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -230,10 +230,12 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op else ops->target.addr = strtoull(ops->raw, NULL, 16); - if (s++ != NULL) + if (s++ != NULL) { ops->target.offset = strtoull(s, NULL, 16); - else - ops->target.offset = UINT64_MAX; + ops->target.offset_avail = true; + } else { + ops->target.offset_avail = false; + } return 0; } @@ -241,7 +243,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - if (!ops->target.addr) + if (!ops->target.addr || ops->target.offset < 0) return ins__raw_scnprintf(ins, bf, size, ops); return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); @@ -1209,9 +1211,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, if (dl == NULL) return -1; - if (dl->ops.target.offset == UINT64_MAX) + if (!disasm_line__has_offset(dl)) { dl->ops.target.offset = dl->ops.target.addr - map__rip_2objdump(map, sym->start); + dl->ops.target.offset_avail = true; + } /* kcore has no symbols, so add the call target name */ if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) { diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 87e4cadc5d27..09776b5af991 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -24,7 +24,8 @@ struct ins_operands { char *raw; char *name; u64 addr; - u64 offset; + s64 offset; + bool offset_avail; } target; union { struct { @@ -68,7 +69,7 @@ struct disasm_line { static inline bool disasm_line__has_offset(const struct disasm_line *dl) { - return dl->ops.target.offset != UINT64_MAX; + return dl->ops.target.offset_avail; } void disasm_line__free(struct disasm_line *dl); -- cgit v1.2.3 From 2bd42f3aaa53ebe78b9be6f898b7945dd61f9773 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2016 20:56:54 +0100 Subject: perf trace: Check if MAP_32BIT is defined (again) There might be systems where MAP_32BIT is not defined, like some some RHEL7 powerpc versions. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Kyle McMartin Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 256763b01741 ("perf trace beauty mmap: Add more conditional defines") Link: http://lkml.kernel.org/r/1481831814-23683-1-git-send-email-jolsa@kernel.org [ Changed the Fixme cset to the one removing the conditional switch case for MAP_32BIT ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index fd710ab33684..af1cfde6b97b 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -42,7 +42,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(SHARED); P_MMAP_FLAG(PRIVATE); +#ifdef MAP_32BIT P_MMAP_FLAG(32BIT); +#endif P_MMAP_FLAG(ANONYMOUS); P_MMAP_FLAG(DENYWRITE); P_MMAP_FLAG(EXECUTABLE); -- cgit v1.2.3 From 9de3ffa1b714e6b8ebc1723f71bc9172a4470f7d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 15 Dec 2016 11:36:24 -0800 Subject: perf bench futex: Fix lock-pi help string Obvious copy/paste typo from the requeue program. Signed-off-by: Davidlohr Bueso Cc: Davidlohr Bueso Link: http://lkml.kernel.org/r/1481830584-30909-1-git-send-email-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-lock-pi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 465012b320ee..6d9d6c40a916 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -48,7 +48,7 @@ static const struct option options[] = { }; static const char * const bench_futex_lock_pi_usage[] = { - "perf bench futex requeue ", + "perf bench futex lock-pi ", NULL }; -- cgit v1.2.3 From edee44be59190bf22d5c6e521f3852b7ff16862f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 22 Nov 2016 14:10:50 +0530 Subject: perf annotate: Don't throw error for zero length symbols 'perf report --tui' exits with error when it finds a sample of zero length symbol (i.e. addr == sym->start == sym->end). Actually these are valid samples. Don't exit TUI and show report with such symbols. Reported-and-Tested-by: Anton Blanchard Link: https://lkml.org/lkml/2016/10/8/189 Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Benjamin Herrenschmidt Cc: Chris Riyder Cc: linuxppc-dev@lists.ozlabs.org Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Mackerras Cc: Peter Zijlstra Cc: stable@kernel.org # v4.9+ Link: http://lkml.kernel.org/r/1479804050-5028-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index c81a3950a7fe..06cc04e5806a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -647,7 +647,8 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); - if (addr < sym->start || addr >= sym->end) { + if ((addr < sym->start || addr >= sym->end) && + (addr != sym->end || sym->start != sym->end)) { pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, __LINE__, sym->name, sym->start, addr, sym->end); return -ERANGE; -- cgit v1.2.3 From ed6c166cc7dc329736cace3affd2df984fb22ec8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 13 Dec 2016 10:29:44 -0500 Subject: perf diff: Do not overwrite valid build id Fixes a perf diff regression issue which was introduced by commit 5baecbcd9c9a ("perf symbols: we can now read separate debug-info files based on a build ID") The binary name could be same when perf diff different binaries. Build id is used to distinguish between them. However, the previous patch assumes the same binary name has same build id. So it overwrites the build id according to the binary name, regardless of whether the build id is set or not. Check the has_build_id in dso__load. If the build id is already set, use it. Before the fix: $ perf diff 1.perf.data 2.perf.data # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... ................ ............................. # 99.83% -99.80% tchain_edit [.] f2 0.12% +99.81% tchain_edit [.] f3 0.02% -0.01% [ixgbe] [k] ixgbe_read_reg After the fix: $ perf diff 1.perf.data 2.perf.data # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... ................ ............................. # 99.83% +0.10% tchain_edit [.] f3 0.12% -0.08% tchain_edit [.] f2 Signed-off-by: Kan Liang Cc: Andi Kleen CC: Dima Kogan Cc: Jiri Olsa Cc: Namhyung Kim Fixes: 5baecbcd9c9a ("perf symbols: we can now read separate debug-info files based on a build ID") Link: http://lkml.kernel.org/r/1481642984-13593-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index df2482b2ba45..dc93940de351 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1459,7 +1459,8 @@ int dso__load(struct dso *dso, struct map *map) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (is_regular_file(dso->long_name) && + if (!dso->has_build_id && + is_regular_file(dso->long_name) && filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); -- cgit v1.2.3 From 0e6758e8231d5905c2e267566e9bd679a68a7b00 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:48 +0900 Subject: perf sched timehist: Honour 'comm_width' when aligning the headers Current default value is 20, but that may change in the future, so make places where we have 20 hardcoded use 'comm_width'. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-1-namhyung@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c1c07bfe132c..020f9c9cef7b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1817,7 +1817,7 @@ static void timehist_header(struct perf_sched *sched) printf(" "); } - printf(" %-20s %9s %9s %9s", + printf(" %-*s %9s %9s %9s", comm_width, "task name", "wait time", "sch delay", "run time"); printf("\n"); @@ -1830,7 +1830,8 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %*s ", ncpus, ""); - printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)"); + printf(" %-*s %9s %9s %9s\n", comm_width, + "[tid/pid]", "(msec)", "(msec)", "(msec)"); /* * separator @@ -1840,7 +1841,7 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %.*s ", ncpus, graph_dotted_line); - printf(" %.20s %.9s %.9s %.9s", + printf(" %.*s %.9s %.9s %.9s", comm_width, graph_dotted_line, graph_dotted_line, graph_dotted_line, graph_dotted_line); -- cgit v1.2.3 From 9b8087d72086b249ff744cee237ad12cc5e9255d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:48 +0900 Subject: perf sched timehist: Enlarge default 'comm_width' Current default value is 20 but it's easily changed to a bigger value as task has a long name and different tid and pid. And it makes the output not aligned. So change it to have a large value as summary shows. Committer notes: Before: # perf sched record ^C # perf sched timehist 40602.770537 [0001] rcuos/2[29] 7.970 0.002 0.020 40602.771512 [0003] 0.003 0.000 0.986 40602.771586 [0001] 0.020 0.000 1.049 40602.771606 [0001] qemu-system-x86[3593/3510] 0.000 0.002 0.020 40602.771629 [0003] qemu-system-x86[3510] 0.000 0.003 0.116 40602.771776 [0000] 0.001 0.000 1.892 After: # perf sched timehist 40602.770537 [0001] rcuos/2[29] 7.970 0.002 0.020 40602.771512 [0003] 0.003 0.000 0.986 40602.771586 [0001] 0.020 0.000 1.049 40602.771606 [0001] qemu-system-x86[3593/3510] 0.000 0.002 0.020 40602.771629 [0003] qemu-system-x86[3510] 0.000 0.003 0.116 Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-1-namhyung@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 020f9c9cef7b..c65f16cbae62 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1775,7 +1775,7 @@ static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) return r->last_time[cpu]; } -static int comm_width = 20; +static int comm_width = 30; static char *timehist_get_commstr(struct thread *thread) { -- cgit v1.2.3 From 4fa0d1aa2711a5053fb2422331bdf6bce99b5f87 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:48 +0900 Subject: perf sched timehist: Remove hardcoded 'comm_width' check at print_summary Now that the default 'comm_width' value is 30, no need to check that at print_summary, Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161222060350.17655-1-namhyung@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c65f16cbae62..5052caa91caa 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2627,9 +2627,6 @@ static void timehist_print_summary(struct perf_sched *sched, memset(&totals, 0, sizeof(totals)); - if (comm_width < 30) - comm_width = 30; - if (sched->idle_hist) { printf("\nIdle-time summary\n"); printf("%*s parent sched-out ", comm_width, "comm"); -- cgit v1.2.3 From bdd75729e5d279d734e8d3fb41ef4818ac1598ab Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 22 Dec 2016 15:03:49 +0900 Subject: perf sched timehist: Fix invalid period calculation When --time option is given with a value outside recorded time, the last sample time (tprev) was set to that value and run time calculation might be incorrect. This is a problem of the first samples for each cpus since it would skip the runtime update when tprev is 0. But with --time option it had non-zero (which is invalid) value so the calculation is also incorrect. For example, let's see the followging: $ perf sched timehist time cpu task name wait time sch delay run time [tid/pid] (msec) (msec) (msec) --------------- ------ ------------------------------ --------- --------- --------- 3195.968367 [0003] 0.000 0.000 0.000 3195.968386 [0002] Timer[4306/4277] 0.000 0.000 0.018 3195.968397 [0002] Web Content[4277] 0.000 0.000 0.000 3195.968595 [0001] JS Helper[4302/4277] 0.000 0.000 0.000 3195.969217 [0000] 0.000 0.000 0.621 3195.969251 [0001] kworker/1:1H[291] 0.000 0.000 0.033 The sample starts at 3195.968367 but when I gave a time interval from 3194 to 3196 (in sec) it will calculate the whole 2 second as runtime. In below, 2 cpus accounted it as runtime, other 2 cpus accounted it as idle time. Before: $ perf sched timehist --time 3194,3196 -s | tail Idle stats: CPU 0 idle for 1995.991 msec CPU 1 idle for 20.793 msec CPU 2 idle for 30.191 msec CPU 3 idle for 1999.852 msec Total number of unique tasks: 23 Total number of context switches: 128 Total run time (msec): 3724.940 After: $ perf sched timehist --time 3194,3196 -s | tail Idle stats: CPU 0 idle for 10.811 msec CPU 1 idle for 20.793 msec CPU 2 idle for 30.191 msec CPU 3 idle for 18.337 msec Total number of unique tasks: 23 Total number of context switches: 128 Total run time (msec): 18.139 Committer notes: Further testing: Before: Idle stats: CPU 0 idle for 229.785 msec CPU 1 idle for 937.944 msec CPU 2 idle for 188.931 msec CPU 3 idle for 986.185 msec After: # perf sched timehist --time 40602,40603 -s | tail Idle stats: CPU 0 idle for 229.785 msec CPU 1 idle for 175.407 msec CPU 2 idle for 188.931 msec CPU 3 idle for 223.657 msec Total number of unique tasks: 68 Total number of context switches: 814 Total run time (msec): 97.688 # for cpu in `seq 0 3` ; do echo -n "CPU $cpu idle for " ; perf sched timehist --time 40602,40603 | grep "\[000${cpu}\].*\" | tr -s ' ' | cut -d' ' -f7 | awk '{entries++ ; s+=$1} END {print s " msec (entries: " entries ")"}' ; done CPU 0 idle for 229.721 msec (entries: 123) CPU 1 idle for 175.381 msec (entries: 65) CPU 2 idle for 188.903 msec (entries: 56) CPU 3 idle for 223.61 msec (entries: 102) Difference due to the idle stats being accounted at nanoseconds precision while the entries in 'perf sched timehist' are trucated at msec.usec. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Fixes: 853b74071110 ("perf sched timehist: Add option to specify time window of interest") Link: http://lkml.kernel.org/r/20161222060350.17655-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5052caa91caa..d53e706a6f17 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2405,7 +2405,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, if (ptime->start && ptime->start > t) goto out; - if (ptime->start > tprev) + if (tprev && ptime->start > tprev) tprev = ptime->start; /* -- cgit v1.2.3