From 1953287bfe8afcbbd235bd6c42c9df06d52438dc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Aug 2009 07:11:05 +0200 Subject: perf tools: Fix call-chain cumul hit based sub-total (fractal mode) The callchain fractal mode builds each new total hits in a new branch of profiling by using the parent's hits of the current branch plus the hits of the children. This is wrong, the total hits of a branch should be made of the sum of every children hits, we must ignore the parent hits in this scope. This patch also fixes another mistake with the hit counting. Now the rates are correct. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Pekka Enberg Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'tools/perf/util/callchain.c') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9d3c8141b8c1..98c5627f327b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -26,10 +26,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct callchain_node *rnode; + u64 chain_cumul = cumul_hits(chain); while (*p) { + u64 rnode_cumul; + parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node); + rnode_cumul = cumul_hits(rnode); switch (mode) { case CHAIN_FLAT: @@ -40,7 +44,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, break; case CHAIN_GRAPH_ABS: /* Falldown */ case CHAIN_GRAPH_REL: - if (rnode->cumul_hit < chain->cumul_hit) + if (rnode_cumul < chain_cumul) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -87,7 +91,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node, chain_for_each_child(child, node) { __sort_chain_graph_abs(child, min_hit); - if (child->cumul_hit >= min_hit) + if (cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_ABS); } @@ -108,11 +112,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node, u64 min_hit; node->rb_root = RB_ROOT; - min_hit = node->cumul_hit * min_percent / 100.0; + min_hit = node->children_hit * min_percent / 100.0; chain_for_each_child(child, node) { __sort_chain_graph_rel(child, min_percent); - if (child->cumul_hit >= min_hit) + if (cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_REL); } @@ -211,7 +215,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain, new = create_child(parent, false); fill_node(new, chain, start, syms); - new->cumul_hit = new->hit = 1; + new->children_hit = 0; + new->hit = 1; } /* @@ -241,7 +246,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, /* split the hits */ new->hit = parent->hit; - new->cumul_hit = parent->cumul_hit; + new->children_hit = parent->children_hit; + parent->children_hit = cumul_hits(new); new->val_nr = parent->val_nr - idx_local; parent->val_nr = idx_local; @@ -249,6 +255,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, if (idx_total < chain->nr) { parent->hit = 0; add_child(parent, chain, idx_total, syms); + parent->children_hit++; } else { parent->hit = 1; } @@ -269,13 +276,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, unsigned int ret = __append_chain(rnode, chain, start, syms); if (!ret) - goto cumul; + goto inc_children_hit; } /* nothing in children, add to the current node */ add_child(root, chain, start, syms); -cumul: - root->cumul_hit++; +inc_children_hit: + root->children_hit++; } static int @@ -317,8 +324,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, /* we match 100% of the path, increment the hit */ if (i - start == root->val_nr && i == chain->nr) { root->hit++; - root->cumul_hit++; - return 0; } -- cgit v1.2.3 From b0efe213f84f7fd5ccfe07053e3d9fb827b7c188 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Aug 2009 02:16:23 +0200 Subject: perf tools: callchain: Fix spurious 'perf report' warnings: ignore empty callchains When the callchain tree comes to insert an empty backtrace, it raises a spurious warning about the fact we are inserting an empty. This is spurious because the radix tree assumes it did something wrong to reach this state. But it didn't, we just met an empty callchain that has to be ignored. This happens occasionally with certain types of call-chain recordings. If it happens it's a big nuisance as perf report output starts with thousands of warning lines. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1249690585-9145-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf/util/callchain.c') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 98c5627f327b..a8e67aa9ef49 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -336,5 +336,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms) { + if (!chain->nr) + return; __append_chain_children(root, chain, syms, 0); } -- cgit v1.2.3 From c0a8865e32c8d1a562db38e06ef31ef23282f646 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 9 Aug 2009 04:19:15 +0200 Subject: perf tools: callchain: Fix bad rounding of minimum rate Sometimes we get callchain branches that have a rate under the limit given by the user. Say you launched: perf record -f -g -a ./hackbench 10 perf report -g fractal,10.0 And you got: 2.33% hackbench [kernel] [k] _spin_lock_irqsave | |--78.57%-- remove_wait_queue | poll_freewait | do_sys_poll | sys_poll | sysenter_dispatch | 0xf7ffa430 | 0x1ffadea3c | |--7.14%-- __up_read | up_read | do_page_fault | page_fault | 0xf7ffa430 | 0xa0df710000000a ... It is abnormal to get a 7.14% branch whereas we passed a 10% filter. The problem is that we round down the minimum threshold. This happens mostly when we have very low number of events. If the total amount of your branch is 4 and you have a subranch of 3 events, filtering to 90% will be computed like follows: limit = 4 * 0.9; The result is about 3.6, but the cast to integer will round down to 3. It means that our filter is actually of 75% We must then explicitly round up the minimum threshold. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: acme@redhat.com Cc: peterz@infradead.org Cc: efault@gmx.de LKML-Reference: <20090809024235.GA10146@nowhere> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/callchain.c') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index a8e67aa9ef49..011473411642 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "callchain.h" @@ -112,7 +113,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node, u64 min_hit; node->rb_root = RB_ROOT; - min_hit = node->children_hit * min_percent / 100.0; + min_hit = ceil(node->children_hit * min_percent); chain_for_each_child(child, node) { __sort_chain_graph_rel(child, min_percent); @@ -126,7 +127,7 @@ static void sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, u64 min_hit __used, struct callchain_param *param) { - __sort_chain_graph_rel(chain_root, param->min_percent); + __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); rb_root->rb_node = chain_root->rb_root.rb_node; } -- cgit v1.2.3