diff options
author | Andi Kleen <ak@linux.intel.com> | 2024-04-01 23:08:04 +0200 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2024-04-05 15:43:07 +0200 |
commit | d812044688dfe73e1b309689d58d06f50a15e618 (patch) | |
tree | 9dada53ea400d26e430ea05f95004ff28e2d2682 | |
parent | perf script: Support 32bit code under 64bit OS with capstone (diff) | |
download | linux-d812044688dfe73e1b309689d58d06f50a15e618.tar.xz linux-d812044688dfe73e1b309689d58d06f50a15e618.zip |
perf script: Add capstone support for '-F +brstackdisasm'
Support capstone output for the '-F +brstackinsn' branch dump.
The new output is enabled with the new field 'brstackdisasm'.
This was possible before with --xed, but now also allow it for users
that don't have xed using the builtin capstone support.
Before:
perf record -b emacs -Q --batch '()'
perf script -F +brstackinsn
...
emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237:
00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [3]
00007f0ab2d17105 insn: 73 51
00007f0ab2d17107 insn: 48 89 c1
00007f0ab2d1710a insn: 48 39 ca
00007f0ab2d1710d insn: 73 96
00007f0ab2d1710f insn: 48 8d 04 11
00007f0ab2d17113 insn: 48 d1 e8
00007f0ab2d17116 insn: 49 8d 34 c1
00007f0ab2d1711a insn: 44 3a 06
00007f0ab2d1711d insn: 75 e6 # PRED 3 cycles [6] 3.00 IPC
00007f0ab2d17105 insn: 73 51 # PRED 1 cycles [7] 1.00 IPC
00007f0ab2d17158 insn: 48 8d 50 01
00007f0ab2d1715c insn: eb 92 # PRED 1 cycles [8] 2.00 IPC
00007f0ab2d170f0 insn: 48 39 ca
00007f0ab2d170f3 insn: 73 b0 # PRED 1 cycles [9] 2.00 IPC
After (perf must be compiled with capstone):
perf script -F +brstackdisasm
...
emacs 55778 1814366.755945: 151564 cycles:P: 7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s> intel_check_word.constprop.0+237:
00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [3]
00007f0ab2d17105 jae intel_check_word.constprop.0+0x128
00007f0ab2d17107 movq %rax, %rcx
00007f0ab2d1710a cmpq %rcx, %rdx
00007f0ab2d1710d jae intel_check_word.constprop.0+0x75
00007f0ab2d1710f leaq (%rcx, %rdx), %rax
00007f0ab2d17113 shrq $1, %rax
00007f0ab2d17116 leaq (%r9, %rax, 8), %rsi
00007f0ab2d1711a cmpb (%rsi), %r8b
00007f0ab2d1711d jne intel_check_word.constprop.0+0xd5 # PRED 3 cycles [6] 3.00 IPC
00007f0ab2d17105 jae intel_check_word.constprop.0+0x128 # PRED 1 cycles [7] 1.00 IPC
00007f0ab2d17158 leaq 1(%rax), %rdx
00007f0ab2d1715c jmp intel_check_word.constprop.0+0xc0 # PRED 1 cycles [8] 2.00 IPC
00007f0ab2d170f0 cmpq %rcx, %rdx
00007f0ab2d170f3 jae intel_check_word.constprop.0+0x75 # PRED 1 cycles [9] 2.00 IPC
Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/20240401210925.209671-3-ak@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/Documentation/perf-script.txt | 7 | ||||
-rw-r--r-- | tools/perf/builtin-script.c | 32 | ||||
-rw-r--r-- | tools/perf/util/dump-insn.h | 1 | ||||
-rw-r--r-- | tools/perf/util/print_insn.c | 52 | ||||
-rw-r--r-- | tools/perf/util/print_insn.h | 3 |
5 files changed, 86 insertions, 9 deletions
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 005e51df855e..ff086ef05a0c 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -132,9 +132,9 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, - brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm, + brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm, insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, - code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat. + code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat, Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. @@ -257,6 +257,9 @@ OPTIONS can’t know the next sequential instruction after an unconditional branch unless you calculate that based on its length. + brstackdisasm acts like brstackinsn, but will print disassembled instructions if + perf is built with the capstone library. + The brstackoff field will print an offset into a specific dso/binary. With the metric option perf script can compute metrics for diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a711bedace47..dd10f158ed0c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -136,6 +136,7 @@ enum perf_output_field { PERF_OUTPUT_RETIRE_LAT = 1ULL << 40, PERF_OUTPUT_DSOFF = 1ULL << 41, PERF_OUTPUT_DISASM = 1ULL << 42, + PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43, }; struct perf_script { @@ -210,6 +211,7 @@ struct output_option { {.str = "vcpu", .field = PERF_OUTPUT_VCPU}, {.str = "cgroup", .field = PERF_OUTPUT_CGROUP}, {.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT}, + {.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM}, }; enum { @@ -510,7 +512,8 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) "selected. Hence, no address to lookup the source line number.\n"); return -EINVAL; } - if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set && + if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) + && !allow_user_set && !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" "Hint: run 'perf record -b ...'\n"); @@ -1162,6 +1165,20 @@ out: return ret; } +static const char *any_dump_insn(struct perf_event_attr *attr __maybe_unused, + struct perf_insn *x, uint64_t ip, + u8 *inbuf, int inlen, int *lenp) +{ +#ifdef HAVE_LIBCAPSTONE_SUPPORT + if (PRINT_FIELD(BRSTACKDISASM)) { + const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp); + if (p) + return p; + } +#endif + return dump_insn(x, ip, inbuf, inlen, lenp); +} + static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, struct perf_insn *x, u8 *inbuf, int len, int insn, FILE *fp, int *total_cycles, @@ -1170,7 +1187,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, { int ilen = 0; int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip, - dump_insn(x, ip, inbuf, len, &ilen)); + any_dump_insn(attr, x, ip, inbuf, len, &ilen)); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "ilen: %d\t", ilen); @@ -1262,6 +1279,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, nr = max_blocks + 1; x.thread = thread; + x.machine = machine; x.cpu = sample->cpu; printed += fprintf(fp, "%c", '\n'); @@ -1313,7 +1331,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, } else { ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip, - dump_insn(&x, ip, buffer + off, len - off, &ilen)); + any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen)); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "\tilen: %d", ilen); printed += fprintf(fp, "\n"); @@ -1361,7 +1379,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, goto out; ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip, - dump_insn(&x, sample->ip, buffer, len, &ilen)); + any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen)); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "\tilen: %d", ilen); printed += fprintf(fp, "\n"); @@ -1372,7 +1390,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, for (off = 0; off <= end - start; off += ilen) { ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off, - dump_insn(&x, start + off, buffer + off, len - off, &ilen)); + any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen)); if (PRINT_FIELD(BRSTACKINSNLEN)) printed += fprintf(fp, "\tilen: %d", ilen); printed += fprintf(fp, "\n"); @@ -1534,7 +1552,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, printed += fprintf(fp, "\t\t"); printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al); } - if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) + if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM)) printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp); return printed; @@ -3940,7 +3958,7 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," - "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth," + "brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth," "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat", parse_output_fields), diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h index 650125061530..4a7797dd6d09 100644 --- a/tools/perf/util/dump-insn.h +++ b/tools/perf/util/dump-insn.h @@ -11,6 +11,7 @@ struct thread; struct perf_insn { /* Initialized by callers: */ struct thread *thread; + struct machine *machine; u8 cpumode; bool is64bit; int cpu; diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c index 32dc9dad9cf2..8825330d435f 100644 --- a/tools/perf/util/print_insn.c +++ b/tools/perf/util/print_insn.c @@ -12,6 +12,7 @@ #include "machine.h" #include "thread.h" #include "print_insn.h" +#include "dump-insn.h" #include "map.h" #include "dso.h" @@ -71,6 +72,57 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) return 0; } +static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x) +{ + struct addr_location al; + bool printed = false; + + if (insn->detail && insn->detail->x86.op_count == 1) { + cs_x86_op *op = &insn->detail->x86.operands[0]; + + addr_location__init(&al); + if (op->type == X86_OP_IMM && + thread__find_symbol(thread, x->cpumode, op->imm, &al) && + al.sym && + al.addr < al.sym->end) { + snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic, + al.sym->name, al.addr - al.sym->start, op->imm); + printed = true; + } + addr_location__exit(&al); + } + + if (!printed) + snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str); +} + +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, + u8 *inbuf, int inlen, int *lenp) +{ + int ret; + int count; + cs_insn *insn; + csh cs_handle; + + ret = capstone_init(x->machine, &cs_handle, x->is64bit); + if (ret < 0) + return NULL; + + count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn); + if (count > 0) { + if (machine__normalized_is(x->machine, "x86")) + dump_insn_x86(x->thread, &insn[0], x); + else + snprintf(x->out, sizeof(x->out), "%s %s", + insn[0].mnemonic, insn[0].op_str); + *lenp = insn->size; + cs_free(insn, count); + } else { + return NULL; + } + return x->out; +} + static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread, cs_insn *insn, FILE *fp) { diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h index 6447dd41b543..c2a6391a45ce 100644 --- a/tools/perf/util/print_insn.h +++ b/tools/perf/util/print_insn.h @@ -8,9 +8,12 @@ struct perf_sample; struct thread; struct machine; +struct perf_insn; size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, struct machine *machine, FILE *fp, struct addr_location *al); size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); +const char *cs_dump_insn(struct perf_insn *x, uint64_t ip, + u8 *inbuf, int inlen, int *lenp); #endif /* PERF_PRINT_INSN_H */ |