diff options
174 files changed, 7998 insertions, 1346 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b0382f3f1095..d1e65ce545b3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -48,7 +48,7 @@ struct cpu_hw_events { unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; - unsigned int group_flag; + unsigned int txn_flags; int n_txn_start; /* BHRB bits */ @@ -1441,7 +1441,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ - if (cpuhw->group_flag & PERF_EVENT_TXN) + if (cpuhw->txn_flags & PERF_PMU_TXN_ADD) goto nocheck; if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) @@ -1586,13 +1586,22 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); - cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -1604,8 +1613,15 @@ static void power_pmu_start_txn(struct pmu *pmu) static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + unsigned int txn_flags; + + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; - cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -1621,7 +1637,15 @@ static int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; + cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuhw->txn_flags = 0; + return 0; + } + n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1632,7 +1656,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) for (i = cpuhw->n_txn_start; i < n; ++i) cpuhw->event[i]->hw.config = cpuhw->events[i]; - cpuhw->group_flag &= ~PERF_EVENT_TXN; + cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 527c8b98e97e..9f9dfda9ed2c 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -142,6 +142,15 @@ static struct attribute_group event_long_desc_group = { static struct kmem_cache *hv_page_cache; +DEFINE_PER_CPU(int, hv_24x7_txn_flags); +DEFINE_PER_CPU(int, hv_24x7_txn_err); + +struct hv_24x7_hw { + struct perf_event *events[255]; +}; + +DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); + /* * request_buffer and result_buffer are not required to be 4k aligned, * but are not allowed to cross any 4k boundary. Aligning them to 4k is @@ -1231,9 +1240,48 @@ static void update_event_count(struct perf_event *event, u64 now) static void h_24x7_event_read(struct perf_event *event) { u64 now; + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_hw *h24x7hw; + int txn_flags; + + txn_flags = __this_cpu_read(hv_24x7_txn_flags); + + /* + * If in a READ transaction, add this counter to the list of + * counters to read during the next HCALL (i.e commit_txn()). + * If not in a READ transaction, go ahead and make the HCALL + * to read this counter by itself. + */ + + if (txn_flags & PERF_PMU_TXN_READ) { + int i; + int ret; - now = h_24x7_get_value(event); - update_event_count(event, now); + if (__this_cpu_read(hv_24x7_txn_err)) + return; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + + ret = add_event_to_24x7_request(event, request_buffer); + if (ret) { + __this_cpu_write(hv_24x7_txn_err, ret); + } else { + /* + * Assoicate the event with the HCALL request index, + * so ->commit_txn() can quickly find/update count. + */ + i = request_buffer->num_requests - 1; + + h24x7hw = &get_cpu_var(hv_24x7_hw); + h24x7hw->events[i] = event; + put_cpu_var(h24x7hw); + } + + put_cpu_var(hv_24x7_reqb); + } else { + now = h_24x7_get_value(event); + update_event_count(event, now); + } } static void h_24x7_event_start(struct perf_event *event, int flags) @@ -1255,6 +1303,117 @@ static int h_24x7_event_add(struct perf_event *event, int flags) return 0; } +/* + * 24x7 counters only support READ transactions. They are + * always counting and dont need/support ADD transactions. + * Cache the flags, but otherwise ignore transactions that + * are not PERF_PMU_TXN_READ. + */ +static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags) +{ + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_data_result_buffer *result_buffer; + + /* We should not be called if we are already in a txn */ + WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags)); + + __this_cpu_write(hv_24x7_txn_flags, flags); + if (flags & ~PERF_PMU_TXN_READ) + return; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); + + init_24x7_request(request_buffer, result_buffer); + + put_cpu_var(hv_24x7_resb); + put_cpu_var(hv_24x7_reqb); +} + +/* + * Clean up transaction state. + * + * NOTE: Ignore state of request and result buffers for now. + * We will initialize them during the next read/txn. + */ +static void reset_txn(void) +{ + __this_cpu_write(hv_24x7_txn_flags, 0); + __this_cpu_write(hv_24x7_txn_err, 0); +} + +/* + * 24x7 counters only support READ transactions. They are always counting + * and dont need/support ADD transactions. Clear ->txn_flags but otherwise + * ignore transactions that are not of type PERF_PMU_TXN_READ. + * + * For READ transactions, submit all pending 24x7 requests (i.e requests + * that were queued by h_24x7_event_read()), to the hypervisor and update + * the event counts. + */ +static int h_24x7_event_commit_txn(struct pmu *pmu) +{ + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_data_result_buffer *result_buffer; + struct hv_24x7_result *resb; + struct perf_event *event; + u64 count; + int i, ret, txn_flags; + struct hv_24x7_hw *h24x7hw; + + txn_flags = __this_cpu_read(hv_24x7_txn_flags); + WARN_ON_ONCE(!txn_flags); + + ret = 0; + if (txn_flags & ~PERF_PMU_TXN_READ) + goto out; + + ret = __this_cpu_read(hv_24x7_txn_err); + if (ret) + goto out; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); + + ret = make_24x7_request(request_buffer, result_buffer); + if (ret) { + log_24x7_hcall(request_buffer, result_buffer, ret); + goto put_reqb; + } + + h24x7hw = &get_cpu_var(hv_24x7_hw); + + /* Update event counts from hcall */ + for (i = 0; i < request_buffer->num_requests; i++) { + resb = &result_buffer->results[i]; + count = be64_to_cpu(resb->elements[0].element_data[0]); + event = h24x7hw->events[i]; + h24x7hw->events[i] = NULL; + update_event_count(event, count); + } + + put_cpu_var(hv_24x7_hw); + +put_reqb: + put_cpu_var(hv_24x7_resb); + put_cpu_var(hv_24x7_reqb); +out: + reset_txn(); + return ret; +} + +/* + * 24x7 counters only support READ transactions. They are always counting + * and dont need/support ADD transactions. However, regardless of type + * of transaction, all we need to do is cleanup, so we don't have to check + * the type of transaction. + */ +static void h_24x7_event_cancel_txn(struct pmu *pmu) +{ + WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags)); + reset_txn(); +} + static struct pmu h_24x7_pmu = { .task_ctx_nr = perf_invalid_context, @@ -1266,6 +1425,9 @@ static struct pmu h_24x7_pmu = { .start = h_24x7_event_start, .stop = h_24x7_event_stop, .read = h_24x7_event_read, + .start_txn = h_24x7_event_start_txn, + .commit_txn = h_24x7_event_commit_txn, + .cancel_txn = h_24x7_event_cancel_txn, }; static int hv_24x7_init(void) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index a9563409c36e..929c147e07b4 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -72,6 +72,7 @@ struct cpu_hw_events { atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state, tx_state; unsigned int flags; + unsigned int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { @@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { }, .state = 0, .flags = 0, + .txn_flags = 0, }; static int get_counter_set(u64 event) @@ -538,7 +540,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) * For group events transaction, the authorization check is * done in cpumf_pmu_commit_txn(). */ - if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD)) if (validate_ctr_auth(&event->hw)) return -ENOENT; @@ -576,13 +578,22 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) /* * Start group events scheduling transaction. * Set flags to perform a single test at commit time. + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void cpumf_pmu_start_txn(struct pmu *pmu) +static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); - cpuhw->flags |= PERF_EVENT_TXN; cpuhw->tx_state = cpuhw->state; } @@ -593,11 +604,18 @@ static void cpumf_pmu_start_txn(struct pmu *pmu) */ static void cpumf_pmu_cancel_txn(struct pmu *pmu) { + unsigned int txn_flags; struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + WARN_ON(cpuhw->tx_state != cpuhw->state); - cpuhw->flags &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -611,13 +629,20 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); u64 state; + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuhw->txn_flags = 0; + return 0; + } + /* check if the updated state can be scheduled */ state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); state >>= CPUMF_LCCTL_ENABLE_SHIFT; if ((state & cpuhw->info.auth_ctl) != state) return -ENOENT; - cpuhw->flags &= ~PERF_EVENT_TXN; + cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 689db65f8529..b0da5aedb336 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -108,7 +108,7 @@ struct cpu_hw_events { /* Enabled/disable state. */ int enabled; - unsigned int group_flag; + unsigned int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -1379,7 +1379,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto nocheck; if (check_excludes(cpuc->event, n0, 1)) @@ -1494,12 +1494,17 @@ static int sparc_pmu_event_init(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void sparc_pmu_start_txn(struct pmu *pmu) +static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); - cpuhw->group_flag |= PERF_EVENT_TXN; } /* @@ -1510,8 +1515,15 @@ static void sparc_pmu_start_txn(struct pmu *pmu) static void sparc_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + unsigned int txn_flags; + + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; - cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -1528,14 +1540,20 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) if (!sparc_pmu) return -EINVAL; - cpuc = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuc->txn_flags = 0; + return 0; + } + n = cpuc->n_events; if (check_excludes(cpuc->event, 0, n)) return -EINVAL; if (sparc_check_constraints(cpuc->event, cpuc->events, n)) return -EAGAIN; - cpuc->group_flag &= ~PERF_EVENT_TXN; + cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4eb065c6bed2..58031303e304 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ perf_event_intel_uncore_snb.o \ diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index be4febc58b94..e38d338a6447 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -157,7 +157,7 @@ struct _cpuid4_info_regs { struct amd_northbridge *nb; }; -unsigned short num_cache_leaves; +static unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same information to the user. This makes some assumptions about the machine: @@ -326,7 +326,7 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb) * * @returns: the disabled index if used or negative value if slot free. */ -int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) +static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) { unsigned int reg = 0; @@ -403,8 +403,8 @@ static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, * * @return: 0 on success, error status on failure */ -int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, - unsigned long index) +static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, + unsigned slot, unsigned long index) { int ret = 0; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 66dd3fe99b82..4562cf070c27 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1175,7 +1175,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) * skip the schedulability test here, it will be performed * at commit time (->commit_txn) as a whole. */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto done_collect; ret = x86_pmu.schedule_events(cpuc, n, assign); @@ -1326,7 +1326,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) * XXX assumes any ->del() called during a TXN will only be on * an event added during that same TXN. */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) return; /* @@ -1748,11 +1748,22 @@ static inline void x86_pmu_read(struct perf_event *event) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void x86_pmu_start_txn(struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */ + + cpuc->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); - __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN); __this_cpu_write(cpu_hw_events.n_txn, 0); } @@ -1763,7 +1774,16 @@ static void x86_pmu_start_txn(struct pmu *pmu) */ static void x86_pmu_cancel_txn(struct pmu *pmu) { - __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); + unsigned int txn_flags; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + txn_flags = cpuc->txn_flags; + cpuc->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + /* * Truncate collected array by the number of events added in this * transaction. See x86_pmu_add() and x86_pmu_*_txn(). @@ -1786,6 +1806,13 @@ static int x86_pmu_commit_txn(struct pmu *pmu) int assign[X86_PMC_IDX_MAX]; int n, ret; + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuc->txn_flags = 0; + return 0; + } + n = cpuc->n_events; if (!x86_pmu_initialized()) @@ -1801,7 +1828,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) */ memcpy(cpuc->assign, assign, n*sizeof(int)); - cpuc->group_flag &= ~PERF_EVENT_TXN; + cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 165be83a7fa4..499f533dd3cc 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -196,7 +196,7 @@ struct cpu_hw_events { int n_excl; /* the number of exclusive events */ - unsigned int group_flag; + unsigned int txn_flags; int is_fake; /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index d1c0f254afbe..2cad71d1b14c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -495,6 +495,19 @@ static int bts_event_init(struct perf_event *event) if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; + /* + * BTS leaks kernel addresses even when CPL0 tracing is + * disabled, so disallow intel_bts driver for unprivileged + * users on paranoid systems since it provides trace data + * to the user in a zero-copy fashion. + * + * Note that the default paranoia setting permits unprivileged + * users to profile the kernel. + */ + if (event->attr.exclude_kernel && perf_paranoid_kernel() && + !capable(CAP_SYS_ADMIN)) + return -EACCES; + ret = x86_reserve_hardware(); if (ret) { x86_del_exclusive(x86_lbr_exclusive_bts); diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/kernel/cpu/perf_event_intel_cstate.c new file mode 100644 index 000000000000..75a38b5a2e26 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_cstate.c @@ -0,0 +1,694 @@ +/* + * perf_event_intel_cstate.c: support cstate residency counters + * + * Copyright (C) 2015, Intel Corp. + * Author: Kan Liang (kan.liang@intel.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + */ + +/* + * This file export cstate related free running (read-only) counters + * for perf. These counters may be use simultaneously by other tools, + * such as turbostat. However, it still make sense to implement them + * in perf. Because we can conveniently collect them together with + * other events, and allow to use them from tools without special MSR + * access code. + * + * The events only support system-wide mode counting. There is no + * sampling support because it is not supported by the hardware. + * + * According to counters' scope and category, two PMUs are registered + * with the perf_event core subsystem. + * - 'cstate_core': The counter is available for each physical core. + * The counters include CORE_C*_RESIDENCY. + * - 'cstate_pkg': The counter is available for each physical package. + * The counters include PKG_C*_RESIDENCY. + * + * All of these counters are specified in the Intel® 64 and IA-32 + * Architectures Software Developer.s Manual Vol3b. + * + * Model specific counters: + * MSR_CORE_C1_RES: CORE C1 Residency Counter + * perf code: 0x00 + * Available model: SLM,AMT + * Scope: Core (each processor core has a MSR) + * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter + * perf code: 0x01 + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Core + * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Core + * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter + * perf code: 0x03 + * Available model: SNB,IVB,HSW,BDW,SKL + * Scope: Core + * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. + * perf code: 0x00 + * Available model: SNB,IVB,HSW,BDW,SKL + * Scope: Package (physical package) + * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. + * perf code: 0x01 + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Package (physical package) + * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Package (physical package) + * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. + * perf code: 0x03 + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Package (physical package) + * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. + * perf code: 0x04 + * Available model: HSW ULT only + * Scope: Package (physical package) + * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. + * perf code: 0x05 + * Available model: HSW ULT only + * Scope: Package (physical package) + * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. + * perf code: 0x06 + * Available model: HSW ULT only + * Scope: Package (physical package) + * + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/perf_event.h> +#include <asm/cpu_device_id.h> +#include "perf_event.h" + +#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __cstate_##_var##_show, NULL) + +static ssize_t cstate_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf); + +struct perf_cstate_msr { + u64 msr; + struct perf_pmu_events_attr *attr; + bool (*test)(int idx); +}; + + +/* cstate_core PMU */ + +static struct pmu cstate_core_pmu; +static bool has_cstate_core; + +enum perf_cstate_core_id { + /* + * cstate_core events + */ + PERF_CSTATE_CORE_C1_RES = 0, + PERF_CSTATE_CORE_C3_RES, + PERF_CSTATE_CORE_C6_RES, + PERF_CSTATE_CORE_C7_RES, + + PERF_CSTATE_CORE_EVENT_MAX, +}; + +bool test_core(int idx) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 != 6) + return false; + + switch (boot_cpu_data.x86_model) { + case 30: /* 45nm Nehalem */ + case 26: /* 45nm Nehalem-EP */ + case 46: /* 45nm Nehalem-EX */ + + case 37: /* 32nm Westmere */ + case 44: /* 32nm Westmere-EP */ + case 47: /* 32nm Westmere-EX */ + if (idx == PERF_CSTATE_CORE_C3_RES || + idx == PERF_CSTATE_CORE_C6_RES) + return true; + break; + case 42: /* 32nm SandyBridge */ + case 45: /* 32nm SandyBridge-E/EN/EP */ + + case 58: /* 22nm IvyBridge */ + case 62: /* 22nm IvyBridge-EP/EX */ + + case 60: /* 22nm Haswell Core */ + case 63: /* 22nm Haswell Server */ + case 69: /* 22nm Haswell ULT */ + case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + + case 61: /* 14nm Broadwell Core-M */ + case 86: /* 14nm Broadwell Xeon D */ + case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ + case 79: /* 14nm Broadwell Server */ + + case 78: /* 14nm Skylake Mobile */ + case 94: /* 14nm Skylake Desktop */ + if (idx == PERF_CSTATE_CORE_C3_RES || + idx == PERF_CSTATE_CORE_C6_RES || + idx == PERF_CSTATE_CORE_C7_RES) + return true; + break; + case 55: /* 22nm Atom "Silvermont" */ + case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ + case 76: /* 14nm Atom "Airmont" */ + if (idx == PERF_CSTATE_CORE_C1_RES || + idx == PERF_CSTATE_CORE_C6_RES) + return true; + break; + } + + return false; +} + +PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03"); + +static struct perf_cstate_msr core_msr[] = { + [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, }, + [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, }, + [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, }, + [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, }, +}; + +static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = { + NULL, +}; + +static struct attribute_group core_events_attr_group = { + .name = "events", + .attrs = core_events_attrs, +}; + +DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63"); +static struct attribute *core_format_attrs[] = { + &format_attr_core_event.attr, + NULL, +}; + +static struct attribute_group core_format_attr_group = { + .name = "format", + .attrs = core_format_attrs, +}; + +static cpumask_t cstate_core_cpu_mask; +static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL); + +static struct attribute *cstate_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = cstate_cpumask_attrs, +}; + +static const struct attribute_group *core_attr_groups[] = { + &core_events_attr_group, + &core_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +/* cstate_core PMU end */ + + +/* cstate_pkg PMU */ + +static struct pmu cstate_pkg_pmu; +static bool has_cstate_pkg; + +enum perf_cstate_pkg_id { + /* + * cstate_pkg events + */ + PERF_CSTATE_PKG_C2_RES = 0, + PERF_CSTATE_PKG_C3_RES, + PERF_CSTATE_PKG_C6_RES, + PERF_CSTATE_PKG_C7_RES, + PERF_CSTATE_PKG_C8_RES, + PERF_CSTATE_PKG_C9_RES, + PERF_CSTATE_PKG_C10_RES, + + PERF_CSTATE_PKG_EVENT_MAX, +}; + +bool test_pkg(int idx) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 != 6) + return false; + + switch (boot_cpu_data.x86_model) { + case 30: /* 45nm Nehalem */ + case 26: /* 45nm Nehalem-EP */ + case 46: /* 45nm Nehalem-EX */ + + case 37: /* 32nm Westmere */ + case 44: /* 32nm Westmere-EP */ + case 47: /* 32nm Westmere-EX */ + if (idx == PERF_CSTATE_CORE_C3_RES || + idx == PERF_CSTATE_CORE_C6_RES || + idx == PERF_CSTATE_CORE_C7_RES) + return true; + break; + case 42: /* 32nm SandyBridge */ + case 45: /* 32nm SandyBridge-E/EN/EP */ + + case 58: /* 22nm IvyBridge */ + case 62: /* 22nm IvyBridge-EP/EX */ + + case 60: /* 22nm Haswell Core */ + case 63: /* 22nm Haswell Server */ + case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + + case 61: /* 14nm Broadwell Core-M */ + case 86: /* 14nm Broadwell Xeon D */ + case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ + case 79: /* 14nm Broadwell Server */ + + case 78: /* 14nm Skylake Mobile */ + case 94: /* 14nm Skylake Desktop */ + if (idx == PERF_CSTATE_PKG_C2_RES || + idx == PERF_CSTATE_PKG_C3_RES || + idx == PERF_CSTATE_PKG_C6_RES || + idx == PERF_CSTATE_PKG_C7_RES) + return true; + break; + case 55: /* 22nm Atom "Silvermont" */ + case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ + case 76: /* 14nm Atom "Airmont" */ + if (idx == PERF_CSTATE_CORE_C6_RES) + return true; + break; + case 69: /* 22nm Haswell ULT */ + if (idx == PERF_CSTATE_PKG_C2_RES || + idx == PERF_CSTATE_PKG_C3_RES || + idx == PERF_CSTATE_PKG_C6_RES || + idx == PERF_CSTATE_PKG_C7_RES || + idx == PERF_CSTATE_PKG_C8_RES || + idx == PERF_CSTATE_PKG_C9_RES || + idx == PERF_CSTATE_PKG_C10_RES) + return true; + break; + } + + return false; +} + +PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03"); +PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04"); +PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05"); +PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06"); + +static struct perf_cstate_msr pkg_msr[] = { + [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, }, + [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, }, + [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, }, + [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, }, + [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, }, + [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, }, + [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, }, +}; + +static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = { + NULL, +}; + +static struct attribute_group pkg_events_attr_group = { + .name = "events", + .attrs = pkg_events_attrs, +}; + +DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63"); +static struct attribute *pkg_format_attrs[] = { + &format_attr_pkg_event.attr, + NULL, +}; +static struct attribute_group pkg_format_attr_group = { + .name = "format", + .attrs = pkg_format_attrs, +}; + +static cpumask_t cstate_pkg_cpu_mask; + +static const struct attribute_group *pkg_attr_groups[] = { + &pkg_events_attr_group, + &pkg_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +/* cstate_pkg PMU end*/ + +static ssize_t cstate_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pmu *pmu = dev_get_drvdata(dev); + + if (pmu == &cstate_core_pmu) + return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask); + else if (pmu == &cstate_pkg_pmu) + return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask); + else + return 0; +} + +static int cstate_pmu_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config; + int ret = 0; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (event->pmu == &cstate_core_pmu) { + if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) + return -EINVAL; + if (!core_msr[cfg].attr) + return -EINVAL; + event->hw.event_base = core_msr[cfg].msr; + } else if (event->pmu == &cstate_pkg_pmu) { + if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) + return -EINVAL; + if (!pkg_msr[cfg].attr) + return -EINVAL; + event->hw.event_base = pkg_msr[cfg].msr; + } else + return -ENOENT; + + /* must be done before validate_group */ + event->hw.config = cfg; + event->hw.idx = -1; + + return ret; +} + +static inline u64 cstate_pmu_read_counter(struct perf_event *event) +{ + u64 val; + + rdmsrl(event->hw.event_base, val); + return val; +} + +static void cstate_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = cstate_pmu_read_counter(event); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + local64_add(new_raw_count - prev_raw_count, &event->count); +} + +static void cstate_pmu_event_start(struct perf_event *event, int mode) +{ + local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event)); +} + +static void cstate_pmu_event_stop(struct perf_event *event, int mode) +{ + cstate_pmu_event_update(event); +} + +static void cstate_pmu_event_del(struct perf_event *event, int mode) +{ + cstate_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int cstate_pmu_event_add(struct perf_event *event, int mode) +{ + if (mode & PERF_EF_START) + cstate_pmu_event_start(event, mode); + + return 0; +} + +static void cstate_cpu_exit(int cpu) +{ + int i, id, target; + + /* cpu exit for cstate core */ + if (has_cstate_core) { + id = topology_core_id(cpu); + target = -1; + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (id == topology_core_id(i)) { + target = i; + break; + } + } + if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0) + cpumask_set_cpu(target, &cstate_core_cpu_mask); + WARN_ON(cpumask_empty(&cstate_core_cpu_mask)); + if (target >= 0) + perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); + } + + /* cpu exit for cstate pkg */ + if (has_cstate_pkg) { + id = topology_physical_package_id(cpu); + target = -1; + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (id == topology_physical_package_id(i)) { + target = i; + break; + } + } + if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0) + cpumask_set_cpu(target, &cstate_pkg_cpu_mask); + WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask)); + if (target >= 0) + perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); + } +} + +static void cstate_cpu_init(int cpu) +{ + int i, id; + + /* cpu init for cstate core */ + if (has_cstate_core) { + id = topology_core_id(cpu); + for_each_cpu(i, &cstate_core_cpu_mask) { + if (id == topology_core_id(i)) + break; + } + if (i >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_core_cpu_mask); + } + + /* cpu init for cstate pkg */ + if (has_cstate_pkg) { + id = topology_physical_package_id(cpu); + for_each_cpu(i, &cstate_pkg_cpu_mask) { + if (id == topology_physical_package_id(i)) + break; + } + if (i >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); + } +} + +static int cstate_cpu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + break; + case CPU_STARTING: + cstate_cpu_init(cpu); + break; + case CPU_UP_CANCELED: + case CPU_DYING: + break; + case CPU_ONLINE: + case CPU_DEAD: + break; + case CPU_DOWN_PREPARE: + cstate_cpu_exit(cpu); + break; + default: + break; + } + + return NOTIFY_OK; +} + +/* + * Probe the cstate events and insert the available one into sysfs attrs + * Return false if there is no available events. + */ +static bool cstate_probe_msr(struct perf_cstate_msr *msr, + struct attribute **events_attrs, + int max_event_nr) +{ + int i, j = 0; + u64 val; + + /* Probe the cstate events. */ + for (i = 0; i < max_event_nr; i++) { + if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) + msr[i].attr = NULL; + } + + /* List remaining events in the sysfs attrs. */ + for (i = 0; i < max_event_nr; i++) { + if (msr[i].attr) + events_attrs[j++] = &msr[i].attr->attr.attr; + } + events_attrs[j] = NULL; + + return (j > 0) ? true : false; +} + +static int __init cstate_init(void) +{ + /* SLM has different MSR for PKG C6 */ + switch (boot_cpu_data.x86_model) { + case 55: + case 76: + case 77: + pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + } + + if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX)) + has_cstate_core = true; + + if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX)) + has_cstate_pkg = true; + + return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; +} + +static void __init cstate_cpumask_init(void) +{ + int cpu; + + cpu_notifier_register_begin(); + + for_each_online_cpu(cpu) + cstate_cpu_init(cpu); + + __perf_cpu_notifier(cstate_cpu_notifier); + + cpu_notifier_register_done(); +} + +static struct pmu cstate_core_pmu = { + .attr_groups = core_attr_groups, + .name = "cstate_core", + .task_ctx_nr = perf_invalid_context, + .event_init = cstate_pmu_event_init, + .add = cstate_pmu_event_add, /* must have */ + .del = cstate_pmu_event_del, /* must have */ + .start = cstate_pmu_event_start, + .stop = cstate_pmu_event_stop, + .read = cstate_pmu_event_update, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, +}; + +static struct pmu cstate_pkg_pmu = { + .attr_groups = pkg_attr_groups, + .name = "cstate_pkg", + .task_ctx_nr = perf_invalid_context, + .event_init = cstate_pmu_event_init, + .add = cstate_pmu_event_add, /* must have */ + .del = cstate_pmu_event_del, /* must have */ + .start = cstate_pmu_event_start, + .stop = cstate_pmu_event_stop, + .read = cstate_pmu_event_update, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, +}; + +static void __init cstate_pmus_register(void) +{ + int err; + + if (has_cstate_core) { + err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); + if (WARN_ON(err)) + pr_info("Failed to register PMU %s error %d\n", + cstate_core_pmu.name, err); + } + + if (has_cstate_pkg) { + err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1); + if (WARN_ON(err)) + pr_info("Failed to register PMU %s error %d\n", + cstate_pkg_pmu.name, err); + } +} + +static int __init cstate_pmu_init(void) +{ + int err; + + if (cpu_has_hypervisor) + return -ENODEV; + + err = cstate_init(); + if (err) + return err; + + cstate_cpumask_init(); + + cstate_pmus_register(); + + return 0; +} + +device_initcall(cstate_pmu_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 84f236ab96b0..5db1c7755548 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -510,10 +510,11 @@ int intel_pmu_drain_bts_buffer(void) u64 flags; }; struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; - struct bts_record *at, *top; + struct bts_record *at, *base, *top; struct perf_output_handle handle; struct perf_event_header header; struct perf_sample_data data; + unsigned long skip = 0; struct pt_regs regs; if (!event) @@ -522,10 +523,10 @@ int intel_pmu_drain_bts_buffer(void) if (!x86_pmu.bts_active) return 0; - at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; - top = (struct bts_record *)(unsigned long)ds->bts_index; + base = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; - if (top <= at) + if (top <= base) return 0; memset(®s, 0, sizeof(regs)); @@ -535,16 +536,43 @@ int intel_pmu_drain_bts_buffer(void) perf_sample_data_init(&data, 0, event->hw.last_period); /* + * BTS leaks kernel addresses in branches across the cpl boundary, + * such as traps or system calls, so unless the user is asking for + * kernel tracing (and right now it's not possible), we'd need to + * filter them out. But first we need to count how many of those we + * have in the current batch. This is an extra O(n) pass, however, + * it's much faster than the other one especially considering that + * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the + * alloc_bts_buffer()). + */ + for (at = base; at < top; at++) { + /* + * Note that right now *this* BTS code only works if + * attr::exclude_kernel is set, but let's keep this extra + * check here in case that changes. + */ + if (event->attr.exclude_kernel && + (kernel_ip(at->from) || kernel_ip(at->to))) + skip++; + } + + /* * Prepare a generic sample, i.e. fill in the invariant fields. * We will overwrite the from and to address before we output * the sample. */ perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * (top - at))) + if (perf_output_begin(&handle, event, header.size * + (top - base - skip))) return 1; - for (; at < top; at++) { + for (at = base; at < top; at++) { + /* Filter out any records that contain kernel addresses. */ + if (event->attr.exclude_kernel && + (kernel_ip(at->from) || kernel_ip(at->to))) + continue; + data.ip = at->from; data.addr = at->to; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index b2c9475b7ff2..ad0b8b0490a0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -151,10 +151,10 @@ static void __intel_pmu_lbr_enable(bool pmi) * No need to reprogram LBR_SELECT in a PMI, as it * did not change. */ - if (cpuc->lbr_sel && !pmi) { + if (cpuc->lbr_sel) lbr_select = cpuc->lbr_sel->config; + if (!pmi) wrmsrl(MSR_LBR_SELECT, lbr_select); - } rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); orig_debugctl = debugctl; diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index 42169283448b..868e1194337f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -139,9 +139,6 @@ static int __init pt_pmu_hw_init(void) long i; attrs = NULL; - ret = -ENODEV; - if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT)) - goto fail; for (i = 0; i < PT_CPUID_LEAVES; i++) { cpuid_count(20, i, @@ -1130,6 +1127,10 @@ static __init int pt_init(void) int ret, cpu, prior_warn = 0; BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE); + + if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT)) + return -ENODEV; + get_online_cpus(); for_each_online_cpu(cpu) { u64 ctl; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 560e5255b15e..61215a69b03d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -7,7 +7,8 @@ struct intel_uncore_type **uncore_pci_uncores = empty_uncore; static bool pcidrv_registered; struct pci_driver *uncore_pci_driver; /* pci bus to socket mapping */ -int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, }; +DEFINE_RAW_SPINLOCK(pci2phy_map_lock); +struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; static DEFINE_RAW_SPINLOCK(uncore_box_lock); @@ -20,6 +21,59 @@ static struct event_constraint uncore_constraint_fixed = struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); +int uncore_pcibus_to_physid(struct pci_bus *bus) +{ + struct pci2phy_map *map; + int phys_id = -1; + + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + if (map->segment == pci_domain_nr(bus)) { + phys_id = map->pbus_to_physid[bus->number]; + break; + } + } + raw_spin_unlock(&pci2phy_map_lock); + + return phys_id; +} + +struct pci2phy_map *__find_pci2phy_map(int segment) +{ + struct pci2phy_map *map, *alloc = NULL; + int i; + + lockdep_assert_held(&pci2phy_map_lock); + +lookup: + list_for_each_entry(map, &pci2phy_map_head, list) { + if (map->segment == segment) + goto end; + } + + if (!alloc) { + raw_spin_unlock(&pci2phy_map_lock); + alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); + raw_spin_lock(&pci2phy_map_lock); + + if (!alloc) + return NULL; + + goto lookup; + } + + map = alloc; + alloc = NULL; + map->segment = segment; + for (i = 0; i < 256; i++) + map->pbus_to_physid[i] = -1; + list_add_tail(&map->list, &pci2phy_map_head); + +end: + kfree(alloc); + return map; +} + ssize_t uncore_event_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -809,7 +863,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id int phys_id; bool first_box = false; - phys_id = uncore_pcibus_to_physid[pdev->bus->number]; + phys_id = uncore_pcibus_to_physid(pdev->bus); if (phys_id < 0) return -ENODEV; @@ -856,9 +910,10 @@ static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box = pci_get_drvdata(pdev); struct intel_uncore_pmu *pmu; - int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number]; + int i, cpu, phys_id; bool last_box = false; + phys_id = uncore_pcibus_to_physid(pdev->bus); box = pci_get_drvdata(pdev); if (!box) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 72c54c2e5b1a..2f0a4a98e16b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -117,6 +117,15 @@ struct uncore_event_desc { const char *config; }; +struct pci2phy_map { + struct list_head list; + int segment; + int pbus_to_physid[256]; +}; + +int uncore_pcibus_to_physid(struct pci_bus *bus); +struct pci2phy_map *__find_pci2phy_map(int segment); + ssize_t uncore_event_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); @@ -317,7 +326,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); extern struct intel_uncore_type **uncore_msr_uncores; extern struct intel_uncore_type **uncore_pci_uncores; extern struct pci_driver *uncore_pci_driver; -extern int uncore_pcibus_to_physid[256]; +extern raw_spinlock_t pci2phy_map_lock; +extern struct list_head pci2phy_map_head; extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; extern struct event_constraint uncore_constraint_empty; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index f78574b3cb55..845256158a10 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -420,15 +420,25 @@ static void snb_uncore_imc_event_del(struct perf_event *event, int flags) static int snb_pci2phy_map_init(int devid) { struct pci_dev *dev = NULL; - int bus; + struct pci2phy_map *map; + int bus, segment; dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); if (!dev) return -ENOTTY; bus = dev->bus->number; - - uncore_pcibus_to_physid[bus] = 0; + segment = pci_domain_nr(dev->bus); + + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + pci_dev_put(dev); + return -ENOMEM; + } + map->pbus_to_physid[bus] = 0; + raw_spin_unlock(&pci2phy_map_lock); pci_dev_put(dev); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 694510a887dc..f0f4fcba252e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -1087,7 +1087,8 @@ static struct pci_driver snbep_uncore_pci_driver = { static int snbep_pci2phy_map_init(int devid) { struct pci_dev *ubox_dev = NULL; - int i, bus, nodeid; + int i, bus, nodeid, segment; + struct pci2phy_map *map; int err = 0; u32 config = 0; @@ -1106,16 +1107,27 @@ static int snbep_pci2phy_map_init(int devid) err = pci_read_config_dword(ubox_dev, 0x54, &config); if (err) break; + + segment = pci_domain_nr(ubox_dev->bus); + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + err = -ENOMEM; + break; + } + /* * every three bits in the Node ID mapping register maps * to a particular node. */ for (i = 0; i < 8; i++) { if (nodeid == ((config >> (3 * i)) & 0x7)) { - uncore_pcibus_to_physid[bus] = i; + map->pbus_to_physid[bus] = i; break; } } + raw_spin_unlock(&pci2phy_map_lock); } if (!err) { @@ -1123,13 +1135,17 @@ static int snbep_pci2phy_map_init(int devid) * For PCI bus with no UBOX device, find the next bus * that has UBOX device and use its mapping. */ - i = -1; - for (bus = 255; bus >= 0; bus--) { - if (uncore_pcibus_to_physid[bus] >= 0) - i = uncore_pcibus_to_physid[bus]; - else - uncore_pcibus_to_physid[bus] = i; + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + i = -1; + for (bus = 255; bus >= 0; bus--) { + if (map->pbus_to_physid[bus] >= 0) + i = map->pbus_to_physid[bus]; + else + map->pbus_to_physid[bus] = i; + } } + raw_spin_unlock(&pci2phy_map_lock); } pci_dev_put(ubox_dev); @@ -2444,7 +2460,7 @@ static struct intel_uncore_type *bdx_pci_uncores[] = { NULL, }; -static DEFINE_PCI_DEVICE_TABLE(bdx_uncore_pci_ids) = { +static const struct pci_device_id bdx_uncore_pci_ids[] = { { /* Home Agent 0 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30), .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0), diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c3f7602cd038..69b84a26ea17 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -168,21 +168,20 @@ static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div - * into a shift. + * into a shift. The larger SC is, the more accurate the conversion, but + * cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication + * (64-bit result) can be used. * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * We can use khz divisor instead of mhz to keep a better precision. * (mathieu.desnoyers@polymtl.ca) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - static void cyc2ns_data_init(struct cyc2ns_data *data) { data->cyc2ns_mul = 0; - data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; + data->cyc2ns_shift = 0; data->cyc2ns_offset = 0; data->__count = 0; } @@ -216,14 +215,14 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) if (likely(data == tail)) { ns = data->cyc2ns_offset; - ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); + ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); } else { data->__count++; barrier(); ns = data->cyc2ns_offset; - ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); + ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); barrier(); @@ -257,12 +256,11 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) * time function is continuous; see the comment near struct * cyc2ns_data. */ - data->cyc2ns_mul = - DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, - cpu_khz); - data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; + clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz, + NSEC_PER_MSEC, 0); + data->cyc2ns_offset = ns_now - - mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); + mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift); cyc2ns_write_end(cpu, data); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 816488c0b97e..d388de72eaca 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -353,8 +353,12 @@ AVXcode: 1 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) 18: Grp16 (1A) 19: -1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv -1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv +# Intel SDM opcode map does not list MPX instructions. For now using Gv for +# bnd registers and Ev for everything else is OK because the instruction +# decoder does not use the information except as an indication that there is +# a ModR/M byte. +1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev +1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv 1c: 1d: 1e: @@ -732,6 +736,12 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) # 0x0f 0x38 0xc0-0xff +c8: sha1nexte Vdq,Wdq +c9: sha1msg1 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq +cb: sha256rnds2 Vdq,Wdq +cc: sha256msg1 Vdq,Wdq +cd: sha256msg2 Vdq,Wdq db: VAESIMC Vdq,Wdq (66),(v1) dc: VAESENC Vdq,Hdq,Wdq (66),(v1) dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) @@ -790,6 +800,7 @@ AVXcode: 3 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +cc: sha1rnds4 Vdq,Wdq,Ib df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) f0: RORX Gy,Ey,Ib (F2),(v) EndTable @@ -874,7 +885,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: +5: rdpkru (110),(11B) | wrpkru (111),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -888,6 +899,9 @@ EndTable GrpTable: Grp9 1: CMPXCHG8B/16B Mq/Mdq +3: xrstors +4: xsavec +5: xsaves 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable @@ -932,8 +946,8 @@ GrpTable: Grp15 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE 5: XRSTOR | lfence (11B) -6: XSAVEOPT | mfence (11B) -7: clflush | sfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) +7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) EndTable GrpTable: Grp16 diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 092a0e8a479a..d841d33bcdc9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -140,33 +140,67 @@ struct hw_perf_event { }; #endif }; + /* + * If the event is a per task event, this will point to the task in + * question. See the comment in perf_event_alloc(). + */ struct task_struct *target; + +/* + * hw_perf_event::state flags; used to track the PERF_EF_* state. + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + int state; + + /* + * The last observed hardware counter value, updated with a + * local64_cmpxchg() such that pmu::read() can be called nested. + */ local64_t prev_count; + + /* + * The period to start the next sample with. + */ u64 sample_period; + + /* + * The period we started this sample with. + */ u64 last_period; + + /* + * However much is left of the current period; note that this is + * a full 64bit value and allows for generation of periods longer + * than hardware might allow. + */ local64_t period_left; + + /* + * State for throttling the event, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 interrupts_seq; u64 interrupts; + /* + * State for freq target events, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 freq_time_stamp; u64 freq_count_stamp; #endif }; -/* - * hw_perf_event::state flags - */ -#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ -#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ -#define PERF_HES_ARCH 0x04 - struct perf_event; /* * Common implementation detail of pmu::{start,commit,cancel}_txn */ -#define PERF_EVENT_TXN 0x1 +#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ /** * pmu::capabilities flags @@ -210,7 +244,19 @@ struct pmu { /* * Try and initialize the event for this PMU. - * Should return -ENOENT when the @event doesn't match this PMU. + * + * Returns: + * -ENOENT -- @event is not for this PMU + * + * -ENODEV -- @event is for this PMU but PMU not present + * -EBUSY -- @event is for this PMU but PMU temporarily unavailable + * -EINVAL -- @event is for this PMU but @event is not valid + * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported + * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges + * + * 0 -- @event is for this PMU and valid + * + * Other error return values are allowed. */ int (*event_init) (struct perf_event *event); @@ -221,27 +267,61 @@ struct pmu { void (*event_mapped) (struct perf_event *event); /*optional*/ void (*event_unmapped) (struct perf_event *event); /*optional*/ + /* + * Flags for ->add()/->del()/ ->start()/->stop(). There are + * matching hw_perf_event::state flags. + */ #define PERF_EF_START 0x01 /* start the counter when adding */ #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ /* - * Adds/Removes a counter to/from the PMU, can be done inside - * a transaction, see the ->*_txn() methods. + * Adds/Removes a counter to/from the PMU, can be done inside a + * transaction, see the ->*_txn() methods. + * + * The add/del callbacks will reserve all hardware resources required + * to service the event, this includes any counter constraint + * scheduling etc. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on. + * + * ->add() called without PERF_EF_START should result in the same state + * as ->add() followed by ->stop(). + * + * ->del() must always PERF_EF_UPDATE stop an event. If it calls + * ->stop() that must deal with already being stopped without + * PERF_EF_UPDATE. */ int (*add) (struct perf_event *event, int flags); void (*del) (struct perf_event *event, int flags); /* - * Starts/Stops a counter present on the PMU. The PMI handler - * should stop the counter when perf_event_overflow() returns - * !0. ->start() will be used to continue. + * Starts/Stops a counter present on the PMU. + * + * The PMI handler should stop the counter when perf_event_overflow() + * returns !0. ->start() will be used to continue. + * + * Also used to change the sample period. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on -- will be called from NMI context with the PMU generates + * NMIs. + * + * ->stop() with PERF_EF_UPDATE will read the counter and update + * period/count values like ->read() would. + * + * ->start() with PERF_EF_RELOAD will reprogram the the counter + * value, must be preceded by a ->stop() with PERF_EF_UPDATE. */ void (*start) (struct perf_event *event, int flags); void (*stop) (struct perf_event *event, int flags); /* * Updates the counter value of the event. + * + * For sampling capable PMUs this will also update the software period + * hw_perf_event::period_left field. */ void (*read) (struct perf_event *event); @@ -252,20 +332,26 @@ struct pmu { * * Start the transaction, after this ->add() doesn't need to * do schedulability tests. + * + * Optional. */ - void (*start_txn) (struct pmu *pmu); /* optional */ + void (*start_txn) (struct pmu *pmu, unsigned int txn_flags); /* * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. + * + * Optional. */ - int (*commit_txn) (struct pmu *pmu); /* optional */ + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->del() is called * for each successful ->add() during the transaction. + * + * Optional. */ - void (*cancel_txn) (struct pmu *pmu); /* optional */ + void (*cancel_txn) (struct pmu *pmu); /* * Will return the value for perf_event_mmap_page::index for this event, diff --git a/kernel/events/core.c b/kernel/events/core.c index b11756f9b6dc..ea02109aee77 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -196,7 +196,7 @@ static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; static int perf_sample_allowed_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100; -void update_perf_cpu_limits(void) +static void update_perf_cpu_limits(void) { u64 tmp = perf_sample_period_ns; @@ -472,7 +472,7 @@ perf_cgroup_set_timestamp(struct task_struct *task, * mode SWOUT : schedule out everything * mode SWIN : schedule in based on cgroup for next */ -void perf_cgroup_switch(struct task_struct *task, int mode) +static void perf_cgroup_switch(struct task_struct *task, int mode) { struct perf_cpu_context *cpuctx; struct pmu *pmu; @@ -1939,7 +1939,7 @@ group_sched_in(struct perf_event *group_event, if (group_event->state == PERF_EVENT_STATE_OFF) return 0; - pmu->start_txn(pmu); + pmu->start_txn(pmu, PERF_PMU_TXN_ADD); if (event_sched_in(group_event, cpuctx, ctx)) { pmu->cancel_txn(pmu); @@ -3209,14 +3209,22 @@ void perf_event_exec(void) rcu_read_unlock(); } +struct perf_read_data { + struct perf_event *event; + bool group; + int ret; +}; + /* * Cross CPU call to read the hardware event */ static void __perf_event_read(void *info) { - struct perf_event *event = info; + struct perf_read_data *data = info; + struct perf_event *sub, *event = data->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct pmu *pmu = event->pmu; /* * If this is a task context, we need to check whether it is @@ -3233,9 +3241,35 @@ static void __perf_event_read(void *info) update_context_time(ctx); update_cgrp_time_from_event(event); } + update_event_times(event); - if (event->state == PERF_EVENT_STATE_ACTIVE) - event->pmu->read(event); + if (event->state != PERF_EVENT_STATE_ACTIVE) + goto unlock; + + if (!data->group) { + pmu->read(event); + data->ret = 0; + goto unlock; + } + + pmu->start_txn(pmu, PERF_PMU_TXN_READ); + + pmu->read(event); + + list_for_each_entry(sub, &event->sibling_list, group_entry) { + update_event_times(sub); + if (sub->state == PERF_EVENT_STATE_ACTIVE) { + /* + * Use sibling's PMU rather than @event's since + * sibling could be on different (eg: software) PMU. + */ + sub->pmu->read(sub); + } + } + + data->ret = pmu->commit_txn(pmu); + +unlock: raw_spin_unlock(&ctx->lock); } @@ -3300,15 +3334,23 @@ u64 perf_event_read_local(struct perf_event *event) return val; } -static u64 perf_event_read(struct perf_event *event) +static int perf_event_read(struct perf_event *event, bool group) { + int ret = 0; + /* * If event is enabled and currently active on a CPU, update the * value in the event structure: */ if (event->state == PERF_EVENT_STATE_ACTIVE) { + struct perf_read_data data = { + .event = event, + .group = group, + .ret = 0, + }; smp_call_function_single(event->oncpu, - __perf_event_read, event, 1); + __perf_event_read, &data, 1); + ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; @@ -3323,11 +3365,14 @@ static u64 perf_event_read(struct perf_event *event) update_context_time(ctx); update_cgrp_time_from_event(event); } - update_event_times(event); + if (group) + update_group_times(event); + else + update_event_times(event); raw_spin_unlock_irqrestore(&ctx->lock, flags); } - return perf_event_count(event); + return ret; } /* @@ -3769,7 +3814,7 @@ static void put_event(struct perf_event *event) * see the comment there. * * 2) there is a lock-inversion with mmap_sem through - * perf_event_read_group(), which takes faults while + * perf_read_group(), which takes faults while * holding ctx->mutex, however this is called after * the last filedesc died, so there is no possibility * to trigger the AB-BA case. @@ -3843,14 +3888,18 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) *running = 0; mutex_lock(&event->child_mutex); - total += perf_event_read(event); + + (void)perf_event_read(event, false); + total += perf_event_count(event); + *enabled += event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); *running += event->total_time_running + atomic64_read(&event->child_total_time_running); list_for_each_entry(child, &event->child_list, child_list) { - total += perf_event_read(child); + (void)perf_event_read(child, false); + total += perf_event_count(child); *enabled += child->total_time_enabled; *running += child->total_time_running; } @@ -3860,55 +3909,95 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_event_read_group(struct perf_event *event, - u64 read_format, char __user *buf) +static int __perf_read_group_add(struct perf_event *leader, + u64 read_format, u64 *values) { - struct perf_event *leader = event->group_leader, *sub; - struct perf_event_context *ctx = leader->ctx; - int n = 0, size = 0, ret; - u64 count, enabled, running; - u64 values[5]; + struct perf_event *sub; + int n = 1; /* skip @nr */ + int ret; - lockdep_assert_held(&ctx->mutex); + ret = perf_event_read(leader, true); + if (ret) + return ret; - count = perf_event_read_value(leader, &enabled, &running); + /* + * Since we co-schedule groups, {enabled,running} times of siblings + * will be identical to those of the leader, so we only publish one + * set. + */ + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] += leader->total_time_enabled + + atomic64_read(&leader->child_total_time_enabled); + } - values[n++] = 1 + leader->nr_siblings; - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = enabled; - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = running; - values[n++] = count; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] += leader->total_time_running + + atomic64_read(&leader->child_total_time_running); + } + + /* + * Write {count,id} tuples for every sibling. + */ + values[n++] += perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); - size = n * sizeof(u64); + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + values[n++] += perf_event_count(sub); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(sub); + } - if (copy_to_user(buf, values, size)) - return -EFAULT; + return 0; +} + +static int perf_read_group(struct perf_event *event, + u64 read_format, char __user *buf) +{ + struct perf_event *leader = event->group_leader, *child; + struct perf_event_context *ctx = leader->ctx; + int ret; + u64 *values; - ret = size; + lockdep_assert_held(&ctx->mutex); - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - n = 0; + values = kzalloc(event->read_size, GFP_KERNEL); + if (!values) + return -ENOMEM; - values[n++] = perf_event_read_value(sub, &enabled, &running); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_event_id(sub); + values[0] = 1 + leader->nr_siblings; - size = n * sizeof(u64); + /* + * By locking the child_mutex of the leader we effectively + * lock the child list of all siblings.. XXX explain how. + */ + mutex_lock(&leader->child_mutex); - if (copy_to_user(buf + ret, values, size)) { - return -EFAULT; - } + ret = __perf_read_group_add(leader, read_format, values); + if (ret) + goto unlock; - ret += size; + list_for_each_entry(child, &leader->child_list, child_list) { + ret = __perf_read_group_add(child, read_format, values); + if (ret) + goto unlock; } + mutex_unlock(&leader->child_mutex); + + ret = event->read_size; + if (copy_to_user(buf, values, event->read_size)) + ret = -EFAULT; + goto out; + +unlock: + mutex_unlock(&leader->child_mutex); +out: + kfree(values); return ret; } -static int perf_event_read_one(struct perf_event *event, +static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; @@ -3946,7 +4035,7 @@ static bool is_event_hup(struct perf_event *event) * Read the performance event - simple non blocking version for now */ static ssize_t -perf_read_hw(struct perf_event *event, char __user *buf, size_t count) +__perf_read(struct perf_event *event, char __user *buf, size_t count) { u64 read_format = event->attr.read_format; int ret; @@ -3964,9 +4053,9 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) WARN_ON_ONCE(event->ctx->parent_ctx); if (read_format & PERF_FORMAT_GROUP) - ret = perf_event_read_group(event, read_format, buf); + ret = perf_read_group(event, read_format, buf); else - ret = perf_event_read_one(event, read_format, buf); + ret = perf_read_one(event, read_format, buf); return ret; } @@ -3979,7 +4068,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) int ret; ctx = perf_event_ctx_lock(event); - ret = perf_read_hw(event, buf, count); + ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); return ret; @@ -4010,7 +4099,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - (void)perf_event_read(event); + (void)perf_event_read(event, false); local64_set(&event->count, 0); perf_event_update_userpage(event); } @@ -7292,24 +7381,49 @@ static void perf_pmu_nop_void(struct pmu *pmu) { } +static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags) +{ +} + static int perf_pmu_nop_int(struct pmu *pmu) { return 0; } -static void perf_pmu_start_txn(struct pmu *pmu) +static DEFINE_PER_CPU(unsigned int, nop_txn_flags); + +static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) { + __this_cpu_write(nop_txn_flags, flags); + + if (flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); } static int perf_pmu_commit_txn(struct pmu *pmu) { + unsigned int flags = __this_cpu_read(nop_txn_flags); + + __this_cpu_write(nop_txn_flags, 0); + + if (flags & ~PERF_PMU_TXN_ADD) + return 0; + perf_pmu_enable(pmu); return 0; } static void perf_pmu_cancel_txn(struct pmu *pmu) { + unsigned int flags = __this_cpu_read(nop_txn_flags); + + __this_cpu_write(nop_txn_flags, 0); + + if (flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_enable(pmu); } @@ -7548,7 +7662,7 @@ got_cpu_context: pmu->commit_txn = perf_pmu_commit_txn; pmu->cancel_txn = perf_pmu_cancel_txn; } else { - pmu->start_txn = perf_pmu_nop_void; + pmu->start_txn = perf_pmu_nop_txn; pmu->commit_txn = perf_pmu_nop_int; pmu->cancel_txn = perf_pmu_nop_void; } @@ -7636,7 +7750,7 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) return ret; } -struct pmu *perf_init_event(struct perf_event *event) +static struct pmu *perf_init_event(struct perf_event *event) { struct pmu *pmu = NULL; int idx; @@ -9345,14 +9459,6 @@ static void perf_cgroup_exit(struct cgroup_subsys_state *css, struct cgroup_subsys_state *old_css, struct task_struct *task) { - /* - * cgroup_exit() is called in the copy_process() failure path. - * Ignore this case since the task hasn't ran yet, this avoids - * trying to poke a half freed task state from generic code. - */ - if (!(task->flags & PF_EXITING)) - return; - task_function_call(task, __perf_cgroup_move, task); } diff --git a/tools/build/Build b/tools/build/Build new file mode 100644 index 000000000000..63a6c34c0c88 --- /dev/null +++ b/tools/build/Build @@ -0,0 +1 @@ +fixdep-y := fixdep.o diff --git a/tools/build/Build.include b/tools/build/Build.include index 4c8daaccb82a..4d000bc959b4 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -55,14 +55,25 @@ make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) ### +# Copy dependency data into .cmd file +# - gcc -M dependency info +# - command line to create object 'cmd_object :=' +dep-cmd = $(if $(wildcard $(fixdep)), \ + $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp; \ + rm -f $(depfile); \ + mv -f $(dot-target).tmp $(dot-target).cmd, \ + printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ + printf '\# using basic dep data\n\n' >> $(dot-target).cmd; \ + cat $(depfile) >> $(dot-target).cmd; \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + +### # if_changed_dep - execute command if any prerequisite is newer than # target, or command line has changed and update # dependencies in the cmd file if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \ @set -e; \ - $(echo-cmd) $(cmd_$(1)); \ - cat $(depfile) > $(dot-target).cmd; \ - printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + $(echo-cmd) $(cmd_$(1)) && $(dep-cmd)) # if_changed - execute command if any prerequisite is newer than # target, or command line has changed diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt index aa5e092c4352..a47bffbae159 100644 --- a/tools/build/Documentation/Build.txt +++ b/tools/build/Documentation/Build.txt @@ -11,8 +11,9 @@ Unlike the kernel we don't have a single build object 'obj-y' list that where we setup source objects, but we support more. This allows one 'Build' file to carry a sources list for multiple build objects. -a) Build framework makefiles ----------------------------- + +Build framework makefiles +------------------------- The build framework consists of 2 Makefiles: @@ -23,7 +24,7 @@ While the 'Build.include' file contains just some generic definitions, the 'Makefile.build' file is the makefile used from the outside. It's interface/usage is following: - $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) + $ make -f tools/build/Makefile.build srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) where: @@ -38,8 +39,9 @@ called $(OBJECT)-in.o: which includes all compiled sources described in 'Build' makefiles. -a) Build makefiles ------------------- + +Build makefiles +--------------- The user supplies 'Build' makefiles that contains a objects list, and connects the build to nested directories. @@ -95,8 +97,31 @@ It's only a matter of 2 single commands to create the final binaries: You can check the 'ex' example in 'tools/build/tests/ex' for more details. -b) Rules --------- + +Makefile.include +---------------- + +The tools/build/Makefile.include makefile could be included +via user makefiles to get usefull definitions. + +It defines following interface: + + - build macro definition: + build := -f $(srctree)/tools/build/Makefile.build dir=. obj + + to make it easier to invoke build like: + make $(build)=ex + + +Fixdep +------ +It is necessary to build the fixdep helper before invoking the build. +The Makefile.include file adds the fixdep target, that could be +invoked by the user. + + +Rules +----- The build framework provides standard compilation rules to handle .S and .c compilation. @@ -104,8 +129,9 @@ compilation. It's possible to include special rule if needed (like we do for flex or bison code generation). -c) CFLAGS ---------- + +CFLAGS +------ It's possible to alter the standard object C flags in the following way: @@ -115,8 +141,8 @@ It's possible to alter the standard object C flags in the following way: This C flags changes has the scope of the Build makefile they are defined in. -d) Dependencies ---------------- +Dependencies +------------ For each built object file 'a.o' the '.a.cmd' is created and holds: @@ -130,8 +156,8 @@ All existing '.cmd' files are included in the Build process to follow properly the dependencies and trigger a rebuild when necessary. -e) Single rules ---------------- +Single rules +------------ It's possible to build single object file by choice, like: diff --git a/tools/build/Makefile b/tools/build/Makefile new file mode 100644 index 000000000000..a93036272d43 --- /dev/null +++ b/tools/build/Makefile @@ -0,0 +1,43 @@ +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +include $(srctree)/tools//scripts/Makefile.include + +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + +ifeq ($(V),1) + Q = +else + Q = @ +endif + +export Q srctree CC LD + +MAKEFLAGS := --no-print-directory +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +all: fixdep + +clean: + $(call QUIET_CLEAN, fixdep) + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)rm -f fixdep + +$(OUTPUT)fixdep-in.o: FORCE + $(Q)$(MAKE) $(build)=fixdep + +$(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o + $(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $< + +FORCE: + +.PHONY: FORCE diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 0c5f485521d6..4a96473b180f 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -21,6 +21,13 @@ endif build-dir := $(srctree)/tools/build +# Define $(fixdep) for dep-cmd function +ifeq ($(OUTPUT),) + fixdep := $(build-dir)/fixdep +else + fixdep := $(OUTPUT)/fixdep +endif + # Generic definitions include $(build-dir)/Build.include diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index c8fe6d177119..72817e4d5e70 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -121,8 +121,9 @@ define feature_print_text_code MSG = $(shell printf '...%30s: %s' $(1) $(2)) endef +FEATURE_DUMP_FILENAME = $(OUTPUT)FEATURE-DUMP$(FEATURE_USER) FEATURE_DUMP := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat)))) -FEATURE_DUMP_FILE := $(shell touch $(OUTPUT)FEATURE-DUMP; cat $(OUTPUT)FEATURE-DUMP) +FEATURE_DUMP_FILE := $(shell touch $(FEATURE_DUMP_FILENAME); cat $(FEATURE_DUMP_FILENAME)) ifeq ($(dwarf-post-unwind),1) FEATURE_DUMP += dwarf-post-unwind($(dwarf-post-unwind-text)) @@ -131,16 +132,16 @@ endif # The $(feature_display) controls the default detection message # output. It's set if: # - detected features differes from stored features from -# last build (in FEATURE-DUMP file) +# last build (in $(FEATURE_DUMP_FILENAME) file) # - one of the $(FEATURE_DISPLAY) is not detected # - VF is enabled ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)") - $(shell echo "$(FEATURE_DUMP)" > $(OUTPUT)FEATURE-DUMP) + $(shell echo "$(FEATURE_DUMP)" > $(FEATURE_DUMP_FILENAME)) feature_display := 1 endif -feature_display_check = $(eval $(feature_check_code)) +feature_display_check = $(eval $(feature_check_display_code)) define feature_display_check_code ifneq ($(feature-$(1)), 1) feature_display := 1 diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include new file mode 100644 index 000000000000..6572bb023543 --- /dev/null +++ b/tools/build/Makefile.include @@ -0,0 +1,6 @@ +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +fixdep: + $(Q)$(MAKE) -C $(srctree)/tools/build fixdep + +.PHONY: fixdep diff --git a/tools/build/fixdep.c b/tools/build/fixdep.c new file mode 100644 index 000000000000..1521d36cef0d --- /dev/null +++ b/tools/build/fixdep.c @@ -0,0 +1,168 @@ +/* + * "Optimize" a list of dependencies as spit out by gcc -MD + * for the build framework. + * + * Original author: + * Copyright 2002 by Kai Germaschewski <kai.germaschewski@gmx.de> + * + * This code has been borrowed from kbuild's fixdep (scripts/basic/fixdep.c), + * Please check it for detailed explanation. This fixdep borow only the + * base transformation of dependecies without the CONFIG mangle. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <limits.h> + +char *target; +char *depfile; +char *cmdline; + +static void usage(void) +{ + fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n"); + exit(1); +} + +/* + * Print out the commandline prefixed with cmd_<target filename> := + */ +static void print_cmdline(void) +{ + printf("cmd_%s := %s\n\n", target, cmdline); +} + +/* + * Important: The below generated source_foo.o and deps_foo.o variable + * assignments are parsed not only by make, but also by the rather simple + * parser in scripts/mod/sumversion.c. + */ +static void parse_dep_file(void *map, size_t len) +{ + char *m = map; + char *end = m + len; + char *p; + char s[PATH_MAX]; + int is_target; + int saw_any_target = 0; + int is_first_dep = 0; + + while (m < end) { + /* Skip any "white space" */ + while (m < end && (*m == ' ' || *m == '\\' || *m == '\n')) + m++; + /* Find next "white space" */ + p = m; + while (p < end && *p != ' ' && *p != '\\' && *p != '\n') + p++; + /* Is the token we found a target name? */ + is_target = (*(p-1) == ':'); + /* Don't write any target names into the dependency file */ + if (is_target) { + /* The /next/ file is the first dependency */ + is_first_dep = 1; + } else { + /* Save this token/filename */ + memcpy(s, m, p-m); + s[p - m] = 0; + + /* + * Do not list the source file as dependency, + * so that kbuild is not confused if a .c file + * is rewritten into .S or vice versa. Storing + * it in source_* is needed for modpost to + * compute srcversions. + */ + if (is_first_dep) { + /* + * If processing the concatenation of + * multiple dependency files, only + * process the first target name, which + * will be the original source name, + * and ignore any other target names, + * which will be intermediate temporary + * files. + */ + if (!saw_any_target) { + saw_any_target = 1; + printf("source_%s := %s\n\n", + target, s); + printf("deps_%s := \\\n", + target); + } + is_first_dep = 0; + } else + printf(" %s \\\n", s); + } + /* + * Start searching for next token immediately after the first + * "whitespace" character that follows this token. + */ + m = p + 1; + } + + if (!saw_any_target) { + fprintf(stderr, "fixdep: parse error; no targets found\n"); + exit(1); + } + + printf("\n%s: $(deps_%s)\n\n", target, target); + printf("$(deps_%s):\n", target); +} + +static void print_deps(void) +{ + struct stat st; + int fd; + void *map; + + fd = open(depfile, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "fixdep: error opening depfile: "); + perror(depfile); + exit(2); + } + if (fstat(fd, &st) < 0) { + fprintf(stderr, "fixdep: error fstat'ing depfile: "); + perror(depfile); + exit(2); + } + if (st.st_size == 0) { + fprintf(stderr, "fixdep: %s is empty\n", depfile); + close(fd); + return; + } + map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if ((long) map == -1) { + perror("fixdep: mmap"); + close(fd); + return; + } + + parse_dep_file(map, st.st_size); + + munmap(map, st.st_size); + + close(fd); +} + +int main(int argc, char **argv) +{ + if (argc != 4) + usage(); + + depfile = argv[1]; + target = argv[2]; + cmdline = argv[3]; + + print_cmdline(); + print_deps(); + + return 0; +} diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build index 429c7d452101..4d502f9b1a50 100644 --- a/tools/build/tests/ex/Build +++ b/tools/build/tests/ex/Build @@ -4,6 +4,7 @@ ex-y += b.o ex-y += b.o ex-y += empty/ ex-y += empty2/ +ex-y += inc.o libex-y += c.o libex-y += d.o diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile index 52d2476073a3..c50d5782ad5a 100644 --- a/tools/build/tests/ex/Makefile +++ b/tools/build/tests/ex/Makefile @@ -1,19 +1,22 @@ -export srctree := ../../../.. +export srctree := $(abspath ../../../..) export CC := gcc export LD := ld export AR := ar -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +ex: + +include $(srctree)/tools/build/Makefile.include + ex: ex-in.o libex-in.o gcc -o $@ $^ -ex.%: FORCE +ex.%: fixdep FORCE make -f $(srctree)/tools/build/Makefile.build dir=. $@ -ex-in.o: FORCE +ex-in.o: fixdep FORCE make $(build)=ex -libex-in.o: FORCE +libex-in.o: fixdep FORCE make $(build)=libex clean: diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c index dc42eb2e1a67..57de6074d252 100644 --- a/tools/build/tests/ex/ex.c +++ b/tools/build/tests/ex/ex.c @@ -5,6 +5,7 @@ int c(void); int d(void); int e(void); int f(void); +int inc(void); int main(void) { @@ -14,6 +15,7 @@ int main(void) d(); e(); f(); + inc(); return 0; } diff --git a/tools/build/tests/ex/inc.c b/tools/build/tests/ex/inc.c new file mode 100644 index 000000000000..c20f1e9033a3 --- /dev/null +++ b/tools/build/tests/ex/inc.c @@ -0,0 +1,8 @@ +#ifdef INCLUDE +#include "krava.h" +#endif + +int inc(void) +{ + return 0; +} diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh index 5494f8ea7567..44d2a0fade67 100755 --- a/tools/build/tests/run.sh +++ b/tools/build/tests/run.sh @@ -34,9 +34,36 @@ function test_ex_suffix { make -C ex V=1 clean > /dev/null 2>&1 rm -f ex.out } + +function test_ex_include { + make -C ex V=1 clean > ex.out 2>&1 + + # build with krava.h include + touch ex/krava.h + make -C ex V=1 CFLAGS=-DINCLUDE >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + # build without the include + rm -f ex/krava.h ex/ex + make -C ex V=1 >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} + echo -n Testing.. test_ex test_ex_suffix +test_ex_include echo OK diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h new file mode 100644 index 000000000000..bdc3dd8131d4 --- /dev/null +++ b/tools/include/linux/err.h @@ -0,0 +1,49 @@ +#ifndef __TOOLS_LINUX_ERR_H +#define __TOOLS_LINUX_ERR_H + +#include <linux/compiler.h> +#include <linux/types.h> + +#include <asm/errno.h> + +/* + * Original kernel header comment: + * + * Kernel pointers have redundant information, so we can use a + * scheme where we can return either an error code or a normal + * pointer with the same return value. + * + * This should be a per-architecture thing, to allow different + * error and pointer decisions. + * + * Userspace note: + * The same principle works for userspace, because 'error' pointers + * fall down to the unused hole far from user space, as described + * in Documentation/x86/x86_64/mm.txt for x86_64 arch: + * + * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension + * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole + * + * It should be the same case for other architectures, because + * this code is used in generic kernel code. + */ +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) + +static inline void * __must_check ERR_PTR(long error_) +{ + return (void *) error_; +} + +static inline long __must_check PTR_ERR(__force const void *ptr) +{ + return (long) ptr; +} + +static inline bool __must_check IS_ERR(__force const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +#endif /* _LINUX_ERR_H */ diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h new file mode 100644 index 000000000000..3276625595b2 --- /dev/null +++ b/tools/include/linux/filter.h @@ -0,0 +1,231 @@ +/* + * Linux Socket Filter Data Structures + */ +#ifndef __TOOLS_LINUX_FILTER_H +#define __TOOLS_LINUX_FILTER_H + +#include <linux/bpf.h> + +/* ArgX, context and stack frame pointer register positions. Note, + * Arg1, Arg2, Arg3, etc are used as argument mappings of function + * calls in BPF_CALL instruction. + */ +#define BPF_REG_ARG1 BPF_REG_1 +#define BPF_REG_ARG2 BPF_REG_2 +#define BPF_REG_ARG3 BPF_REG_3 +#define BPF_REG_ARG4 BPF_REG_4 +#define BPF_REG_ARG5 BPF_REG_5 +#define BPF_REG_CTX BPF_REG_6 +#define BPF_REG_FP BPF_REG_10 + +/* Additional register mappings for converted user programs. */ +#define BPF_REG_A BPF_REG_0 +#define BPF_REG_X BPF_REG_7 +#define BPF_REG_TMP BPF_REG_8 + +/* BPF program can access up to 512 bytes of stack space. */ +#define MAX_BPF_STACK 512 + +/* Helper macros for filter block array initializers. */ + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +#define BPF_ALU64_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ + +#define BPF_ENDIAN(TYPE, DST, LEN) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = LEN }) + +/* Short form of mov, dst_reg = src_reg */ + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, dst_reg = imm32 */ + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ + +#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = IMM }) + +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */ + +#define BPF_LD_IND(SIZE, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \ + .dst_reg = 0, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = IMM }) + +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ + +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Function call */ + +#define BPF_EMIT_CALL(FUNC) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_CALL, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((FUNC) - BPF_FUNC_unspec) }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = CODE, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#endif /* __TOOLS_LINUX_FILTER_H */ diff --git a/tools/lib/api/Build b/tools/lib/api/Build index 3653965cf481..e8b8a23b9bf4 100644 --- a/tools/lib/api/Build +++ b/tools/lib/api/Build @@ -1,2 +1,3 @@ libapi-y += fd/ libapi-y += fs/ +libapi-y += cpu.o diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index fe1b02c2c95b..d85904dc9b38 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -21,12 +21,14 @@ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f -build := -f $(srctree)/tools/build/Makefile.build dir=. obj API_IN := $(OUTPUT)libapi-in.o +all: + export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include -all: $(LIBFILE) +all: fixdep $(LIBFILE) $(API_IN): FORCE @$(MAKE) $(build)=libapi diff --git a/tools/lib/api/cpu.c b/tools/lib/api/cpu.c new file mode 100644 index 000000000000..8c6489356e3a --- /dev/null +++ b/tools/lib/api/cpu.c @@ -0,0 +1,18 @@ +#include <stdio.h> + +#include "cpu.h" +#include "fs/fs.h" + +int cpu__get_max_freq(unsigned long long *freq) +{ + char entry[PATH_MAX]; + int cpu; + + if (sysfs__read_int("devices/system/cpu/online", &cpu) < 0) + return -1; + + snprintf(entry, sizeof(entry), + "devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu); + + return sysfs__read_ull(entry, freq); +} diff --git a/tools/lib/api/cpu.h b/tools/lib/api/cpu.h new file mode 100644 index 000000000000..81e9d3955961 --- /dev/null +++ b/tools/lib/api/cpu.h @@ -0,0 +1,6 @@ +#ifndef __API_CPU__ +#define __API_CPU__ + +int cpu__get_max_freq(unsigned long long *freq); + +#endif /* __API_CPU__ */ diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build index 6de5a4f0b501..f4ed9629ae85 100644 --- a/tools/lib/api/fs/Build +++ b/tools/lib/api/fs/Build @@ -1,4 +1,2 @@ libapi-y += fs.o -libapi-y += debugfs.o -libapi-y += findfs.o -libapi-y += tracefs.o +libapi-y += tracing_path.o diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c deleted file mode 100644 index eb7cf4d18f8a..000000000000 --- a/tools/lib/api/fs/debugfs.c +++ /dev/null @@ -1,129 +0,0 @@ -#define _GNU_SOURCE -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <stdbool.h> -#include <sys/vfs.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mount.h> -#include <linux/kernel.h> - -#include "debugfs.h" -#include "tracefs.h" - -#ifndef DEBUGFS_DEFAULT_PATH -#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" -#endif - -char debugfs_mountpoint[PATH_MAX + 1] = DEBUGFS_DEFAULT_PATH; - -static const char * const debugfs_known_mountpoints[] = { - DEBUGFS_DEFAULT_PATH, - "/debug", - 0, -}; - -static bool debugfs_found; - -bool debugfs_configured(void) -{ - return debugfs_find_mountpoint() != NULL; -} - -/* find the path to the mounted debugfs */ -const char *debugfs_find_mountpoint(void) -{ - const char *ret; - - if (debugfs_found) - return (const char *)debugfs_mountpoint; - - ret = find_mountpoint("debugfs", (long) DEBUGFS_MAGIC, - debugfs_mountpoint, PATH_MAX + 1, - debugfs_known_mountpoints); - if (ret) - debugfs_found = true; - - return ret; -} - -/* mount the debugfs somewhere if it's not mounted */ -char *debugfs_mount(const char *mountpoint) -{ - /* see if it's already mounted */ - if (debugfs_find_mountpoint()) - goto out; - - /* if not mounted and no argument */ - if (mountpoint == NULL) { - /* see if environment variable set */ - mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT); - /* if no environment variable, use default */ - if (mountpoint == NULL) - mountpoint = DEBUGFS_DEFAULT_PATH; - } - - if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0) - return NULL; - - /* save the mountpoint */ - debugfs_found = true; - strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); -out: - return debugfs_mountpoint; -} - -int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename) -{ - char sbuf[128]; - - switch (err) { - case ENOENT: - if (debugfs_found) { - snprintf(buf, size, - "Error:\tFile %s/%s not found.\n" - "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n", - debugfs_mountpoint, filename); - break; - } - snprintf(buf, size, "%s", - "Error:\tUnable to find debugfs\n" - "Hint:\tWas your kernel compiled with debugfs support?\n" - "Hint:\tIs the debugfs filesystem mounted?\n" - "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); - break; - case EACCES: { - const char *mountpoint = debugfs_mountpoint; - - if (!access(debugfs_mountpoint, R_OK) && strncmp(filename, "tracing/", 8) == 0) { - const char *tracefs_mntpoint = tracefs_find_mountpoint(); - - if (tracefs_mntpoint) - mountpoint = tracefs_mntpoint; - } - - snprintf(buf, size, - "Error:\tNo permissions to read %s/%s\n" - "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - debugfs_mountpoint, filename, mountpoint); - } - break; - default: - snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); - break; - } - - return 0; -} - -int debugfs__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) -{ - char path[PATH_MAX]; - - snprintf(path, PATH_MAX, "tracing/events/%s/%s", sys, name ?: "*"); - - return debugfs__strerror_open(err, buf, size, path); -} diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h deleted file mode 100644 index 455023698d2b..000000000000 --- a/tools/lib/api/fs/debugfs.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __API_DEBUGFS_H__ -#define __API_DEBUGFS_H__ - -#include "findfs.h" - -#ifndef DEBUGFS_MAGIC -#define DEBUGFS_MAGIC 0x64626720 -#endif - -#ifndef PERF_DEBUGFS_ENVIRONMENT -#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" -#endif - -bool debugfs_configured(void); -const char *debugfs_find_mountpoint(void); -char *debugfs_mount(const char *mountpoint); - -extern char debugfs_mountpoint[]; - -int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename); -int debugfs__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name); - -#endif /* __API_DEBUGFS_H__ */ diff --git a/tools/lib/api/fs/findfs.c b/tools/lib/api/fs/findfs.c deleted file mode 100644 index 49946cb6d7af..000000000000 --- a/tools/lib/api/fs/findfs.c +++ /dev/null @@ -1,63 +0,0 @@ -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdbool.h> -#include <sys/vfs.h> - -#include "findfs.h" - -/* verify that a mountpoint is actually the type we want */ - -int valid_mountpoint(const char *mount, long magic) -{ - struct statfs st_fs; - - if (statfs(mount, &st_fs) < 0) - return -ENOENT; - else if ((long)st_fs.f_type != magic) - return -ENOENT; - - return 0; -} - -/* find the path to a mounted file system */ -const char *find_mountpoint(const char *fstype, long magic, - char *mountpoint, int len, - const char * const *known_mountpoints) -{ - const char * const *ptr; - char format[128]; - char type[100]; - FILE *fp; - - if (known_mountpoints) { - ptr = known_mountpoints; - while (*ptr) { - if (valid_mountpoint(*ptr, magic) == 0) { - strncpy(mountpoint, *ptr, len - 1); - mountpoint[len-1] = 0; - return mountpoint; - } - ptr++; - } - } - - /* give up and parse /proc/mounts */ - fp = fopen("/proc/mounts", "r"); - if (fp == NULL) - return NULL; - - snprintf(format, 128, "%%*s %%%ds %%99s %%*s %%*d %%*d\n", len); - - while (fscanf(fp, format, mountpoint, type) == 2) { - if (strcmp(type, fstype) == 0) - break; - } - fclose(fp); - - if (strcmp(type, fstype) != 0) - return NULL; - - return mountpoint; -} diff --git a/tools/lib/api/fs/findfs.h b/tools/lib/api/fs/findfs.h deleted file mode 100644 index b6f5d05acc42..000000000000 --- a/tools/lib/api/fs/findfs.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __API_FINDFS_H__ -#define __API_FINDFS_H__ - -#include <stdbool.h> - -#define _STR(x) #x -#define STR(x) _STR(x) - -/* - * On most systems <limits.h> would have given us this, but not on some systems - * (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - -const char *find_mountpoint(const char *fstype, long magic, - char *mountpoint, int len, - const char * const *known_mountpoints); - -int valid_mountpoint(const char *mount, long magic); - -#endif /* __API_FINDFS_H__ */ diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 128ef6332a6b..459599d1b6c4 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -1,7 +1,6 @@ -/* TODO merge/factor in debugfs.c here */ - #include <ctype.h> #include <errno.h> +#include <limits.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -11,10 +10,29 @@ #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> +#include <sys/mount.h> -#include "debugfs.h" #include "fs.h" +#define _STR(x) #x +#define STR(x) _STR(x) + +#ifndef SYSFS_MAGIC +#define SYSFS_MAGIC 0x62656572 +#endif + +#ifndef PROC_SUPER_MAGIC +#define PROC_SUPER_MAGIC 0x9fa0 +#endif + +#ifndef DEBUGFS_MAGIC +#define DEBUGFS_MAGIC 0x64626720 +#endif + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + static const char * const sysfs__fs_known_mountpoints[] = { "/sys", 0, @@ -25,19 +43,48 @@ static const char * const procfs__known_mountpoints[] = { 0, }; +#ifndef DEBUGFS_DEFAULT_PATH +#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" +#endif + +static const char * const debugfs__known_mountpoints[] = { + DEBUGFS_DEFAULT_PATH, + "/debug", + 0, +}; + + +#ifndef TRACEFS_DEFAULT_PATH +#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing" +#endif + +static const char * const tracefs__known_mountpoints[] = { + TRACEFS_DEFAULT_PATH, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, +}; + struct fs { const char *name; const char * const *mounts; - char path[PATH_MAX + 1]; + char path[PATH_MAX]; bool found; long magic; }; enum { - FS__SYSFS = 0, - FS__PROCFS = 1, + FS__SYSFS = 0, + FS__PROCFS = 1, + FS__DEBUGFS = 2, + FS__TRACEFS = 3, }; +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + static struct fs fs__entries[] = { [FS__SYSFS] = { .name = "sysfs", @@ -49,6 +96,16 @@ static struct fs fs__entries[] = { .mounts = procfs__known_mountpoints, .magic = PROC_SUPER_MAGIC, }, + [FS__DEBUGFS] = { + .name = "debugfs", + .mounts = debugfs__known_mountpoints, + .magic = DEBUGFS_MAGIC, + }, + [FS__TRACEFS] = { + .name = "tracefs", + .mounts = tracefs__known_mountpoints, + .magic = TRACEFS_MAGIC, + }, }; static bool fs__read_mounts(struct fs *fs) @@ -159,14 +216,54 @@ static const char *fs__mountpoint(int idx) return fs__get_mountpoint(fs); } -#define FS__MOUNTPOINT(name, idx) \ -const char *name##__mountpoint(void) \ -{ \ - return fs__mountpoint(idx); \ +static const char *mount_overload(struct fs *fs) +{ + size_t name_len = strlen(fs->name); + /* "PERF_" + name + "_ENVIRONMENT" + '\0' */ + char upper_name[5 + name_len + 12 + 1]; + + snprintf(upper_name, name_len, "PERF_%s_ENVIRONMENT", fs->name); + mem_toupper(upper_name, name_len); + + return getenv(upper_name) ?: *fs->mounts; +} + +static const char *fs__mount(int idx) +{ + struct fs *fs = &fs__entries[idx]; + const char *mountpoint; + + if (fs__mountpoint(idx)) + return (const char *)fs->path; + + mountpoint = mount_overload(fs); + + if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0) + return NULL; + + return fs__check_mounts(fs) ? fs->path : NULL; +} + +#define FS(name, idx) \ +const char *name##__mountpoint(void) \ +{ \ + return fs__mountpoint(idx); \ +} \ + \ +const char *name##__mount(void) \ +{ \ + return fs__mount(idx); \ +} \ + \ +bool name##__configured(void) \ +{ \ + return name##__mountpoint() != NULL; \ } -FS__MOUNTPOINT(sysfs, FS__SYSFS); -FS__MOUNTPOINT(procfs, FS__PROCFS); +FS(sysfs, FS__SYSFS); +FS(procfs, FS__PROCFS); +FS(debugfs, FS__DEBUGFS); +FS(tracefs, FS__TRACEFS); int filename__read_int(const char *filename, int *value) { @@ -185,6 +282,50 @@ int filename__read_int(const char *filename, int *value) return err; } +int filename__read_ull(const char *filename, unsigned long long *value) +{ + char line[64]; + int fd = open(filename, O_RDONLY), err = -1; + + if (fd < 0) + return -1; + + if (read(fd, line, sizeof(line)) > 0) { + *value = strtoull(line, NULL, 10); + if (*value != ULLONG_MAX) + err = 0; + } + + close(fd); + return err; +} + +int sysfs__read_ull(const char *entry, unsigned long long *value) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + + return filename__read_ull(path, value); +} + +int sysfs__read_int(const char *entry, int *value) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + + return filename__read_int(path, value); +} + int sysctl__read_int(const char *sysctl, int *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 6caa2bbc6cec..d024a7f682f6 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -1,17 +1,33 @@ #ifndef __API_FS__ #define __API_FS__ -#ifndef SYSFS_MAGIC -#define SYSFS_MAGIC 0x62656572 -#endif +#include <stdbool.h> -#ifndef PROC_SUPER_MAGIC -#define PROC_SUPER_MAGIC 0x9fa0 +/* + * On most systems <limits.h> would have given us this, but not on some systems + * (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 #endif -const char *sysfs__mountpoint(void); -const char *procfs__mountpoint(void); +#define FS(name) \ + const char *name##__mountpoint(void); \ + const char *name##__mount(void); \ + bool name##__configured(void); \ + +FS(sysfs) +FS(procfs) +FS(debugfs) +FS(tracefs) + +#undef FS + int filename__read_int(const char *filename, int *value); +int filename__read_ull(const char *filename, unsigned long long *value); + int sysctl__read_int(const char *sysctl, int *value); +int sysfs__read_int(const char *entry, int *value); +int sysfs__read_ull(const char *entry, unsigned long long *value); #endif /* __API_FS__ */ diff --git a/tools/lib/api/fs/tracefs.c b/tools/lib/api/fs/tracefs.c deleted file mode 100644 index e4aa9688b71e..000000000000 --- a/tools/lib/api/fs/tracefs.c +++ /dev/null @@ -1,78 +0,0 @@ -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <stdbool.h> -#include <sys/vfs.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mount.h> -#include <linux/kernel.h> - -#include "tracefs.h" - -#ifndef TRACEFS_DEFAULT_PATH -#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing" -#endif - -char tracefs_mountpoint[PATH_MAX + 1] = TRACEFS_DEFAULT_PATH; - -static const char * const tracefs_known_mountpoints[] = { - TRACEFS_DEFAULT_PATH, - "/sys/kernel/debug/tracing", - "/tracing", - "/trace", - 0, -}; - -static bool tracefs_found; - -bool tracefs_configured(void) -{ - return tracefs_find_mountpoint() != NULL; -} - -/* find the path to the mounted tracefs */ -const char *tracefs_find_mountpoint(void) -{ - const char *ret; - - if (tracefs_found) - return (const char *)tracefs_mountpoint; - - ret = find_mountpoint("tracefs", (long) TRACEFS_MAGIC, - tracefs_mountpoint, PATH_MAX + 1, - tracefs_known_mountpoints); - - if (ret) - tracefs_found = true; - - return ret; -} - -/* mount the tracefs somewhere if it's not mounted */ -char *tracefs_mount(const char *mountpoint) -{ - /* see if it's already mounted */ - if (tracefs_find_mountpoint()) - goto out; - - /* if not mounted and no argument */ - if (mountpoint == NULL) { - /* see if environment variable set */ - mountpoint = getenv(PERF_TRACEFS_ENVIRONMENT); - /* if no environment variable, use default */ - if (mountpoint == NULL) - mountpoint = TRACEFS_DEFAULT_PATH; - } - - if (mount(NULL, mountpoint, "tracefs", 0, NULL) < 0) - return NULL; - - /* save the mountpoint */ - tracefs_found = true; - strncpy(tracefs_mountpoint, mountpoint, sizeof(tracefs_mountpoint)); -out: - return tracefs_mountpoint; -} diff --git a/tools/lib/api/fs/tracefs.h b/tools/lib/api/fs/tracefs.h deleted file mode 100644 index da780ac49acb..000000000000 --- a/tools/lib/api/fs/tracefs.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __API_TRACEFS_H__ -#define __API_TRACEFS_H__ - -#include "findfs.h" - -#ifndef TRACEFS_MAGIC -#define TRACEFS_MAGIC 0x74726163 -#endif - -#ifndef PERF_TRACEFS_ENVIRONMENT -#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" -#endif - -bool tracefs_configured(void); -const char *tracefs_find_mountpoint(void); -int tracefs_valid_mountpoint(const char *debugfs); -char *tracefs_mount(const char *mountpoint); - -extern char tracefs_mountpoint[]; - -#endif /* __API_DEBUGFS_H__ */ diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c new file mode 100644 index 000000000000..a26bb5ea8283 --- /dev/null +++ b/tools/lib/api/fs/tracing_path.c @@ -0,0 +1,135 @@ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include "fs.h" + +#include "tracing_path.h" + + +char tracing_mnt[PATH_MAX] = "/sys/kernel/debug"; +char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing"; +char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events"; + + +static void __tracing_path_set(const char *tracing, const char *mountpoint) +{ + snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint); + snprintf(tracing_path, sizeof(tracing_path), "%s/%s", + mountpoint, tracing); + snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", + mountpoint, tracing, "events"); +} + +static const char *tracing_path_tracefs_mount(void) +{ + const char *mnt; + + mnt = tracefs__mount(); + if (!mnt) + return NULL; + + __tracing_path_set("", mnt); + + return mnt; +} + +static const char *tracing_path_debugfs_mount(void) +{ + const char *mnt; + + mnt = debugfs__mount(); + if (!mnt) + return NULL; + + __tracing_path_set("tracing/", mnt); + + return mnt; +} + +const char *tracing_path_mount(void) +{ + const char *mnt; + + mnt = tracing_path_tracefs_mount(); + if (mnt) + return mnt; + + mnt = tracing_path_debugfs_mount(); + + return mnt; +} + +void tracing_path_set(const char *mntpt) +{ + __tracing_path_set("tracing/", mntpt); +} + +char *get_tracing_file(const char *name) +{ + char *file; + + if (asprintf(&file, "%s/%s", tracing_path, name) < 0) + return NULL; + + return file; +} + +void put_tracing_file(char *file) +{ + free(file); +} + +static int strerror_open(int err, char *buf, size_t size, const char *filename) +{ + char sbuf[128]; + + switch (err) { + case ENOENT: + /* + * We will get here if we can't find the tracepoint, but one of + * debugfs or tracefs is configured, which means you probably + * want some tracepoint which wasn't compiled in your kernel. + * - jirka + */ + if (debugfs__configured() || tracefs__configured()) { + snprintf(buf, size, + "Error:\tFile %s/%s not found.\n" + "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n", + tracing_events_path, filename); + break; + } + snprintf(buf, size, "%s", + "Error:\tUnable to find debugfs/tracefs\n" + "Hint:\tWas your kernel compiled with debugfs/tracefs support?\n" + "Hint:\tIs the debugfs/tracefs filesystem mounted?\n" + "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); + break; + case EACCES: { + snprintf(buf, size, + "Error:\tNo permissions to read %s/%s\n" + "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", + tracing_events_path, filename, tracing_mnt); + } + break; + default: + snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); + break; + } + + return 0; +} + +int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) +{ + char path[PATH_MAX]; + + snprintf(path, PATH_MAX, "%s/%s", sys, name ?: "*"); + + return strerror_open(err, buf, size, path); +} diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h new file mode 100644 index 000000000000..3f233ac70b6f --- /dev/null +++ b/tools/lib/api/fs/tracing_path.h @@ -0,0 +1,16 @@ +#ifndef __API_FS_TRACING_PATH_H +#define __API_FS_TRACING_PATH_H + +#include <linux/types.h> + +extern char tracing_path[]; +extern char tracing_events_path[]; + +void tracing_path_set(const char *mountpoint); +const char *tracing_path_mount(void); + +char *get_tracing_file(const char *name); +void put_tracing_file(char *file); + +int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name); +#endif /* __API_FS_TRACING_PATH_H */ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index f68d23a0b487..fc9af57b666e 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -64,8 +64,9 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) #$(info Determined 'srctree' to be $(srctree)) endif -FEATURE_DISPLAY = libelf libelf-getphdrnum libelf-mmap bpf -FEATURE_TESTS = libelf bpf +FEATURE_USER = .libbpf +FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf +FEATURE_DISPLAY = libelf bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) @@ -122,8 +123,10 @@ endif # the same command line setup. MAKEOVERRIDES= +all: + export srctree OUTPUT CC LD CFLAGS V -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include BPF_IN := $(OUTPUT)libbpf-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) @@ -132,7 +135,7 @@ CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) -all: $(VERSION_FILES) all_cmd +all: fixdep $(VERSION_FILES) all_cmd all_cmd: $(CMD_TARGETS) diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 18ffccf00426..7e319afac78a 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -93,8 +93,10 @@ else print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif +all: + export srctree OUTPUT CC LD CFLAGS V -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include do_compile_shared_library = \ ($(print_shared_lib_compile) \ @@ -109,7 +111,7 @@ CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) -all: all_cmd +all: fixdep all_cmd all_cmd: $(CMD_TARGETS) diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c index 18bc271a4bbc..5e431077fcd6 100644 --- a/tools/lib/symbol/kallsyms.c +++ b/tools/lib/symbol/kallsyms.c @@ -2,6 +2,12 @@ #include <stdio.h> #include <stdlib.h> +u8 kallsyms2elf_type(char type) +{ + type = tolower(type); + return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT; +} + int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, char type, u64 start)) diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h index 6084f5e18b3c..4071316a766e 100644 --- a/tools/lib/symbol/kallsyms.h +++ b/tools/lib/symbol/kallsyms.h @@ -9,7 +9,7 @@ #define KSYM_NAME_LEN 256 #endif -static inline u8 kallsyms2elf_type(char type) +static inline u8 kallsyms2elf_binding(char type) { if (type == 'W') return STB_WEAK; @@ -17,6 +17,8 @@ static inline u8 kallsyms2elf_type(char type) return isupper(type) ? STB_GLOBAL : STB_LOCAL; } +u8 kallsyms2elf_type(char type); + int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, char type, u64 start)); diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index cf42b090477b..9aa107a0ce8c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -848,6 +848,7 @@ static void free_arg(struct print_arg *arg) free(arg->bitmask.bitmask); break; case PRINT_DYNAMIC_ARRAY: + case PRINT_DYNAMIC_ARRAY_LEN: free(arg->dynarray.index); break; case PRINT_OP: @@ -2729,6 +2730,42 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char ** } static enum event_type +process_dynamic_array_len(struct event_format *event, struct print_arg *arg, + char **tok) +{ + struct format_field *field; + enum event_type type; + char *token; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto out_free; + + arg->type = PRINT_DYNAMIC_ARRAY_LEN; + + /* Find the field */ + field = pevent_find_field(event, token); + if (!field) + goto out_free; + + arg->dynarray.field = field; + arg->dynarray.index = 0; + + if (read_expected(EVENT_DELIM, ")") < 0) + goto out_err; + + type = read_token(&token); + *tok = token; + + return type; + + out_free: + free_token(token); + out_err: + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type process_paren(struct event_format *event, struct print_arg *arg, char **tok) { struct print_arg *item_arg; @@ -2975,6 +3012,10 @@ process_function(struct event_format *event, struct print_arg *arg, free_token(token); return process_dynamic_array(event, arg, tok); } + if (strcmp(token, "__get_dynamic_array_len") == 0) { + free_token(token); + return process_dynamic_array_len(event, arg, tok); + } func = find_func_handler(event->pevent, token); if (func) { @@ -3655,14 +3696,25 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg goto out_warning_op; } break; + case PRINT_DYNAMIC_ARRAY_LEN: + offset = pevent_read_number(pevent, + data + arg->dynarray.field->offset, + arg->dynarray.field->size); + /* + * The total allocated length of the dynamic array is + * stored in the top half of the field, and the offset + * is in the bottom half of the 32 bit field. + */ + val = (unsigned long long)(offset >> 16); + break; case PRINT_DYNAMIC_ARRAY: /* Without [], we pass the address to the dynamic data */ offset = pevent_read_number(pevent, data + arg->dynarray.field->offset, arg->dynarray.field->size); /* - * The actual length of the dynamic array is stored - * in the top half of the field, and the offset + * The total allocated length of the dynamic array is + * stored in the top half of the field, and the offset * is in the bottom half of the 32 bit field. */ offset &= 0xffff; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 204befb05a17..6fc83c7edbe9 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -294,6 +294,7 @@ enum print_arg_type { PRINT_OP, PRINT_FUNC, PRINT_BITMASK, + PRINT_DYNAMIC_ARRAY_LEN, }; struct print_arg { diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index c94c9de3173e..be764f9ec769 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -671,6 +671,7 @@ The letters are: e synthesize tracing error events d create a debug log g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) "Instructions" events look like they were recorded by "perf record -e instructions". @@ -707,12 +708,26 @@ on the sample is *not* adjusted and reflects the last known value of TSC. For Intel PT, the default period is 100us. +Setting it to a zero period means "as often as possible". + +In the case of Intel PT that is the same as a period of 1 and a unit of +'instructions' (i.e. --itrace=i1i). + Also the call chain size (default 16, max. 1024) for instructions or transactions events can be specified. e.g. --itrace=ig32 --itrace=xg32 +Also the number of last branch entries (default 64, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=il10 + --itrace=xl10 + +Note that last branch entries are cleared for each sample, so there is no overlap +from one sample to the next. + To disable trace decoding entirely, use the option --no-itrace. @@ -749,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is removed and replaced with the synthesized events. e.g. perf inject --itrace -i perf.data -o perf.data.new + +Below is an example of using Intel PT with autofdo. It requires autofdo +(https://github.com/google/autofdo) and gcc version 5. The bubble +sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) +amended to take the number of elements as a parameter. + + $ gcc-5 -O3 sort.c -o sort_optimized + $ ./sort_optimized 30000 + Bubble sorting array of 30000 elements + 2254 ms + + $ cat ~/.perfconfig + [intel-pt] + mispred-all + + $ perf record -e intel_pt//u ./sort 3000 + Bubble sorting array of 3000 elements + 58 ms + [ perf record: Woken up 2 times to write data ] + [ perf record: Captured and wrote 3.939 MB perf.data ] + $ perf inject -i perf.data -o inj --itrace=i100usle --strip + $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo + $ ./sort_autofdo 30000 + Bubble sorting array of 30000 elements + 2155 ms + +Note there is currently no advantage to using Intel PT instead of LBR, but +that may change in the future if greater use is made of the data. diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 2ff946677e3b..65453f4c7006 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -6,6 +6,7 @@ e synthesize error events d create a debug log g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) The default is all events i.e. the same as --itrace=ibxe @@ -20,3 +21,6 @@ Also the call chain size (default 16, max. 1024) for instructions or transactions events can be specified. + + Also the number of last branch entries (default 64, max. 1024) for + instructions or transactions events can be specified. diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 0c721c3e37e1..0b1cedeef895 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -50,6 +50,9 @@ OPTIONS include::itrace.txt[] +--strip:: + Use with --itrace to strip out non-synthesized events. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index bada8933fdd4..79483f40e991 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -30,6 +30,7 @@ counted. The following modifiers exist: G - guest counting (in KVM guests) H - host counting (not in KVM guests) p - precise level + P - use maximum detected precise level S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU @@ -125,6 +126,8 @@ To limit the list use: . If none of the above is matched, it will apply the supplied glob to all events, printing the ones that match. +. As a last resort, it will do a substring search in all event names. + One or more types can be used at the same time, listing the events for the types specified. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9c7981bfddad..e4fdeeb51123 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -68,7 +68,7 @@ OPTIONS --sort=:: Sort histogram entries by given key(s) - multiple keys can be specified in CSV format. Following sort keys are available: - pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight. + pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight. Each key has following meaning: @@ -79,6 +79,7 @@ OPTIONS - parent: name of function matched to the parent regex filter. Unmatched entries are displayed as "[other]". - cpu: cpu number the task ran at the time of sample + - socket: processor socket number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The DWARF debugging info must be provided. - srcfile: file name of the source file of the same. Requires dwarf @@ -191,7 +192,7 @@ OPTIONS when available. Usually more convenient to use --branch-history for this. - Default: fractal,0.5,callee,function. + Default: graph,0.5,caller --children:: Accumulate callchain of children to parent entry so that then can @@ -204,6 +205,8 @@ OPTIONS beyond the specified depth will be ignored. This is a trade-off between information loss and faster processing especially for workloads that can have a very long callchain stack. + Note that when using the --itrace option the synthesized callchain size + will override this value if the synthesized callchain size is bigger. Default: 127 @@ -349,6 +352,9 @@ include::itrace.txt[] This option extends the perf report to show reference callgraphs, which collected by reference event, in no callgraph event. +--socket-filter:: + Only report the samples on the processor socket that match with this filter + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index dc3ec783b7bd..b3b42f9285df 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -249,6 +249,9 @@ include::itrace.txt[] --full-source-path:: Show the full path for source files for srcline output. +--ns:: + Use 9 decimal places when displaying time (i.e. show the nanoseconds) + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 47469abdcc1c..4e074a660826 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -128,8 +128,9 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m -I msecs:: --interval-print msecs:: - Print count deltas every N milliseconds (minimum: 100ms) - example: perf stat -I 1000 -e cycles -a sleep 5 +Print count deltas every N milliseconds (minimum: 10ms) +The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. + example: 'perf stat -I 1000 -e cycles -a sleep 5' --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. This diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 2b131776363e..864e37597252 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -27,6 +27,14 @@ OPTIONS Setup buildid cache directory. It has higher priority than buildid.dir config file option. +-v:: +--version:: + Display perf version. + +-h:: +--help:: + Run perf help command. + DESCRIPTION ----------- Performance counters for Linux are a new kernel-based subsystem diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index af009bd6e6b7..9e6bdf5b2df6 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -41,6 +41,7 @@ tools/include/asm-generic/bitops.h tools/include/linux/atomic.h tools/include/linux/bitops.h tools/include/linux/compiler.h +tools/include/linux/filter.h tools/include/linux/hash.h tools/include/linux/kernel.h tools/include/linux/list.h @@ -49,6 +50,7 @@ tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h tools/include/linux/types.h +tools/include/linux/err.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/fls64.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d9863cb96f59..56517d304772 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -297,16 +297,16 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include -$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE +$(PERF_IN): prepare FORCE $(Q)$(MAKE) $(build)=perf $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ $(PERF_IN) $(LIBS) -o $@ -$(GTK_IN): FORCE +$(GTK_IN): fixdep FORCE $(Q)$(MAKE) $(build)=gtk $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) @@ -349,27 +349,27 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep -$(OUTPUT)%.o: %.c single_dep FORCE +$(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.i: %.c single_dep FORCE +$(OUTPUT)%.i: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.s: %.c single_dep FORCE +$(OUTPUT)%.s: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%-bison.o: %.c single_dep FORCE +$(OUTPUT)%-bison.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%-flex.o: %.c single_dep FORCE +$(OUTPUT)%-flex.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.o: %.S single_dep FORCE +$(OUTPUT)%.o: %.S prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.i: %.S single_dep FORCE +$(OUTPUT)%.i: %.S prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) @@ -389,7 +389,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o -$(LIBPERF_IN): FORCE +$(LIBPERF_IN): fixdep FORCE $(Q)$(MAKE) $(build)=libperf $(LIB_FILE): $(LIBPERF_IN) @@ -397,10 +397,10 @@ $(LIB_FILE): $(LIBPERF_IN) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -$(LIBTRACEEVENT): FORCE +$(LIBTRACEEVENT): fixdep FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a -libtraceevent_plugins: FORCE +libtraceevent_plugins: fixdep FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins @@ -413,7 +413,7 @@ $(LIBTRACEEVENT)-clean: install-traceevent-plugins: $(LIBTRACEEVENT) $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -$(LIBAPI): FORCE +$(LIBAPI): fixdep FORCE $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a $(LIBAPI)-clean: @@ -459,7 +459,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html $(DOC_TARGETS): $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) -TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol +TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol ../include TAG_FILES= ../../include/uapi/linux/perf_event.h TAGS: @@ -591,6 +591,6 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare .PHONY: libtraceevent_plugins diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index b00dfd92ea73..e83c8ce24303 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -128,9 +128,8 @@ static const char *normalize_arch(char *arch) return arch; } -static int perf_session_env__lookup_binutils_path(struct perf_env *env, - const char *name, - const char **path) +static int perf_env__lookup_binutils_path(struct perf_env *env, + const char *name, const char **path) { int idx; const char *arch, *cross_env; @@ -206,7 +205,7 @@ out_error: return -1; } -int perf_session_env__lookup_objdump(struct perf_env *env) +int perf_env__lookup_objdump(struct perf_env *env) { /* * For live mode, env->arch will be NULL and we can use @@ -215,6 +214,5 @@ int perf_session_env__lookup_objdump(struct perf_env *env) if (env->arch == NULL) return 0; - return perf_session_env__lookup_binutils_path(env, "objdump", - &objdump_path); + return perf_env__lookup_binutils_path(env, "objdump", &objdump_path); } diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index 20176df69fc8..7529cfb143ce 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -1,10 +1,10 @@ #ifndef ARCH_PERF_COMMON_H #define ARCH_PERF_COMMON_H -#include "../util/session.h" +#include "../util/env.h" extern const char *objdump_path; -int perf_session_env__lookup_objdump(struct perf_env *env); +int perf_env__lookup_objdump(struct perf_env *env); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build index 41bf61da476a..db52fa22d3a1 100644 --- a/tools/perf/arch/x86/Build +++ b/tools/perf/arch/x86/Build @@ -1,2 +1,2 @@ libperf-y += util/ -libperf-$(CONFIG_DWARF_UNWIND) += tests/ +libperf-y += tests/ diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 21322e0385b8..09ba923debe8 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -2,3 +2,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h new file mode 100644 index 000000000000..7ed00f4b0908 --- /dev/null +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -0,0 +1,19 @@ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +/* Tests */ +int test__rdpmc(void); +int test__perf_time_to_tsc(void); +int test__insn_x86(void); +int test__intel_cqm_count_nmi_context(void); + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +int test__arch_unwind_sample(struct perf_sample *sample, + struct thread *thread); +#endif + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index b30eff9bcc83..cbb7e978166b 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -1,2 +1,8 @@ -libperf-y += regs_load.o -libperf-y += dwarf-unwind.o +libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o +libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o + +libperf-y += arch-tests.o +libperf-y += rdpmc.o +libperf-y += perf-time-to-tsc.o +libperf-$(CONFIG_AUXTRACE) += insn-x86.o +libperf-y += intel-cqm.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c new file mode 100644 index 000000000000..2218cb64f840 --- /dev/null +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -0,0 +1,34 @@ +#include <string.h> +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { + { + .desc = "x86 rdpmc test", + .func = test__rdpmc, + }, + { + .desc = "Test converting perf time to TSC", + .func = test__perf_time_to_tsc, + }, +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "Test dwarf unwind", + .func = test__dwarf_unwind, + }, +#endif +#ifdef HAVE_AUXTRACE_SUPPORT + { + .desc = "Test x86 instruction decoder - new instructions", + .func = test__insn_x86, + }, +#endif + { + .desc = "Test intel cqm nmi context read", + .func = test__intel_cqm_count_nmi_context, + }, + { + .func = NULL, + }, + +}; diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index d8bbf7ad1681..7f209ce827bf 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -5,6 +5,7 @@ #include "event.h" #include "debug.h" #include "tests/tests.h" +#include "arch-tests.h" #define STACK_SIZE 8192 diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk b/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk new file mode 100644 index 000000000000..a21454835cd4 --- /dev/null +++ b/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk @@ -0,0 +1,75 @@ +#!/bin/awk -f +# gen-insn-x86-dat.awk: script to convert data for the insn-x86 test +# Copyright (c) 2015, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + +BEGIN { + print "/*" + print " * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk" + print " * from insn-x86-dat-src.c for inclusion by insn-x86.c" + print " * Do not change this code." + print "*/\n" + op = "" + branch = "" + rel = 0 + going = 0 +} + +/ Start here / { + going = 1 +} + +/ Stop here / { + going = 0 +} + +/^\s*[0-9a-fA-F]+\:/ { + if (going) { + colon_pos = index($0, ":") + useful_line = substr($0, colon_pos + 1) + first_pos = match(useful_line, "[0-9a-fA-F]") + useful_line = substr(useful_line, first_pos) + gsub("\t", "\\t", useful_line) + printf "{{" + len = 0 + for (i = 2; i <= NF; i++) { + if (match($i, "^[0-9a-fA-F][0-9a-fA-F]$")) { + printf "0x%s, ", $i + len += 1 + } else { + break + } + } + printf "}, %d, %s, \"%s\", \"%s\",", len, rel, op, branch + printf "\n\"%s\",},\n", useful_line + op = "" + branch = "" + rel = 0 + } +} + +/ Expecting: / { + expecting_str = " Expecting: " + expecting_len = length(expecting_str) + expecting_pos = index($0, expecting_str) + useful_line = substr($0, expecting_pos + expecting_len) + for (i = 1; i <= NF; i++) { + if ($i == "Expecting:") { + i++ + op = $i + i++ + branch = $i + i++ + rel = $i + break + } + } +} diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh new file mode 100755 index 000000000000..2d4ef94cff98 --- /dev/null +++ b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# gen-insn-x86-dat: generate data for the insn-x86 test +# Copyright (c) 2015, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + +set -e + +if [ "$(uname -m)" != "x86_64" ]; then + echo "ERROR: This script only works on x86_64" + exit 1 +fi + +cd $(dirname $0) + +trap 'echo "Might need a more recent version of binutils"' EXIT + +echo "Compiling insn-x86-dat-src.c to 64-bit object" + +gcc -g -c insn-x86-dat-src.c + +objdump -dSw insn-x86-dat-src.o | awk -f gen-insn-x86-dat.awk > insn-x86-dat-64.c + +rm -f insn-x86-dat-src.o + +echo "Compiling insn-x86-dat-src.c to 32-bit object" + +gcc -g -c -m32 insn-x86-dat-src.c + +objdump -dSw insn-x86-dat-src.o | awk -f gen-insn-x86-dat.awk > insn-x86-dat-32.c + +rm -f insn-x86-dat-src.o + +trap - EXIT + +echo "Done (use git diff to see the changes)" diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c new file mode 100644 index 000000000000..3b491cfe204e --- /dev/null +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -0,0 +1,658 @@ +/* + * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk + * from insn-x86-dat-src.c for inclusion by insn-x86.c + * Do not change this code. +*/ + +{{0x0f, 0x31, }, 2, 0, "", "", +"0f 31 \trdtsc ",}, +{{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"f3 0f 1b 00 \tbndmk (%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 05 78 56 34 12 \tbndmk 0x12345678,%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"f3 0f 1b 18 \tbndmk (%eax),%bnd3",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"f3 0f 1b 04 01 \tbndmk (%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 04 05 78 56 34 12 \tbndmk 0x12345678(,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"f3 0f 1b 04 08 \tbndmk (%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"f3 0f 1b 04 c8 \tbndmk (%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"f3 0f 1b 40 12 \tbndmk 0x12(%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"f3 0f 1b 45 12 \tbndmk 0x12(%ebp),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 01 12 \tbndmk 0x12(%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 05 12 \tbndmk 0x12(%ebp,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 08 12 \tbndmk 0x12(%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 c8 12 \tbndmk 0x12(%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 80 78 56 34 12 \tbndmk 0x12345678(%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 85 78 56 34 12 \tbndmk 0x12345678(%ebp),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 01 78 56 34 12 \tbndmk 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 05 78 56 34 12 \tbndmk 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 08 78 56 34 12 \tbndmk 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 c8 78 56 34 12 \tbndmk 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"f3 0f 1a 00 \tbndcl (%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 05 78 56 34 12 \tbndcl 0x12345678,%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"f3 0f 1a 18 \tbndcl (%eax),%bnd3",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"f3 0f 1a 04 01 \tbndcl (%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 04 05 78 56 34 12 \tbndcl 0x12345678(,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"f3 0f 1a 04 08 \tbndcl (%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"f3 0f 1a 04 c8 \tbndcl (%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"f3 0f 1a 40 12 \tbndcl 0x12(%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"f3 0f 1a 45 12 \tbndcl 0x12(%ebp),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 01 12 \tbndcl 0x12(%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 05 12 \tbndcl 0x12(%ebp,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 08 12 \tbndcl 0x12(%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 c8 12 \tbndcl 0x12(%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 80 78 56 34 12 \tbndcl 0x12345678(%eax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 85 78 56 34 12 \tbndcl 0x12345678(%ebp),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 01 78 56 34 12 \tbndcl 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 05 78 56 34 12 \tbndcl 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 08 78 56 34 12 \tbndcl 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 c8 78 56 34 12 \tbndcl 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "", +"f3 0f 1a c0 \tbndcl %eax,%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"f2 0f 1a 00 \tbndcu (%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 05 78 56 34 12 \tbndcu 0x12345678,%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"f2 0f 1a 18 \tbndcu (%eax),%bnd3",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"f2 0f 1a 04 01 \tbndcu (%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 04 05 78 56 34 12 \tbndcu 0x12345678(,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"f2 0f 1a 04 08 \tbndcu (%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"f2 0f 1a 04 c8 \tbndcu (%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"f2 0f 1a 40 12 \tbndcu 0x12(%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"f2 0f 1a 45 12 \tbndcu 0x12(%ebp),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 01 12 \tbndcu 0x12(%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 05 12 \tbndcu 0x12(%ebp,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 08 12 \tbndcu 0x12(%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 c8 12 \tbndcu 0x12(%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 80 78 56 34 12 \tbndcu 0x12345678(%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 85 78 56 34 12 \tbndcu 0x12345678(%ebp),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 01 78 56 34 12 \tbndcu 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 05 78 56 34 12 \tbndcu 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 08 78 56 34 12 \tbndcu 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 c8 78 56 34 12 \tbndcu 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "", +"f2 0f 1a c0 \tbndcu %eax,%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"f2 0f 1b 00 \tbndcn (%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 05 78 56 34 12 \tbndcn 0x12345678,%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"f2 0f 1b 18 \tbndcn (%eax),%bnd3",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"f2 0f 1b 04 01 \tbndcn (%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 04 05 78 56 34 12 \tbndcn 0x12345678(,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"f2 0f 1b 04 08 \tbndcn (%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"f2 0f 1b 04 c8 \tbndcn (%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"f2 0f 1b 40 12 \tbndcn 0x12(%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"f2 0f 1b 45 12 \tbndcn 0x12(%ebp),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 01 12 \tbndcn 0x12(%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 05 12 \tbndcn 0x12(%ebp,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 08 12 \tbndcn 0x12(%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 c8 12 \tbndcn 0x12(%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 80 78 56 34 12 \tbndcn 0x12345678(%eax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 85 78 56 34 12 \tbndcn 0x12345678(%ebp),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 01 78 56 34 12 \tbndcn 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 05 78 56 34 12 \tbndcn 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 08 78 56 34 12 \tbndcn 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 c8 78 56 34 12 \tbndcn 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0xc0, }, 4, 0, "", "", +"f2 0f 1b c0 \tbndcn %eax,%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"66 0f 1a 00 \tbndmov (%eax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 05 78 56 34 12 \tbndmov 0x12345678,%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"66 0f 1a 18 \tbndmov (%eax),%bnd3",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"66 0f 1a 04 01 \tbndmov (%ecx,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 04 05 78 56 34 12 \tbndmov 0x12345678(,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"66 0f 1a 04 08 \tbndmov (%eax,%ecx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"66 0f 1a 04 c8 \tbndmov (%eax,%ecx,8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"66 0f 1a 40 12 \tbndmov 0x12(%eax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"66 0f 1a 45 12 \tbndmov 0x12(%ebp),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 01 12 \tbndmov 0x12(%ecx,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 05 12 \tbndmov 0x12(%ebp,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 08 12 \tbndmov 0x12(%eax,%ecx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 c8 12 \tbndmov 0x12(%eax,%ecx,8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 80 78 56 34 12 \tbndmov 0x12345678(%eax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 85 78 56 34 12 \tbndmov 0x12345678(%ebp),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 01 78 56 34 12 \tbndmov 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 05 78 56 34 12 \tbndmov 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 08 78 56 34 12 \tbndmov 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 c8 78 56 34 12 \tbndmov 0x12345678(%eax,%ecx,8),%bnd0",}, +{{0x66, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"66 0f 1b 00 \tbndmov %bnd0,(%eax)",}, +{{0x66, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 05 78 56 34 12 \tbndmov %bnd0,0x12345678",}, +{{0x66, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"66 0f 1b 18 \tbndmov %bnd3,(%eax)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"66 0f 1b 04 01 \tbndmov %bnd0,(%ecx,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 04 05 78 56 34 12 \tbndmov %bnd0,0x12345678(,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"66 0f 1b 04 08 \tbndmov %bnd0,(%eax,%ecx,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"66 0f 1b 04 c8 \tbndmov %bnd0,(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"66 0f 1b 40 12 \tbndmov %bnd0,0x12(%eax)",}, +{{0x66, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"66 0f 1b 45 12 \tbndmov %bnd0,0x12(%ebp)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 01 12 \tbndmov %bnd0,0x12(%ecx,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 05 12 \tbndmov %bnd0,0x12(%ebp,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 08 12 \tbndmov %bnd0,0x12(%eax,%ecx,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 c8 12 \tbndmov %bnd0,0x12(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 80 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax)",}, +{{0x66, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 85 78 56 34 12 \tbndmov %bnd0,0x12345678(%ebp)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 01 78 56 34 12 \tbndmov %bnd0,0x12345678(%ecx,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 05 78 56 34 12 \tbndmov %bnd0,0x12345678(%ebp,%eax,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 08 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax,%ecx,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 c8 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x1a, 0xc8, }, 4, 0, "", "", +"66 0f 1a c8 \tbndmov %bnd0,%bnd1",}, +{{0x66, 0x0f, 0x1a, 0xc1, }, 4, 0, "", "", +"66 0f 1a c1 \tbndmov %bnd1,%bnd0",}, +{{0x0f, 0x1a, 0x00, }, 3, 0, "", "", +"0f 1a 00 \tbndldx (%eax),%bnd0",}, +{{0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 05 78 56 34 12 \tbndldx 0x12345678,%bnd0",}, +{{0x0f, 0x1a, 0x18, }, 3, 0, "", "", +"0f 1a 18 \tbndldx (%eax),%bnd3",}, +{{0x0f, 0x1a, 0x04, 0x01, }, 4, 0, "", "", +"0f 1a 04 01 \tbndldx (%ecx,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 04 05 78 56 34 12 \tbndldx 0x12345678(,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x08, }, 4, 0, "", "", +"0f 1a 04 08 \tbndldx (%eax,%ecx,1),%bnd0",}, +{{0x0f, 0x1a, 0x40, 0x12, }, 4, 0, "", "", +"0f 1a 40 12 \tbndldx 0x12(%eax),%bnd0",}, +{{0x0f, 0x1a, 0x45, 0x12, }, 4, 0, "", "", +"0f 1a 45 12 \tbndldx 0x12(%ebp),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x01, 0x12, }, 5, 0, "", "", +"0f 1a 44 01 12 \tbndldx 0x12(%ecx,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x05, 0x12, }, 5, 0, "", "", +"0f 1a 44 05 12 \tbndldx 0x12(%ebp,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x08, 0x12, }, 5, 0, "", "", +"0f 1a 44 08 12 \tbndldx 0x12(%eax,%ecx,1),%bnd0",}, +{{0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 80 78 56 34 12 \tbndldx 0x12345678(%eax),%bnd0",}, +{{0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 85 78 56 34 12 \tbndldx 0x12345678(%ebp),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 01 78 56 34 12 \tbndldx 0x12345678(%ecx,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 05 78 56 34 12 \tbndldx 0x12345678(%ebp,%eax,1),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 08 78 56 34 12 \tbndldx 0x12345678(%eax,%ecx,1),%bnd0",}, +{{0x0f, 0x1b, 0x00, }, 3, 0, "", "", +"0f 1b 00 \tbndstx %bnd0,(%eax)",}, +{{0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 05 78 56 34 12 \tbndstx %bnd0,0x12345678",}, +{{0x0f, 0x1b, 0x18, }, 3, 0, "", "", +"0f 1b 18 \tbndstx %bnd3,(%eax)",}, +{{0x0f, 0x1b, 0x04, 0x01, }, 4, 0, "", "", +"0f 1b 04 01 \tbndstx %bnd0,(%ecx,%eax,1)",}, +{{0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 04 05 78 56 34 12 \tbndstx %bnd0,0x12345678(,%eax,1)",}, +{{0x0f, 0x1b, 0x04, 0x08, }, 4, 0, "", "", +"0f 1b 04 08 \tbndstx %bnd0,(%eax,%ecx,1)",}, +{{0x0f, 0x1b, 0x40, 0x12, }, 4, 0, "", "", +"0f 1b 40 12 \tbndstx %bnd0,0x12(%eax)",}, +{{0x0f, 0x1b, 0x45, 0x12, }, 4, 0, "", "", +"0f 1b 45 12 \tbndstx %bnd0,0x12(%ebp)",}, +{{0x0f, 0x1b, 0x44, 0x01, 0x12, }, 5, 0, "", "", +"0f 1b 44 01 12 \tbndstx %bnd0,0x12(%ecx,%eax,1)",}, +{{0x0f, 0x1b, 0x44, 0x05, 0x12, }, 5, 0, "", "", +"0f 1b 44 05 12 \tbndstx %bnd0,0x12(%ebp,%eax,1)",}, +{{0x0f, 0x1b, 0x44, 0x08, 0x12, }, 5, 0, "", "", +"0f 1b 44 08 12 \tbndstx %bnd0,0x12(%eax,%ecx,1)",}, +{{0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 80 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax)",}, +{{0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 85 78 56 34 12 \tbndstx %bnd0,0x12345678(%ebp)",}, +{{0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 01 78 56 34 12 \tbndstx %bnd0,0x12345678(%ecx,%eax,1)",}, +{{0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 05 78 56 34 12 \tbndstx %bnd0,0x12345678(%ebp,%eax,1)",}, +{{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax,%ecx,1)",}, +{{0xf2, 0xe8, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "call", "unconditional", +"f2 e8 fc ff ff ff \tbnd call 3c3 <main+0x3c3>",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd call *(%eax)",}, +{{0xf2, 0xc3, }, 2, 0, "ret", "indirect", +"f2 c3 \tbnd ret ",}, +{{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional", +"f2 e9 fc ff ff ff \tbnd jmp 3ce <main+0x3ce>",}, +{{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional", +"f2 e9 fc ff ff ff \tbnd jmp 3d4 <main+0x3d4>",}, +{{0xf2, 0xff, 0x21, }, 3, 0, "jmp", "indirect", +"f2 ff 21 \tbnd jmp *(%ecx)",}, +{{0xf2, 0x0f, 0x85, 0xfc, 0xff, 0xff, 0xff, }, 7, 0xfffffffc, "jcc", "conditional", +"f2 0f 85 fc ff ff ff \tbnd jne 3de <main+0x3de>",}, +{{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "", +"0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "", +"0f 3a cc d7 91 \tsha1rnds4 $0x91,%xmm7,%xmm2",}, +{{0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 5, 0, "", "", +"0f 3a cc 00 91 \tsha1rnds4 $0x91,(%eax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678,%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x18, 0x91, }, 5, 0, "", "", +"0f 3a cc 18 91 \tsha1rnds4 $0x91,(%eax),%xmm3",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x01, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 01 91 \tsha1rnds4 $0x91,(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 04 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x08, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 08 91 \tsha1rnds4 $0x91,(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0xc8, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 c8 91 \tsha1rnds4 $0x91,(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x40, 0x12, 0x91, }, 6, 0, "", "", +"0f 3a cc 40 12 91 \tsha1rnds4 $0x91,0x12(%eax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x45, 0x12, 0x91, }, 6, 0, "", "", +"0f 3a cc 45 12 91 \tsha1rnds4 $0x91,0x12(%ebp),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x01, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 01 12 91 \tsha1rnds4 $0x91,0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x05, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 05 12 91 \tsha1rnds4 $0x91,0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x08, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 08 12 91 \tsha1rnds4 $0x91,0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0xc8, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 c8 12 91 \tsha1rnds4 $0x91,0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 80 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 85 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 01 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 08 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0xc1, }, 4, 0, "", "", +"0f 38 c8 c1 \tsha1nexte %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xc8, 0xd7, }, 4, 0, "", "", +"0f 38 c8 d7 \tsha1nexte %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xc8, 0x00, }, 4, 0, "", "", +"0f 38 c8 00 \tsha1nexte (%eax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 05 78 56 34 12 \tsha1nexte 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x18, }, 4, 0, "", "", +"0f 38 c8 18 \tsha1nexte (%eax),%xmm3",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 c8 04 01 \tsha1nexte (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 04 05 78 56 34 12 \tsha1nexte 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 c8 04 08 \tsha1nexte (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 c8 04 c8 \tsha1nexte (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 c8 40 12 \tsha1nexte 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 c8 45 12 \tsha1nexte 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 01 12 \tsha1nexte 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 05 12 \tsha1nexte 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 08 12 \tsha1nexte 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 c8 12 \tsha1nexte 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 80 78 56 34 12 \tsha1nexte 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 85 78 56 34 12 \tsha1nexte 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 01 78 56 34 12 \tsha1nexte 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 05 78 56 34 12 \tsha1nexte 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 08 78 56 34 12 \tsha1nexte 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 c8 78 56 34 12 \tsha1nexte 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0xc1, }, 4, 0, "", "", +"0f 38 c9 c1 \tsha1msg1 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xc9, 0xd7, }, 4, 0, "", "", +"0f 38 c9 d7 \tsha1msg1 %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xc9, 0x00, }, 4, 0, "", "", +"0f 38 c9 00 \tsha1msg1 (%eax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 05 78 56 34 12 \tsha1msg1 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x18, }, 4, 0, "", "", +"0f 38 c9 18 \tsha1msg1 (%eax),%xmm3",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 c9 04 01 \tsha1msg1 (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 04 05 78 56 34 12 \tsha1msg1 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 c9 04 08 \tsha1msg1 (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 c9 04 c8 \tsha1msg1 (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 c9 40 12 \tsha1msg1 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 c9 45 12 \tsha1msg1 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 01 12 \tsha1msg1 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 05 12 \tsha1msg1 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 08 12 \tsha1msg1 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 c8 12 \tsha1msg1 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 80 78 56 34 12 \tsha1msg1 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 85 78 56 34 12 \tsha1msg1 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 01 78 56 34 12 \tsha1msg1 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 05 78 56 34 12 \tsha1msg1 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 08 78 56 34 12 \tsha1msg1 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 c8 78 56 34 12 \tsha1msg1 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0xc1, }, 4, 0, "", "", +"0f 38 ca c1 \tsha1msg2 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xca, 0xd7, }, 4, 0, "", "", +"0f 38 ca d7 \tsha1msg2 %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xca, 0x00, }, 4, 0, "", "", +"0f 38 ca 00 \tsha1msg2 (%eax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 05 78 56 34 12 \tsha1msg2 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xca, 0x18, }, 4, 0, "", "", +"0f 38 ca 18 \tsha1msg2 (%eax),%xmm3",}, +{{0x0f, 0x38, 0xca, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 ca 04 01 \tsha1msg2 (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 04 05 78 56 34 12 \tsha1msg2 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 ca 04 08 \tsha1msg2 (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 ca 04 c8 \tsha1msg2 (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 ca 40 12 \tsha1msg2 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 ca 45 12 \tsha1msg2 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 01 12 \tsha1msg2 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 05 12 \tsha1msg2 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 08 12 \tsha1msg2 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 c8 12 \tsha1msg2 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 80 78 56 34 12 \tsha1msg2 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 85 78 56 34 12 \tsha1msg2 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 01 78 56 34 12 \tsha1msg2 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 05 78 56 34 12 \tsha1msg2 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 08 78 56 34 12 \tsha1msg2 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 c8 78 56 34 12 \tsha1msg2 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcb, 0xcc, }, 4, 0, "", "", +"0f 38 cb cc \tsha256rnds2 %xmm0,%xmm4,%xmm1",}, +{{0x0f, 0x38, 0xcb, 0xd7, }, 4, 0, "", "", +"0f 38 cb d7 \tsha256rnds2 %xmm0,%xmm7,%xmm2",}, +{{0x0f, 0x38, 0xcb, 0x08, }, 4, 0, "", "", +"0f 38 cb 08 \tsha256rnds2 %xmm0,(%eax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 0d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678,%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x18, }, 4, 0, "", "", +"0f 38 cb 18 \tsha256rnds2 %xmm0,(%eax),%xmm3",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x01, }, 5, 0, "", "", +"0f 38 cb 0c 01 \tsha256rnds2 %xmm0,(%ecx,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 0c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x08, }, 5, 0, "", "", +"0f 38 cb 0c 08 \tsha256rnds2 %xmm0,(%eax,%ecx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0xc8, }, 5, 0, "", "", +"0f 38 cb 0c c8 \tsha256rnds2 %xmm0,(%eax,%ecx,8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x48, 0x12, }, 5, 0, "", "", +"0f 38 cb 48 12 \tsha256rnds2 %xmm0,0x12(%eax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4d, 0x12, }, 5, 0, "", "", +"0f 38 cb 4d 12 \tsha256rnds2 %xmm0,0x12(%ebp),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 01 12 \tsha256rnds2 %xmm0,0x12(%ecx,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 05 12 \tsha256rnds2 %xmm0,0x12(%ebp,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 08 12 \tsha256rnds2 %xmm0,0x12(%eax,%ecx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c c8 12 \tsha256rnds2 %xmm0,0x12(%eax,%ecx,8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 88 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 8d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ebp),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 01 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ecx,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ebp,%eax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 08 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax,%ecx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x0f, 0x38, 0xcc, 0xc1, }, 4, 0, "", "", +"0f 38 cc c1 \tsha256msg1 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xcc, 0xd7, }, 4, 0, "", "", +"0f 38 cc d7 \tsha256msg1 %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xcc, 0x00, }, 4, 0, "", "", +"0f 38 cc 00 \tsha256msg1 (%eax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 05 78 56 34 12 \tsha256msg1 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x18, }, 4, 0, "", "", +"0f 38 cc 18 \tsha256msg1 (%eax),%xmm3",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 cc 04 01 \tsha256msg1 (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 04 05 78 56 34 12 \tsha256msg1 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 cc 04 08 \tsha256msg1 (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 cc 04 c8 \tsha256msg1 (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 cc 40 12 \tsha256msg1 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 cc 45 12 \tsha256msg1 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 01 12 \tsha256msg1 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 05 12 \tsha256msg1 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 08 12 \tsha256msg1 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 c8 12 \tsha256msg1 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 80 78 56 34 12 \tsha256msg1 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 85 78 56 34 12 \tsha256msg1 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 01 78 56 34 12 \tsha256msg1 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 05 78 56 34 12 \tsha256msg1 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 08 78 56 34 12 \tsha256msg1 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 c8 78 56 34 12 \tsha256msg1 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0xc1, }, 4, 0, "", "", +"0f 38 cd c1 \tsha256msg2 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xcd, 0xd7, }, 4, 0, "", "", +"0f 38 cd d7 \tsha256msg2 %xmm7,%xmm2",}, +{{0x0f, 0x38, 0xcd, 0x00, }, 4, 0, "", "", +"0f 38 cd 00 \tsha256msg2 (%eax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 05 78 56 34 12 \tsha256msg2 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x18, }, 4, 0, "", "", +"0f 38 cd 18 \tsha256msg2 (%eax),%xmm3",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 cd 04 01 \tsha256msg2 (%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 04 05 78 56 34 12 \tsha256msg2 0x12345678(,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 cd 04 08 \tsha256msg2 (%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 cd 04 c8 \tsha256msg2 (%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 cd 40 12 \tsha256msg2 0x12(%eax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 cd 45 12 \tsha256msg2 0x12(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 01 12 \tsha256msg2 0x12(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 05 12 \tsha256msg2 0x12(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 08 12 \tsha256msg2 0x12(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 c8 12 \tsha256msg2 0x12(%eax,%ecx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 80 78 56 34 12 \tsha256msg2 0x12345678(%eax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 85 78 56 34 12 \tsha256msg2 0x12345678(%ebp),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 01 78 56 34 12 \tsha256msg2 0x12345678(%ecx,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 05 78 56 34 12 \tsha256msg2 0x12345678(%ebp,%eax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 08 78 56 34 12 \tsha256msg2 0x12345678(%eax,%ecx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 c8 78 56 34 12 \tsha256msg2 0x12345678(%eax,%ecx,8),%xmm0",}, +{{0x66, 0x0f, 0xae, 0x38, }, 4, 0, "", "", +"66 0f ae 38 \tclflushopt (%eax)",}, +{{0x66, 0x0f, 0xae, 0x3d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f ae 3d 78 56 34 12 \tclflushopt 0x12345678",}, +{{0x66, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xae, 0x38, }, 3, 0, "", "", +"0f ae 38 \tclflush (%eax)",}, +{{0x0f, 0xae, 0xf8, }, 3, 0, "", "", +"0f ae f8 \tsfence ",}, +{{0x66, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"66 0f ae 30 \tclwb (%eax)",}, +{{0x66, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f ae 35 78 56 34 12 \tclwb 0x12345678",}, +{{0x66, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae b4 c8 78 56 34 12 \tclwb 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xae, 0x30, }, 3, 0, "", "", +"0f ae 30 \txsaveopt (%eax)",}, +{{0x0f, 0xae, 0xf0, }, 3, 0, "", "", +"0f ae f0 \tmfence ",}, +{{0x0f, 0xc7, 0x20, }, 3, 0, "", "", +"0f c7 20 \txsavec (%eax)",}, +{{0x0f, 0xc7, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f c7 25 78 56 34 12 \txsavec 0x12345678",}, +{{0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xc7, 0x28, }, 3, 0, "", "", +"0f c7 28 \txsaves (%eax)",}, +{{0x0f, 0xc7, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f c7 2d 78 56 34 12 \txsaves 0x12345678",}, +{{0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xc7, 0x18, }, 3, 0, "", "", +"0f c7 18 \txrstors (%eax)",}, +{{0x0f, 0xc7, 0x1d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f c7 1d 78 56 34 12 \txrstors 0x12345678",}, +{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", +"66 0f ae f8 \tpcommit ",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c new file mode 100644 index 000000000000..4fe7cce179c4 --- /dev/null +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -0,0 +1,768 @@ +/* + * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk + * from insn-x86-dat-src.c for inclusion by insn-x86.c + * Do not change this code. +*/ + +{{0x0f, 0x31, }, 2, 0, "", "", +"0f 31 \trdtsc ",}, +{{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"f3 0f 1b 00 \tbndmk (%rax),%bnd0",}, +{{0xf3, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "", +"f3 41 0f 1b 00 \tbndmk (%r8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 04 25 78 56 34 12 \tbndmk 0x12345678,%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"f3 0f 1b 18 \tbndmk (%rax),%bnd3",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"f3 0f 1b 04 01 \tbndmk (%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 04 05 78 56 34 12 \tbndmk 0x12345678(,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"f3 0f 1b 04 08 \tbndmk (%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"f3 0f 1b 04 c8 \tbndmk (%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"f3 0f 1b 40 12 \tbndmk 0x12(%rax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"f3 0f 1b 45 12 \tbndmk 0x12(%rbp),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 01 12 \tbndmk 0x12(%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 05 12 \tbndmk 0x12(%rbp,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 08 12 \tbndmk 0x12(%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f3 0f 1b 44 c8 12 \tbndmk 0x12(%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 80 78 56 34 12 \tbndmk 0x12345678(%rax),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1b 85 78 56 34 12 \tbndmk 0x12345678(%rbp),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 01 78 56 34 12 \tbndmk 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 05 78 56 34 12 \tbndmk 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 08 78 56 34 12 \tbndmk 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1b 84 c8 78 56 34 12 \tbndmk 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"f3 0f 1a 00 \tbndcl (%rax),%bnd0",}, +{{0xf3, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "", +"f3 41 0f 1a 00 \tbndcl (%r8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 04 25 78 56 34 12 \tbndcl 0x12345678,%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"f3 0f 1a 18 \tbndcl (%rax),%bnd3",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"f3 0f 1a 04 01 \tbndcl (%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 04 05 78 56 34 12 \tbndcl 0x12345678(,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"f3 0f 1a 04 08 \tbndcl (%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"f3 0f 1a 04 c8 \tbndcl (%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"f3 0f 1a 40 12 \tbndcl 0x12(%rax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"f3 0f 1a 45 12 \tbndcl 0x12(%rbp),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 01 12 \tbndcl 0x12(%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 05 12 \tbndcl 0x12(%rbp,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 08 12 \tbndcl 0x12(%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f3 0f 1a 44 c8 12 \tbndcl 0x12(%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 80 78 56 34 12 \tbndcl 0x12345678(%rax),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 1a 85 78 56 34 12 \tbndcl 0x12345678(%rbp),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 01 78 56 34 12 \tbndcl 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 05 78 56 34 12 \tbndcl 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 08 78 56 34 12 \tbndcl 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 1a 84 c8 78 56 34 12 \tbndcl 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0xf3, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "", +"f3 0f 1a c0 \tbndcl %rax,%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"f2 0f 1a 00 \tbndcu (%rax),%bnd0",}, +{{0xf2, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "", +"f2 41 0f 1a 00 \tbndcu (%r8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 04 25 78 56 34 12 \tbndcu 0x12345678,%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"f2 0f 1a 18 \tbndcu (%rax),%bnd3",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"f2 0f 1a 04 01 \tbndcu (%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 04 05 78 56 34 12 \tbndcu 0x12345678(,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"f2 0f 1a 04 08 \tbndcu (%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"f2 0f 1a 04 c8 \tbndcu (%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"f2 0f 1a 40 12 \tbndcu 0x12(%rax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"f2 0f 1a 45 12 \tbndcu 0x12(%rbp),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 01 12 \tbndcu 0x12(%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 05 12 \tbndcu 0x12(%rbp,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 08 12 \tbndcu 0x12(%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f2 0f 1a 44 c8 12 \tbndcu 0x12(%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 80 78 56 34 12 \tbndcu 0x12345678(%rax),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1a 85 78 56 34 12 \tbndcu 0x12345678(%rbp),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 01 78 56 34 12 \tbndcu 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 05 78 56 34 12 \tbndcu 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 08 78 56 34 12 \tbndcu 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1a 84 c8 78 56 34 12 \tbndcu 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "", +"f2 0f 1a c0 \tbndcu %rax,%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"f2 0f 1b 00 \tbndcn (%rax),%bnd0",}, +{{0xf2, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "", +"f2 41 0f 1b 00 \tbndcn (%r8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 04 25 78 56 34 12 \tbndcn 0x12345678,%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"f2 0f 1b 18 \tbndcn (%rax),%bnd3",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"f2 0f 1b 04 01 \tbndcn (%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 04 05 78 56 34 12 \tbndcn 0x12345678(,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"f2 0f 1b 04 08 \tbndcn (%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"f2 0f 1b 04 c8 \tbndcn (%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"f2 0f 1b 40 12 \tbndcn 0x12(%rax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"f2 0f 1b 45 12 \tbndcn 0x12(%rbp),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 01 12 \tbndcn 0x12(%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 05 12 \tbndcn 0x12(%rbp,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 08 12 \tbndcn 0x12(%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"f2 0f 1b 44 c8 12 \tbndcn 0x12(%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 80 78 56 34 12 \tbndcn 0x12345678(%rax),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f2 0f 1b 85 78 56 34 12 \tbndcn 0x12345678(%rbp),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 01 78 56 34 12 \tbndcn 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 05 78 56 34 12 \tbndcn 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 08 78 56 34 12 \tbndcn 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f2 0f 1b 84 c8 78 56 34 12 \tbndcn 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0xf2, 0x0f, 0x1b, 0xc0, }, 4, 0, "", "", +"f2 0f 1b c0 \tbndcn %rax,%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"66 0f 1a 00 \tbndmov (%rax),%bnd0",}, +{{0x66, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "", +"66 41 0f 1a 00 \tbndmov (%r8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 04 25 78 56 34 12 \tbndmov 0x12345678,%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x18, }, 4, 0, "", "", +"66 0f 1a 18 \tbndmov (%rax),%bnd3",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "", +"66 0f 1a 04 01 \tbndmov (%rcx,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 04 05 78 56 34 12 \tbndmov 0x12345678(,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "", +"66 0f 1a 04 08 \tbndmov (%rax,%rcx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "", +"66 0f 1a 04 c8 \tbndmov (%rax,%rcx,8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "", +"66 0f 1a 40 12 \tbndmov 0x12(%rax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "", +"66 0f 1a 45 12 \tbndmov 0x12(%rbp),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 01 12 \tbndmov 0x12(%rcx,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 05 12 \tbndmov 0x12(%rbp,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 08 12 \tbndmov 0x12(%rax,%rcx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"66 0f 1a 44 c8 12 \tbndmov 0x12(%rax,%rcx,8),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 80 78 56 34 12 \tbndmov 0x12345678(%rax),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1a 85 78 56 34 12 \tbndmov 0x12345678(%rbp),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 01 78 56 34 12 \tbndmov 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 05 78 56 34 12 \tbndmov 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 08 78 56 34 12 \tbndmov 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0x66, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1a 84 c8 78 56 34 12 \tbndmov 0x12345678(%rax,%rcx,8),%bnd0",}, +{{0x66, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"66 0f 1b 00 \tbndmov %bnd0,(%rax)",}, +{{0x66, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "", +"66 41 0f 1b 00 \tbndmov %bnd0,(%r8)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 04 25 78 56 34 12 \tbndmov %bnd0,0x12345678",}, +{{0x66, 0x0f, 0x1b, 0x18, }, 4, 0, "", "", +"66 0f 1b 18 \tbndmov %bnd3,(%rax)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "", +"66 0f 1b 04 01 \tbndmov %bnd0,(%rcx,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 04 05 78 56 34 12 \tbndmov %bnd0,0x12345678(,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "", +"66 0f 1b 04 08 \tbndmov %bnd0,(%rax,%rcx,1)",}, +{{0x66, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "", +"66 0f 1b 04 c8 \tbndmov %bnd0,(%rax,%rcx,8)",}, +{{0x66, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "", +"66 0f 1b 40 12 \tbndmov %bnd0,0x12(%rax)",}, +{{0x66, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "", +"66 0f 1b 45 12 \tbndmov %bnd0,0x12(%rbp)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 01 12 \tbndmov %bnd0,0x12(%rcx,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 05 12 \tbndmov %bnd0,0x12(%rbp,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 08 12 \tbndmov %bnd0,0x12(%rax,%rcx,1)",}, +{{0x66, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"66 0f 1b 44 c8 12 \tbndmov %bnd0,0x12(%rax,%rcx,8)",}, +{{0x66, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 80 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax)",}, +{{0x66, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"66 0f 1b 85 78 56 34 12 \tbndmov %bnd0,0x12345678(%rbp)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 01 78 56 34 12 \tbndmov %bnd0,0x12345678(%rcx,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 05 78 56 34 12 \tbndmov %bnd0,0x12345678(%rbp,%rax,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 08 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax,%rcx,1)",}, +{{0x66, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 1b 84 c8 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x0f, 0x1a, 0xc8, }, 4, 0, "", "", +"66 0f 1a c8 \tbndmov %bnd0,%bnd1",}, +{{0x66, 0x0f, 0x1a, 0xc1, }, 4, 0, "", "", +"66 0f 1a c1 \tbndmov %bnd1,%bnd0",}, +{{0x0f, 0x1a, 0x00, }, 3, 0, "", "", +"0f 1a 00 \tbndldx (%rax),%bnd0",}, +{{0x41, 0x0f, 0x1a, 0x00, }, 4, 0, "", "", +"41 0f 1a 00 \tbndldx (%r8),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 04 25 78 56 34 12 \tbndldx 0x12345678,%bnd0",}, +{{0x0f, 0x1a, 0x18, }, 3, 0, "", "", +"0f 1a 18 \tbndldx (%rax),%bnd3",}, +{{0x0f, 0x1a, 0x04, 0x01, }, 4, 0, "", "", +"0f 1a 04 01 \tbndldx (%rcx,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 04 05 78 56 34 12 \tbndldx 0x12345678(,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x04, 0x08, }, 4, 0, "", "", +"0f 1a 04 08 \tbndldx (%rax,%rcx,1),%bnd0",}, +{{0x0f, 0x1a, 0x40, 0x12, }, 4, 0, "", "", +"0f 1a 40 12 \tbndldx 0x12(%rax),%bnd0",}, +{{0x0f, 0x1a, 0x45, 0x12, }, 4, 0, "", "", +"0f 1a 45 12 \tbndldx 0x12(%rbp),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x01, 0x12, }, 5, 0, "", "", +"0f 1a 44 01 12 \tbndldx 0x12(%rcx,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x05, 0x12, }, 5, 0, "", "", +"0f 1a 44 05 12 \tbndldx 0x12(%rbp,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x44, 0x08, 0x12, }, 5, 0, "", "", +"0f 1a 44 08 12 \tbndldx 0x12(%rax,%rcx,1),%bnd0",}, +{{0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 80 78 56 34 12 \tbndldx 0x12345678(%rax),%bnd0",}, +{{0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1a 85 78 56 34 12 \tbndldx 0x12345678(%rbp),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 01 78 56 34 12 \tbndldx 0x12345678(%rcx,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 05 78 56 34 12 \tbndldx 0x12345678(%rbp,%rax,1),%bnd0",}, +{{0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1a 84 08 78 56 34 12 \tbndldx 0x12345678(%rax,%rcx,1),%bnd0",}, +{{0x0f, 0x1b, 0x00, }, 3, 0, "", "", +"0f 1b 00 \tbndstx %bnd0,(%rax)",}, +{{0x41, 0x0f, 0x1b, 0x00, }, 4, 0, "", "", +"41 0f 1b 00 \tbndstx %bnd0,(%r8)",}, +{{0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 04 25 78 56 34 12 \tbndstx %bnd0,0x12345678",}, +{{0x0f, 0x1b, 0x18, }, 3, 0, "", "", +"0f 1b 18 \tbndstx %bnd3,(%rax)",}, +{{0x0f, 0x1b, 0x04, 0x01, }, 4, 0, "", "", +"0f 1b 04 01 \tbndstx %bnd0,(%rcx,%rax,1)",}, +{{0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 04 05 78 56 34 12 \tbndstx %bnd0,0x12345678(,%rax,1)",}, +{{0x0f, 0x1b, 0x04, 0x08, }, 4, 0, "", "", +"0f 1b 04 08 \tbndstx %bnd0,(%rax,%rcx,1)",}, +{{0x0f, 0x1b, 0x40, 0x12, }, 4, 0, "", "", +"0f 1b 40 12 \tbndstx %bnd0,0x12(%rax)",}, +{{0x0f, 0x1b, 0x45, 0x12, }, 4, 0, "", "", +"0f 1b 45 12 \tbndstx %bnd0,0x12(%rbp)",}, +{{0x0f, 0x1b, 0x44, 0x01, 0x12, }, 5, 0, "", "", +"0f 1b 44 01 12 \tbndstx %bnd0,0x12(%rcx,%rax,1)",}, +{{0x0f, 0x1b, 0x44, 0x05, 0x12, }, 5, 0, "", "", +"0f 1b 44 05 12 \tbndstx %bnd0,0x12(%rbp,%rax,1)",}, +{{0x0f, 0x1b, 0x44, 0x08, 0x12, }, 5, 0, "", "", +"0f 1b 44 08 12 \tbndstx %bnd0,0x12(%rax,%rcx,1)",}, +{{0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 80 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax)",}, +{{0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f 1b 85 78 56 34 12 \tbndstx %bnd0,0x12345678(%rbp)",}, +{{0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 01 78 56 34 12 \tbndstx %bnd0,0x12345678(%rcx,%rax,1)",}, +{{0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 05 78 56 34 12 \tbndstx %bnd0,0x12345678(%rbp,%rax,1)",}, +{{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax,%rcx,1)",}, +{{0xf2, 0xe8, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "call", "unconditional", +"f2 e8 00 00 00 00 \tbnd callq 3f6 <main+0x3f6>",}, +{{0x67, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"67 f2 ff 10 \tbnd callq *(%eax)",}, +{{0xf2, 0xc3, }, 2, 0, "ret", "indirect", +"f2 c3 \tbnd retq ",}, +{{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional", +"f2 e9 00 00 00 00 \tbnd jmpq 402 <main+0x402>",}, +{{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional", +"f2 e9 00 00 00 00 \tbnd jmpq 408 <main+0x408>",}, +{{0x67, 0xf2, 0xff, 0x21, }, 4, 0, "jmp", "indirect", +"67 f2 ff 21 \tbnd jmpq *(%ecx)",}, +{{0xf2, 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, }, 7, 0, "jcc", "conditional", +"f2 0f 85 00 00 00 00 \tbnd jne 413 <main+0x413>",}, +{{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "", +"0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "", +"0f 3a cc d7 91 \tsha1rnds4 $0x91,%xmm7,%xmm2",}, +{{0x41, 0x0f, 0x3a, 0xcc, 0xc0, 0x91, }, 6, 0, "", "", +"41 0f 3a cc c0 91 \tsha1rnds4 $0x91,%xmm8,%xmm0",}, +{{0x44, 0x0f, 0x3a, 0xcc, 0xc7, 0x91, }, 6, 0, "", "", +"44 0f 3a cc c7 91 \tsha1rnds4 $0x91,%xmm7,%xmm8",}, +{{0x45, 0x0f, 0x3a, 0xcc, 0xc7, 0x91, }, 6, 0, "", "", +"45 0f 3a cc c7 91 \tsha1rnds4 $0x91,%xmm15,%xmm8",}, +{{0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 5, 0, "", "", +"0f 3a cc 00 91 \tsha1rnds4 $0x91,(%rax),%xmm0",}, +{{0x41, 0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 6, 0, "", "", +"41 0f 3a cc 00 91 \tsha1rnds4 $0x91,(%r8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 04 25 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678,%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x18, 0x91, }, 5, 0, "", "", +"0f 3a cc 18 91 \tsha1rnds4 $0x91,(%rax),%xmm3",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x01, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 01 91 \tsha1rnds4 $0x91,(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 04 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0x08, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 08 91 \tsha1rnds4 $0x91,(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x04, 0xc8, 0x91, }, 6, 0, "", "", +"0f 3a cc 04 c8 91 \tsha1rnds4 $0x91,(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x40, 0x12, 0x91, }, 6, 0, "", "", +"0f 3a cc 40 12 91 \tsha1rnds4 $0x91,0x12(%rax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x45, 0x12, 0x91, }, 6, 0, "", "", +"0f 3a cc 45 12 91 \tsha1rnds4 $0x91,0x12(%rbp),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x01, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 01 12 91 \tsha1rnds4 $0x91,0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x05, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 05 12 91 \tsha1rnds4 $0x91,0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0x08, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 08 12 91 \tsha1rnds4 $0x91,0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x44, 0xc8, 0x12, 0x91, }, 7, 0, "", "", +"0f 3a cc 44 c8 12 91 \tsha1rnds4 $0x91,0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 80 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "", +"0f 3a cc 85 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 01 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 08 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x3a, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "", +"0f 3a cc 84 c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x3a, 0xcc, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 11, 0, "", "", +"44 0f 3a cc bc c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xc8, 0xc1, }, 4, 0, "", "", +"0f 38 c8 c1 \tsha1nexte %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xc8, 0xd7, }, 4, 0, "", "", +"0f 38 c8 d7 \tsha1nexte %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xc8, 0xc0, }, 5, 0, "", "", +"41 0f 38 c8 c0 \tsha1nexte %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xc8, 0xc7, }, 5, 0, "", "", +"44 0f 38 c8 c7 \tsha1nexte %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xc8, 0xc7, }, 5, 0, "", "", +"45 0f 38 c8 c7 \tsha1nexte %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xc8, 0x00, }, 4, 0, "", "", +"0f 38 c8 00 \tsha1nexte (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xc8, 0x00, }, 5, 0, "", "", +"41 0f 38 c8 00 \tsha1nexte (%r8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 04 25 78 56 34 12 \tsha1nexte 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x18, }, 4, 0, "", "", +"0f 38 c8 18 \tsha1nexte (%rax),%xmm3",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 c8 04 01 \tsha1nexte (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 04 05 78 56 34 12 \tsha1nexte 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 c8 04 08 \tsha1nexte (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 c8 04 c8 \tsha1nexte (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 c8 40 12 \tsha1nexte 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 c8 45 12 \tsha1nexte 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 01 12 \tsha1nexte 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 05 12 \tsha1nexte 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 08 12 \tsha1nexte 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 c8 44 c8 12 \tsha1nexte 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 80 78 56 34 12 \tsha1nexte 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c8 85 78 56 34 12 \tsha1nexte 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 01 78 56 34 12 \tsha1nexte 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 05 78 56 34 12 \tsha1nexte 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 08 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc8, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c8 84 c8 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xc8, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 c8 bc c8 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xc9, 0xc1, }, 4, 0, "", "", +"0f 38 c9 c1 \tsha1msg1 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xc9, 0xd7, }, 4, 0, "", "", +"0f 38 c9 d7 \tsha1msg1 %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xc9, 0xc0, }, 5, 0, "", "", +"41 0f 38 c9 c0 \tsha1msg1 %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xc9, 0xc7, }, 5, 0, "", "", +"44 0f 38 c9 c7 \tsha1msg1 %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xc9, 0xc7, }, 5, 0, "", "", +"45 0f 38 c9 c7 \tsha1msg1 %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xc9, 0x00, }, 4, 0, "", "", +"0f 38 c9 00 \tsha1msg1 (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xc9, 0x00, }, 5, 0, "", "", +"41 0f 38 c9 00 \tsha1msg1 (%r8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 04 25 78 56 34 12 \tsha1msg1 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x18, }, 4, 0, "", "", +"0f 38 c9 18 \tsha1msg1 (%rax),%xmm3",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 c9 04 01 \tsha1msg1 (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 04 05 78 56 34 12 \tsha1msg1 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 c9 04 08 \tsha1msg1 (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 c9 04 c8 \tsha1msg1 (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 c9 40 12 \tsha1msg1 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 c9 45 12 \tsha1msg1 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 01 12 \tsha1msg1 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 05 12 \tsha1msg1 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 08 12 \tsha1msg1 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 c9 44 c8 12 \tsha1msg1 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 80 78 56 34 12 \tsha1msg1 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 c9 85 78 56 34 12 \tsha1msg1 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 01 78 56 34 12 \tsha1msg1 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 05 78 56 34 12 \tsha1msg1 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 08 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xc9, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 c9 84 c8 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xc9, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 c9 bc c8 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xca, 0xc1, }, 4, 0, "", "", +"0f 38 ca c1 \tsha1msg2 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xca, 0xd7, }, 4, 0, "", "", +"0f 38 ca d7 \tsha1msg2 %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xca, 0xc0, }, 5, 0, "", "", +"41 0f 38 ca c0 \tsha1msg2 %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xca, 0xc7, }, 5, 0, "", "", +"44 0f 38 ca c7 \tsha1msg2 %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xca, 0xc7, }, 5, 0, "", "", +"45 0f 38 ca c7 \tsha1msg2 %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xca, 0x00, }, 4, 0, "", "", +"0f 38 ca 00 \tsha1msg2 (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xca, 0x00, }, 5, 0, "", "", +"41 0f 38 ca 00 \tsha1msg2 (%r8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 04 25 78 56 34 12 \tsha1msg2 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xca, 0x18, }, 4, 0, "", "", +"0f 38 ca 18 \tsha1msg2 (%rax),%xmm3",}, +{{0x0f, 0x38, 0xca, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 ca 04 01 \tsha1msg2 (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 04 05 78 56 34 12 \tsha1msg2 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 ca 04 08 \tsha1msg2 (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 ca 04 c8 \tsha1msg2 (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 ca 40 12 \tsha1msg2 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 ca 45 12 \tsha1msg2 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 01 12 \tsha1msg2 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 05 12 \tsha1msg2 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 08 12 \tsha1msg2 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 ca 44 c8 12 \tsha1msg2 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 80 78 56 34 12 \tsha1msg2 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 ca 85 78 56 34 12 \tsha1msg2 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 01 78 56 34 12 \tsha1msg2 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 05 78 56 34 12 \tsha1msg2 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 08 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xca, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 ca 84 c8 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xca, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 ca bc c8 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xcb, 0xcc, }, 4, 0, "", "", +"0f 38 cb cc \tsha256rnds2 %xmm0,%xmm4,%xmm1",}, +{{0x0f, 0x38, 0xcb, 0xd7, }, 4, 0, "", "", +"0f 38 cb d7 \tsha256rnds2 %xmm0,%xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xcb, 0xc8, }, 5, 0, "", "", +"41 0f 38 cb c8 \tsha256rnds2 %xmm0,%xmm8,%xmm1",}, +{{0x44, 0x0f, 0x38, 0xcb, 0xc7, }, 5, 0, "", "", +"44 0f 38 cb c7 \tsha256rnds2 %xmm0,%xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xcb, 0xc7, }, 5, 0, "", "", +"45 0f 38 cb c7 \tsha256rnds2 %xmm0,%xmm15,%xmm8",}, +{{0x0f, 0x38, 0xcb, 0x08, }, 4, 0, "", "", +"0f 38 cb 08 \tsha256rnds2 %xmm0,(%rax),%xmm1",}, +{{0x41, 0x0f, 0x38, 0xcb, 0x08, }, 5, 0, "", "", +"41 0f 38 cb 08 \tsha256rnds2 %xmm0,(%r8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 0c 25 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678,%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x18, }, 4, 0, "", "", +"0f 38 cb 18 \tsha256rnds2 %xmm0,(%rax),%xmm3",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x01, }, 5, 0, "", "", +"0f 38 cb 0c 01 \tsha256rnds2 %xmm0,(%rcx,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 0c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0x08, }, 5, 0, "", "", +"0f 38 cb 0c 08 \tsha256rnds2 %xmm0,(%rax,%rcx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x0c, 0xc8, }, 5, 0, "", "", +"0f 38 cb 0c c8 \tsha256rnds2 %xmm0,(%rax,%rcx,8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x48, 0x12, }, 5, 0, "", "", +"0f 38 cb 48 12 \tsha256rnds2 %xmm0,0x12(%rax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4d, 0x12, }, 5, 0, "", "", +"0f 38 cb 4d 12 \tsha256rnds2 %xmm0,0x12(%rbp),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 01 12 \tsha256rnds2 %xmm0,0x12(%rcx,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 05 12 \tsha256rnds2 %xmm0,0x12(%rbp,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c 08 12 \tsha256rnds2 %xmm0,0x12(%rax,%rcx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x4c, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cb 4c c8 12 \tsha256rnds2 %xmm0,0x12(%rax,%rcx,8),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 88 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cb 8d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rbp),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 01 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rcx,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rbp,%rax,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c 08 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,1),%xmm1",}, +{{0x0f, 0x38, 0xcb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cb 8c c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x44, 0x0f, 0x38, 0xcb, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 cb bc c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xcc, 0xc1, }, 4, 0, "", "", +"0f 38 cc c1 \tsha256msg1 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xcc, 0xd7, }, 4, 0, "", "", +"0f 38 cc d7 \tsha256msg1 %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xcc, 0xc0, }, 5, 0, "", "", +"41 0f 38 cc c0 \tsha256msg1 %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xcc, 0xc7, }, 5, 0, "", "", +"44 0f 38 cc c7 \tsha256msg1 %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xcc, 0xc7, }, 5, 0, "", "", +"45 0f 38 cc c7 \tsha256msg1 %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xcc, 0x00, }, 4, 0, "", "", +"0f 38 cc 00 \tsha256msg1 (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xcc, 0x00, }, 5, 0, "", "", +"41 0f 38 cc 00 \tsha256msg1 (%r8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 04 25 78 56 34 12 \tsha256msg1 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x18, }, 4, 0, "", "", +"0f 38 cc 18 \tsha256msg1 (%rax),%xmm3",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 cc 04 01 \tsha256msg1 (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 04 05 78 56 34 12 \tsha256msg1 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 cc 04 08 \tsha256msg1 (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 cc 04 c8 \tsha256msg1 (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 cc 40 12 \tsha256msg1 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 cc 45 12 \tsha256msg1 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 01 12 \tsha256msg1 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 05 12 \tsha256msg1 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 08 12 \tsha256msg1 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cc 44 c8 12 \tsha256msg1 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 80 78 56 34 12 \tsha256msg1 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cc 85 78 56 34 12 \tsha256msg1 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 01 78 56 34 12 \tsha256msg1 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 05 78 56 34 12 \tsha256msg1 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 08 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cc 84 c8 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xcc, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 cc bc c8 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x0f, 0x38, 0xcd, 0xc1, }, 4, 0, "", "", +"0f 38 cd c1 \tsha256msg2 %xmm1,%xmm0",}, +{{0x0f, 0x38, 0xcd, 0xd7, }, 4, 0, "", "", +"0f 38 cd d7 \tsha256msg2 %xmm7,%xmm2",}, +{{0x41, 0x0f, 0x38, 0xcd, 0xc0, }, 5, 0, "", "", +"41 0f 38 cd c0 \tsha256msg2 %xmm8,%xmm0",}, +{{0x44, 0x0f, 0x38, 0xcd, 0xc7, }, 5, 0, "", "", +"44 0f 38 cd c7 \tsha256msg2 %xmm7,%xmm8",}, +{{0x45, 0x0f, 0x38, 0xcd, 0xc7, }, 5, 0, "", "", +"45 0f 38 cd c7 \tsha256msg2 %xmm15,%xmm8",}, +{{0x0f, 0x38, 0xcd, 0x00, }, 4, 0, "", "", +"0f 38 cd 00 \tsha256msg2 (%rax),%xmm0",}, +{{0x41, 0x0f, 0x38, 0xcd, 0x00, }, 5, 0, "", "", +"41 0f 38 cd 00 \tsha256msg2 (%r8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 04 25 78 56 34 12 \tsha256msg2 0x12345678,%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x18, }, 4, 0, "", "", +"0f 38 cd 18 \tsha256msg2 (%rax),%xmm3",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x01, }, 5, 0, "", "", +"0f 38 cd 04 01 \tsha256msg2 (%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 04 05 78 56 34 12 \tsha256msg2 0x12345678(,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0x08, }, 5, 0, "", "", +"0f 38 cd 04 08 \tsha256msg2 (%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x04, 0xc8, }, 5, 0, "", "", +"0f 38 cd 04 c8 \tsha256msg2 (%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x40, 0x12, }, 5, 0, "", "", +"0f 38 cd 40 12 \tsha256msg2 0x12(%rax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x45, 0x12, }, 5, 0, "", "", +"0f 38 cd 45 12 \tsha256msg2 0x12(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x01, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 01 12 \tsha256msg2 0x12(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x05, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 05 12 \tsha256msg2 0x12(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0x08, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 08 12 \tsha256msg2 0x12(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x44, 0xc8, 0x12, }, 6, 0, "", "", +"0f 38 cd 44 c8 12 \tsha256msg2 0x12(%rax,%rcx,8),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 80 78 56 34 12 \tsha256msg2 0x12345678(%rax),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 cd 85 78 56 34 12 \tsha256msg2 0x12345678(%rbp),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 01 78 56 34 12 \tsha256msg2 0x12345678(%rcx,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 05 78 56 34 12 \tsha256msg2 0x12345678(%rbp,%rax,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 08 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,1),%xmm0",}, +{{0x0f, 0x38, 0xcd, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 cd 84 c8 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,8),%xmm0",}, +{{0x44, 0x0f, 0x38, 0xcd, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"44 0f 38 cd bc c8 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,8),%xmm15",}, +{{0x66, 0x0f, 0xae, 0x38, }, 4, 0, "", "", +"66 0f ae 38 \tclflushopt (%rax)",}, +{{0x66, 0x41, 0x0f, 0xae, 0x38, }, 5, 0, "", "", +"66 41 0f ae 38 \tclflushopt (%r8)",}, +{{0x66, 0x0f, 0xae, 0x3c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae 3c 25 78 56 34 12 \tclflushopt 0x12345678",}, +{{0x66, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x41, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 41 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xae, 0x38, }, 3, 0, "", "", +"0f ae 38 \tclflush (%rax)",}, +{{0x41, 0x0f, 0xae, 0x38, }, 4, 0, "", "", +"41 0f ae 38 \tclflush (%r8)",}, +{{0x0f, 0xae, 0xf8, }, 3, 0, "", "", +"0f ae f8 \tsfence ",}, +{{0x66, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"66 0f ae 30 \tclwb (%rax)",}, +{{0x66, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "", +"66 41 0f ae 30 \tclwb (%r8)",}, +{{0x66, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae 34 25 78 56 34 12 \tclwb 0x12345678",}, +{{0x66, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f ae b4 c8 78 56 34 12 \tclwb 0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 41 0f ae b4 c8 78 56 34 12 \tclwb 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xae, 0x30, }, 3, 0, "", "", +"0f ae 30 \txsaveopt (%rax)",}, +{{0x41, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"41 0f ae 30 \txsaveopt (%r8)",}, +{{0x0f, 0xae, 0xf0, }, 3, 0, "", "", +"0f ae f0 \tmfence ",}, +{{0x0f, 0xc7, 0x20, }, 3, 0, "", "", +"0f c7 20 \txsavec (%rax)",}, +{{0x41, 0x0f, 0xc7, 0x20, }, 4, 0, "", "", +"41 0f c7 20 \txsavec (%r8)",}, +{{0x0f, 0xc7, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 24 25 78 56 34 12 \txsavec 0x12345678",}, +{{0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xc7, 0x28, }, 3, 0, "", "", +"0f c7 28 \txsaves (%rax)",}, +{{0x41, 0x0f, 0xc7, 0x28, }, 4, 0, "", "", +"41 0f c7 28 \txsaves (%r8)",}, +{{0x0f, 0xc7, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 2c 25 78 56 34 12 \txsaves 0x12345678",}, +{{0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xc7, 0x18, }, 3, 0, "", "", +"0f c7 18 \txrstors (%rax)",}, +{{0x41, 0x0f, 0xc7, 0x18, }, 4, 0, "", "", +"41 0f c7 18 \txrstors (%r8)",}, +{{0x0f, 0xc7, 0x1c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 1c 25 78 56 34 12 \txrstors 0x12345678",}, +{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", +"66 0f ae f8 \tpcommit ",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c new file mode 100644 index 000000000000..41b1b1c62660 --- /dev/null +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -0,0 +1,877 @@ +/* + * This file contains instructions for testing by the test titled: + * + * "Test x86 instruction decoder - new instructions" + * + * Note that the 'Expecting' comment lines are consumed by the + * gen-insn-x86-dat.awk script and have the format: + * + * Expecting: <op> <branch> <rel> + * + * If this file is changed, remember to run the gen-insn-x86-dat.sh + * script and commit the result. + * + * Refer to insn-x86.c for more details. + */ + +int main(void) +{ + /* Following line is a marker for the awk script - do not change */ + asm volatile("rdtsc"); /* Start here */ + +#ifdef __x86_64__ + + /* bndmk m64, bnd */ + + asm volatile("bndmk (%rax), %bnd0"); + asm volatile("bndmk (%r8), %bnd0"); + asm volatile("bndmk (0x12345678), %bnd0"); + asm volatile("bndmk (%rax), %bnd3"); + asm volatile("bndmk (%rcx,%rax,1), %bnd0"); + asm volatile("bndmk 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndmk (%rax,%rcx,1), %bnd0"); + asm volatile("bndmk (%rax,%rcx,8), %bnd0"); + asm volatile("bndmk 0x12(%rax), %bnd0"); + asm volatile("bndmk 0x12(%rbp), %bnd0"); + asm volatile("bndmk 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndmk 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndmk 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndmk 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndmk 0x12345678(%rax), %bnd0"); + asm volatile("bndmk 0x12345678(%rbp), %bnd0"); + asm volatile("bndmk 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndmk 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndmk 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndmk 0x12345678(%rax,%rcx,8), %bnd0"); + + /* bndcl r/m64, bnd */ + + asm volatile("bndcl (%rax), %bnd0"); + asm volatile("bndcl (%r8), %bnd0"); + asm volatile("bndcl (0x12345678), %bnd0"); + asm volatile("bndcl (%rax), %bnd3"); + asm volatile("bndcl (%rcx,%rax,1), %bnd0"); + asm volatile("bndcl 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndcl (%rax,%rcx,1), %bnd0"); + asm volatile("bndcl (%rax,%rcx,8), %bnd0"); + asm volatile("bndcl 0x12(%rax), %bnd0"); + asm volatile("bndcl 0x12(%rbp), %bnd0"); + asm volatile("bndcl 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndcl 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndcl 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndcl 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndcl 0x12345678(%rax), %bnd0"); + asm volatile("bndcl 0x12345678(%rbp), %bnd0"); + asm volatile("bndcl 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndcl 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndcl 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndcl 0x12345678(%rax,%rcx,8), %bnd0"); + asm volatile("bndcl %rax, %bnd0"); + + /* bndcu r/m64, bnd */ + + asm volatile("bndcu (%rax), %bnd0"); + asm volatile("bndcu (%r8), %bnd0"); + asm volatile("bndcu (0x12345678), %bnd0"); + asm volatile("bndcu (%rax), %bnd3"); + asm volatile("bndcu (%rcx,%rax,1), %bnd0"); + asm volatile("bndcu 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndcu (%rax,%rcx,1), %bnd0"); + asm volatile("bndcu (%rax,%rcx,8), %bnd0"); + asm volatile("bndcu 0x12(%rax), %bnd0"); + asm volatile("bndcu 0x12(%rbp), %bnd0"); + asm volatile("bndcu 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndcu 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndcu 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndcu 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndcu 0x12345678(%rax), %bnd0"); + asm volatile("bndcu 0x12345678(%rbp), %bnd0"); + asm volatile("bndcu 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndcu 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndcu 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndcu 0x12345678(%rax,%rcx,8), %bnd0"); + asm volatile("bndcu %rax, %bnd0"); + + /* bndcn r/m64, bnd */ + + asm volatile("bndcn (%rax), %bnd0"); + asm volatile("bndcn (%r8), %bnd0"); + asm volatile("bndcn (0x12345678), %bnd0"); + asm volatile("bndcn (%rax), %bnd3"); + asm volatile("bndcn (%rcx,%rax,1), %bnd0"); + asm volatile("bndcn 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndcn (%rax,%rcx,1), %bnd0"); + asm volatile("bndcn (%rax,%rcx,8), %bnd0"); + asm volatile("bndcn 0x12(%rax), %bnd0"); + asm volatile("bndcn 0x12(%rbp), %bnd0"); + asm volatile("bndcn 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndcn 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndcn 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndcn 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndcn 0x12345678(%rax), %bnd0"); + asm volatile("bndcn 0x12345678(%rbp), %bnd0"); + asm volatile("bndcn 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndcn 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndcn 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndcn 0x12345678(%rax,%rcx,8), %bnd0"); + asm volatile("bndcn %rax, %bnd0"); + + /* bndmov m128, bnd */ + + asm volatile("bndmov (%rax), %bnd0"); + asm volatile("bndmov (%r8), %bnd0"); + asm volatile("bndmov (0x12345678), %bnd0"); + asm volatile("bndmov (%rax), %bnd3"); + asm volatile("bndmov (%rcx,%rax,1), %bnd0"); + asm volatile("bndmov 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndmov (%rax,%rcx,1), %bnd0"); + asm volatile("bndmov (%rax,%rcx,8), %bnd0"); + asm volatile("bndmov 0x12(%rax), %bnd0"); + asm volatile("bndmov 0x12(%rbp), %bnd0"); + asm volatile("bndmov 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndmov 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndmov 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndmov 0x12(%rax,%rcx,8), %bnd0"); + asm volatile("bndmov 0x12345678(%rax), %bnd0"); + asm volatile("bndmov 0x12345678(%rbp), %bnd0"); + asm volatile("bndmov 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndmov 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndmov 0x12345678(%rax,%rcx,1), %bnd0"); + asm volatile("bndmov 0x12345678(%rax,%rcx,8), %bnd0"); + + /* bndmov bnd, m128 */ + + asm volatile("bndmov %bnd0, (%rax)"); + asm volatile("bndmov %bnd0, (%r8)"); + asm volatile("bndmov %bnd0, (0x12345678)"); + asm volatile("bndmov %bnd3, (%rax)"); + asm volatile("bndmov %bnd0, (%rcx,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(,%rax,1)"); + asm volatile("bndmov %bnd0, (%rax,%rcx,1)"); + asm volatile("bndmov %bnd0, (%rax,%rcx,8)"); + asm volatile("bndmov %bnd0, 0x12(%rax)"); + asm volatile("bndmov %bnd0, 0x12(%rbp)"); + asm volatile("bndmov %bnd0, 0x12(%rcx,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12(%rbp,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12(%rax,%rcx,1)"); + asm volatile("bndmov %bnd0, 0x12(%rax,%rcx,8)"); + asm volatile("bndmov %bnd0, 0x12345678(%rax)"); + asm volatile("bndmov %bnd0, 0x12345678(%rbp)"); + asm volatile("bndmov %bnd0, 0x12345678(%rcx,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%rbp,%rax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%rax,%rcx,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%rax,%rcx,8)"); + + /* bndmov bnd2, bnd1 */ + + asm volatile("bndmov %bnd0, %bnd1"); + asm volatile("bndmov %bnd1, %bnd0"); + + /* bndldx mib, bnd */ + + asm volatile("bndldx (%rax), %bnd0"); + asm volatile("bndldx (%r8), %bnd0"); + asm volatile("bndldx (0x12345678), %bnd0"); + asm volatile("bndldx (%rax), %bnd3"); + asm volatile("bndldx (%rcx,%rax,1), %bnd0"); + asm volatile("bndldx 0x12345678(,%rax,1), %bnd0"); + asm volatile("bndldx (%rax,%rcx,1), %bnd0"); + asm volatile("bndldx 0x12(%rax), %bnd0"); + asm volatile("bndldx 0x12(%rbp), %bnd0"); + asm volatile("bndldx 0x12(%rcx,%rax,1), %bnd0"); + asm volatile("bndldx 0x12(%rbp,%rax,1), %bnd0"); + asm volatile("bndldx 0x12(%rax,%rcx,1), %bnd0"); + asm volatile("bndldx 0x12345678(%rax), %bnd0"); + asm volatile("bndldx 0x12345678(%rbp), %bnd0"); + asm volatile("bndldx 0x12345678(%rcx,%rax,1), %bnd0"); + asm volatile("bndldx 0x12345678(%rbp,%rax,1), %bnd0"); + asm volatile("bndldx 0x12345678(%rax,%rcx,1), %bnd0"); + + /* bndstx bnd, mib */ + + asm volatile("bndstx %bnd0, (%rax)"); + asm volatile("bndstx %bnd0, (%r8)"); + asm volatile("bndstx %bnd0, (0x12345678)"); + asm volatile("bndstx %bnd3, (%rax)"); + asm volatile("bndstx %bnd0, (%rcx,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(,%rax,1)"); + asm volatile("bndstx %bnd0, (%rax,%rcx,1)"); + asm volatile("bndstx %bnd0, 0x12(%rax)"); + asm volatile("bndstx %bnd0, 0x12(%rbp)"); + asm volatile("bndstx %bnd0, 0x12(%rcx,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12(%rbp,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12(%rax,%rcx,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%rax)"); + asm volatile("bndstx %bnd0, 0x12345678(%rbp)"); + asm volatile("bndstx %bnd0, 0x12345678(%rcx,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%rbp,%rax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%rax,%rcx,1)"); + + /* bnd prefix on call, ret, jmp and all jcc */ + + asm volatile("bnd call label1"); /* Expecting: call unconditional 0 */ + asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd ret"); /* Expecting: ret indirect 0 */ + asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0 */ + asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0 */ + asm volatile("bnd jmp *(%ecx)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jne label1"); /* Expecting: jcc conditional 0 */ + + /* sha1rnds4 imm8, xmm2/m128, xmm1 */ + + asm volatile("sha1rnds4 $0x0, %xmm1, %xmm0"); + asm volatile("sha1rnds4 $0x91, %xmm7, %xmm2"); + asm volatile("sha1rnds4 $0x91, %xmm8, %xmm0"); + asm volatile("sha1rnds4 $0x91, %xmm7, %xmm8"); + asm volatile("sha1rnds4 $0x91, %xmm15, %xmm8"); + asm volatile("sha1rnds4 $0x91, (%rax), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%r8), %xmm0"); + asm volatile("sha1rnds4 $0x91, (0x12345678), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%rax), %xmm3"); + asm volatile("sha1rnds4 $0x91, (%rcx,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%rax,%rcx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%rax,%rcx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rax), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rbp), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rax), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rbp), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha1nexte xmm2/m128, xmm1 */ + + asm volatile("sha1nexte %xmm1, %xmm0"); + asm volatile("sha1nexte %xmm7, %xmm2"); + asm volatile("sha1nexte %xmm8, %xmm0"); + asm volatile("sha1nexte %xmm7, %xmm8"); + asm volatile("sha1nexte %xmm15, %xmm8"); + asm volatile("sha1nexte (%rax), %xmm0"); + asm volatile("sha1nexte (%r8), %xmm0"); + asm volatile("sha1nexte (0x12345678), %xmm0"); + asm volatile("sha1nexte (%rax), %xmm3"); + asm volatile("sha1nexte (%rcx,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha1nexte (%rax,%rcx,1), %xmm0"); + asm volatile("sha1nexte (%rax,%rcx,8), %xmm0"); + asm volatile("sha1nexte 0x12(%rax), %xmm0"); + asm volatile("sha1nexte 0x12(%rbp), %xmm0"); + asm volatile("sha1nexte 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha1nexte 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rax), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rbp), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha1nexte 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha1msg1 xmm2/m128, xmm1 */ + + asm volatile("sha1msg1 %xmm1, %xmm0"); + asm volatile("sha1msg1 %xmm7, %xmm2"); + asm volatile("sha1msg1 %xmm8, %xmm0"); + asm volatile("sha1msg1 %xmm7, %xmm8"); + asm volatile("sha1msg1 %xmm15, %xmm8"); + asm volatile("sha1msg1 (%rax), %xmm0"); + asm volatile("sha1msg1 (%r8), %xmm0"); + asm volatile("sha1msg1 (0x12345678), %xmm0"); + asm volatile("sha1msg1 (%rax), %xmm3"); + asm volatile("sha1msg1 (%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha1msg1 (%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg1 (%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg1 0x12(%rax), %xmm0"); + asm volatile("sha1msg1 0x12(%rbp), %xmm0"); + asm volatile("sha1msg1 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg1 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rax), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rbp), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg1 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha1msg2 xmm2/m128, xmm1 */ + + asm volatile("sha1msg2 %xmm1, %xmm0"); + asm volatile("sha1msg2 %xmm7, %xmm2"); + asm volatile("sha1msg2 %xmm8, %xmm0"); + asm volatile("sha1msg2 %xmm7, %xmm8"); + asm volatile("sha1msg2 %xmm15, %xmm8"); + asm volatile("sha1msg2 (%rax), %xmm0"); + asm volatile("sha1msg2 (%r8), %xmm0"); + asm volatile("sha1msg2 (0x12345678), %xmm0"); + asm volatile("sha1msg2 (%rax), %xmm3"); + asm volatile("sha1msg2 (%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha1msg2 (%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg2 (%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg2 0x12(%rax), %xmm0"); + asm volatile("sha1msg2 0x12(%rbp), %xmm0"); + asm volatile("sha1msg2 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg2 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rax), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rbp), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha1msg2 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha256rnds2 <XMM0>, xmm2/m128, xmm1 */ + /* Note sha256rnds2 has an implicit operand 'xmm0' */ + + asm volatile("sha256rnds2 %xmm4, %xmm1"); + asm volatile("sha256rnds2 %xmm7, %xmm2"); + asm volatile("sha256rnds2 %xmm8, %xmm1"); + asm volatile("sha256rnds2 %xmm7, %xmm8"); + asm volatile("sha256rnds2 %xmm15, %xmm8"); + asm volatile("sha256rnds2 (%rax), %xmm1"); + asm volatile("sha256rnds2 (%r8), %xmm1"); + asm volatile("sha256rnds2 (0x12345678), %xmm1"); + asm volatile("sha256rnds2 (%rax), %xmm3"); + asm volatile("sha256rnds2 (%rcx,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(,%rax,1), %xmm1"); + asm volatile("sha256rnds2 (%rax,%rcx,1), %xmm1"); + asm volatile("sha256rnds2 (%rax,%rcx,8), %xmm1"); + asm volatile("sha256rnds2 0x12(%rax), %xmm1"); + asm volatile("sha256rnds2 0x12(%rbp), %xmm1"); + asm volatile("sha256rnds2 0x12(%rcx,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%rbp,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%rax,%rcx,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%rax,%rcx,8), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rax), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rbp), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rcx,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rbp,%rax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rax,%rcx,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha256msg1 xmm2/m128, xmm1 */ + + asm volatile("sha256msg1 %xmm1, %xmm0"); + asm volatile("sha256msg1 %xmm7, %xmm2"); + asm volatile("sha256msg1 %xmm8, %xmm0"); + asm volatile("sha256msg1 %xmm7, %xmm8"); + asm volatile("sha256msg1 %xmm15, %xmm8"); + asm volatile("sha256msg1 (%rax), %xmm0"); + asm volatile("sha256msg1 (%r8), %xmm0"); + asm volatile("sha256msg1 (0x12345678), %xmm0"); + asm volatile("sha256msg1 (%rax), %xmm3"); + asm volatile("sha256msg1 (%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha256msg1 (%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg1 (%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg1 0x12(%rax), %xmm0"); + asm volatile("sha256msg1 0x12(%rbp), %xmm0"); + asm volatile("sha256msg1 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg1 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rax), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rbp), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg1 0x12345678(%rax,%rcx,8), %xmm15"); + + /* sha256msg2 xmm2/m128, xmm1 */ + + asm volatile("sha256msg2 %xmm1, %xmm0"); + asm volatile("sha256msg2 %xmm7, %xmm2"); + asm volatile("sha256msg2 %xmm8, %xmm0"); + asm volatile("sha256msg2 %xmm7, %xmm8"); + asm volatile("sha256msg2 %xmm15, %xmm8"); + asm volatile("sha256msg2 (%rax), %xmm0"); + asm volatile("sha256msg2 (%r8), %xmm0"); + asm volatile("sha256msg2 (0x12345678), %xmm0"); + asm volatile("sha256msg2 (%rax), %xmm3"); + asm volatile("sha256msg2 (%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(,%rax,1), %xmm0"); + asm volatile("sha256msg2 (%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg2 (%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg2 0x12(%rax), %xmm0"); + asm volatile("sha256msg2 0x12(%rbp), %xmm0"); + asm volatile("sha256msg2 0x12(%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12(%rbp,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12(%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg2 0x12(%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rax), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rbp), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rcx,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rbp,%rax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rax,%rcx,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rax,%rcx,8), %xmm0"); + asm volatile("sha256msg2 0x12345678(%rax,%rcx,8), %xmm15"); + + /* clflushopt m8 */ + + asm volatile("clflushopt (%rax)"); + asm volatile("clflushopt (%r8)"); + asm volatile("clflushopt (0x12345678)"); + asm volatile("clflushopt 0x12345678(%rax,%rcx,8)"); + asm volatile("clflushopt 0x12345678(%r8,%rcx,8)"); + /* Also check instructions in the same group encoding as clflushopt */ + asm volatile("clflush (%rax)"); + asm volatile("clflush (%r8)"); + asm volatile("sfence"); + + /* clwb m8 */ + + asm volatile("clwb (%rax)"); + asm volatile("clwb (%r8)"); + asm volatile("clwb (0x12345678)"); + asm volatile("clwb 0x12345678(%rax,%rcx,8)"); + asm volatile("clwb 0x12345678(%r8,%rcx,8)"); + /* Also check instructions in the same group encoding as clwb */ + asm volatile("xsaveopt (%rax)"); + asm volatile("xsaveopt (%r8)"); + asm volatile("mfence"); + + /* xsavec mem */ + + asm volatile("xsavec (%rax)"); + asm volatile("xsavec (%r8)"); + asm volatile("xsavec (0x12345678)"); + asm volatile("xsavec 0x12345678(%rax,%rcx,8)"); + asm volatile("xsavec 0x12345678(%r8,%rcx,8)"); + + /* xsaves mem */ + + asm volatile("xsaves (%rax)"); + asm volatile("xsaves (%r8)"); + asm volatile("xsaves (0x12345678)"); + asm volatile("xsaves 0x12345678(%rax,%rcx,8)"); + asm volatile("xsaves 0x12345678(%r8,%rcx,8)"); + + /* xrstors mem */ + + asm volatile("xrstors (%rax)"); + asm volatile("xrstors (%r8)"); + asm volatile("xrstors (0x12345678)"); + asm volatile("xrstors 0x12345678(%rax,%rcx,8)"); + asm volatile("xrstors 0x12345678(%r8,%rcx,8)"); + +#else /* #ifdef __x86_64__ */ + + /* bndmk m32, bnd */ + + asm volatile("bndmk (%eax), %bnd0"); + asm volatile("bndmk (0x12345678), %bnd0"); + asm volatile("bndmk (%eax), %bnd3"); + asm volatile("bndmk (%ecx,%eax,1), %bnd0"); + asm volatile("bndmk 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndmk (%eax,%ecx,1), %bnd0"); + asm volatile("bndmk (%eax,%ecx,8), %bnd0"); + asm volatile("bndmk 0x12(%eax), %bnd0"); + asm volatile("bndmk 0x12(%ebp), %bnd0"); + asm volatile("bndmk 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndmk 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndmk 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndmk 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndmk 0x12345678(%eax), %bnd0"); + asm volatile("bndmk 0x12345678(%ebp), %bnd0"); + asm volatile("bndmk 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndmk 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndmk 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndmk 0x12345678(%eax,%ecx,8), %bnd0"); + + /* bndcl r/m32, bnd */ + + asm volatile("bndcl (%eax), %bnd0"); + asm volatile("bndcl (0x12345678), %bnd0"); + asm volatile("bndcl (%eax), %bnd3"); + asm volatile("bndcl (%ecx,%eax,1), %bnd0"); + asm volatile("bndcl 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndcl (%eax,%ecx,1), %bnd0"); + asm volatile("bndcl (%eax,%ecx,8), %bnd0"); + asm volatile("bndcl 0x12(%eax), %bnd0"); + asm volatile("bndcl 0x12(%ebp), %bnd0"); + asm volatile("bndcl 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndcl 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndcl 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndcl 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndcl 0x12345678(%eax), %bnd0"); + asm volatile("bndcl 0x12345678(%ebp), %bnd0"); + asm volatile("bndcl 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndcl 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndcl 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndcl 0x12345678(%eax,%ecx,8), %bnd0"); + asm volatile("bndcl %eax, %bnd0"); + + /* bndcu r/m32, bnd */ + + asm volatile("bndcu (%eax), %bnd0"); + asm volatile("bndcu (0x12345678), %bnd0"); + asm volatile("bndcu (%eax), %bnd3"); + asm volatile("bndcu (%ecx,%eax,1), %bnd0"); + asm volatile("bndcu 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndcu (%eax,%ecx,1), %bnd0"); + asm volatile("bndcu (%eax,%ecx,8), %bnd0"); + asm volatile("bndcu 0x12(%eax), %bnd0"); + asm volatile("bndcu 0x12(%ebp), %bnd0"); + asm volatile("bndcu 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndcu 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndcu 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndcu 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndcu 0x12345678(%eax), %bnd0"); + asm volatile("bndcu 0x12345678(%ebp), %bnd0"); + asm volatile("bndcu 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndcu 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndcu 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndcu 0x12345678(%eax,%ecx,8), %bnd0"); + asm volatile("bndcu %eax, %bnd0"); + + /* bndcn r/m32, bnd */ + + asm volatile("bndcn (%eax), %bnd0"); + asm volatile("bndcn (0x12345678), %bnd0"); + asm volatile("bndcn (%eax), %bnd3"); + asm volatile("bndcn (%ecx,%eax,1), %bnd0"); + asm volatile("bndcn 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndcn (%eax,%ecx,1), %bnd0"); + asm volatile("bndcn (%eax,%ecx,8), %bnd0"); + asm volatile("bndcn 0x12(%eax), %bnd0"); + asm volatile("bndcn 0x12(%ebp), %bnd0"); + asm volatile("bndcn 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndcn 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndcn 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndcn 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndcn 0x12345678(%eax), %bnd0"); + asm volatile("bndcn 0x12345678(%ebp), %bnd0"); + asm volatile("bndcn 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndcn 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndcn 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndcn 0x12345678(%eax,%ecx,8), %bnd0"); + asm volatile("bndcn %eax, %bnd0"); + + /* bndmov m64, bnd */ + + asm volatile("bndmov (%eax), %bnd0"); + asm volatile("bndmov (0x12345678), %bnd0"); + asm volatile("bndmov (%eax), %bnd3"); + asm volatile("bndmov (%ecx,%eax,1), %bnd0"); + asm volatile("bndmov 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndmov (%eax,%ecx,1), %bnd0"); + asm volatile("bndmov (%eax,%ecx,8), %bnd0"); + asm volatile("bndmov 0x12(%eax), %bnd0"); + asm volatile("bndmov 0x12(%ebp), %bnd0"); + asm volatile("bndmov 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndmov 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndmov 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndmov 0x12(%eax,%ecx,8), %bnd0"); + asm volatile("bndmov 0x12345678(%eax), %bnd0"); + asm volatile("bndmov 0x12345678(%ebp), %bnd0"); + asm volatile("bndmov 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndmov 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndmov 0x12345678(%eax,%ecx,1), %bnd0"); + asm volatile("bndmov 0x12345678(%eax,%ecx,8), %bnd0"); + + /* bndmov bnd, m64 */ + + asm volatile("bndmov %bnd0, (%eax)"); + asm volatile("bndmov %bnd0, (0x12345678)"); + asm volatile("bndmov %bnd3, (%eax)"); + asm volatile("bndmov %bnd0, (%ecx,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(,%eax,1)"); + asm volatile("bndmov %bnd0, (%eax,%ecx,1)"); + asm volatile("bndmov %bnd0, (%eax,%ecx,8)"); + asm volatile("bndmov %bnd0, 0x12(%eax)"); + asm volatile("bndmov %bnd0, 0x12(%ebp)"); + asm volatile("bndmov %bnd0, 0x12(%ecx,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12(%ebp,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12(%eax,%ecx,1)"); + asm volatile("bndmov %bnd0, 0x12(%eax,%ecx,8)"); + asm volatile("bndmov %bnd0, 0x12345678(%eax)"); + asm volatile("bndmov %bnd0, 0x12345678(%ebp)"); + asm volatile("bndmov %bnd0, 0x12345678(%ecx,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%ebp,%eax,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%eax,%ecx,1)"); + asm volatile("bndmov %bnd0, 0x12345678(%eax,%ecx,8)"); + + /* bndmov bnd2, bnd1 */ + + asm volatile("bndmov %bnd0, %bnd1"); + asm volatile("bndmov %bnd1, %bnd0"); + + /* bndldx mib, bnd */ + + asm volatile("bndldx (%eax), %bnd0"); + asm volatile("bndldx (0x12345678), %bnd0"); + asm volatile("bndldx (%eax), %bnd3"); + asm volatile("bndldx (%ecx,%eax,1), %bnd0"); + asm volatile("bndldx 0x12345678(,%eax,1), %bnd0"); + asm volatile("bndldx (%eax,%ecx,1), %bnd0"); + asm volatile("bndldx 0x12(%eax), %bnd0"); + asm volatile("bndldx 0x12(%ebp), %bnd0"); + asm volatile("bndldx 0x12(%ecx,%eax,1), %bnd0"); + asm volatile("bndldx 0x12(%ebp,%eax,1), %bnd0"); + asm volatile("bndldx 0x12(%eax,%ecx,1), %bnd0"); + asm volatile("bndldx 0x12345678(%eax), %bnd0"); + asm volatile("bndldx 0x12345678(%ebp), %bnd0"); + asm volatile("bndldx 0x12345678(%ecx,%eax,1), %bnd0"); + asm volatile("bndldx 0x12345678(%ebp,%eax,1), %bnd0"); + asm volatile("bndldx 0x12345678(%eax,%ecx,1), %bnd0"); + + /* bndstx bnd, mib */ + + asm volatile("bndstx %bnd0, (%eax)"); + asm volatile("bndstx %bnd0, (0x12345678)"); + asm volatile("bndstx %bnd3, (%eax)"); + asm volatile("bndstx %bnd0, (%ecx,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(,%eax,1)"); + asm volatile("bndstx %bnd0, (%eax,%ecx,1)"); + asm volatile("bndstx %bnd0, 0x12(%eax)"); + asm volatile("bndstx %bnd0, 0x12(%ebp)"); + asm volatile("bndstx %bnd0, 0x12(%ecx,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12(%ebp,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12(%eax,%ecx,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%eax)"); + asm volatile("bndstx %bnd0, 0x12345678(%ebp)"); + asm volatile("bndstx %bnd0, 0x12345678(%ecx,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%ebp,%eax,1)"); + asm volatile("bndstx %bnd0, 0x12345678(%eax,%ecx,1)"); + + /* bnd prefix on call, ret, jmp and all jcc */ + + asm volatile("bnd call label1"); /* Expecting: call unconditional 0xfffffffc */ + asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd ret"); /* Expecting: ret indirect 0 */ + asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0xfffffffc */ + asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0xfffffffc */ + asm volatile("bnd jmp *(%ecx)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jne label1"); /* Expecting: jcc conditional 0xfffffffc */ + + /* sha1rnds4 imm8, xmm2/m128, xmm1 */ + + asm volatile("sha1rnds4 $0x0, %xmm1, %xmm0"); + asm volatile("sha1rnds4 $0x91, %xmm7, %xmm2"); + asm volatile("sha1rnds4 $0x91, (%eax), %xmm0"); + asm volatile("sha1rnds4 $0x91, (0x12345678), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%eax), %xmm3"); + asm volatile("sha1rnds4 $0x91, (%ecx,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%eax,%ecx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, (%eax,%ecx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%eax), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%ebp), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%eax), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%ebp), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha1rnds4 $0x91, 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha1nexte xmm2/m128, xmm1 */ + + asm volatile("sha1nexte %xmm1, %xmm0"); + asm volatile("sha1nexte %xmm7, %xmm2"); + asm volatile("sha1nexte (%eax), %xmm0"); + asm volatile("sha1nexte (0x12345678), %xmm0"); + asm volatile("sha1nexte (%eax), %xmm3"); + asm volatile("sha1nexte (%ecx,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha1nexte (%eax,%ecx,1), %xmm0"); + asm volatile("sha1nexte (%eax,%ecx,8), %xmm0"); + asm volatile("sha1nexte 0x12(%eax), %xmm0"); + asm volatile("sha1nexte 0x12(%ebp), %xmm0"); + asm volatile("sha1nexte 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha1nexte 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha1nexte 0x12345678(%eax), %xmm0"); + asm volatile("sha1nexte 0x12345678(%ebp), %xmm0"); + asm volatile("sha1nexte 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha1nexte 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha1msg1 xmm2/m128, xmm1 */ + + asm volatile("sha1msg1 %xmm1, %xmm0"); + asm volatile("sha1msg1 %xmm7, %xmm2"); + asm volatile("sha1msg1 (%eax), %xmm0"); + asm volatile("sha1msg1 (0x12345678), %xmm0"); + asm volatile("sha1msg1 (%eax), %xmm3"); + asm volatile("sha1msg1 (%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha1msg1 (%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg1 (%eax,%ecx,8), %xmm0"); + asm volatile("sha1msg1 0x12(%eax), %xmm0"); + asm volatile("sha1msg1 0x12(%ebp), %xmm0"); + asm volatile("sha1msg1 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg1 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha1msg1 0x12345678(%eax), %xmm0"); + asm volatile("sha1msg1 0x12345678(%ebp), %xmm0"); + asm volatile("sha1msg1 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg1 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha1msg2 xmm2/m128, xmm1 */ + + asm volatile("sha1msg2 %xmm1, %xmm0"); + asm volatile("sha1msg2 %xmm7, %xmm2"); + asm volatile("sha1msg2 (%eax), %xmm0"); + asm volatile("sha1msg2 (0x12345678), %xmm0"); + asm volatile("sha1msg2 (%eax), %xmm3"); + asm volatile("sha1msg2 (%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha1msg2 (%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg2 (%eax,%ecx,8), %xmm0"); + asm volatile("sha1msg2 0x12(%eax), %xmm0"); + asm volatile("sha1msg2 0x12(%ebp), %xmm0"); + asm volatile("sha1msg2 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg2 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha1msg2 0x12345678(%eax), %xmm0"); + asm volatile("sha1msg2 0x12345678(%ebp), %xmm0"); + asm volatile("sha1msg2 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha1msg2 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha256rnds2 <XMM0>, xmm2/m128, xmm1 */ + /* Note sha256rnds2 has an implicit operand 'xmm0' */ + + asm volatile("sha256rnds2 %xmm4, %xmm1"); + asm volatile("sha256rnds2 %xmm7, %xmm2"); + asm volatile("sha256rnds2 (%eax), %xmm1"); + asm volatile("sha256rnds2 (0x12345678), %xmm1"); + asm volatile("sha256rnds2 (%eax), %xmm3"); + asm volatile("sha256rnds2 (%ecx,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(,%eax,1), %xmm1"); + asm volatile("sha256rnds2 (%eax,%ecx,1), %xmm1"); + asm volatile("sha256rnds2 (%eax,%ecx,8), %xmm1"); + asm volatile("sha256rnds2 0x12(%eax), %xmm1"); + asm volatile("sha256rnds2 0x12(%ebp), %xmm1"); + asm volatile("sha256rnds2 0x12(%ecx,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%ebp,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%eax,%ecx,1), %xmm1"); + asm volatile("sha256rnds2 0x12(%eax,%ecx,8), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%eax), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%ebp), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%ecx,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%ebp,%eax,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%eax,%ecx,1), %xmm1"); + asm volatile("sha256rnds2 0x12345678(%eax,%ecx,8), %xmm1"); + + /* sha256msg1 xmm2/m128, xmm1 */ + + asm volatile("sha256msg1 %xmm1, %xmm0"); + asm volatile("sha256msg1 %xmm7, %xmm2"); + asm volatile("sha256msg1 (%eax), %xmm0"); + asm volatile("sha256msg1 (0x12345678), %xmm0"); + asm volatile("sha256msg1 (%eax), %xmm3"); + asm volatile("sha256msg1 (%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha256msg1 (%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg1 (%eax,%ecx,8), %xmm0"); + asm volatile("sha256msg1 0x12(%eax), %xmm0"); + asm volatile("sha256msg1 0x12(%ebp), %xmm0"); + asm volatile("sha256msg1 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg1 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha256msg1 0x12345678(%eax), %xmm0"); + asm volatile("sha256msg1 0x12345678(%ebp), %xmm0"); + asm volatile("sha256msg1 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg1 0x12345678(%eax,%ecx,8), %xmm0"); + + /* sha256msg2 xmm2/m128, xmm1 */ + + asm volatile("sha256msg2 %xmm1, %xmm0"); + asm volatile("sha256msg2 %xmm7, %xmm2"); + asm volatile("sha256msg2 (%eax), %xmm0"); + asm volatile("sha256msg2 (0x12345678), %xmm0"); + asm volatile("sha256msg2 (%eax), %xmm3"); + asm volatile("sha256msg2 (%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(,%eax,1), %xmm0"); + asm volatile("sha256msg2 (%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg2 (%eax,%ecx,8), %xmm0"); + asm volatile("sha256msg2 0x12(%eax), %xmm0"); + asm volatile("sha256msg2 0x12(%ebp), %xmm0"); + asm volatile("sha256msg2 0x12(%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12(%ebp,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12(%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg2 0x12(%eax,%ecx,8), %xmm0"); + asm volatile("sha256msg2 0x12345678(%eax), %xmm0"); + asm volatile("sha256msg2 0x12345678(%ebp), %xmm0"); + asm volatile("sha256msg2 0x12345678(%ecx,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%ebp,%eax,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%eax,%ecx,1), %xmm0"); + asm volatile("sha256msg2 0x12345678(%eax,%ecx,8), %xmm0"); + + /* clflushopt m8 */ + + asm volatile("clflushopt (%eax)"); + asm volatile("clflushopt (0x12345678)"); + asm volatile("clflushopt 0x12345678(%eax,%ecx,8)"); + /* Also check instructions in the same group encoding as clflushopt */ + asm volatile("clflush (%eax)"); + asm volatile("sfence"); + + /* clwb m8 */ + + asm volatile("clwb (%eax)"); + asm volatile("clwb (0x12345678)"); + asm volatile("clwb 0x12345678(%eax,%ecx,8)"); + /* Also check instructions in the same group encoding as clwb */ + asm volatile("xsaveopt (%eax)"); + asm volatile("mfence"); + + /* xsavec mem */ + + asm volatile("xsavec (%eax)"); + asm volatile("xsavec (0x12345678)"); + asm volatile("xsavec 0x12345678(%eax,%ecx,8)"); + + /* xsaves mem */ + + asm volatile("xsaves (%eax)"); + asm volatile("xsaves (0x12345678)"); + asm volatile("xsaves 0x12345678(%eax,%ecx,8)"); + + /* xrstors mem */ + + asm volatile("xrstors (%eax)"); + asm volatile("xrstors (0x12345678)"); + asm volatile("xrstors 0x12345678(%eax,%ecx,8)"); + +#endif /* #ifndef __x86_64__ */ + + /* pcommit */ + + asm volatile("pcommit"); + + /* Following line is a marker for the awk script - do not change */ + asm volatile("rdtsc"); /* Stop here */ + + return 0; +} diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c new file mode 100644 index 000000000000..b6115dfd28f0 --- /dev/null +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -0,0 +1,185 @@ +#include <linux/types.h> + +#include "debug.h" +#include "tests/tests.h" +#include "arch-tests.h" + +#include "intel-pt-decoder/insn.h" +#include "intel-pt-decoder/intel-pt-insn-decoder.h" + +struct test_data { + u8 data[MAX_INSN_SIZE]; + int expected_length; + int expected_rel; + const char *expected_op_str; + const char *expected_branch_str; + const char *asm_rep; +}; + +struct test_data test_data_32[] = { +#include "insn-x86-dat-32.c" + {{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"}, + {{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"}, + {{0}, 0, 0, NULL, NULL, NULL}, +}; + +struct test_data test_data_64[] = { +#include "insn-x86-dat-64.c" + {{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"}, + {{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"}, + {{0}, 0, 0, NULL, NULL, NULL}, +}; + +static int get_op(const char *op_str) +{ + struct val_data { + const char *name; + int val; + } vals[] = { + {"other", INTEL_PT_OP_OTHER}, + {"call", INTEL_PT_OP_CALL}, + {"ret", INTEL_PT_OP_RET}, + {"jcc", INTEL_PT_OP_JCC}, + {"jmp", INTEL_PT_OP_JMP}, + {"loop", INTEL_PT_OP_LOOP}, + {"iret", INTEL_PT_OP_IRET}, + {"int", INTEL_PT_OP_INT}, + {"syscall", INTEL_PT_OP_SYSCALL}, + {"sysret", INTEL_PT_OP_SYSRET}, + {NULL, 0}, + }; + struct val_data *val; + + if (!op_str || !strlen(op_str)) + return 0; + + for (val = vals; val->name; val++) { + if (!strcmp(val->name, op_str)) + return val->val; + } + + pr_debug("Failed to get op\n"); + + return -1; +} + +static int get_branch(const char *branch_str) +{ + struct val_data { + const char *name; + int val; + } vals[] = { + {"no_branch", INTEL_PT_BR_NO_BRANCH}, + {"indirect", INTEL_PT_BR_INDIRECT}, + {"conditional", INTEL_PT_BR_CONDITIONAL}, + {"unconditional", INTEL_PT_BR_UNCONDITIONAL}, + {NULL, 0}, + }; + struct val_data *val; + + if (!branch_str || !strlen(branch_str)) + return 0; + + for (val = vals; val->name; val++) { + if (!strcmp(val->name, branch_str)) + return val->val; + } + + pr_debug("Failed to get branch\n"); + + return -1; +} + +static int test_data_item(struct test_data *dat, int x86_64) +{ + struct intel_pt_insn intel_pt_insn; + struct insn insn; + int op, branch; + + insn_init(&insn, dat->data, MAX_INSN_SIZE, x86_64); + insn_get_length(&insn); + + if (!insn_complete(&insn)) { + pr_debug("Failed to decode: %s\n", dat->asm_rep); + return -1; + } + + if (insn.length != dat->expected_length) { + pr_debug("Failed to decode length (%d vs expected %d): %s\n", + insn.length, dat->expected_length, dat->asm_rep); + return -1; + } + + op = get_op(dat->expected_op_str); + branch = get_branch(dat->expected_branch_str); + + if (intel_pt_get_insn(dat->data, MAX_INSN_SIZE, x86_64, &intel_pt_insn)) { + pr_debug("Intel PT failed to decode: %s\n", dat->asm_rep); + return -1; + } + + if ((int)intel_pt_insn.op != op) { + pr_debug("Failed to decode 'op' value (%d vs expected %d): %s\n", + intel_pt_insn.op, op, dat->asm_rep); + return -1; + } + + if ((int)intel_pt_insn.branch != branch) { + pr_debug("Failed to decode 'branch' value (%d vs expected %d): %s\n", + intel_pt_insn.branch, branch, dat->asm_rep); + return -1; + } + + if (intel_pt_insn.rel != dat->expected_rel) { + pr_debug("Failed to decode 'rel' value (%#x vs expected %#x): %s\n", + intel_pt_insn.rel, dat->expected_rel, dat->asm_rep); + return -1; + } + + pr_debug("Decoded ok: %s\n", dat->asm_rep); + + return 0; +} + +static int test_data_set(struct test_data *dat_set, int x86_64) +{ + struct test_data *dat; + int ret = 0; + + for (dat = dat_set; dat->expected_length; dat++) { + if (test_data_item(dat, x86_64)) + ret = -1; + } + + return ret; +} + +/** + * test__insn_x86 - test x86 instruction decoder - new instructions. + * + * This function implements a test that decodes a selection of instructions and + * checks the results. The Intel PT function that further categorizes + * instructions (i.e. intel_pt_get_insn()) is also checked. + * + * The instructions are originally in insn-x86-dat-src.c which has been + * processed by scripts gen-insn-x86-dat.sh and gen-insn-x86-dat.awk to produce + * insn-x86-dat-32.c and insn-x86-dat-64.c which are included into this program. + * i.e. to add new instructions to the test, edit insn-x86-dat-src.c, run the + * gen-insn-x86-dat.sh script, make perf, and then run the test. + * + * If the test passes %0 is returned, otherwise %-1 is returned. Use the + * verbose (-v) option to see all the instructions and whether or not they + * decoded successfuly. + */ +int test__insn_x86(void) +{ + int ret = 0; + + if (test_data_set(test_data_32, 0)) + ret = -1; + + if (test_data_set(test_data_64, 1)) + ret = -1; + + return ret; +} diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c new file mode 100644 index 000000000000..d28c1b6a3b54 --- /dev/null +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -0,0 +1,124 @@ +#include "tests/tests.h" +#include "perf.h" +#include "cloexec.h" +#include "debug.h" +#include "evlist.h" +#include "evsel.h" +#include "arch-tests.h" + +#include <sys/mman.h> +#include <string.h> + +static pid_t spawn(void) +{ + pid_t pid; + + pid = fork(); + if (pid) + return pid; + + while(1); + sleep(5); + return 0; +} + +/* + * Create an event group that contains both a sampled hardware + * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then + * wait for the hardware perf counter to overflow and generate a PMI, + * which triggers an event read for both of the events in the group. + * + * Since reading Intel CQM event counters requires sending SMP IPIs, the + * CQM pmu needs to handle the above situation gracefully, and return + * the last read counter value to avoid triggering a WARN_ON_ONCE() in + * smp_call_function_many() caused by sending IPIs from NMI context. + */ +int test__intel_cqm_count_nmi_context(void) +{ + struct perf_evlist *evlist = NULL; + struct perf_evsel *evsel = NULL; + struct perf_event_attr pe; + int i, fd[2], flag, ret; + size_t mmap_len; + void *event; + pid_t pid; + int err = TEST_FAIL; + + flag = perf_event_open_cloexec_flag(); + + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("perf_evlist__new failed\n"); + return TEST_FAIL; + } + + ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL); + if (ret) { + pr_debug("parse_events failed\n"); + err = TEST_SKIP; + goto out; + } + + evsel = perf_evlist__first(evlist); + if (!evsel) { + pr_debug("perf_evlist__first failed\n"); + goto out; + } + + memset(&pe, 0, sizeof(pe)); + pe.size = sizeof(pe); + + pe.type = PERF_TYPE_HARDWARE; + pe.config = PERF_COUNT_HW_CPU_CYCLES; + pe.read_format = PERF_FORMAT_GROUP; + + pe.sample_period = 128; + pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ; + + pid = spawn(); + + fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag); + if (fd[0] < 0) { + pr_debug("failed to open event\n"); + goto out; + } + + memset(&pe, 0, sizeof(pe)); + pe.size = sizeof(pe); + + pe.type = evsel->attr.type; + pe.config = evsel->attr.config; + + fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag); + if (fd[1] < 0) { + pr_debug("failed to open event\n"); + goto out; + } + + /* + * Pick a power-of-two number of pages + 1 for the meta-data + * page (struct perf_event_mmap_page). See tools/perf/design.txt. + */ + mmap_len = page_size * 65; + + event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0); + if (event == (void *)(-1)) { + pr_debug("failed to mmap %d\n", errno); + goto out; + } + + sleep(1); + + err = TEST_OK; + + munmap(event, mmap_len); + + for (i = 0; i < 2; i++) + close(fd[i]); + + kill(pid, SIGKILL); + wait(NULL); +out: + perf_evlist__delete(evlist); + return err; +} diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 5f49484f1abc..658cd200af74 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -9,7 +9,9 @@ #include "thread_map.h" #include "cpumap.h" #include "tsc.h" -#include "tests.h" +#include "tests/tests.h" + +#include "arch-tests.h" #define CHECK__(x) { \ while ((x) < 0) { \ diff --git a/tools/perf/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index d31f2c4d9f64..e7688214c7cf 100644 --- a/tools/perf/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -5,10 +5,9 @@ #include <linux/types.h> #include "perf.h" #include "debug.h" -#include "tests.h" +#include "tests/tests.h" #include "cloexec.h" - -#if defined(__x86_64__) || defined(__i386__) +#include "arch-tests.h" static u64 rdpmc(unsigned int counter) { @@ -173,5 +172,3 @@ int test__rdpmc(void) return 0; } - -#endif diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index a08de0a35b83..9223c164e545 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -21,55 +21,109 @@ */ #include <stddef.h> +#include <errno.h> /* for EINVAL */ +#include <string.h> /* for strcmp */ +#include <linux/ptrace.h> /* for struct pt_regs */ +#include <linux/kernel.h> /* for offsetof */ #include <dwarf-regs.h> /* - * Generic dwarf analysis helpers + * See arch/x86/kernel/ptrace.c. + * Different from it: + * + * - Since struct pt_regs is defined differently for user and kernel, + * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct + * field name of user's pt_regs), we make REG_OFFSET_NAME to accept + * both string name and reg field name. + * + * - Since accessing x86_32's pt_regs from x86_64 building is difficult + * and vise versa, we simply fill offset with -1, so + * get_arch_regstr() still works but regs_query_register_offset() + * returns error. + * The only inconvenience caused by it now is that we are not allowed + * to generate BPF prologue for a x86_64 kernel if perf is built for + * x86_32. This is really a rare usecase. + * + * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use + * the order defined by dwarf. */ -#define X86_32_MAX_REGS 8 -const char *x86_32_regs_table[X86_32_MAX_REGS] = { - "%ax", - "%cx", - "%dx", - "%bx", - "$stack", /* Stack address instead of %sp */ - "%bp", - "%si", - "%di", +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +#ifdef __x86_64__ +# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} +# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1} +#else +# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1} +# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} +#endif + +static const struct pt_regs_offset x86_32_regoffset_table[] = { + REG_OFFSET_NAME_32("%ax", eax), + REG_OFFSET_NAME_32("%cx", ecx), + REG_OFFSET_NAME_32("%dx", edx), + REG_OFFSET_NAME_32("%bx", ebx), + REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */ + REG_OFFSET_NAME_32("%bp", ebp), + REG_OFFSET_NAME_32("%si", esi), + REG_OFFSET_NAME_32("%di", edi), + REG_OFFSET_END, }; -#define X86_64_MAX_REGS 16 -const char *x86_64_regs_table[X86_64_MAX_REGS] = { - "%ax", - "%dx", - "%cx", - "%bx", - "%si", - "%di", - "%bp", - "%sp", - "%r8", - "%r9", - "%r10", - "%r11", - "%r12", - "%r13", - "%r14", - "%r15", +static const struct pt_regs_offset x86_64_regoffset_table[] = { + REG_OFFSET_NAME_64("%ax", rax), + REG_OFFSET_NAME_64("%dx", rdx), + REG_OFFSET_NAME_64("%cx", rcx), + REG_OFFSET_NAME_64("%bx", rbx), + REG_OFFSET_NAME_64("%si", rsi), + REG_OFFSET_NAME_64("%di", rdi), + REG_OFFSET_NAME_64("%bp", rbp), + REG_OFFSET_NAME_64("%sp", rsp), + REG_OFFSET_NAME_64("%r8", r8), + REG_OFFSET_NAME_64("%r9", r9), + REG_OFFSET_NAME_64("%r10", r10), + REG_OFFSET_NAME_64("%r11", r11), + REG_OFFSET_NAME_64("%r12", r12), + REG_OFFSET_NAME_64("%r13", r13), + REG_OFFSET_NAME_64("%r14", r14), + REG_OFFSET_NAME_64("%r15", r15), + REG_OFFSET_END, }; /* TODO: switching by dwarf address size */ #ifdef __x86_64__ -#define ARCH_MAX_REGS X86_64_MAX_REGS -#define arch_regs_table x86_64_regs_table +#define regoffset_table x86_64_regoffset_table #else -#define ARCH_MAX_REGS X86_32_MAX_REGS -#define arch_regs_table x86_32_regs_table +#define regoffset_table x86_32_regoffset_table #endif +/* Minus 1 for the ending REG_OFFSET_END */ +#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1) + /* Return architecture dependent register string (for kprobe-tracer) */ const char *get_arch_regstr(unsigned int n) { - return (n < ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; + return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL; +} + +/* Reuse code from arch/x86/kernel/ptrace.c */ +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 2ca10d796c0b..b02af064f0f9 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -624,13 +624,49 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * threads. */ if (have_timing_info && !cpu_map__empty(cpus)) { - err = intel_pt_track_switches(evlist); - if (err == -EPERM) - pr_debug2("Unable to select sched:sched_switch\n"); - else if (err) - return err; - else - ptr->have_sched_switch = 1; + if (perf_can_record_switch_events()) { + bool cpu_wide = !target__none(&opts->target) && + !target__has_task(&opts->target); + + if (!cpu_wide && perf_can_record_cpu_wide()) { + struct perf_evsel *switch_evsel; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + switch_evsel = perf_evlist__last(evlist); + + switch_evsel->attr.freq = 0; + switch_evsel->attr.sample_period = 1; + switch_evsel->attr.context_switch = 1; + + switch_evsel->system_wide = true; + switch_evsel->no_aux_samples = true; + switch_evsel->immediate = true; + + perf_evsel__set_sample_bit(switch_evsel, TID); + perf_evsel__set_sample_bit(switch_evsel, TIME); + perf_evsel__set_sample_bit(switch_evsel, CPU); + + opts->record_switch_events = false; + ptr->have_sched_switch = 3; + } else { + opts->record_switch_events = true; + if (cpu_wide) + ptr->have_sched_switch = 3; + else + ptr->have_sched_switch = 2; + } + } else { + err = intel_pt_track_switches(evlist); + if (err == -EPERM) + pr_debug2("Unable to select sched:sched_switch\n"); + else if (err) + return err; + else + ptr->have_sched_switch = 1; + } } if (intel_pt_evsel) { @@ -663,8 +699,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel->attr.sample_period = 1; /* In per-cpu case, always need the time of mmap events etc */ - if (!cpu_map__empty(cpus)) + if (!cpu_map__empty(cpus)) { perf_evsel__set_sample_bit(tracking_evsel, TIME); + /* And the CPU for switch events */ + perf_evsel__set_sample_bit(tracking_evsel, CPU); + } } /* diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 8edc205ff9a7..2bf9b3fd9e61 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -211,7 +211,7 @@ static int __cmd_annotate(struct perf_annotate *ann) } if (!objdump_path) { - ret = perf_session_env__lookup_objdump(&session->header.env); + ret = perf_env__lookup_objdump(&session->header.env); if (ret) goto out; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f62c49b35be0..0a945d2e8ca5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -28,9 +28,11 @@ struct perf_inject { bool build_ids; bool sched_stat; bool have_auxtrace; + bool strip; const char *input_name; struct perf_data_file output; u64 bytes_written; + u64 aux_id; struct list_head samples; struct itrace_synth_opts itrace_synth_opts; }; @@ -176,6 +178,27 @@ static int perf_event__repipe(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +static int perf_event__drop(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static int perf_event__drop_aux(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + + if (!inject->aux_id) + inject->aux_id = sample->id; + + return 0; +} + typedef int (*inject_handler)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -466,6 +489,78 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel, return 0; } +static int drop_sample(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static void strip_init(struct perf_inject *inject) +{ + struct perf_evlist *evlist = inject->session->evlist; + struct perf_evsel *evsel; + + inject->tool.context_switch = perf_event__drop; + + evlist__for_each(evlist, evsel) + evsel->handler = drop_sample; +} + +static bool has_tracking(struct perf_evsel *evsel) +{ + return evsel->attr.mmap || evsel->attr.mmap2 || evsel->attr.comm || + evsel->attr.task; +} + +#define COMPAT_MASK (PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER) + +/* + * In order that the perf.data file is parsable, tracking events like MMAP need + * their selected event to exist, except if there is only 1 selected event left + * and it has a compatible sample type. + */ +static bool ok_to_remove(struct perf_evlist *evlist, + struct perf_evsel *evsel_to_remove) +{ + struct perf_evsel *evsel; + int cnt = 0; + bool ok = false; + + if (!has_tracking(evsel_to_remove)) + return true; + + evlist__for_each(evlist, evsel) { + if (evsel->handler != drop_sample) { + cnt += 1; + if ((evsel->attr.sample_type & COMPAT_MASK) == + (evsel_to_remove->attr.sample_type & COMPAT_MASK)) + ok = true; + } + } + + return ok && cnt == 1; +} + +static void strip_fini(struct perf_inject *inject) +{ + struct perf_evlist *evlist = inject->session->evlist; + struct perf_evsel *evsel, *tmp; + + /* Remove non-synthesized evsels if possible */ + evlist__for_each_safe(evlist, tmp, evsel) { + if (evsel->handler == drop_sample && + ok_to_remove(evlist, evsel)) { + pr_debug("Deleting %s\n", perf_evsel__name(evsel)); + perf_evlist__remove(evlist, evsel); + perf_evsel__delete(evsel); + } + } +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; @@ -512,10 +607,14 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.id_index = perf_event__repipe_id_index; inject->tool.auxtrace_info = perf_event__process_auxtrace_info; inject->tool.auxtrace = perf_event__process_auxtrace; + inject->tool.aux = perf_event__drop_aux; + inject->tool.itrace_start = perf_event__drop_aux, inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ output_data_offset = 4096; + if (inject->strip) + strip_init(inject); } if (!inject->itrace_synth_opts.set) @@ -535,11 +634,28 @@ static int __cmd_inject(struct perf_inject *inject) } /* * The AUX areas have been removed and replaced with - * synthesized hardware events, so clear the feature flag. + * synthesized hardware events, so clear the feature flag and + * remove the evsel. */ - if (inject->itrace_synth_opts.set) + if (inject->itrace_synth_opts.set) { + struct perf_evsel *evsel; + perf_header__clear_feat(&session->header, HEADER_AUXTRACE); + if (inject->itrace_synth_opts.last_branch) + perf_header__set_feat(&session->header, + HEADER_BRANCH_STACK); + evsel = perf_evlist__id2evsel_strict(session->evlist, + inject->aux_id); + if (evsel) { + pr_debug("Deleting %s\n", + perf_evsel__name(evsel)); + perf_evlist__remove(session->evlist, evsel); + perf_evsel__delete(evsel); + } + if (inject->strip) + strip_fini(inject); + } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__write_header(session, session->evlist, fd, true); @@ -604,6 +720,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "strip", &inject.strip, + "strip non-synthesized events (use with --itrace)"), OPT_END() }; const char * const inject_usage[] = { @@ -619,6 +737,11 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(inject_usage, options); + if (inject.strip && !inject.itrace_synth_opts.set) { + pr_err("--strip option requires --itrace option\n"); + return -1; + } + if (perf_data_file__open(&inject.output)) { perror("failed to create output file"); return -1; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 23b1faaaa4cc..93ce665f976f 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -329,7 +329,7 @@ static int build_alloc_func_list(void) return -EINVAL; } - kernel_map = machine->vmlinux_maps[MAP__FUNCTION]; + kernel_map = machine__kernel_map(machine); if (map__load(kernel_map, NULL) < 0) { pr_err("cannot load kernel map\n"); return -ENOENT; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index fc1cffb1b7a2..dd94b4ca2213 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -13,7 +13,6 @@ #include "util/parse-options.h" #include "util/trace-event.h" #include "util/debug.h" -#include <api/fs/debugfs.h> #include "util/tool.h" #include "util/stat.h" #include "util/top.h" diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index af5bd0514108..bf679e2c978b 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -36,7 +36,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) setup_pager(); - if (!raw_dump) + if (!raw_dump && pager_in_use()) printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { @@ -45,6 +45,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) } for (i = 0; i < argc; ++i) { + char *sep, *s; + if (strcmp(argv[i], "tracepoint") == 0) print_tracepoint_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "hw") == 0 || @@ -60,8 +62,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump); - else { - char *sep = strchr(argv[i], ':'), *s; + else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; if (sep == NULL) { @@ -76,6 +77,19 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); free(s); + } else { + if (asprintf(&s, "*%s*", argv[i]) < 0) { + printf("Critical: Not enough memory! Trying to continue...\n"); + continue; + } + print_symbol_events(s, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); + print_symbol_events(s, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); + print_hwcache_events(s, raw_dump); + print_pmu_events(s, raw_dump); + print_tracepoint_events(NULL, s, raw_dump); + free(s); } } return 0; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index b81cec33b4b2..530c3a28a58c 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -37,10 +37,10 @@ #include "util/strfilter.h" #include "util/symbol.h" #include "util/debug.h" -#include <api/fs/debugfs.h> #include "util/parse-options.h" #include "util/probe-finder.h" #include "util/probe-event.h" +#include "util/probe-file.h" #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" #define DEFAULT_FUNC_FILTER "!_*" @@ -182,10 +182,8 @@ static int opt_set_target(const struct option *opt, const char *str, if (str) { if (!strcmp(opt->long_name, "exec")) params.uprobes = true; -#ifdef HAVE_DWARF_SUPPORT else if (!strcmp(opt->long_name, "module")) params.uprobes = false; -#endif else return ret; @@ -311,6 +309,119 @@ static void pr_err_with_code(const char *msg, int err) pr_err("\n"); } +static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs) +{ + int ret; + int i, k; + const char *event = NULL, *group = NULL; + + ret = init_probe_symbol_maps(pevs->uprobes); + if (ret < 0) + return ret; + + ret = convert_perf_probe_events(pevs, npevs); + if (ret < 0) + goto out_cleanup; + + ret = apply_perf_probe_events(pevs, npevs); + if (ret < 0) + goto out_cleanup; + + for (i = k = 0; i < npevs; i++) + k += pevs[i].ntevs; + + pr_info("Added new event%s\n", (k > 1) ? "s:" : ":"); + for (i = 0; i < npevs; i++) { + struct perf_probe_event *pev = &pevs[i]; + + for (k = 0; k < pev->ntevs; k++) { + struct probe_trace_event *tev = &pev->tevs[k]; + + /* We use tev's name for showing new events */ + show_perf_probe_event(tev->group, tev->event, pev, + tev->point.module, false); + + /* Save the last valid name */ + event = tev->event; + group = tev->group; + } + } + + /* Note that it is possible to skip all events because of blacklist */ + if (event) { + /* Show how to use the event. */ + pr_info("\nYou can now use it in all perf tools, such as:\n\n"); + pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event); + } + +out_cleanup: + cleanup_perf_probe_events(pevs, npevs); + exit_probe_symbol_maps(); + return ret; +} + +static int perf_del_probe_events(struct strfilter *filter) +{ + int ret, ret2, ufd = -1, kfd = -1; + char *str = strfilter__string(filter); + struct strlist *klist = NULL, *ulist = NULL; + struct str_node *ent; + + if (!str) + return -EINVAL; + + pr_debug("Delete filter: \'%s\'\n", str); + + /* Get current event names */ + ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); + if (ret < 0) + goto out; + + klist = strlist__new(NULL, NULL); + ulist = strlist__new(NULL, NULL); + if (!klist || !ulist) { + ret = -ENOMEM; + goto out; + } + + ret = probe_file__get_events(kfd, filter, klist); + if (ret == 0) { + strlist__for_each(ent, klist) + pr_info("Removed event: %s\n", ent->s); + + ret = probe_file__del_strlist(kfd, klist); + if (ret < 0) + goto error; + } + + ret2 = probe_file__get_events(ufd, filter, ulist); + if (ret2 == 0) { + strlist__for_each(ent, ulist) + pr_info("Removed event: %s\n", ent->s); + + ret2 = probe_file__del_strlist(ufd, ulist); + if (ret2 < 0) + goto error; + } + + if (ret == -ENOENT && ret2 == -ENOENT) + pr_debug("\"%s\" does not hit any event.\n", str); + /* Note that this is silently ignored */ + ret = 0; + +error: + if (kfd >= 0) + close(kfd); + if (ufd >= 0) + close(ufd); +out: + strlist__delete(klist); + strlist__delete(ulist); + free(str); + + return ret; +} + static int __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) { @@ -377,9 +488,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) "file", "vmlinux pathname"), OPT_STRING('s', "source", &symbol_conf.source_prefix, "directory", "path to kernel source"), - OPT_CALLBACK('m', "module", NULL, "modname|path", - "target module name (for online) or path (for offline)", - opt_set_target), OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines, "Don't search inlined functions"), #endif @@ -396,6 +504,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) opt_set_filter), OPT_CALLBACK('x', "exec", NULL, "executable|path", "target executable name or path", opt_set_target), + OPT_CALLBACK('m', "module", NULL, "modname|path", + "target module name (for online) or path (for offline)", + opt_set_target), OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, "Enable symbol demangling"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, @@ -483,7 +594,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) return ret; #endif case 'd': - ret = del_perf_probe_events(params.filter); + ret = perf_del_probe_events(params.filter); if (ret < 0) { pr_err_with_code(" Error: Failed to delete events.", ret); return ret; @@ -496,7 +607,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(probe_usage, options); } - ret = add_perf_probe_events(params.events, params.nevents); + ret = perf_add_probe_events(params.events, params.nevents); if (ret < 0) { pr_err_with_code(" Error: Failed to add events.", ret); return ret; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 142eeb341b29..24ace2f318c1 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -49,7 +49,7 @@ struct record { int realtime_prio; bool no_buildid; bool no_buildid_cache; - long samples; + unsigned long long samples; }; static int record__write(struct record *rec, void *bf, size_t size) @@ -636,8 +636,29 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) /* * Let the child rip */ - if (forks) + if (forks) { + union perf_event *event; + + event = malloc(sizeof(event->comm) + machine->id_hdr_size); + if (event == NULL) { + err = -ENOMEM; + goto out_child; + } + + /* + * Some H/W events are generated before COMM event + * which is emitted during exec(), so perf script + * cannot see a correct process name for those events. + * Synthesize COMM event to prevent it. + */ + perf_event__synthesize_comm(tool, event, + rec->evlist->workload.pid, + process_synthesized_event, + machine); + free(event); + perf_evlist__start_workload(rec->evlist); + } if (opts->initial_delay) { usleep(opts->initial_delay * 1000); @@ -646,7 +667,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) auxtrace_snapshot_enabled = 1; for (;;) { - int hits = rec->samples; + unsigned long long hits = rec->samples; if (record__mmap_read_all(rec) < 0) { auxtrace_snapshot_enabled = 0; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 62b285e32aa5..3b23b25d1589 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -62,6 +62,7 @@ struct report { float min_percent; u64 nr_entries; u64 queue_size; + int socket_filter; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; @@ -162,14 +163,21 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) goto out_put; - if (sort__mode == SORT_MODE__BRANCH) + if (sort__mode == SORT_MODE__BRANCH) { + /* + * A non-synthesized event might not have a branch stack if + * branch stacks have been synthesized (using itrace options). + */ + if (!sample->branch_stack) + goto out_put; iter.ops = &hist_iter_branch; - else if (rep->mem_mode) + } else if (rep->mem_mode) { iter.ops = &hist_iter_mem; - else if (symbol_conf.cumulate_callchain) + } else if (symbol_conf.cumulate_callchain) { iter.ops = &hist_iter_cumulative; - else + } else { iter.ops = &hist_iter_normal; + } if (al.map != NULL) al.map->dso->hit = 1; @@ -213,6 +221,15 @@ static int report__setup_sample_type(struct report *rep) u64 sample_type = perf_evlist__combined_sample_type(session->evlist); bool is_pipe = perf_data_file__is_pipe(session->file); + if (session->itrace_synth_opts->callchain || + (!is_pipe && + perf_header__has_feat(&session->header, HEADER_AUXTRACE) && + !session->itrace_synth_opts->set)) + sample_type |= PERF_SAMPLE_CALLCHAIN; + + if (session->itrace_synth_opts->last_branch) + sample_type |= PERF_SAMPLE_BRANCH_STACK; + if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { ui__error("Selected --sort parent, but no " @@ -286,6 +303,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report struct perf_evsel *evsel = hists_to_evsel(hists); char buf[512]; size_t size = sizeof(buf); + int socked_id = hists->socket_filter; if (symbol_conf.filter_relative) { nr_samples = hists->stats.nr_non_filtered_samples; @@ -326,6 +344,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); } else ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); + + if (socked_id > -1) + ret += fprintf(fp, "\n# Processor Socket: %d", socked_id); + return ret + fprintf(fp, "\n#\n"); } @@ -365,7 +387,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, static void report__warn_kptr_restrict(const struct report *rep) { - struct map *kernel_map = rep->session->machines.host.vmlinux_maps[MAP__FUNCTION]; + struct map *kernel_map = machine__kernel_map(&rep->session->machines.host); struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; if (kernel_map == NULL || @@ -450,6 +472,8 @@ static void report__collapse_hists(struct report *rep) if (pos->idx == 0) hists->symbol_filter_str = rep->symbol_filter_str; + hists->socket_filter = rep->socket_filter; + hists__collapse_resort(hists, &prog); /* Non-group events are considered as leader */ @@ -609,7 +633,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) bool has_br_stack = false; int branch_mode = -1; bool branch_call_mode = false; - char callchain_default_opt[] = "fractal,0.5,callee"; + char callchain_default_opt[] = "graph,0.5,caller"; const char * const report_usage[] = { "perf report [<options>]", NULL @@ -635,6 +659,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) }, .max_stack = PERF_MAX_STACK_DEPTH, .pretty_printing_style = "normal", + .socket_filter = -1, }; const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", @@ -676,7 +701,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Only display entries with parent-match"), OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " - "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), + "Default: graph,0.5,caller", &report_parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &report.max_stack, @@ -747,6 +772,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Show full source file name path for source lines"), OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph, "Show callgraph from reference event"), + OPT_INTEGER(0, "socket-filter", &report.socket_filter, + "only show processor socket that match with this filter"), OPT_END() }; struct perf_data_file file = { @@ -782,6 +809,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; + if (itrace_synth_opts.callchain && + (int)itrace_synth_opts.callchain_sz > report.max_stack) + report.max_stack = itrace_synth_opts.callchain_sz; + if (!input_name || !strlen(input_name)) { if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) input_name = "-"; @@ -809,6 +840,9 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); + if (itrace_synth_opts.last_branch) + has_br_stack = true; + /* * Branch mode is a tristate: * -1 means default, so decide based on the file having branch data. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 284a76e04628..8ce1c6bbfa45 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -29,9 +29,12 @@ static bool no_callchain; static bool latency_format; static bool system_wide; static bool print_flags; +static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; + enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -415,7 +418,10 @@ static void print_sample_start(struct perf_sample *sample, secs = nsecs / NSECS_PER_SEC; nsecs -= secs * NSECS_PER_SEC; usecs = nsecs / NSECS_PER_USEC; - printf("%5lu.%06lu: ", secs, usecs); + if (nanosecs) + printf("%5lu.%09llu: ", secs, nsecs); + else + printf("%5lu.%06lu: ", secs, usecs); } } @@ -471,7 +477,7 @@ static void print_sample_bts(union perf_event *event, } } perf_evsel__print_ip(evsel, sample, al, print_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } /* print branch_to information */ @@ -548,7 +554,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample, perf_evsel__print_ip(evsel, sample, al, output[attr->type].print_ip_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } if (PRINT_FIELD(IREGS)) @@ -1695,6 +1701,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, "Show context switch events (if recorded)"), OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_BOOLEAN(0, "ns", &nanosecs, + "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), @@ -1740,6 +1748,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) } } + if (itrace_synth_opts.callchain && + itrace_synth_opts.callchain_sz > scripting_max_stack) + scripting_max_stack = itrace_synth_opts.callchain_sz; + /* make sure PERF_EXEC_PATH is set for scripts */ perf_set_argv_exec_path(perf_exec_path()); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d46dbb1bc65d..5ef88f760b12 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -215,7 +215,7 @@ static void read_counters(bool close_counters) evlist__for_each(evsel_list, counter) { if (read_counter(counter)) - pr_warning("failed to read counter %s\n", counter->name); + pr_debug("failed to read counter %s\n", counter->name); if (perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); @@ -1179,7 +1179,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"), OPT_UINTEGER('I', "interval-print", &stat_config.interval, - "print counts at regular interval in ms (>= 100)"), + "print counts at regular interval in ms (>= 10)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, @@ -1332,9 +1332,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) thread_map__read_comms(evsel_list->threads); if (interval && interval < 100) { - pr_err("print interval must be >= 100ms\n"); - parse_options_usage(stat_usage, options, "I", 1); - goto out; + if (interval < 10) { + pr_err("print interval must be >= 10ms\n"); + parse_options_usage(stat_usage, options, "I", 1); + goto out; + } else + pr_warning("print interval < 100ms. " + "The overhead percentage could be high in some cases. " + "Please proceed with caution.\n"); } if (perf_evlist__alloc_stats(evsel_list, interval)) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8c465c83aabf..6f641fd68296 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -655,7 +655,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) { const char *name = sym->name; - if (!map->dso->kernel) + if (!__map__is_kernel(map)) return 0; /* * ppc64 uses function descriptors and appends a '.' to the @@ -857,9 +857,12 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) * TODO: we don't process guest user from host side * except simple counting. */ - /* Fall thru */ - default: goto next_event; + default: + if (event->header.type == PERF_RECORD_SAMPLE) + goto next_event; + machine = &session->machines.host; + break; } @@ -952,7 +955,7 @@ static int __cmd_top(struct perf_top *top) machines__set_symbol_filter(&top->session->machines, symbol_filter); if (!objdump_path) { - ret = perf_session_env__lookup_objdump(&top->session->header.env); + ret = perf_env__lookup_objdump(&top->session->header.env); if (ret) goto out_delete; } @@ -961,8 +964,18 @@ static int __cmd_top(struct perf_top *top) if (ret) goto out_delete; + if (perf_session__register_idle_thread(top->session) == NULL) + goto out_delete; + machine__synthesize_threads(&top->session->machines.host, &opts->target, top->evlist->threads, false, opts->proc_map_timeout); + + if (sort__has_socket) { + ret = perf_env__read_cpu_topology_map(&perf_env); + if (ret < 0) + goto out_err_cpu_topo; + } + ret = perf_top__start_counters(top); if (ret) goto out_delete; @@ -1020,6 +1033,14 @@ out_delete: top->session = NULL; return ret; + +out_err_cpu_topo: { + char errbuf[BUFSIZ]; + const char *err = strerror_r(-ret, errbuf, sizeof(errbuf)); + + ui__error("Could not read the CPU topology map: %s\n", err); + goto out_delete; +} } static int diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4e3abba03062..93b80f12f35e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -17,6 +17,7 @@ */ #include <traceevent/event-parse.h> +#include <api/fs/tracing_path.h> #include "builtin.h" #include "util/color.h" #include "util/debug.h" @@ -37,6 +38,7 @@ #include <stdlib.h> #include <sys/mman.h> #include <linux/futex.h> +#include <linux/err.h> /* For older distros: */ #ifndef MAP_STACK @@ -244,13 +246,14 @@ static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ - if (evsel == NULL) + if (IS_ERR(evsel)) evsel = perf_evsel__newtp("syscalls", direction); - if (evsel) { - if (perf_evsel__init_syscall_tp(evsel, handler)) - goto out_delete; - } + if (IS_ERR(evsel)) + return NULL; + + if (perf_evsel__init_syscall_tp(evsel, handler)) + goto out_delete; return evsel; @@ -1704,12 +1707,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); sc->tp_format = trace_event__tp_format("syscalls", tp_name); - if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { + if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) { snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); sc->tp_format = trace_event__tp_format("syscalls", tp_name); } - if (sc->tp_format == NULL) + if (IS_ERR(sc->tp_format)) return -1; sc->args = sc->tp_format->format.fields; @@ -2389,7 +2392,8 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); - if (evsel == NULL) + + if (IS_ERR(evsel)) return false; if (perf_evsel__field(evsel, "pathname") == NULL) { @@ -2686,11 +2690,11 @@ out_delete_evlist: char errbuf[BUFSIZ]; out_error_sched_stat_runtime: - debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); + tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); goto out_error; out_error_raw_syscalls: - debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); + tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); goto out_error; out_error_mmap: diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 38a08539f4bf..ab09adaabc9c 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -109,6 +109,10 @@ endif # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile +ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET + CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET +endif + include $(src-perf)/config/utilities.mak ifeq ($(call get-executable,$(FLEX)),) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 07dbff5c0e60..543713422d14 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/env.h" #include "util/exec_cmd.h" #include "util/cache.h" #include "util/quote.h" @@ -15,7 +16,7 @@ #include "util/parse-events.h" #include "util/parse-options.h" #include "util/debug.h" -#include <api/fs/debugfs.h> +#include <api/fs/tracing_path.h> #include <pthread.h> const char perf_usage_string[] = @@ -161,6 +162,20 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) break; /* + * Shortcut for '-h' and '-v' options to invoke help + * and version command. + */ + if (!strcmp(cmd, "-h")) { + (*argv)[0] = "--help"; + break; + } + + if (!strcmp(cmd, "-v")) { + (*argv)[0] = "--version"; + break; + } + + /* * Check remaining flags. */ if (!prefixcmp(cmd, CMD_EXEC_PATH)) { @@ -214,7 +229,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) fprintf(stderr, "No directory given for --debugfs-dir.\n"); usage(perf_usage_string); } - perf_debugfs_set_path((*argv)[1]); + tracing_path_set((*argv)[1]); if (envchanged) *envchanged = 1; (*argv)++; @@ -230,7 +245,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) (*argv)++; (*argc)--; } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { - perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); + tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR)); fprintf(stderr, "dir: %s\n", tracing_path); if (envchanged) *envchanged = 1; @@ -369,6 +384,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) status = p->fn(argc, argv, prefix); exit_browser(status); + perf_env__exit(&perf_env); if (status) return status & 0xff; @@ -517,8 +533,10 @@ int main(int argc, const char **argv) cmd = perf_extract_argv0_path(argv[0]); if (!cmd) cmd = "perf-help"; - /* get debugfs mount point from /proc/mounts */ - perf_debugfs_mount(NULL); + + /* get debugfs/tracefs mount point from /proc/mounts */ + tracing_path_mount(); + /* * "perf-xxxx" is the same as "perf xxxx", but we obviously: * diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 84a32037a80f..1b02cdc0cab6 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -61,6 +61,142 @@ import datetime # # An example of using the database is provided by the script # call-graph-from-postgresql.py. Refer to that script for details. +# +# Tables: +# +# The tables largely correspond to perf tools' data structures. They are largely self-explanatory. +# +# samples +# +# 'samples' is the main table. It represents what instruction was executing at a point in time +# when something (a selected event) happened. The memory address is the instruction pointer or 'ip'. +# +# calls +# +# 'calls' represents function calls and is related to 'samples' by 'call_id' and 'return_id'. +# 'calls' is only created when the 'calls' option to this script is specified. +# +# call_paths +# +# 'call_paths' represents all the call stacks. Each 'call' has an associated record in 'call_paths'. +# 'calls_paths' is only created when the 'calls' option to this script is specified. +# +# branch_types +# +# 'branch_types' provides descriptions for each type of branch. +# +# comm_threads +# +# 'comm_threads' shows how 'comms' relates to 'threads'. +# +# comms +# +# 'comms' contains a record for each 'comm' - the name given to the executable that is running. +# +# dsos +# +# 'dsos' contains a record for each executable file or library. +# +# machines +# +# 'machines' can be used to distinguish virtual machines if virtualization is supported. +# +# selected_events +# +# 'selected_events' contains a record for each kind of event that has been sampled. +# +# symbols +# +# 'symbols' contains a record for each symbol. Only symbols that have samples are present. +# +# threads +# +# 'threads' contains a record for each thread. +# +# Views: +# +# Most of the tables have views for more friendly display. The views are: +# +# calls_view +# call_paths_view +# comm_threads_view +# dsos_view +# machines_view +# samples_view +# symbols_view +# threads_view +# +# More examples of browsing the database with psql: +# Note that some of the examples are not the most optimal SQL query. +# Note that call information is only available if the script's 'calls' option has been used. +# +# Top 10 function calls (not aggregated by symbol): +# +# SELECT * FROM calls_view ORDER BY elapsed_time DESC LIMIT 10; +# +# Top 10 function calls (aggregated by symbol): +# +# SELECT symbol_id,(SELECT name FROM symbols WHERE id = symbol_id) AS symbol, +# SUM(elapsed_time) AS tot_elapsed_time,SUM(branch_count) AS tot_branch_count +# FROM calls_view GROUP BY symbol_id ORDER BY tot_elapsed_time DESC LIMIT 10; +# +# Note that the branch count gives a rough estimation of cpu usage, so functions +# that took a long time but have a relatively low branch count must have spent time +# waiting. +# +# Find symbols by pattern matching on part of the name (e.g. names containing 'alloc'): +# +# SELECT * FROM symbols_view WHERE name LIKE '%alloc%'; +# +# Top 10 function calls for a specific symbol (e.g. whose symbol_id is 187): +# +# SELECT * FROM calls_view WHERE symbol_id = 187 ORDER BY elapsed_time DESC LIMIT 10; +# +# Show function calls made by function in the same context (i.e. same call path) (e.g. one with call_path_id 254): +# +# SELECT * FROM calls_view WHERE parent_call_path_id = 254; +# +# Show branches made during a function call (e.g. where call_id is 29357 and return_id is 29370 and tid is 29670) +# +# SELECT * FROM samples_view WHERE id >= 29357 AND id <= 29370 AND tid = 29670 AND event LIKE 'branches%'; +# +# Show transactions: +# +# SELECT * FROM samples_view WHERE event = 'transactions'; +# +# Note transaction start has 'in_tx' true whereas, transaction end has 'in_tx' false. +# Transaction aborts have branch_type_name 'transaction abort' +# +# Show transaction aborts: +# +# SELECT * FROM samples_view WHERE event = 'transactions' AND branch_type_name = 'transaction abort'; +# +# To print a call stack requires walking the call_paths table. For example this python script: +# #!/usr/bin/python2 +# +# import sys +# from PySide.QtSql import * +# +# if __name__ == '__main__': +# if (len(sys.argv) < 3): +# print >> sys.stderr, "Usage is: printcallstack.py <database name> <call_path_id>" +# raise Exception("Too few arguments") +# dbname = sys.argv[1] +# call_path_id = sys.argv[2] +# db = QSqlDatabase.addDatabase('QPSQL') +# db.setDatabaseName(dbname) +# if not db.open(): +# raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) +# query = QSqlQuery(db) +# print " id ip symbol_id symbol dso_id dso_short_name" +# while call_path_id != 0 and call_path_id != 1: +# ret = query.exec_('SELECT * FROM call_paths_view WHERE id = ' + str(call_path_id)) +# if not ret: +# raise Exception("Query failed: " + query.lastError().text()) +# if not query.next(): +# raise Exception("Query failed") +# print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5)) +# call_path_id = query.value(6) from PySide.QtSql import * @@ -244,6 +380,91 @@ if perf_db_export_calls: 'parent_call_path_id bigint,' 'flags integer)') +do_query(query, 'CREATE VIEW machines_view AS ' + 'SELECT ' + 'id,' + 'pid,' + 'root_dir,' + 'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest' + ' FROM machines') + +do_query(query, 'CREATE VIEW dsos_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'short_name,' + 'long_name,' + 'build_id' + ' FROM dsos') + +do_query(query, 'CREATE VIEW symbols_view AS ' + 'SELECT ' + 'id,' + 'name,' + '(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,' + 'dso_id,' + 'sym_start,' + 'sym_end,' + 'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding' + ' FROM symbols') + +do_query(query, 'CREATE VIEW threads_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'process_id,' + 'pid,' + 'tid' + ' FROM threads') + +do_query(query, 'CREATE VIEW comm_threads_view AS ' + 'SELECT ' + 'comm_id,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid' + ' FROM comm_threads') + +if perf_db_export_calls: + do_query(query, 'CREATE VIEW call_paths_view AS ' + 'SELECT ' + 'c.id,' + 'to_hex(c.ip) AS ip,' + 'c.symbol_id,' + '(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,' + '(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,' + '(SELECT dso FROM symbols_view WHERE id = c.symbol_id) AS dso_short_name,' + 'c.parent_id,' + 'to_hex(p.ip) AS parent_ip,' + 'p.symbol_id AS parent_symbol_id,' + '(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,' + '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,' + '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name' + ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id') + do_query(query, 'CREATE VIEW calls_view AS ' + 'SELECT ' + 'calls.id,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'call_path_id,' + 'to_hex(ip) AS ip,' + 'symbol_id,' + '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,' + 'call_time,' + 'return_time,' + 'return_time - call_time AS elapsed_time,' + 'branch_count,' + 'call_id,' + 'return_id,' + 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'parent_call_path_id' + ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') + do_query(query, 'CREATE VIEW samples_view AS ' 'SELECT ' 'id,' diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index c1518bdd0f1b..50de2253cff6 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -8,7 +8,6 @@ perf-y += openat-syscall-all-cpus.o perf-y += openat-syscall-tp-fields.o perf-y += mmap-basic.o perf-y += perf-record.o -perf-y += rdpmc.o perf-y += evsel-roundtrip-name.o perf-y += evsel-tp-sched.o perf-y += fdarray.o @@ -33,8 +32,7 @@ perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o perf-y += thread-map.o perf-y += llvm.o - -perf-$(CONFIG_X86) += perf-time-to-tsc.o +perf-y += topology.o ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 136cd934be66..66f72d3d6677 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -14,10 +14,13 @@ #include "parse-options.h" #include "symbol.h" -static struct test { - const char *desc; - int (*func)(void); -} tests[] = { +struct test __weak arch_tests[] = { + { + .func = NULL, + }, +}; + +static struct test generic_tests[] = { { .desc = "vmlinux symtab matches kallsyms", .func = test__vmlinux_matches_kallsyms, @@ -38,12 +41,6 @@ static struct test { .desc = "parse events tests", .func = test__parse_events, }, -#if defined(__x86_64__) || defined(__i386__) - { - .desc = "x86 rdpmc test", - .func = test__rdpmc, - }, -#endif { .desc = "Validate PERF_RECORD_* events & perf_sample fields", .func = test__PERF_RECORD, @@ -104,12 +101,6 @@ static struct test { .desc = "Test software clock events have valid period values", .func = test__sw_clock_freq, }, -#if defined(__x86_64__) || defined(__i386__) - { - .desc = "Test converting perf time to TSC", - .func = test__perf_time_to_tsc, - }, -#endif { .desc = "Test object code reading", .func = test__code_reading, @@ -126,14 +117,6 @@ static struct test { .desc = "Test parsing with no sample_id_all bit set", .func = test__parse_no_sample_id_all, }, -#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) -#ifdef HAVE_DWARF_UNWIND_SUPPORT - { - .desc = "Test dwarf unwind", - .func = test__dwarf_unwind, - }, -#endif -#endif { .desc = "Test filtering hist entries", .func = test__hists_filter, @@ -179,11 +162,20 @@ static struct test { .func = test__llvm, }, { + .desc = "Test topology in session", + .func = test_session_topology, + }, + { .func = NULL, }, }; -static bool perf_test__matches(int curr, int argc, const char *argv[]) +static struct test *tests[] = { + generic_tests, + arch_tests, +}; + +static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[]) { int i; @@ -200,7 +192,7 @@ static bool perf_test__matches(int curr, int argc, const char *argv[]) continue; } - if (strstr(tests[curr].desc, argv[i])) + if (strstr(test->desc, argv[i])) return true; } @@ -237,27 +229,31 @@ static int run_test(struct test *test) return err; } +#define for_each_test(j, t) \ + for (j = 0; j < ARRAY_SIZE(tests); j++) \ + for (t = &tests[j][0]; t->func; t++) + static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) { + struct test *t; + unsigned int j; int i = 0; int width = 0; - while (tests[i].func) { - int len = strlen(tests[i].desc); + for_each_test(j, t) { + int len = strlen(t->desc); if (width < len) width = len; - ++i; } - i = 0; - while (tests[i].func) { + for_each_test(j, t) { int curr = i++, err; - if (!perf_test__matches(curr, argc, argv)) + if (!perf_test__matches(t, curr, argc, argv)) continue; - pr_info("%2d: %-*s:", i, width, tests[curr].desc); + pr_info("%2d: %-*s:", i, width, t->desc); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); @@ -265,8 +261,8 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) } pr_debug("\n--- start ---\n"); - err = run_test(&tests[curr]); - pr_debug("---- end ----\n%s:", tests[curr].desc); + err = run_test(t); + pr_debug("---- end ----\n%s:", t->desc); switch (err) { case TEST_OK: @@ -287,15 +283,15 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) static int perf_test__list(int argc, const char **argv) { + unsigned int j; + struct test *t; int i = 0; - while (tests[i].func) { - int curr = i++; - - if (argc > 1 && !strstr(tests[curr].desc, argv[1])) + for_each_test(j, t) { + if (argc > 1 && !strstr(t->desc, argv[1])) continue; - pr_info("%2d: %s\n", i, tests[curr].desc); + pr_info("%2d: %s\n", ++i, t->desc); } return 0; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 39c784a100a9..49b1959dda41 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -33,20 +33,20 @@ static unsigned int hex(char c) return c - 'A' + 10; } -static void read_objdump_line(const char *line, size_t line_len, void **buf, - size_t *len) +static size_t read_objdump_line(const char *line, size_t line_len, void *buf, + size_t len) { const char *p; - size_t i; + size_t i, j = 0; /* Skip to a colon */ p = strchr(line, ':'); if (!p) - return; + return 0; i = p + 1 - line; /* Read bytes */ - while (*len) { + while (j < len) { char c1, c2; /* Skip spaces */ @@ -65,20 +65,26 @@ static void read_objdump_line(const char *line, size_t line_len, void **buf, if (i < line_len && line[i] && !isspace(line[i])) break; /* Store byte */ - *(unsigned char *)*buf = (hex(c1) << 4) | hex(c2); - *buf += 1; - *len -= 1; + *(unsigned char *)buf = (hex(c1) << 4) | hex(c2); + buf += 1; + j++; } + /* return number of successfully read bytes */ + return j; } -static int read_objdump_output(FILE *f, void **buf, size_t *len) +static int read_objdump_output(FILE *f, void *buf, size_t *len, u64 start_addr) { char *line = NULL; - size_t line_len; + size_t line_len, off_last = 0; ssize_t ret; int err = 0; + u64 addr, last_addr = start_addr; + + while (off_last < *len) { + size_t off, read_bytes, written_bytes; + unsigned char tmp[BUFSZ]; - while (1) { ret = getline(&line, &line_len, f); if (feof(f)) break; @@ -87,9 +93,33 @@ static int read_objdump_output(FILE *f, void **buf, size_t *len) err = -1; break; } - read_objdump_line(line, ret, buf, len); + + /* read objdump data into temporary buffer */ + read_bytes = read_objdump_line(line, ret, tmp, sizeof(tmp)); + if (!read_bytes) + continue; + + if (sscanf(line, "%"PRIx64, &addr) != 1) + continue; + if (addr < last_addr) { + pr_debug("addr going backwards, read beyond section?\n"); + break; + } + last_addr = addr; + + /* copy it from temporary buffer to 'buf' according + * to address on current objdump line */ + off = addr - start_addr; + if (off >= *len) + break; + written_bytes = MIN(read_bytes, *len - off); + memcpy(buf + off, tmp, written_bytes); + off_last = off + written_bytes; } + /* len returns number of bytes that could not be read */ + *len -= off_last; + free(line); return err; @@ -103,7 +133,7 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, FILE *f; int ret; - fmt = "%s -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s"; + fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s"; ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len, filename); if (ret <= 0 || (size_t)ret >= sizeof(cmd)) @@ -120,7 +150,7 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, return -1; } - ret = read_objdump_output(f, &buf, &len); + ret = read_objdump_output(f, buf, &len, addr); if (len) { pr_debug("objdump read too few bytes\n"); if (!ret) @@ -132,6 +162,18 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, return ret; } +static void dump_buf(unsigned char *buf, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + pr_debug("0x%02x ", buf[i]); + if (i % 16 == 15) + pr_debug("\n"); + } + pr_debug("\n"); +} + static int read_object_code(u64 addr, size_t len, u8 cpumode, struct thread *thread, struct state *state) { @@ -234,6 +276,10 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, /* The results should be identical */ if (memcmp(buf1, buf2, len)) { pr_debug("Bytes read differ from those read by objdump\n"); + pr_debug("buf1 (dso):\n"); + dump_buf(buf1, len); + pr_debug("buf2 (objdump):\n"); + dump_buf(buf2, len); return -1; } pr_debug("Bytes read match those read by objdump\n"); @@ -427,7 +473,7 @@ static int do_test_code_reading(bool try_kcore) symbol_conf.kallsyms_name = "/proc/kallsyms"; /* Load kernel map */ - map = machine->vmlinux_maps[MAP__FUNCTION]; + map = machine__kernel_map(machine); ret = map__load(map, NULL); if (ret < 0) { pr_debug("map__load failed\n"); diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 40b36c462427..07221793a3ac 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -11,6 +11,10 @@ #include "thread.h" #include "callchain.h" +#if defined (__x86_64__) || defined (__i386__) +#include "arch-tests.h" +#endif + /* For bsearch. We try to unwind functions in shared object. */ #include <stdlib.h> diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 52162425c969..790e413d9a1f 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -1,3 +1,4 @@ +#include <linux/err.h> #include <traceevent/event-parse.h> #include "evsel.h" #include "tests.h" @@ -36,8 +37,8 @@ int test__perf_evsel__tp_sched_test(void) struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch"); int ret = 0; - if (evsel == NULL) { - pr_debug("perf_evsel__new\n"); + if (IS_ERR(evsel)) { + pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); return -1; } @@ -66,6 +67,11 @@ int test__perf_evsel__tp_sched_test(void) evsel = perf_evsel__newtp("sched", "sched_wakeup"); + if (IS_ERR(evsel)) { + pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); + return -1; + } + if (perf_evsel__test_field(evsel, "comm", 16, true)) ret = -1; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index ce48775e6ada..818acf875dd0 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -16,30 +16,31 @@ struct sample { struct thread *thread; struct map *map; struct symbol *sym; + int socket; }; /* For the numbers, see hists_common.c */ static struct sample fake_samples[] = { /* perf [kernel] schedule() */ - { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, .socket = 0 }, /* perf [perf] main() */ - { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, .socket = 0 }, /* perf [libc] malloc() */ - { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, .socket = 0 }, /* perf [perf] main() */ - { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, .socket = 0 }, /* will be merged */ /* perf [perf] cmd_record() */ - { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, .socket = 1 }, /* perf [kernel] page_fault() */ - { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, .socket = 1 }, /* bash [bash] main() */ - { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, .socket = 2 }, /* bash [bash] xmalloc() */ - { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, .socket = 2 }, /* bash [libc] malloc() */ - { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, .socket = 3 }, /* bash [kernel] page_fault() */ - { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, .socket = 3 }, }; static int add_hist_entries(struct perf_evlist *evlist, @@ -83,6 +84,7 @@ static int add_hist_entries(struct perf_evlist *evlist, &sample) < 0) goto out; + al.socket = fake_samples[i].socket; if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, NULL) < 0) { addr_location__put(&al); @@ -253,6 +255,39 @@ int test__hists_filter(void) TEST_ASSERT_VAL("Unmatched total period for symbol filter", hists->stats.total_non_filtered_period == 300); + /* remove symbol filter first */ + hists->symbol_filter_str = NULL; + hists__filter_by_symbol(hists); + + /* now applying socket filters */ + hists->socket_filter = 2; + hists__filter_by_socket(hists); + + if (verbose > 2) { + pr_info("Histogram for socket filters\n"); + print_hists_out(hists); + } + + /* normal stats should be invariant */ + TEST_ASSERT_VAL("Invalid nr samples", + hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + TEST_ASSERT_VAL("Invalid nr hist entries", + hists->nr_entries == 9); + TEST_ASSERT_VAL("Invalid total period", + hists->stats.total_period == 1000); + + /* but filter stats are changed */ + TEST_ASSERT_VAL("Unmatched nr samples for socket filter", + hists->stats.nr_non_filtered_samples == 2); + TEST_ASSERT_VAL("Unmatched nr hist entries for socket filter", + hists->nr_non_filtered_entries == 2); + TEST_ASSERT_VAL("Unmatched total period for socket filter", + hists->stats.total_non_filtered_period == 200); + + /* remove socket filter first */ + hists->socket_filter = -1; + hists__filter_by_socket(hists); + /* now applying all filters at once. */ hists->thread_filter = fake_samples[1].thread; hists->dso_filter = fake_samples[1].map->dso; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 666b67a4df9d..4495493c9431 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -3,6 +3,7 @@ #include "thread_map.h" #include "cpumap.h" #include "tests.h" +#include <linux/err.h> /* * This test will generate random numbers of calls to some getpid syscalls, @@ -65,7 +66,7 @@ int test__basic_mmap(void) snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); evsels[i] = perf_evsel__newtp("syscalls", name); - if (evsels[i] == NULL) { + if (IS_ERR(evsels[i])) { pr_debug("perf_evsel__new\n"); goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index a572f87e9c8d..9e104a2e973d 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -1,3 +1,5 @@ +#include <api/fs/fs.h> +#include <linux/err.h> #include "evsel.h" #include "tests.h" #include "thread_map.h" @@ -14,6 +16,7 @@ int test__openat_syscall_event_on_all_cpus(void) cpu_set_t cpu_set; struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); char sbuf[STRERR_BUFSIZE]; + char errbuf[BUFSIZ]; if (threads == NULL) { pr_debug("thread_map__new\n"); @@ -29,13 +32,9 @@ int test__openat_syscall_event_on_all_cpus(void) CPU_ZERO(&cpu_set); evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); - if (evsel == NULL) { - if (tracefs_configured()) - pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); - else if (debugfs_configured()) - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); - else - pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); + if (IS_ERR(evsel)) { + tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); + pr_err("%s\n", errbuf); goto out_thread_map_delete; } diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 01a19626c846..473d3869727e 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -1,3 +1,4 @@ +#include <linux/err.h> #include "perf.h" #include "evlist.h" #include "evsel.h" @@ -30,7 +31,7 @@ int test__syscall_openat_tp_fields(void) } evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); - if (evsel == NULL) { + if (IS_ERR(evsel)) { pr_debug("%s: perf_evsel__newtp\n", __func__); goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index c9a37bc6b33a..7b1db8306098 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -1,3 +1,5 @@ +#include <api/fs/tracing_path.h> +#include <linux/err.h> #include "thread_map.h" #include "evsel.h" #include "debug.h" @@ -10,6 +12,7 @@ int test__openat_syscall_event(void) unsigned int nr_openat_calls = 111, i; struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); char sbuf[STRERR_BUFSIZE]; + char errbuf[BUFSIZ]; if (threads == NULL) { pr_debug("thread_map__new\n"); @@ -17,13 +20,9 @@ int test__openat_syscall_event(void) } evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); - if (evsel == NULL) { - if (tracefs_configured()) - pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); - else if (debugfs_configured()) - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); - else - pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); + if (IS_ERR(evsel)) { + tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); + pr_err("%s\n", errbuf); goto out_thread_map_delete; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 9b6b2b6324a1..0648b84a9171 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -3,11 +3,10 @@ #include "evsel.h" #include "evlist.h" #include <api/fs/fs.h> -#include <api/fs/tracefs.h> -#include <api/fs/debugfs.h> #include "tests.h" #include "debug.h" #include <linux/hw_breakpoint.h> +#include <api/fs/fs.h> #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) @@ -1260,25 +1259,24 @@ test__checkevent_breakpoint_len_rw_modifier(struct perf_evlist *evlist) return test__checkevent_breakpoint_rw(evlist); } +static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_SW_TASK_CLOCK == evsel->attr.config); + return 0; +} + static int count_tracepoints(void) { - char events_path[PATH_MAX]; struct dirent *events_ent; - const char *mountpoint; DIR *events_dir; int cnt = 0; - mountpoint = tracefs_find_mountpoint(); - if (mountpoint) { - scnprintf(events_path, PATH_MAX, "%s/events", - mountpoint); - } else { - mountpoint = debugfs_find_mountpoint(); - scnprintf(events_path, PATH_MAX, "%s/tracing/events", - mountpoint); - } - - events_dir = opendir(events_path); + events_dir = opendir(tracing_events_path); TEST_ASSERT_VAL("Can't open events dir", events_dir); @@ -1295,7 +1293,7 @@ static int count_tracepoints(void) continue; scnprintf(sys_path, PATH_MAX, "%s/%s", - events_path, events_ent->d_name); + tracing_events_path, events_ent->d_name); sys_dir = opendir(sys_path); TEST_ASSERT_VAL("Can't open sys dir", sys_dir); @@ -1575,6 +1573,11 @@ static struct evlist_test test__events[] = { .check = test__checkevent_exclude_idle_modifier_1, .id = 46, }, + { + .name = "task-clock:P,cycles", + .check = test__checkevent_precise_max_modifier, + .id = 47, + }, }; static struct evlist_test test__events_pmu[] = { diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index bf113a247987..c80486969f83 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -24,13 +24,17 @@ enum { TEST_SKIP = -2, }; +struct test { + const char *desc; + int (*func)(void); +}; + /* Tests */ int test__vmlinux_matches_kallsyms(void); int test__openat_syscall_event(void); int test__openat_syscall_event_on_all_cpus(void); int test__basic_mmap(void); int test__PERF_RECORD(void); -int test__rdpmc(void); int test__perf_evsel__roundtrip_name_test(void); int test__perf_evsel__tp_sched_test(void); int test__syscall_openat_tp_fields(void); @@ -46,7 +50,6 @@ int test__bp_signal(void); int test__bp_signal_overflow(void); int test__task_exit(void); int test__sw_clock_freq(void); -int test__perf_time_to_tsc(void); int test__code_reading(void); int test__sample_parsing(void); int test__keep_tracking(void); @@ -63,8 +66,9 @@ int test__fdarray__add(void); int test__kmod_path__parse(void); int test__thread_map(void); int test__llvm(void); +int test_session_topology(void); -#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; struct perf_sample; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c new file mode 100644 index 000000000000..c3aff53a976a --- /dev/null +++ b/tools/perf/tests/topology.c @@ -0,0 +1,115 @@ +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include "tests.h" +#include "util.h" +#include "session.h" +#include "evlist.h" +#include "debug.h" + +#define TEMPL "/tmp/perf-test-XXXXXX" +#define DATA_SIZE 10 + +static int get_temp(char *path) +{ + int fd; + + strcpy(path, TEMPL); + + fd = mkstemp(path); + if (fd < 0) { + perror("mkstemp failed"); + return -1; + } + + close(fd); + return 0; +} + +static int session_write_header(char *path) +{ + struct perf_session *session; + struct perf_data_file file = { + .path = path, + .mode = PERF_DATA_MODE_WRITE, + }; + + session = perf_session__new(&file, false, NULL); + TEST_ASSERT_VAL("can't get session", session); + + session->evlist = perf_evlist__new_default(); + TEST_ASSERT_VAL("can't get evlist", session->evlist); + + perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); + perf_header__set_feat(&session->header, HEADER_NRCPUS); + + session->header.data_size += DATA_SIZE; + + TEST_ASSERT_VAL("failed to write header", + !perf_session__write_header(session, session->evlist, file.fd, true)); + + perf_session__delete(session); + + return 0; +} + +static int check_cpu_topology(char *path, struct cpu_map *map) +{ + struct perf_session *session; + struct perf_data_file file = { + .path = path, + .mode = PERF_DATA_MODE_READ, + }; + int i; + + session = perf_session__new(&file, false, NULL); + TEST_ASSERT_VAL("can't get session", session); + + for (i = 0; i < session->header.env.nr_cpus_online; i++) { + pr_debug("CPU %d, core %d, socket %d\n", i, + session->header.env.cpu[i].core_id, + session->header.env.cpu[i].socket_id); + } + + for (i = 0; i < map->nr; i++) { + TEST_ASSERT_VAL("Core ID doesn't match", + (session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i) & 0xffff))); + + TEST_ASSERT_VAL("Socket ID doesn't match", + (session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i))); + } + + perf_session__delete(session); + + return 0; +} + +int test_session_topology(void) +{ + char path[PATH_MAX]; + struct cpu_map *map; + int ret = -1; + + TEST_ASSERT_VAL("can't get templ file", !get_temp(path)); + + pr_debug("templ file: %s\n", path); + + if (session_write_header(path)) + goto free_path; + + map = cpu_map__new(NULL); + if (map == NULL) { + pr_debug("failed to get system cpumap\n"); + goto free_path; + } + + if (check_cpu_topology(path, map)) + goto free_map; + ret = 0; + +free_map: + cpu_map__put(map); +free_path: + unlink(path); + return ret; +} diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index b34c5fc829ae..d677e018e504 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -68,7 +68,7 @@ int test__vmlinux_matches_kallsyms(void) * to see if the running kernel was relocated by checking if it has the * same value in the vmlinux file we load. */ - kallsyms_map = machine__kernel_map(&kallsyms, type); + kallsyms_map = machine__kernel_map(&kallsyms); /* * Step 5: @@ -80,7 +80,7 @@ int test__vmlinux_matches_kallsyms(void) goto out; } - vmlinux_map = machine__kernel_map(&vmlinux, type); + vmlinux_map = machine__kernel_map(&vmlinux); /* * Step 6: diff --git a/tools/perf/trace/strace/groups/file b/tools/perf/trace/strace/groups/file index 62378a899d79..722e25d200bf 100644 --- a/tools/perf/trace/strace/groups/file +++ b/tools/perf/trace/strace/groups/file @@ -9,6 +9,7 @@ mkdir open openat quotactl +read readlink rename rmdir @@ -16,3 +17,4 @@ stat statfs symlink unlink +write diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index c6c7e5189214..e9703c0829f1 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -393,6 +393,7 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs) if (browser->use_navkeypressed && !browser->navkeypressed) { if (key == K_DOWN || key == K_UP || + (browser->columns && (key == K_LEFT || key == K_RIGHT)) || key == K_PGDN || key == K_PGUP || key == K_HOME || key == K_END || key == ' ') { @@ -421,6 +422,18 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs) browser->seek(browser, -1, SEEK_CUR); } break; + case K_RIGHT: + if (!browser->columns) + goto out; + if (browser->horiz_scroll < browser->columns - 1) + ++browser->horiz_scroll; + break; + case K_LEFT: + if (!browser->columns) + goto out; + if (browser->horiz_scroll != 0) + --browser->horiz_scroll; + break; case K_PGDN: case ' ': if (browser->top_idx + browser->rows > browser->nr_entries - 1) @@ -459,6 +472,7 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs) browser->seek(browser, -offset, SEEK_END); break; default: + out: return key; } } diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index f3cef564de02..01781de59532 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -14,7 +14,7 @@ struct ui_browser { u64 index, top_idx; void *top, *entries; - u16 y, x, width, height, rows; + u16 y, x, width, height, rows, columns, horiz_scroll; int current_color; void *priv; const char *title; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index c04c60d4863c..9b7346a881cf 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -784,11 +784,12 @@ static int hist_browser__show_entry(struct hist_browser *browser, .size = sizeof(s), .ptr = &arg, }; + int column = 0; hist_browser__gotorc(browser, row, 0); perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll) continue; if (current_entry && browser->b.navkeypressed) { @@ -861,14 +862,16 @@ static int advance_hpp_check(struct perf_hpp *hpp, int inc) return hpp->size <= 0; } -static int hists__scnprintf_headers(char *buf, size_t size, struct hists *hists) +static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf, size_t size) { + struct hists *hists = browser->hists; struct perf_hpp dummy_hpp = { .buf = buf, .size = size, }; struct perf_hpp_fmt *fmt; size_t ret = 0; + int column = 0; if (symbol_conf.use_callchain) { ret = scnprintf(buf, size, " "); @@ -877,7 +880,7 @@ static int hists__scnprintf_headers(char *buf, size_t size, struct hists *hists) } perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll) continue; ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); @@ -896,7 +899,7 @@ static void hist_browser__show_headers(struct hist_browser *browser) { char headers[1024]; - hists__scnprintf_headers(headers, sizeof(headers), browser->hists); + hists_browser__scnprintf_headers(browser, headers, sizeof(headers)); ui_browser__gotorc(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); @@ -1261,6 +1264,7 @@ static int hists__browser_title(struct hists *hists, int printed; const struct dso *dso = hists->dso_filter; const struct thread *thread = hists->thread_filter; + int socket_id = hists->socket_filter; unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; u64 nr_events = hists->stats.total_period; struct perf_evsel *evsel = hists_to_evsel(hists); @@ -1314,6 +1318,9 @@ static int hists__browser_title(struct hists *hists, if (dso) printed += scnprintf(bf + printed, size - printed, ", DSO: %s", dso->short_name); + if (socket_id > -1) + printed += scnprintf(bf + printed, size - printed, + ", Processor Socket: %d", socket_id); if (!is_report_browser(hbt)) { struct perf_top *top = hbt->arg; @@ -1425,6 +1432,7 @@ struct popup_action { struct thread *thread; struct dso *dso; struct map_symbol ms; + int socket; int (*fn)(struct hist_browser *browser, struct popup_action *act); }; @@ -1437,7 +1445,7 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) struct hist_entry *he; int err; - if (!objdump_path && perf_session_env__lookup_objdump(browser->env)) + if (!objdump_path && perf_env__lookup_objdump(browser->env)) return 0; notes = symbol__annotation(act->ms.sym); @@ -1522,7 +1530,7 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act, static int do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { - struct dso *dso = act->dso; + struct map *map = act->ms.map; if (browser->hists->dso_filter) { pstack__remove(browser->pstack, &browser->hists->dso_filter); @@ -1530,11 +1538,11 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) browser->hists->dso_filter = NULL; ui_helpline__pop(); } else { - if (dso == NULL) + if (map == NULL) return 0; ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", - dso->kernel ? "the Kernel" : dso->short_name); - browser->hists->dso_filter = dso; + __map__is_kernel(map) ? "the Kernel" : map->dso->short_name); + browser->hists->dso_filter = map->dso; perf_hpp__set_elide(HISTC_DSO, true); pstack__push(browser->pstack, &browser->hists->dso_filter); } @@ -1546,17 +1554,18 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, - char **optstr, struct dso *dso) + char **optstr, struct map *map) { - if (dso == NULL) + if (map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", browser->hists->dso_filter ? "out of" : "into", - dso->kernel ? "the Kernel" : dso->short_name) < 0) + __map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0) return 0; - act->dso = dso; + act->ms.map = map; + act->dso = map->dso; act->fn = do_zoom_dso; return 1; } @@ -1672,6 +1681,41 @@ add_exit_opt(struct hist_browser *browser __maybe_unused, return 1; } +static int +do_zoom_socket(struct hist_browser *browser, struct popup_action *act) +{ + if (browser->hists->socket_filter > -1) { + pstack__remove(browser->pstack, &browser->hists->socket_filter); + browser->hists->socket_filter = -1; + perf_hpp__set_elide(HISTC_SOCKET, false); + } else { + browser->hists->socket_filter = act->socket; + perf_hpp__set_elide(HISTC_SOCKET, true); + pstack__push(browser->pstack, &browser->hists->socket_filter); + } + + hists__filter_by_socket(browser->hists); + hist_browser__reset(browser); + return 0; +} + +static int +add_socket_opt(struct hist_browser *browser, struct popup_action *act, + char **optstr, int socket_id) +{ + if (socket_id < 0) + return 0; + + if (asprintf(optstr, "Zoom %s Processor Socket %d", + (browser->hists->socket_filter > -1) ? "out of" : "into", + socket_id) < 0) + return 0; + + act->socket = socket_id; + act->fn = do_zoom_socket; + return 1; +} + static void hist_browser__update_nr_entries(struct hist_browser *hb) { u64 nr_entries = 0; @@ -1725,6 +1769,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "E Expand all callchains\n" \ "F Toggle percentage of filtered entries\n" \ "H Display column headers\n" \ + "S Zoom into current Processor Socket\n" \ /* help messages are sorted by lexical order of the hotkey */ const char report_help[] = HIST_BROWSER_HELP_COMMON @@ -1755,7 +1800,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, hist_browser__update_nr_entries(browser); } - browser->pstack = pstack__new(2); + browser->pstack = pstack__new(3); if (browser->pstack == NULL) goto out; @@ -1764,8 +1809,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, memset(options, 0, sizeof(options)); memset(actions, 0, sizeof(actions)); - perf_hpp__for_each_format(fmt) + perf_hpp__for_each_format(fmt) { perf_hpp__reset_width(fmt, hists); + /* + * This is done just once, and activates the horizontal scrolling + * code in the ui_browser code, it would be better to have a the + * counter in the perf_hpp code, but I couldn't find doing it here + * works, FIXME by setting this in hist_browser__new, for now, be + * clever 8-) + */ + ++browser->b.columns; + } if (symbol_conf.col_width_list_str) perf_hpp__set_user_width(symbol_conf.col_width_list_str); @@ -1773,7 +1827,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, while (1) { struct thread *thread = NULL; struct dso *dso = NULL; + struct map *map = NULL; int choice = 0; + int socked_id = -1; nr_options = 0; @@ -1781,7 +1837,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); - dso = browser->selection->map ? browser->selection->map->dso : NULL; + map = browser->selection->map; + if (map) + dso = map->dso; + socked_id = browser->he_selection->socket; } switch (key) { case K_TAB: @@ -1824,6 +1883,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, actions->thread = thread; do_zoom_thread(browser, actions); continue; + case 'S': + actions->socket = socked_id; + do_zoom_socket(browser, actions); + continue; case '/': if (ui_browser__input_window("Symbol to show", "Please enter the name of symbol you want to see", @@ -1899,9 +1962,11 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, * Ditto for thread below. */ do_zoom_dso(browser, actions); - } - if (top == &browser->hists->thread_filter) + } else if (top == &browser->hists->thread_filter) { do_zoom_thread(browser, actions); + } else if (top == &browser->hists->socket_filter) { + do_zoom_socket(browser, actions); + } continue; } case 'q': @@ -1965,12 +2030,14 @@ skip_annotation: nr_options += add_thread_opt(browser, &actions[nr_options], &options[nr_options], thread); nr_options += add_dso_opt(browser, &actions[nr_options], - &options[nr_options], dso); + &options[nr_options], map); nr_options += add_map_opt(browser, &actions[nr_options], &options[nr_options], browser->selection ? browser->selection->map : NULL); - + nr_options += add_socket_opt(browser, &actions[nr_options], + &options[nr_options], + socked_id); /* perf script support */ if (browser->he_selection) { nr_options += add_script_opt(browser, diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e5f18a288b74..9217119c4108 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -5,6 +5,7 @@ libperf-y += build-id.o libperf-y += config.o libperf-y += ctype.o libperf-y += db-export.o +libperf-y += env.o libperf-y += environment.o libperf-y += event.o libperf-y += evlist.o diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index e9996092a093..cea323d9ee7e 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -122,7 +122,7 @@ struct annotated_source { struct list_head source; struct source_line *lines; int nr_histograms; - int sizeof_sym_hist; + size_t sizeof_sym_hist; struct cyc_hist *cycles_hist; struct sym_hist histograms[0]; }; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a980e7c50ee0..7f10430af39c 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -926,6 +926,8 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool, #define PERF_ITRACE_DEFAULT_PERIOD 100000 #define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16 #define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024 +#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64 +#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024 void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) { @@ -936,6 +938,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; + synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; } /* @@ -950,6 +953,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, const char *p; char *endptr; bool period_type_set = false; + bool period_set = false; synth_opts->set = true; @@ -971,6 +975,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, p += 1; if (isdigit(*p)) { synth_opts->period = strtoull(p, &endptr, 10); + period_set = true; p = endptr; while (*p == ' ' || *p == ',') p += 1; @@ -1041,6 +1046,23 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->callchain_sz = val; } break; + case 'l': + synth_opts->last_branch = true; + synth_opts->last_branch_sz = + PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; + while (*p == ' ' || *p == ',') + p += 1; + if (isdigit(*p)) { + unsigned int val; + + val = strtoul(p, &endptr, 10); + p = endptr; + if (!val || + val > PERF_ITRACE_MAX_LAST_BRANCH_SZ) + goto out_err; + synth_opts->last_branch_sz = val; + } + break; case ' ': case ',': break; @@ -1053,7 +1075,7 @@ out: if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; - if (!synth_opts->period) + if (!period_set) synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; } diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index bf72b77a588a..b86f90db1352 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -63,7 +63,9 @@ enum itrace_period_type { * @calls: limit branch samples to calls (can be combined with @returns) * @returns: limit branch samples to returns (can be combined with @calls) * @callchain: add callchain to 'instructions' events + * @last_branch: add branch context to 'instruction' events * @callchain_sz: maximum callchain size + * @last_branch_sz: branch context size * @period: 'instructions' events period * @period_type: 'instructions' events period type */ @@ -79,7 +81,9 @@ struct itrace_synth_opts { bool calls; bool returns; bool callchain; + bool last_branch; unsigned int callchain_sz; + unsigned int last_branch_sz; unsigned long long period; enum itrace_period_type period_type; }; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 3667e2123e5b..c51c29fd0732 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -225,32 +225,32 @@ void cpu_map__put(struct cpu_map *map) cpu_map__delete(map); } -int cpu_map__get_socket(struct cpu_map *map, int idx) +static int cpu__get_topology_int(int cpu, const char *name, int *value) { - FILE *fp; - const char *mnt; char path[PATH_MAX]; - int cpu, ret; - if (idx > map->nr) - return -1; + snprintf(path, PATH_MAX, + "devices/system/cpu/cpu%d/topology/%s", cpu, name); - cpu = map->map[idx]; + return sysfs__read_int(path, value); +} - mnt = sysfs__mountpoint(); - if (!mnt) - return -1; +int cpu_map__get_socket_id(int cpu) +{ + int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value); + return ret ?: value; +} - snprintf(path, PATH_MAX, - "%s/devices/system/cpu/cpu%d/topology/physical_package_id", - mnt, cpu); +int cpu_map__get_socket(struct cpu_map *map, int idx) +{ + int cpu; - fp = fopen(path, "r"); - if (!fp) + if (idx > map->nr) return -1; - ret = fscanf(fp, "%d", &cpu); - fclose(fp); - return ret == 1 ? cpu : -1; + + cpu = map->map[idx]; + + return cpu_map__get_socket_id(cpu); } static int cmp_ids(const void *a, const void *b) @@ -289,33 +289,22 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, return 0; } +int cpu_map__get_core_id(int cpu) +{ + int value, ret = cpu__get_topology_int(cpu, "core_id", &value); + return ret ?: value; +} + int cpu_map__get_core(struct cpu_map *map, int idx) { - FILE *fp; - const char *mnt; - char path[PATH_MAX]; - int cpu, ret, s; + int cpu, s; if (idx > map->nr) return -1; cpu = map->map[idx]; - mnt = sysfs__mountpoint(); - if (!mnt) - return -1; - - snprintf(path, PATH_MAX, - "%s/devices/system/cpu/cpu%d/topology/core_id", - mnt, cpu); - - fp = fopen(path, "r"); - if (!fp) - return -1; - ret = fscanf(fp, "%d", &cpu); - fclose(fp); - if (ret != 1) - return -1; + cpu = cpu_map__get_core_id(cpu); s = cpu_map__get_socket(map, idx); if (s == -1) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 0af9cecb4c51..8982d538da83 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -18,7 +18,9 @@ struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); +int cpu_map__get_socket_id(int cpu); int cpu_map__get_socket(struct cpu_map *map, int idx); +int cpu_map__get_core_id(int cpu); int cpu_map__get_core(struct cpu_map *map, int idx); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c new file mode 100644 index 000000000000..6af4f7c36820 --- /dev/null +++ b/tools/perf/util/env.c @@ -0,0 +1,86 @@ +#include "cpumap.h" +#include "env.h" +#include "util.h" + +struct perf_env perf_env; + +void perf_env__exit(struct perf_env *env) +{ + zfree(&env->hostname); + zfree(&env->os_release); + zfree(&env->version); + zfree(&env->arch); + zfree(&env->cpu_desc); + zfree(&env->cpuid); + zfree(&env->cmdline); + zfree(&env->cmdline_argv); + zfree(&env->sibling_cores); + zfree(&env->sibling_threads); + zfree(&env->numa_nodes); + zfree(&env->pmu_mappings); + zfree(&env->cpu); +} + +int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) +{ + int i; + + /* + * If env->cmdline_argv has already been set, do not override it. This allows + * a command to set the cmdline, parse args and then call another + * builtin function that implements a command -- e.g, cmd_kvm calling + * cmd_record. + */ + if (env->cmdline_argv != NULL) + return 0; + + /* do not include NULL termination */ + env->cmdline_argv = calloc(argc, sizeof(char *)); + if (env->cmdline_argv == NULL) + goto out_enomem; + + /* + * Must copy argv contents because it gets moved around during option + * parsing: + */ + for (i = 0; i < argc ; i++) { + env->cmdline_argv[i] = argv[i]; + if (env->cmdline_argv[i] == NULL) + goto out_free; + } + + env->nr_cmdline = argc; + + return 0; +out_free: + zfree(&env->cmdline_argv); +out_enomem: + return -ENOMEM; +} + +int perf_env__read_cpu_topology_map(struct perf_env *env) +{ + int cpu, nr_cpus; + + if (env->cpu != NULL) + return 0; + + if (env->nr_cpus_avail == 0) + env->nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF); + + nr_cpus = env->nr_cpus_avail; + if (nr_cpus == -1) + return -EINVAL; + + env->cpu = calloc(nr_cpus, sizeof(env->cpu[0])); + if (env->cpu == NULL) + return -ENOMEM; + + for (cpu = 0; cpu < nr_cpus; ++cpu) { + env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); + env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); + } + + env->nr_cpus_avail = nr_cpus; + return 0; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h new file mode 100644 index 000000000000..0132b9557c02 --- /dev/null +++ b/tools/perf/util/env.h @@ -0,0 +1,44 @@ +#ifndef __PERF_ENV_H +#define __PERF_ENV_H + +struct cpu_topology_map { + int socket_id; + int core_id; +}; + +struct perf_env { + char *hostname; + char *os_release; + char *version; + char *arch; + int nr_cpus_online; + int nr_cpus_avail; + char *cpu_desc; + char *cpuid; + unsigned long long total_mem; + unsigned int msr_pmu_type; + + int nr_cmdline; + int nr_sibling_cores; + int nr_sibling_threads; + int nr_numa_nodes; + int nr_pmu_mappings; + int nr_groups; + char *cmdline; + const char **cmdline_argv; + char *sibling_cores; + char *sibling_threads; + char *numa_nodes; + char *pmu_mappings; + struct cpu_topology_map *cpu; +}; + +extern struct perf_env perf_env; + +void perf_env__exit(struct perf_env *env); + +int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); + +int perf_env__read_cpu_topology_map(struct perf_env *env); + +#endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 7ff61274ed57..cb98b5af9e17 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -167,7 +167,7 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid, return 0; } -static pid_t perf_event__synthesize_comm(struct perf_tool *tool, +pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine) @@ -378,7 +378,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, for (pos = maps__first(maps); pos; pos = map__next(pos)) { size_t size; - if (pos->dso->kernel) + if (__map__is_kernel(pos)) continue; size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); @@ -649,12 +649,12 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, size_t size; const char *mmap_name; char name_buff[PATH_MAX]; - struct map *map; + struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; union perf_event *event; - if (machine->vmlinux_maps[0] == NULL) + if (map == NULL) return -1; /* @@ -680,7 +680,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; } - map = machine->vmlinux_maps[MAP__FUNCTION]; kmap = map__kmap(map); size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), "%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1; @@ -1008,7 +1007,7 @@ int perf_event__preprocess_sample(const union perf_event *event, * it now. */ if (cpumode == PERF_RECORD_MISC_KERNEL && - machine->vmlinux_maps[MAP__FUNCTION] == NULL) + machine__kernel_map(machine) == NULL) machine__create_kernel_maps(machine); thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al); @@ -1021,6 +1020,14 @@ int perf_event__preprocess_sample(const union perf_event *event, al->sym = NULL; al->cpu = sample->cpu; + al->socket = -1; + + if (al->cpu >= 0) { + struct perf_env *env = machine->env; + + if (env && env->cpu) + al->socket = env->cpu[al->cpu].socket_id; + } if (al->map) { struct dso *dso = al->map->dso; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index f729df5e25e6..a0dbcbd4f6d8 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -257,6 +257,7 @@ struct events_stats { u64 total_non_filtered_period; u64 total_lost; u64 total_lost_samples; + u64 total_aux_lost; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_non_filtered_samples; @@ -478,6 +479,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, const struct perf_sample *sample, bool swapped); +pid_t perf_event__synthesize_comm(struct perf_tool *tool, + union perf_event *event, pid_t pid, + perf_event__handler_t process, + struct machine *machine); + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c8fc8a258f42..d1392194a9a9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -25,6 +25,7 @@ #include <linux/bitops.h> #include <linux/hash.h> #include <linux/log2.h> +#include <linux/err.h> static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); @@ -164,6 +165,13 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) __perf_evlist__propagate_maps(evlist, entry); } +void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) +{ + evsel->evlist = NULL; + list_del_init(&evsel->node); + evlist->nr_entries -= 1; +} + void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list) { @@ -197,6 +205,20 @@ void perf_evlist__set_leader(struct perf_evlist *evlist) } } +void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) +{ + attr->precise_ip = 3; + + while (attr->precise_ip != 0) { + int fd = sys_perf_event_open(attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + break; + } + --attr->precise_ip; + } +} + int perf_evlist__add_default(struct perf_evlist *evlist) { struct perf_event_attr attr = { @@ -207,13 +229,15 @@ int perf_evlist__add_default(struct perf_evlist *evlist) event_attr_init(&attr); + perf_event_attr__set_max_precise_ip(&attr); + evsel = perf_evsel__new(&attr); if (evsel == NULL) goto error; - /* use strdup() because free(evsel) assumes name is allocated */ - evsel->name = strdup("cycles"); - if (!evsel->name) + /* use asprintf() because free(evsel) assumes name is allocated */ + if (asprintf(&evsel->name, "cycles%.*s", + attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) goto error_free; perf_evlist__add(evlist, evsel); @@ -293,7 +317,7 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist, { struct perf_evsel *evsel = perf_evsel__newtp(sys, name); - if (evsel == NULL) + if (IS_ERR(evsel)) return -1; evsel->handler = handler; @@ -616,6 +640,21 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return NULL; } +struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, + u64 id) +{ + struct perf_sample_id *sid; + + if (!id) + return NULL; + + sid = perf_evlist__id2sid(evlist, id); + if (sid) + return sid->evsel; + + return NULL; +} + static int perf_evlist__event2id(struct perf_evlist *evlist, union perf_event *event, u64 *id) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 115d8b53c601..a459fe71b452 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -73,6 +73,7 @@ void perf_evlist__exit(struct perf_evlist *evlist); void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); +void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel); int perf_evlist__add_default(struct perf_evlist *evlist); int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); @@ -104,6 +105,8 @@ int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mas int perf_evlist__poll(struct perf_evlist *evlist, int timeout); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); +struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, + u64 id); struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); @@ -287,4 +290,6 @@ void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); + +void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5410483d5219..8be867ccefe9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -9,10 +9,11 @@ #include <byteswap.h> #include <linux/bitops.h> -#include <api/fs/debugfs.h> +#include <api/fs/tracing_path.h> #include <traceevent/event-parse.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> +#include <linux/err.h> #include <sys/resource.h> #include "asm/bug.h" #include "callchain.h" @@ -225,11 +226,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) return evsel; } +/* + * Returns pointer with encoded error via <linux/err.h> interface. + */ struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx) { struct perf_evsel *evsel = zalloc(perf_evsel__object.size); + int err = -ENOMEM; - if (evsel != NULL) { + if (evsel == NULL) { + goto out_err; + } else { struct perf_event_attr attr = { .type = PERF_TYPE_TRACEPOINT, .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | @@ -240,8 +247,10 @@ struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int goto out_free; evsel->tp_format = trace_event__tp_format(sys, name); - if (evsel->tp_format == NULL) + if (IS_ERR(evsel->tp_format)) { + err = PTR_ERR(evsel->tp_format); goto out_free; + } event_attr_init(&attr); attr.config = evsel->tp_format->id; @@ -254,7 +263,8 @@ struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int out_free: zfree(&evsel->name); free(evsel); - return NULL; +out_err: + return ERR_PTR(err); } const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { @@ -872,6 +882,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->clockid = opts->clockid; } + if (evsel->precise_max) + perf_event_attr__set_max_precise_ip(attr); + /* * Apply event specific term settings, * it overloads any global configuration. @@ -1168,7 +1181,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), bit_name(REGS_INTR), + bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index ef8925f7211a..02a5fed8d924 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -111,6 +111,7 @@ struct perf_evsel { bool system_wide; bool tracking; bool per_pkg; + bool precise_max; /* parse modifier helper */ int exclude_GH; int nr_members; @@ -130,7 +131,6 @@ union u64_swap { struct cpu_map; struct target; struct thread_map; -struct perf_evlist; struct record_opts; static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) @@ -162,6 +162,9 @@ static inline struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr) struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx); +/* + * Returns pointer with encoded error via <linux/err.h> interface. + */ static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name) { return perf_evsel__newtp_idx(sys, name, 0); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index fce6634aebe2..43838003c1a1 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -24,9 +24,6 @@ #include "build-id.h" #include "data.h" -static u32 header_argc; -static const char **header_argv; - /* * magic2 = "PERFILE2" * must be a numerical value to let the endianness @@ -88,6 +85,9 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned) return err; } +#define string_size(str) \ + (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32)) + static int do_write_string(int fd, const char *str) { u32 len, olen; @@ -135,37 +135,6 @@ static char *do_read_string(int fd, struct perf_header *ph) return NULL; } -int -perf_header__set_cmdline(int argc, const char **argv) -{ - int i; - - /* - * If header_argv has already been set, do not override it. - * This allows a command to set the cmdline, parse args and - * then call another builtin function that implements a - * command -- e.g, cmd_kvm calling cmd_record. - */ - if (header_argv) - return 0; - - header_argc = (u32)argc; - - /* do not include NULL termination */ - header_argv = calloc(argc, sizeof(char *)); - if (!header_argv) - return -ENOMEM; - - /* - * must copy argv contents because it gets moved - * around during option parsing - */ - for (i = 0; i < argc ; i++) - header_argv[i] = argv[i]; - - return 0; -} - static int write_tracing_data(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist) { @@ -402,8 +371,8 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, { char buf[MAXPATHLEN]; char proc[32]; - u32 i, n; - int ret; + u32 n; + int i, ret; /* * actual atual path to perf binary @@ -417,7 +386,7 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, buf[ret] = '\0'; /* account for binary path */ - n = header_argc + 1; + n = perf_env.nr_cmdline + 1; ret = do_write(fd, &n, sizeof(n)); if (ret < 0) @@ -427,8 +396,8 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, if (ret < 0) return ret; - for (i = 0 ; i < header_argc; i++) { - ret = do_write_string(fd, header_argv[i]); + for (i = 0 ; i < perf_env.nr_cmdline; i++) { + ret = do_write_string(fd, perf_env.cmdline_argv[i]); if (ret < 0) return ret; } @@ -441,6 +410,7 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" struct cpu_topo { + u32 cpu_nr; u32 core_sib; u32 thread_sib; char **core_siblings; @@ -551,7 +521,7 @@ static struct cpu_topo *build_cpu_topology(void) return NULL; tp = addr; - + tp->cpu_nr = nr; addr += sizeof(*tp); tp->core_siblings = addr; addr += sz; @@ -574,7 +544,7 @@ static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, { struct cpu_topo *tp; u32 i; - int ret; + int ret, j; tp = build_cpu_topology(); if (!tp) @@ -598,6 +568,21 @@ static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, if (ret < 0) break; } + + ret = perf_env__read_cpu_topology_map(&perf_env); + if (ret < 0) + goto done; + + for (j = 0; j < perf_env.nr_cpus_avail; j++) { + ret = do_write(fd, &perf_env.cpu[j].core_id, + sizeof(perf_env.cpu[j].core_id)); + if (ret < 0) + return ret; + ret = do_write(fd, &perf_env.cpu[j].socket_id, + sizeof(perf_env.cpu[j].socket_id)); + if (ret < 0) + return ret; + } done: free_cpu_topo(tp); return ret; @@ -938,6 +923,7 @@ static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused, { int nr, i; char *str; + int cpu_nr = ph->env.nr_cpus_online; nr = ph->env.nr_sibling_cores; str = ph->env.sibling_cores; @@ -954,6 +940,13 @@ static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused, fprintf(fp, "# sibling threads : %s\n", str); str += strlen(str) + 1; } + + if (ph->env.cpu != NULL) { + for (i = 0; i < cpu_nr; i++) + fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i, + ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id); + } else + fprintf(fp, "# Core ID and Socket ID information is not available\n"); } static void free_event_desc(struct perf_evsel *events) @@ -1582,7 +1575,7 @@ error: return -1; } -static int process_cpu_topology(struct perf_file_section *section __maybe_unused, +static int process_cpu_topology(struct perf_file_section *section, struct perf_header *ph, int fd, void *data __maybe_unused) { @@ -1590,15 +1583,22 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused u32 nr, i; char *str; struct strbuf sb; + int cpu_nr = ph->env.nr_cpus_online; + u64 size = 0; + + ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); + if (!ph->env.cpu) + return -1; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) - return -1; + goto free_cpu; if (ph->needs_swap) nr = bswap_32(nr); ph->env.nr_sibling_cores = nr; + size += sizeof(u32); strbuf_init(&sb, 128); for (i = 0; i < nr; i++) { @@ -1608,6 +1608,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused /* include a NULL character at the end */ strbuf_add(&sb, str, strlen(str) + 1); + size += string_size(str); free(str); } ph->env.sibling_cores = strbuf_detach(&sb, NULL); @@ -1620,6 +1621,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused nr = bswap_32(nr); ph->env.nr_sibling_threads = nr; + size += sizeof(u32); for (i = 0; i < nr; i++) { str = do_read_string(fd, ph); @@ -1628,13 +1630,57 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused /* include a NULL character at the end */ strbuf_add(&sb, str, strlen(str) + 1); + size += string_size(str); free(str); } ph->env.sibling_threads = strbuf_detach(&sb, NULL); + + /* + * The header may be from old perf, + * which doesn't include core id and socket id information. + */ + if (section->size <= size) { + zfree(&ph->env.cpu); + return 0; + } + + for (i = 0; i < (u32)cpu_nr; i++) { + ret = readn(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + goto free_cpu; + + if (ph->needs_swap) + nr = bswap_32(nr); + + if (nr > (u32)cpu_nr) { + pr_debug("core_id number is too big." + "You may need to upgrade the perf tool.\n"); + goto free_cpu; + } + ph->env.cpu[i].core_id = nr; + + ret = readn(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + goto free_cpu; + + if (ph->needs_swap) + nr = bswap_32(nr); + + if (nr > (u32)cpu_nr) { + pr_debug("socket_id number is too big." + "You may need to upgrade the perf tool.\n"); + goto free_cpu; + } + + ph->env.cpu[i].socket_id = nr; + } + return 0; error: strbuf_release(&sb); +free_cpu: + zfree(&ph->env.cpu); return -1; } @@ -1737,6 +1783,9 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused /* include a NULL character at the end */ strbuf_add(&sb, "", 1); + if (!strcmp(name, "msr")) + ph->env.msr_pmu_type = type; + free(name); pmu_num--; } @@ -2515,6 +2564,7 @@ int perf_session__read_header(struct perf_session *session) return -ENOMEM; session->evlist->env = &header->env; + session->machines.host.env = &header->env; if (perf_data_file__is_pipe(file)) return perf_header__read_pipe(session); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 396e4965f0c9..05f27cb6b7e3 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -7,7 +7,7 @@ #include <linux/bitmap.h> #include <linux/types.h> #include "event.h" - +#include "env.h" enum { HEADER_RESERVED = 0, /* always cleared */ @@ -66,31 +66,6 @@ struct perf_header; int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); -struct perf_env { - char *hostname; - char *os_release; - char *version; - char *arch; - int nr_cpus_online; - int nr_cpus_avail; - char *cpu_desc; - char *cpuid; - unsigned long long total_mem; - - int nr_cmdline; - int nr_sibling_cores; - int nr_sibling_threads; - int nr_numa_nodes; - int nr_pmu_mappings; - int nr_groups; - char *cmdline; - const char **cmdline_argv; - char *sibling_cores; - char *sibling_threads; - char *numa_nodes; - char *pmu_mappings; -}; - struct perf_header { enum perf_header_version version; bool needs_swap; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 08b6cd945f1e..4fd37d6708cb 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -15,6 +15,8 @@ static bool hists__filter_entry_by_thread(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he); +static bool hists__filter_entry_by_socket(struct hists *hists, + struct hist_entry *he); u16 hists__col_len(struct hists *hists, enum hist_column col) { @@ -130,6 +132,18 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); } + + if (h->mem_info->iaddr.sym) { + symlen = (int)h->mem_info->iaddr.sym->namelen + 4 + + unresolved_col_width + 2; + hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, + symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, + symlen); + } + if (h->mem_info->daddr.map) { symlen = dso__name_len(h->mem_info->daddr.map->dso); hists__new_col_len(hists, HISTC_MEM_DADDR_DSO, @@ -141,9 +155,12 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) } else { symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); + hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, symlen); hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); } + hists__new_col_len(hists, HISTC_CPU, 3); + hists__new_col_len(hists, HISTC_SOCKET, 6); hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); hists__new_col_len(hists, HISTC_MEM_TLB, 22); hists__new_col_len(hists, HISTC_MEM_SNOOP, 12); @@ -452,6 +469,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .map = al->map, .sym = al->sym, }, + .socket = al->socket, .cpu = al->cpu, .cpumode = al->cpumode, .ip = al->addr, @@ -690,7 +708,7 @@ iter_finish_normal_entry(struct hist_entry_iter *iter, } static int -iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, +iter_prepare_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { struct hist_entry **he_cache; @@ -702,7 +720,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, * cumulated only one time to prevent entries more than 100% * overhead. */ - he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1)); + he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1)); if (he_cache == NULL) return -ENOMEM; @@ -863,6 +881,8 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (err) return err; + iter->max_stack = max_stack_depth; + err = iter->ops->prepare_entry(iter, al); if (err) goto out; @@ -1024,6 +1044,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_dso(hists, he); hists__filter_entry_by_thread(hists, he); hists__filter_entry_by_symbol(hists, he); + hists__filter_entry_by_socket(hists, he); } void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) @@ -1143,7 +1164,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) struct perf_evsel *evsel = hists_to_evsel(hists); bool use_callchain; - if (evsel && !symbol_conf.show_ref_callgraph) + if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; else use_callchain = symbol_conf.use_callchain; @@ -1292,6 +1313,37 @@ void hists__filter_by_symbol(struct hists *hists) } } +static bool hists__filter_entry_by_socket(struct hists *hists, + struct hist_entry *he) +{ + if ((hists->socket_filter > -1) && + (he->socket != hists->socket_filter)) { + he->filtered |= (1 << HIST_FILTER__SOCKET); + return true; + } + + return false; +} + +void hists__filter_by_socket(struct hists *hists) +{ + struct rb_node *nd; + + hists->stats.nr_non_filtered_samples = 0; + + hists__reset_filter_stats(hists); + hists__reset_col_len(hists); + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + if (hists__filter_entry_by_socket(hists, h)) + continue; + + hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET); + } +} + void events_stats__inc(struct events_stats *stats, u32 type) { ++stats->nr_events[0]; @@ -1517,6 +1569,7 @@ static int hists_evsel__init(struct perf_evsel *evsel) hists->entries_collapsed = RB_ROOT; hists->entries = RB_ROOT; pthread_mutex_init(&hists->lock, NULL); + hists->socket_filter = -1; return 0; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index de6d58e7f0d5..a48a2078d288 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -20,6 +20,7 @@ enum hist_filter { HIST_FILTER__SYMBOL, HIST_FILTER__GUEST, HIST_FILTER__HOST, + HIST_FILTER__SOCKET, }; enum hist_column { @@ -29,6 +30,7 @@ enum hist_column { HISTC_COMM, HISTC_PARENT, HISTC_CPU, + HISTC_SOCKET, HISTC_SRCLINE, HISTC_SRCFILE, HISTC_MISPREDICT, @@ -47,6 +49,7 @@ enum hist_column { HISTC_MEM_LVL, HISTC_MEM_SNOOP, HISTC_MEM_DCACHELINE, + HISTC_MEM_IADDR_SYMBOL, HISTC_TRANSACTION, HISTC_CYCLES, HISTC_NR_COLS, /* Last entry */ @@ -70,6 +73,7 @@ struct hists { struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; + int socket_filter; }; struct hist_entry_iter; @@ -87,6 +91,7 @@ struct hist_entry_iter { int curr; bool hide_unresolved; + int max_stack; struct perf_evsel *evsel; struct perf_sample *sample; @@ -144,11 +149,12 @@ size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp); void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); void hists__filter_by_symbol(struct hists *hists); +void hists__filter_by_socket(struct hists *hists); static inline bool hists__has_filter(struct hists *hists) { return hists->thread_filter || hists->dso_filter || - hists->symbol_filter_str; + hists->symbol_filter_str || (hists->socket_filter > -1); } u16 hists__col_len(struct hists *hists, enum hist_column col); diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 8f149655f497..07c644ed64c4 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -5,4 +5,12 @@ const char *get_arch_regstr(unsigned int n); #endif +#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET +/* + * Arch should support fetching the offset of a register in pt_regs + * by its name. See kernel's regs_query_register_offset in + * arch/xxx/kernel/ptrace.c. + */ +int regs_query_register_offset(const char *name); +#endif #endif diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 2386322ece4f..0611d619a42e 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -7,6 +7,17 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(call rule_mkdir) @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ -$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c +$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c + @(test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ + diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ + diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ + diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ + diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ + diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ + diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ + diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ + || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 22ba50224319..9409d014b46c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -650,7 +650,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) if (data->from_mtc && timestamp < data->timestamp && data->timestamp - timestamp < decoder->tsc_slip) return 1; - while (timestamp < data->timestamp) + if (timestamp < data->timestamp) timestamp += (1ULL << 56); if (pkt_info->last_packet_type != INTEL_PT_CYC) { if (data->from_mtc) @@ -1191,7 +1191,7 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) timestamp); timestamp = decoder->timestamp; } - while (timestamp < decoder->timestamp) { + if (timestamp < decoder->timestamp) { intel_pt_log_to("Wraparound timestamp", timestamp); timestamp += (1ULL << 56); decoder->tsc_timestamp = timestamp; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index d09c7d9f9050..319bef33a64b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -29,18 +29,18 @@ static FILE *f; static char log_name[MAX_LOG_NAME]; -static bool enable_logging; +bool intel_pt_enable_logging; void intel_pt_log_enable(void) { - enable_logging = true; + intel_pt_enable_logging = true; } void intel_pt_log_disable(void) { if (f) fflush(f); - enable_logging = false; + intel_pt_enable_logging = false; } void intel_pt_log_set_name(const char *name) @@ -80,7 +80,7 @@ static void intel_pt_print_no_data(uint64_t pos, int indent) static int intel_pt_log_open(void) { - if (!enable_logging) + if (!intel_pt_enable_logging) return -1; if (f) @@ -91,15 +91,15 @@ static int intel_pt_log_open(void) f = fopen(log_name, "w+"); if (!f) { - enable_logging = false; + intel_pt_enable_logging = false; return -1; } return 0; } -void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, - uint64_t pos, const unsigned char *buf) +void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf) { char desc[INTEL_PT_PKT_DESC_MAX]; @@ -111,7 +111,7 @@ void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, fprintf(f, "%s\n", desc); } -void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) { char desc[INTEL_PT_INSN_DESC_MAX]; size_t len = intel_pt_insn->length; @@ -128,7 +128,8 @@ void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) fprintf(f, "Bad instruction!\n"); } -void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip) { char desc[INTEL_PT_INSN_DESC_MAX]; @@ -142,7 +143,7 @@ void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip) fprintf(f, "Bad instruction!\n"); } -void intel_pt_log(const char *fmt, ...) +void __intel_pt_log(const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index db3942f83677..debe751dc3d6 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -25,20 +25,46 @@ void intel_pt_log_enable(void); void intel_pt_log_disable(void); void intel_pt_log_set_name(const char *name); -void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, - uint64_t pos, const unsigned char *buf); +void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf); struct intel_pt_insn; -void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); -void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, - uint64_t ip); +void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); +void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip); __attribute__((format(printf, 1, 2))) -void intel_pt_log(const char *fmt, ...); +void __intel_pt_log(const char *fmt, ...); + +#define intel_pt_log(fmt, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log(fmt, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_packet(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_packet(arg, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_insn(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_insn(arg, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_insn_no_data(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \ + } while (0) #define x64_fmt "0x%" PRIx64 +extern bool intel_pt_enable_logging; + static inline void intel_pt_log_at(const char *msg, uint64_t u) { intel_pt_log("%s at " x64_fmt "\n", msg, u); diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index 816488c0b97e..d388de72eaca 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -353,8 +353,12 @@ AVXcode: 1 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) 18: Grp16 (1A) 19: -1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv -1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv +# Intel SDM opcode map does not list MPX instructions. For now using Gv for +# bnd registers and Ev for everything else is OK because the instruction +# decoder does not use the information except as an indication that there is +# a ModR/M byte. +1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev +1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv 1c: 1d: 1e: @@ -732,6 +736,12 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) # 0x0f 0x38 0xc0-0xff +c8: sha1nexte Vdq,Wdq +c9: sha1msg1 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq +cb: sha256rnds2 Vdq,Wdq +cc: sha256msg1 Vdq,Wdq +cd: sha256msg2 Vdq,Wdq db: VAESIMC Vdq,Wdq (66),(v1) dc: VAESENC Vdq,Hdq,Wdq (66),(v1) dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) @@ -790,6 +800,7 @@ AVXcode: 3 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +cc: sha1rnds4 Vdq,Wdq,Ib df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) f0: RORX Gy,Ey,Ib (F2),(v) EndTable @@ -874,7 +885,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: +5: rdpkru (110),(11B) | wrpkru (111),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -888,6 +899,9 @@ EndTable GrpTable: Grp9 1: CMPXCHG8B/16B Mq/Mdq +3: xrstors +4: xsavec +5: xsaves 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable @@ -932,8 +946,8 @@ GrpTable: Grp15 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE 5: XRSTOR | lfence (11B) -6: XSAVEOPT | mfence (11B) -7: clflush | sfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) +7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) EndTable GrpTable: Grp16 diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 535d86f8e4d1..97f963a3dcb9 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -22,6 +22,7 @@ #include "../perf.h" #include "session.h" #include "machine.h" +#include "sort.h" #include "tool.h" #include "event.h" #include "evlist.h" @@ -63,6 +64,7 @@ struct intel_pt { bool data_queued; bool est_tsc; bool sync_switch; + bool mispred_all; int have_sched_switch; u32 pmu_type; u64 kernel_start; @@ -115,6 +117,9 @@ struct intel_pt_queue { void *decoder; const struct intel_pt_state *state; struct ip_callchain *chain; + struct branch_stack *last_branch; + struct branch_stack *last_branch_rb; + size_t last_branch_pos; union perf_event *event_buf; bool on_heap; bool stop; @@ -675,6 +680,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, goto out_free; } + if (pt->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += pt->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + ptq->last_branch = zalloc(sz); + if (!ptq->last_branch) + goto out_free; + ptq->last_branch_rb = zalloc(sz); + if (!ptq->last_branch_rb) + goto out_free; + } + ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); if (!ptq->event_buf) goto out_free; @@ -720,7 +738,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (!params.period) { params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; - params.period = 1000; + params.period = 1; } } @@ -732,6 +750,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, out_free: zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); return NULL; @@ -746,6 +766,8 @@ static void intel_pt_free_queue(void *priv) thread__zput(ptq->thread); intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); } @@ -876,6 +898,58 @@ static int intel_pt_setup_queues(struct intel_pt *pt) return 0; } +static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) +{ + struct branch_stack *bs_src = ptq->last_branch_rb; + struct branch_stack *bs_dst = ptq->last_branch; + size_t nr = 0; + + bs_dst->nr = bs_src->nr; + + if (!bs_src->nr) + return; + + nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; + memcpy(&bs_dst->entries[0], + &bs_src->entries[ptq->last_branch_pos], + sizeof(struct branch_entry) * nr); + + if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { + memcpy(&bs_dst->entries[nr], + &bs_src->entries[0], + sizeof(struct branch_entry) * ptq->last_branch_pos); + } +} + +static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) +{ + ptq->last_branch_pos = 0; + ptq->last_branch_rb->nr = 0; +} + +static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) +{ + const struct intel_pt_state *state = ptq->state; + struct branch_stack *bs = ptq->last_branch_rb; + struct branch_entry *be; + + if (!ptq->last_branch_pos) + ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; + + ptq->last_branch_pos -= 1; + + be = &bs->entries[ptq->last_branch_pos]; + be->from = state->from_ip; + be->to = state->to_ip; + be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); + be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); + /* No support for mispredict */ + be->flags.mispred = ptq->pt->mispred_all; + + if (bs->nr < ptq->pt->synth_opts.last_branch_sz) + bs->nr += 1; +} + static int intel_pt_inject_event(union perf_event *event, struct perf_sample *sample, u64 type, bool swapped) @@ -890,6 +964,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + struct dummy_branch_stack { + u64 nr; + struct branch_entry entries; + } dummy_bs; + + if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) + return 0; event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; @@ -909,8 +990,20 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.flags = ptq->flags; sample.insn_len = ptq->insn_len; - if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) - return 0; + /* + * perf report cannot handle events without a branch stack when using + * SORT_MODE__BRANCH so make a dummy one. + */ + if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { + dummy_bs = (struct dummy_branch_stack){ + .nr = 1, + .entries = { + .from = sample.ip, + .to = sample.addr, + }, + }; + sample.branch_stack = (struct branch_stack *)&dummy_bs; + } if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, @@ -961,6 +1054,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.callchain = ptq->chain; } + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, pt->instructions_sample_type, @@ -974,6 +1072,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", ret); + if (pt->synth_opts.last_branch) + intel_pt_reset_last_branch_rb(ptq); + return ret; } @@ -1008,6 +1109,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) sample.callchain = ptq->chain; } + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, pt->transactions_sample_type, @@ -1021,6 +1127,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", ret); + if (pt->synth_opts.callchain) + intel_pt_reset_last_branch_rb(ptq); + return ret; } @@ -1116,6 +1225,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return err; } + if (pt->synth_opts.last_branch) + intel_pt_update_last_branch_rb(ptq); + if (!pt->sync_switch) return 0; @@ -1145,16 +1257,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return 0; } -static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip) +static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) { + struct machine *machine = pt->machine; struct map *map; struct symbol *sym, *start; u64 ip, switch_ip = 0; + const char *ptss; if (ptss_ip) *ptss_ip = 0; - map = machine__kernel_map(machine, MAP__FUNCTION); + map = machine__kernel_map(machine); if (!map) return 0; @@ -1177,8 +1291,13 @@ static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip) if (!switch_ip || !ptss_ip) return 0; + if (pt->have_sched_switch == 1) + ptss = "perf_trace_sched_switch"; + else + ptss = "__perf_event_task_sched_out"; + for (sym = start; sym; sym = dso__next_symbol(sym)) { - if (!strcmp(sym->name, "perf_trace_sched_switch")) { + if (!strcmp(sym->name, ptss)) { ip = map->unmap_ip(map, sym->start); if (ip >= map->start && ip < map->end) { *ptss_ip = ip; @@ -1198,11 +1317,11 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (!pt->kernel_start) { pt->kernel_start = machine__kernel_start(pt->machine); - if (pt->per_cpu_mmaps && pt->have_sched_switch && + if (pt->per_cpu_mmaps && + (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && !pt->sampling_mode) { - pt->switch_ip = intel_pt_switch_ip(pt->machine, - &pt->ptss_ip); + pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); if (pt->switch_ip) { intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", pt->switch_ip, pt->ptss_ip); @@ -1387,31 +1506,18 @@ static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) return NULL; } -static int intel_pt_process_switch(struct intel_pt *pt, - struct perf_sample *sample) +static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, + u64 timestamp) { struct intel_pt_queue *ptq; - struct perf_evsel *evsel; - pid_t tid; - int cpu, err; - - evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); - if (evsel != pt->switch_evsel) - return 0; - - tid = perf_evsel__intval(evsel, sample, "next_pid"); - cpu = sample->cpu; - - intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", - cpu, tid, sample->time, perf_time_to_tsc(sample->time, - &pt->tc)); + int err; if (!pt->sync_switch) - goto out; + return 1; ptq = intel_pt_cpu_to_ptq(pt, cpu); if (!ptq) - goto out; + return 1; switch (ptq->switch_state) { case INTEL_PT_SS_NOT_TRACING: @@ -1424,7 +1530,7 @@ static int intel_pt_process_switch(struct intel_pt *pt, return 0; case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: if (!ptq->on_heap) { - ptq->timestamp = perf_time_to_tsc(sample->time, + ptq->timestamp = perf_time_to_tsc(timestamp, &pt->tc); err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, ptq->timestamp); @@ -1441,10 +1547,76 @@ static int intel_pt_process_switch(struct intel_pt *pt, default: break; } -out: + + return 1; +} + +static int intel_pt_process_switch(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct perf_evsel *evsel; + pid_t tid; + int cpu, ret; + + evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); + if (evsel != pt->switch_evsel) + return 0; + + tid = perf_evsel__intval(evsel, sample, "next_pid"); + cpu = sample->cpu; + + intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, tid, sample->time, perf_time_to_tsc(sample->time, + &pt->tc)); + + ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); + if (ret <= 0) + return ret; + return machine__set_current_tid(pt->machine, cpu, -1, tid); } +static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, + struct perf_sample *sample) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + pid_t pid, tid; + int cpu, ret; + + cpu = sample->cpu; + + if (pt->have_sched_switch == 3) { + if (!out) + return 0; + if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { + pr_err("Expecting CPU-wide context switch event\n"); + return -EINVAL; + } + pid = event->context_switch.next_prev_pid; + tid = event->context_switch.next_prev_tid; + } else { + if (out) + return 0; + pid = sample->pid; + tid = sample->tid; + } + + if (tid == -1) { + pr_err("context_switch event has no tid\n"); + return -EINVAL; + } + + intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", + cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, + &pt->tc)); + + ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); + if (ret <= 0) + return ret; + + return machine__set_current_tid(pt->machine, cpu, pid, tid); +} + static int intel_pt_process_itrace_start(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) @@ -1515,6 +1687,9 @@ static int intel_pt_process_event(struct perf_session *session, err = intel_pt_process_switch(pt, sample); else if (event->header.type == PERF_RECORD_ITRACE_START) err = intel_pt_process_itrace_start(pt, event, sample); + else if (event->header.type == PERF_RECORD_SWITCH || + event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) + err = intel_pt_context_switch(pt, event, sample); intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", perf_event__name(event->header.type), event->header.type, @@ -1700,6 +1875,8 @@ static int intel_pt_synth_events(struct intel_pt *pt, pt->instructions_sample_period = attr.sample_period; if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); err = intel_pt_synth_event(session, &attr, id); @@ -1719,6 +1896,8 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_period = 1; if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); err = intel_pt_synth_event(session, &attr, id); @@ -1745,6 +1924,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_period = 1; attr.sample_type |= PERF_SAMPLE_ADDR; attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; + attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); err = intel_pt_synth_event(session, &attr, id); @@ -1777,6 +1957,28 @@ static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) return NULL; } +static bool intel_pt_find_switch(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.context_switch) + return true; + } + + return false; +} + +static int intel_pt_perf_config(const char *var, const char *value, void *data) +{ + struct intel_pt *pt = data; + + if (!strcmp(var, "intel-pt.mispred-all")) + pt->mispred_all = perf_config_bool(var, value); + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", @@ -1821,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (!pt) return -ENOMEM; + perf_config(intel_pt_perf_config, pt); + err = auxtrace_queues__init(&pt->queues); if (err) goto err_free; @@ -1888,6 +2092,10 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pr_err("%s: missing sched_switch event\n", __func__); goto err_delete_thread; } + } else if (pt->have_sched_switch == 2 && + !intel_pt_find_switch(session->evlist)) { + pr_err("%s: missing context_switch attribute flag\n", __func__); + goto err_delete_thread; } if (session->itrace_synth_opts && session->itrace_synth_opts->set) { diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6309f7ceb08f..5ef90be2a249 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -35,6 +35,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->last_match = NULL; machine->vdso_info = NULL; + machine->env = NULL; machine->pid = pid; @@ -624,7 +625,7 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) { int i; size_t printed = 0; - struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso; + struct dso *kdso = machine__kernel_map(machine)->dso; if (kdso->has_build_id) { char filename[PATH_MAX]; @@ -740,6 +741,7 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) for (type = 0; type < MAP__NR_TYPES; ++type) { struct kmap *kmap; + struct map *map; machine->vmlinux_maps[type] = map__new2(start, kernel, type); if (machine->vmlinux_maps[type] == NULL) @@ -748,13 +750,13 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) machine->vmlinux_maps[type]->map_ip = machine->vmlinux_maps[type]->unmap_ip = identity__map_ip; - kmap = map__kmap(machine->vmlinux_maps[type]); + map = __machine__kernel_map(machine, type); + kmap = map__kmap(map); if (!kmap) return -1; kmap->kmaps = &machine->kmaps; - map_groups__insert(&machine->kmaps, - machine->vmlinux_maps[type]); + map_groups__insert(&machine->kmaps, map); } return 0; @@ -766,13 +768,13 @@ void machine__destroy_kernel_maps(struct machine *machine) for (type = 0; type < MAP__NR_TYPES; ++type) { struct kmap *kmap; + struct map *map = __machine__kernel_map(machine, type); - if (machine->vmlinux_maps[type] == NULL) + if (map == NULL) continue; - kmap = map__kmap(machine->vmlinux_maps[type]); - map_groups__remove(&machine->kmaps, - machine->vmlinux_maps[type]); + kmap = map__kmap(map); + map_groups__remove(&machine->kmaps, map); if (kmap && kmap->ref_reloc_sym) { /* * ref_reloc_sym is shared among all maps, so free just @@ -866,7 +868,7 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid) int machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type, symbol_filter_t filter) { - struct map *map = machine->vmlinux_maps[type]; + struct map *map = machine__kernel_map(machine); int ret = dso__load_kallsyms(map->dso, filename, map, filter); if (ret > 0) { @@ -885,7 +887,7 @@ int machine__load_kallsyms(struct machine *machine, const char *filename, int machine__load_vmlinux_path(struct machine *machine, enum map_type type, symbol_filter_t filter) { - struct map *map = machine->vmlinux_maps[type]; + struct map *map = machine__kernel_map(machine); int ret = dso__load_vmlinux_path(map->dso, map, filter); if (ret > 0) @@ -1243,8 +1245,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, /* * preload dso of guest kernel and modules */ - dso__load(kernel, machine->vmlinux_maps[MAP__FUNCTION], - NULL); + dso__load(kernel, machine__kernel_map(machine), NULL); } } return 0; @@ -1830,7 +1831,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: - if (chain->nr > PERF_MAX_STACK_DEPTH) { + if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) { pr_warning("corrupted callchain. skipping...\n"); return 0; } @@ -1996,7 +1997,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, int machine__get_kernel_start(struct machine *machine) { - struct map *map = machine__kernel_map(machine, MAP__FUNCTION); + struct map *map = machine__kernel_map(machine); int err = 0; /* diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index ea5cb4a621db..2c2b443df5ba 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -34,6 +34,7 @@ struct machine { struct list_head dead_threads; struct thread *last_match; struct vdso_info *vdso_info; + struct perf_env *env; struct dsos dsos; struct map_groups kmaps; struct map *vmlinux_maps[MAP__NR_TYPES]; @@ -47,11 +48,17 @@ struct machine { }; static inline -struct map *machine__kernel_map(struct machine *machine, enum map_type type) +struct map *__machine__kernel_map(struct machine *machine, enum map_type type) { return machine->vmlinux_maps[type]; } +static inline +struct map *machine__kernel_map(struct machine *machine) +{ + return __machine__kernel_map(machine, MAP__FUNCTION); +} + int machine__get_kernel_start(struct machine *machine); static inline u64 machine__kernel_start(struct machine *machine) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b1c475d9b240..4e38c396a897 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -235,7 +235,7 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) */ bool __map__is_kernel(const struct map *map) { - return map->groups->machine->vmlinux_maps[map->type] == map; + return __machine__kernel_map(map->groups->machine, map->type) == map; } static void map__exit(struct map *map) @@ -553,13 +553,9 @@ struct symbol *map_groups__find_symbol(struct map_groups *mg, return NULL; } -struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, - enum map_type type, - const char *name, - struct map **mapp, - symbol_filter_t filter) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, + struct map **mapp, symbol_filter_t filter) { - struct maps *maps = &mg->maps[type]; struct symbol *sym; struct rb_node *nd; @@ -583,6 +579,17 @@ out: return sym; } +struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, + enum map_type type, + const char *name, + struct map **mapp, + symbol_filter_t filter) +{ + struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp, filter); + + return sym; +} + int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter) { if (ams->addr < ams->map->start || ams->addr >= ams->map->end) { diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 57829e89b78b..7309d64ce39e 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -190,6 +190,8 @@ void maps__remove(struct maps *maps, struct map *map); struct map *maps__find(struct maps *maps, u64 addr); struct map *maps__first(struct maps *maps); struct map *map__next(struct map *map); +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, + struct map **mapp, symbol_filter_t filter); void map_groups__init(struct map_groups *mg, struct machine *machine); void map_groups__exit(struct map_groups *mg); int map_groups__clone(struct map_groups *mg, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 21ed6ee63da9..991bbd469bea 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,4 +1,5 @@ #include <linux/hw_breakpoint.h> +#include <linux/err.h> #include "util.h" #include "../perf.h" #include "evlist.h" @@ -11,7 +12,7 @@ #include "cache.h" #include "header.h" #include "debug.h" -#include <api/fs/debugfs.h> +#include <api/fs/tracing_path.h> #include "parse-events-bison.h" #define YY_EXTRA_TYPE int #include "parse-events-flex.h" @@ -26,6 +27,8 @@ extern int parse_events_debug; #endif int parse_events_parse(void *data, void *scanner); +static int get_config_terms(struct list_head *head_config, + struct list_head *head_terms __maybe_unused); static struct perf_pmu_event_symbol *perf_pmu_events_list; /* @@ -386,32 +389,72 @@ int parse_events_add_cache(struct list_head *list, int *idx, return add_event(list, idx, &attr, name, NULL); } +static void tracepoint_error(struct parse_events_error *e, int err, + char *sys, char *name) +{ + char help[BUFSIZ]; + + /* + * We get error directly from syscall errno ( > 0), + * or from encoded pointer's error ( < 0). + */ + err = abs(err); + + switch (err) { + case EACCES: + e->str = strdup("can't access trace events"); + break; + case ENOENT: + e->str = strdup("unknown tracepoint"); + break; + default: + e->str = strdup("failed to add tracepoint"); + break; + } + + tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); + e->help = strdup(help); +} + static int add_tracepoint(struct list_head *list, int *idx, - char *sys_name, char *evt_name) + char *sys_name, char *evt_name, + struct parse_events_error *err, + struct list_head *head_config) { struct perf_evsel *evsel; evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++); - if (!evsel) - return -ENOMEM; + if (IS_ERR(evsel)) { + tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name); + return PTR_ERR(evsel); + } - list_add_tail(&evsel->node, list); + if (head_config) { + LIST_HEAD(config_terms); + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + list_splice(&config_terms, &evsel->config_terms); + } + list_add_tail(&evsel->node, list); return 0; } static int add_tracepoint_multi_event(struct list_head *list, int *idx, - char *sys_name, char *evt_name) + char *sys_name, char *evt_name, + struct parse_events_error *err, + struct list_head *head_config) { char evt_path[MAXPATHLEN]; struct dirent *evt_ent; DIR *evt_dir; - int ret = 0; + int ret = 0, found = 0; snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name); evt_dir = opendir(evt_path); if (!evt_dir) { - perror("Can't open event dir"); + tracepoint_error(err, errno, sys_name, evt_name); return -1; } @@ -425,7 +468,15 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, if (!strglobmatch(evt_ent->d_name, evt_name)) continue; - ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name); + found++; + + ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, + err, head_config); + } + + if (!found) { + tracepoint_error(err, ENOENT, sys_name, evt_name); + ret = -1; } closedir(evt_dir); @@ -433,15 +484,21 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, } static int add_tracepoint_event(struct list_head *list, int *idx, - char *sys_name, char *evt_name) + char *sys_name, char *evt_name, + struct parse_events_error *err, + struct list_head *head_config) { return strpbrk(evt_name, "*?") ? - add_tracepoint_multi_event(list, idx, sys_name, evt_name) : - add_tracepoint(list, idx, sys_name, evt_name); + add_tracepoint_multi_event(list, idx, sys_name, evt_name, + err, head_config) : + add_tracepoint(list, idx, sys_name, evt_name, + err, head_config); } static int add_tracepoint_multi_sys(struct list_head *list, int *idx, - char *sys_name, char *evt_name) + char *sys_name, char *evt_name, + struct parse_events_error *err, + struct list_head *head_config) { struct dirent *events_ent; DIR *events_dir; @@ -449,7 +506,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, events_dir = opendir(tracing_events_path); if (!events_dir) { - perror("Can't open event dir"); + tracepoint_error(err, errno, sys_name, evt_name); return -1; } @@ -465,22 +522,13 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, continue; ret = add_tracepoint_event(list, idx, events_ent->d_name, - evt_name); + evt_name, err, head_config); } closedir(events_dir); return ret; } -int parse_events_add_tracepoint(struct list_head *list, int *idx, - char *sys, char *event) -{ - if (strpbrk(sys, "*?")) - return add_tracepoint_multi_sys(list, idx, sys, event); - else - return add_tracepoint_event(list, idx, sys, event); -} - static int parse_breakpoint_type(const char *type, struct perf_event_attr *attr) { @@ -565,9 +613,13 @@ static int check_type_val(struct parse_events_term *term, return -EINVAL; } -static int config_term(struct perf_event_attr *attr, - struct parse_events_term *term, - struct parse_events_error *err) +typedef int config_term_func_t(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); + +static int config_term_common(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) { #define CHECK_TYPE_VAL(type) \ do { \ @@ -576,12 +628,6 @@ do { \ } while (0) switch (term->type_term) { - case PARSE_EVENTS__TERM_TYPE_USER: - /* - * Always succeed for sysfs terms, as we dont know - * at this point what type they need to have. - */ - return 0; case PARSE_EVENTS__TERM_TYPE_CONFIG: CHECK_TYPE_VAL(NUM); attr->config = term->val.num; @@ -624,6 +670,9 @@ do { \ CHECK_TYPE_VAL(STR); break; default: + err->str = strdup("unknown term"); + err->idx = term->err_term; + err->help = parse_events_formats_error_string(NULL); return -EINVAL; } @@ -631,9 +680,44 @@ do { \ #undef CHECK_TYPE_VAL } +static int config_term_pmu(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) + /* + * Always succeed for sysfs terms, as we dont know + * at this point what type they need to have. + */ + return 0; + else + return config_term_common(attr, term, err); +} + +static int config_term_tracepoint(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + return config_term_common(attr, term, err); + default: + if (err) { + err->idx = term->err_term; + err->str = strdup("unknown term"); + err->help = strdup("valid terms: call-graph,stack-size\n"); + } + return -EINVAL; + } + + return 0; +} + static int config_attr(struct perf_event_attr *attr, struct list_head *head, - struct parse_events_error *err) + struct parse_events_error *err, + config_term_func_t config_term) { struct parse_events_term *term; @@ -688,6 +772,27 @@ do { \ return 0; } +int parse_events_add_tracepoint(struct list_head *list, int *idx, + char *sys, char *event, + struct parse_events_error *err, + struct list_head *head_config) +{ + if (head_config) { + struct perf_event_attr attr; + + if (config_attr(&attr, head_config, err, + config_term_tracepoint)) + return -EINVAL; + } + + if (strpbrk(sys, "*?")) + return add_tracepoint_multi_sys(list, idx, sys, event, + err, head_config); + else + return add_tracepoint_event(list, idx, sys, event, + err, head_config); +} + int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, @@ -701,7 +806,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data, attr.config = config; if (head_config) { - if (config_attr(&attr, head_config, data->error)) + if (config_attr(&attr, head_config, data->error, + config_term_common)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) @@ -761,7 +867,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, * Configure hardcoded terms first, no need to check * return value when called with fail == 0 ;) */ - if (config_attr(&attr, head_config, data->error)) + if (config_attr(&attr, head_config, data->error, config_term_pmu)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) @@ -793,6 +899,11 @@ void parse_events__set_leader(char *name, struct list_head *list) { struct perf_evsel *leader; + if (list_empty(list)) { + WARN_ONCE(true, "WARNING: failed to set leader: empty list"); + return; + } + __perf_evlist__set_leader(list); leader = list_entry(list->next, struct perf_evsel, node); leader->group_name = name ? strdup(name) : NULL; @@ -819,6 +930,7 @@ struct event_modifier { int eG; int eI; int precise; + int precise_max; int exclude_GH; int sample_read; int pinned; @@ -834,6 +946,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int eG = evsel ? evsel->attr.exclude_guest : 0; int eI = evsel ? evsel->attr.exclude_idle : 0; int precise = evsel ? evsel->attr.precise_ip : 0; + int precise_max = 0; int sample_read = 0; int pinned = evsel ? evsel->attr.pinned : 0; @@ -870,6 +983,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, /* use of precise requires exclude_guest */ if (!exclude_GH) eG = 1; + } else if (*str == 'P') { + precise_max = 1; } else if (*str == 'S') { sample_read = 1; } else if (*str == 'D') { @@ -900,6 +1015,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->eG = eG; mod->eI = eI; mod->precise = precise; + mod->precise_max = precise_max; mod->exclude_GH = exclude_GH; mod->sample_read = sample_read; mod->pinned = pinned; @@ -916,7 +1032,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppSDI") - 1)) + if (strlen(str) > (sizeof("ukhGHpppPSDI") - 1)) return -1; while (*p) { @@ -955,6 +1071,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->attr.exclude_idle = mod.eI; evsel->exclude_GH = mod.exclude_GH; evsel->sample_read = mod.sample_read; + evsel->precise_max = mod.precise_max; if (perf_evsel__is_group_leader(evsel)) evsel->attr.pinned = mod.pinned; @@ -1142,6 +1259,11 @@ int parse_events(struct perf_evlist *evlist, const char *str, if (!ret) { struct perf_evsel *last; + if (list_empty(&data.list)) { + WARN_ONCE(true, "WARNING: event parser found nothing"); + return -1; + } + perf_evlist__splice_list_tail(evlist, &data.list); evlist->nr_groups += data.nr_groups; last = perf_evlist__last(evlist); @@ -1251,6 +1373,12 @@ foreach_evsel_in_last_glob(struct perf_evlist *evlist, struct perf_evsel *last = NULL; int err; + /* + * Don't return when list_empty, give func a chance to report + * error when it found last == NULL. + * + * So no need to WARN here, let *func do this. + */ if (evlist->nr_entries > 0) last = perf_evlist__last(evlist); @@ -1419,7 +1547,7 @@ restart: printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[PERF_TYPE_TRACEPOINT]); } - if (evt_num) + if (evt_num && pager_in_use()) printf("\n"); out_free: @@ -1575,7 +1703,7 @@ restart: printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[PERF_TYPE_HW_CACHE]); } - if (evt_num) + if (evt_num && pager_in_use()) printf("\n"); out_free: @@ -1648,7 +1776,7 @@ restart: } printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); } - if (evt_num) + if (evt_num && pager_in_use()) printf("\n"); out_free: @@ -1689,13 +1817,14 @@ void print_events(const char *event_glob, bool name_only) printf(" %-50s [%s]\n", "cpu/t1=v1[,t2=v2,t3 ...]/modifier", event_type_descriptors[PERF_TYPE_RAW]); - printf(" (see 'man perf-list' on how to encode it)\n"); - printf("\n"); + if (pager_in_use()) + printf(" (see 'man perf-list' on how to encode it)\n\n"); printf(" %-50s [%s]\n", "mem:<addr>[/len][:access]", event_type_descriptors[PERF_TYPE_BREAKPOINT]); - printf("\n"); + if (pager_in_use()) + printf("\n"); } print_tracepoint_events(NULL, NULL, name_only); @@ -1811,3 +1940,29 @@ void parse_events_evlist_error(struct parse_events_evlist *data, err->str = strdup(str); WARN_ONCE(!err->str, "WARNING: failed to allocate error string"); } + +/* + * Return string contains valid config terms of an event. + * @additional_terms: For terms such as PMU sysfs terms. + */ +char *parse_events_formats_error_string(char *additional_terms) +{ + char *str; + static const char *static_terms = "config,config1,config2,name," + "period,freq,branch_type,time," + "call-graph,stack-size\n"; + + /* valid terms */ + if (additional_terms) { + if (!asprintf(&str, "valid terms: %s,%s", + additional_terms, static_terms)) + goto fail; + } else { + if (!asprintf(&str, "valid terms: %s", static_terms)) + goto fail; + } + return str; + +fail: + return NULL; +} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index a09b0e210997..f13d3ccda444 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -118,7 +118,9 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, - char *sys, char *event); + char *sys, char *event, + struct parse_events_error *error, + struct list_head *head_config); int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, @@ -155,5 +157,6 @@ int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); +char *parse_events_formats_error_string(char *additional_terms); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 936d566f48d8..be244573a02e 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -122,7 +122,7 @@ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpGHSDI]+ +modifier_event [ukhpPGHSDI]+ modifier_bp [rwx]{1,3} %% @@ -174,7 +174,7 @@ modifier_bp [rwx]{1,3} <config>{ /* - * Please update formats_error_string any time + * Please update parse_events_formats_error_string any time * new static term is added. */ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 9cd70819c795..ae6af269f9c9 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -67,6 +67,7 @@ static inc_group_count(struct list_head *list, %type <head> event_legacy_cache %type <head> event_legacy_mem %type <head> event_legacy_tracepoint +%type <tracepoint_name> tracepoint_name %type <head> event_legacy_numeric %type <head> event_legacy_raw %type <head> event_def @@ -84,6 +85,10 @@ static inc_group_count(struct list_head *list, u64 num; struct list_head *head; struct parse_events_term *term; + struct tracepoint_name { + char *sys; + char *event; + } tracepoint_name; } %% @@ -368,36 +373,60 @@ PE_PREFIX_MEM PE_VALUE sep_dc } event_legacy_tracepoint: -PE_NAME '-' PE_NAME ':' PE_NAME +tracepoint_name { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; - char sys_name[128]; - snprintf(&sys_name, 128, "%s-%s", $1, $3); ALLOC_LIST(list); - ABORT_ON(parse_events_add_tracepoint(list, &data->idx, &sys_name, $5)); + if (error) + error->idx = @1.first_column; + + if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, + error, NULL)) + return -1; + $$ = list; } | -PE_NAME ':' PE_NAME +tracepoint_name '/' event_config '/' { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - if (parse_events_add_tracepoint(list, &data->idx, $1, $3)) { - struct parse_events_error *error = data->error; + if (error) + error->idx = @1.first_column; - if (error) { - error->idx = @1.first_column; - error->str = strdup("unknown tracepoint"); - } + if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, + error, $3)) return -1; - } + $$ = list; } +tracepoint_name: +PE_NAME '-' PE_NAME ':' PE_NAME +{ + char sys_name[128]; + struct tracepoint_name tracepoint; + + snprintf(&sys_name, 128, "%s-%s", $1, $3); + tracepoint.sys = &sys_name; + tracepoint.event = $5; + + $$ = tracepoint; +} +| +PE_NAME ':' PE_NAME +{ + struct tracepoint_name tracepoint = {$1, $3}; + + $$ = tracepoint; +} + event_legacy_numeric: PE_VALUE ':' PE_VALUE { diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 01626be2a8eb..9a38b05f0273 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -496,7 +496,7 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o { struct parse_opt_ctx_t ctx; - perf_header__set_cmdline(argc, argv); + perf_env__set_cmdline(&perf_env, argc, argv); /* build usage string if it's not provided */ if (subcommands && !usagestr[0]) { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 89c91a1a67e7..e4b173dec4b9 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -626,38 +626,26 @@ static int pmu_resolve_param_term(struct parse_events_term *term, return -1; } -static char *formats_error_string(struct list_head *formats) +static char *pmu_formats_string(struct list_head *formats) { struct perf_pmu_format *format; - char *err, *str; - static const char *static_terms = "config,config1,config2,name," - "period,freq,branch_type,time," - "call-graph,stack-size\n"; + char *str; + struct strbuf buf; unsigned i = 0; - if (!asprintf(&str, "valid terms:")) + if (!formats) return NULL; + strbuf_init(&buf, 0); /* sysfs exported terms */ - list_for_each_entry(format, formats, list) { - char c = i++ ? ',' : ' '; - - err = str; - if (!asprintf(&str, "%s%c%s", err, c, format->name)) - goto fail; - free(err); - } + list_for_each_entry(format, formats, list) + strbuf_addf(&buf, i++ ? ",%s" : "%s", + format->name); - /* static terms */ - err = str; - if (!asprintf(&str, "%s,%s", err, static_terms)) - goto fail; + str = strbuf_detach(&buf, NULL); + strbuf_release(&buf); - free(err); return str; -fail: - free(err); - return NULL; } /* @@ -693,9 +681,12 @@ static int pmu_config_term(struct list_head *formats, if (verbose) printf("Invalid event/parameter '%s'\n", term->config); if (err) { + char *pmu_term = pmu_formats_string(formats); + err->idx = term->err_term; err->str = strdup("unknown term"); - err->help = formats_error_string(formats); + err->help = parse_events_formats_error_string(pmu_term); + free(pmu_term); } return -EINVAL; } @@ -1017,7 +1008,8 @@ void print_pmu_events(const char *event_glob, bool name_only) goto out_enomem; j++; } - if (pmu->selectable) { + if (pmu->selectable && + (event_glob == NULL || strglobmatch(pmu->name, event_glob))) { char *s; if (asprintf(&s, "%s//", pmu->name) < 0) goto out_enomem; @@ -1035,7 +1027,7 @@ void print_pmu_events(const char *event_glob, bool name_only) printf(" %-50s [Kernel PMU event]\n", aliases[j]); printed++; } - if (printed) + if (printed && pager_in_use()) printf("\n"); out_free: for (j = 0; j < len; j++) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c6f9af78f6f5..b51a8bfb40f9 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -40,8 +40,7 @@ #include "color.h" #include "symbol.h" #include "thread.h" -#include <api/fs/debugfs.h> -#include <api/fs/tracefs.h> +#include <api/fs/fs.h> #include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" @@ -72,7 +71,7 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp); static struct machine *host_machine; /* Initialize symbol maps and path of vmlinux/modules */ -static int init_symbol_maps(bool user_only) +int init_probe_symbol_maps(bool user_only) { int ret; @@ -102,7 +101,7 @@ out: return ret; } -static void exit_symbol_maps(void) +void exit_probe_symbol_maps(void) { if (host_machine) { machine__delete(host_machine); @@ -127,17 +126,19 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) { /* kmap->ref_reloc_sym should be set if host_machine is initialized */ struct kmap *kmap; + struct map *map = machine__kernel_map(host_machine); - if (map__load(host_machine->vmlinux_maps[MAP__FUNCTION], NULL) < 0) + if (map__load(map, NULL) < 0) return NULL; - kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); + kmap = map__kmap(map); if (!kmap) return NULL; return kmap->ref_reloc_sym; } -static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) +static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, + bool reloc, bool reladdr) { struct ref_reloc_sym *reloc_sym; struct symbol *sym; @@ -146,12 +147,14 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) /* ref_reloc_sym is just a label. Need a special fix*/ reloc_sym = kernel_get_ref_reloc_sym(); if (reloc_sym && strcmp(name, reloc_sym->name) == 0) - return (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; + *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; else { sym = __find_kernel_function_by_name(name, &map); - if (sym) - return map->unmap_ip(map, sym->start) - - ((reloc) ? 0 : map->reloc); + if (!sym) + return -ENOENT; + *addr = map->unmap_ip(map, sym->start) - + ((reloc) ? 0 : map->reloc) - + ((reladdr) ? map->start : 0); } return 0; } @@ -245,12 +248,14 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) static bool kprobe_blacklist__listed(unsigned long address); static bool kprobe_warn_out_range(const char *symbol, unsigned long address) { - u64 etext_addr; + u64 etext_addr = 0; + int ret; /* Get the address of _etext for checking non-probable text symbol */ - etext_addr = kernel_get_symbol_address_by_name("_etext", false); + ret = kernel_get_symbol_address_by_name("_etext", &etext_addr, + false, false); - if (etext_addr != 0 && etext_addr < address) + if (ret == 0 && etext_addr < address) pr_warning("%s is out of .text, skip it.\n", symbol); else if (kprobe_blacklist__listed(address)) pr_warning("%s is blacklisted function, skip it.\n", symbol); @@ -282,7 +287,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) return -ENOENT; } - map = host_machine->vmlinux_maps[MAP__FUNCTION]; + map = machine__kernel_map(host_machine); dso = map->dso; vmlinux_name = symbol_conf.vmlinux_name; @@ -436,19 +441,22 @@ static char *debuginfo_cache_path; static struct debuginfo *debuginfo_cache__open(const char *module, bool silent) { - if ((debuginfo_cache_path && !strcmp(debuginfo_cache_path, module)) || - (!debuginfo_cache_path && !module && debuginfo_cache)) + const char *path = module; + + /* If the module is NULL, it should be the kernel. */ + if (!module) + path = "kernel"; + + if (debuginfo_cache_path && !strcmp(debuginfo_cache_path, path)) goto out; /* Copy module path */ free(debuginfo_cache_path); - if (module) { - debuginfo_cache_path = strdup(module); - if (!debuginfo_cache_path) { - debuginfo__delete(debuginfo_cache); - debuginfo_cache = NULL; - goto out; - } + debuginfo_cache_path = strdup(path); + if (!debuginfo_cache_path) { + debuginfo__delete(debuginfo_cache); + debuginfo_cache = NULL; + goto out; } debuginfo_cache = open_debuginfo(module, silent); @@ -517,8 +525,10 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, goto error; addr += stext; } else if (tp->symbol) { - addr = kernel_get_symbol_address_by_name(tp->symbol, false); - if (addr == 0) + /* If the module is given, this returns relative address */ + ret = kernel_get_symbol_address_by_name(tp->symbol, &addr, + false, !!tp->module); + if (ret != 0) goto error; addr += tp->offset; } @@ -861,11 +871,11 @@ int show_line_range(struct line_range *lr, const char *module, bool user) { int ret; - ret = init_symbol_maps(user); + ret = init_probe_symbol_maps(user); if (ret < 0) return ret; ret = __show_line_range(lr, module, user); - exit_symbol_maps(); + exit_probe_symbol_maps(); return ret; } @@ -943,7 +953,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, int i, ret = 0; struct debuginfo *dinfo; - ret = init_symbol_maps(pevs->uprobes); + ret = init_probe_symbol_maps(pevs->uprobes); if (ret < 0) return ret; @@ -960,7 +970,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, debuginfo__delete(dinfo); out: - exit_symbol_maps(); + exit_probe_symbol_maps(); return ret; } @@ -1884,8 +1894,12 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, goto out; sym = map__find_symbol(map, addr, NULL); } else { - if (tp->symbol) - addr = kernel_get_symbol_address_by_name(tp->symbol, true); + if (tp->symbol && !addr) { + ret = kernel_get_symbol_address_by_name(tp->symbol, + &addr, true, false); + if (ret < 0) + goto out; + } if (addr) { addr += tp->offset; sym = __find_kernel_function(addr, &map); @@ -2055,7 +2069,7 @@ static void kprobe_blacklist__delete(struct list_head *blacklist) static int kprobe_blacklist__load(struct list_head *blacklist) { struct kprobe_blacklist_node *node; - const char *__debugfs = debugfs_find_mountpoint(); + const char *__debugfs = debugfs__mountpoint(); char buf[PATH_MAX], *p; FILE *fp; int ret; @@ -2181,9 +2195,9 @@ out: } /* Show an event */ -static int show_perf_probe_event(const char *group, const char *event, - struct perf_probe_event *pev, - const char *module, bool use_stdout) +int show_perf_probe_event(const char *group, const char *event, + struct perf_probe_event *pev, + const char *module, bool use_stdout) { struct strbuf buf = STRBUF_INIT; int ret; @@ -2264,7 +2278,7 @@ int show_perf_probe_events(struct strfilter *filter) setup_pager(); - ret = init_symbol_maps(false); + ret = init_probe_symbol_maps(false); if (ret < 0) return ret; @@ -2280,7 +2294,7 @@ int show_perf_probe_events(struct strfilter *filter) close(kp_fd); if (up_fd > 0) close(up_fd); - exit_symbol_maps(); + exit_probe_symbol_maps(); return ret; } @@ -2289,36 +2303,41 @@ static int get_new_event_name(char *buf, size_t len, const char *base, struct strlist *namelist, bool allow_suffix) { int i, ret; - char *p; + char *p, *nbase; if (*base == '.') base++; + nbase = strdup(base); + if (!nbase) + return -ENOMEM; + + /* Cut off the dot suffixes (e.g. .const, .isra)*/ + p = strchr(nbase, '.'); + if (p && p != nbase) + *p = '\0'; - /* Try no suffix */ - ret = e_snprintf(buf, len, "%s", base); + /* Try no suffix number */ + ret = e_snprintf(buf, len, "%s", nbase); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); - return ret; + goto out; } - /* Cut off the postfixes (e.g. .const, .isra)*/ - p = strchr(buf, '.'); - if (p && p != buf) - *p = '\0'; if (!strlist__has_entry(namelist, buf)) - return 0; + goto out; if (!allow_suffix) { pr_warning("Error: event \"%s\" already exists. " - "(Use -f to force duplicates.)\n", base); - return -EEXIST; + "(Use -f to force duplicates.)\n", buf); + ret = -EEXIST; + goto out; } /* Try to add suffix */ for (i = 1; i < MAX_EVENT_INDEX; i++) { - ret = e_snprintf(buf, len, "%s_%d", base, i); + ret = e_snprintf(buf, len, "%s_%d", nbase, i); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); - return ret; + goto out; } if (!strlist__has_entry(namelist, buf)) break; @@ -2328,6 +2347,8 @@ static int get_new_event_name(char *buf, size_t len, const char *base, ret = -ERANGE; } +out: + free(nbase); return ret; } @@ -2400,7 +2421,6 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, { int i, fd, ret; struct probe_trace_event *tev = NULL; - const char *event = NULL, *group = NULL; struct strlist *namelist; fd = probe_file__open(PF_FL_RW | (pev->uprobes ? PF_FL_UPROBE : 0)); @@ -2416,7 +2436,6 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, } ret = 0; - pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; /* Skip if the symbol is out of .text or blacklisted */ @@ -2433,13 +2452,6 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (ret < 0) break; - /* We use tev's name for showing new events */ - show_perf_probe_event(tev->group, tev->event, pev, - tev->point.module, false); - /* Save the last valid name */ - event = tev->event; - group = tev->group; - /* * Probes after the first probe which comes from same * user input are always allowed to add suffix, because @@ -2451,13 +2463,6 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (ret == -EINVAL && pev->uprobes) warn_uprobe_event_compat(tev); - /* Note that it is possible to skip all events because of blacklist */ - if (ret >= 0 && event) { - /* Show how to use the event. */ - pr_info("\nYou can now use it in all perf tools, such as:\n\n"); - pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event); - } - strlist__delete(namelist); close_out: close(fd); @@ -2538,7 +2543,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, goto out; } - if (!pev->uprobes && !pp->retprobe) { + /* Note that the symbols in the kmodule are not relocated */ + if (!pev->uprobes && !pp->retprobe && !pev->target) { reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); @@ -2575,8 +2581,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } /* Add one probe point */ tp->address = map->unmap_ip(map, sym->start) + pp->offset; - /* If we found a wrong one, mark it by NULL symbol */ - if (!pev->uprobes && + + /* Check the kprobe (not in module) is within .text */ + if (!pev->uprobes && !pev->target && kprobe_warn_out_range(sym->name, tp->address)) { tp->symbol = NULL; /* Skip it */ skipped++; @@ -2760,63 +2767,71 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, return find_probe_trace_events_from_map(pev, tevs); } -struct __event_package { - struct perf_probe_event *pev; - struct probe_trace_event *tevs; - int ntevs; -}; - -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) +int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs) { - int i, j, ret; - struct __event_package *pkgs; - - ret = 0; - pkgs = zalloc(sizeof(struct __event_package) * npevs); - - if (pkgs == NULL) - return -ENOMEM; - - ret = init_symbol_maps(pevs->uprobes); - if (ret < 0) { - free(pkgs); - return ret; - } + int i, ret; /* Loop 1: convert all events */ for (i = 0; i < npevs; i++) { - pkgs[i].pev = &pevs[i]; /* Init kprobe blacklist if needed */ - if (!pkgs[i].pev->uprobes) + if (!pevs[i].uprobes) kprobe_blacklist__init(); /* Convert with or without debuginfo */ - ret = convert_to_probe_trace_events(pkgs[i].pev, - &pkgs[i].tevs); + ret = convert_to_probe_trace_events(&pevs[i], &pevs[i].tevs); if (ret < 0) - goto end; - pkgs[i].ntevs = ret; + return ret; + pevs[i].ntevs = ret; } /* This just release blacklist only if allocated */ kprobe_blacklist__release(); + return 0; +} + +int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs) +{ + int i, ret = 0; + /* Loop 2: add all events */ for (i = 0; i < npevs; i++) { - ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs, - pkgs[i].ntevs, + ret = __add_probe_trace_events(&pevs[i], pevs[i].tevs, + pevs[i].ntevs, probe_conf.force_add); if (ret < 0) break; } -end: + return ret; +} + +void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs) +{ + int i, j; + /* Loop 3: cleanup and free trace events */ for (i = 0; i < npevs; i++) { - for (j = 0; j < pkgs[i].ntevs; j++) - clear_probe_trace_event(&pkgs[i].tevs[j]); - zfree(&pkgs[i].tevs); + for (j = 0; j < pevs[i].ntevs; j++) + clear_probe_trace_event(&pevs[i].tevs[j]); + zfree(&pevs[i].tevs); + pevs[i].ntevs = 0; + clear_perf_probe_event(&pevs[i]); } - free(pkgs); - exit_symbol_maps(); +} +int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) +{ + int ret; + + ret = init_probe_symbol_maps(pevs->uprobes); + if (ret < 0) + return ret; + + ret = convert_perf_probe_events(pevs, npevs); + if (ret == 0) + ret = apply_perf_probe_events(pevs, npevs); + + cleanup_perf_probe_events(pevs, npevs); + + exit_probe_symbol_maps(); return ret; } @@ -2828,8 +2843,6 @@ int del_perf_probe_events(struct strfilter *filter) if (!str) return -EINVAL; - pr_debug("Delete filter: \'%s\'\n", str); - /* Get current event names */ ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); if (ret < 0) @@ -2844,9 +2857,6 @@ int del_perf_probe_events(struct strfilter *filter) ret = ret2; goto error; } - if (ret == -ENOENT && ret2 == -ENOENT) - pr_debug("\"%s\" does not hit any event.\n", str); - /* Note that this is silently ignored */ ret = 0; error: @@ -2881,7 +2891,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, struct map *map; int ret; - ret = init_symbol_maps(user); + ret = init_probe_symbol_maps(user); if (ret < 0) return ret; @@ -2911,7 +2921,7 @@ end: if (user) { map__put(map); } - exit_symbol_maps(); + exit_probe_symbol_maps(); return ret; } diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 6e7ec68a4aa8..ba926c30f8cd 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -87,6 +87,8 @@ struct perf_probe_event { bool uprobes; /* Uprobe event flag */ char *target; /* Target binary */ struct perf_probe_arg *args; /* Arguments */ + struct probe_trace_event *tevs; + int ntevs; }; /* Line range */ @@ -108,6 +110,8 @@ struct variable_list { }; struct map; +int init_probe_symbol_maps(bool user_only); +void exit_probe_symbol_maps(void); /* Command string to events */ extern int parse_perf_probe_command(const char *cmd, @@ -138,7 +142,14 @@ extern void line_range__clear(struct line_range *lr); extern int line_range__init(struct line_range *lr); extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); +extern int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); +extern int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); +extern void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); extern int del_perf_probe_events(struct strfilter *filter); + +extern int show_perf_probe_event(const char *group, const char *event, + struct perf_probe_event *pev, + const char *module, bool use_stdout); extern int show_perf_probe_events(struct strfilter *filter); extern int show_line_range(struct line_range *lr, const char *module, bool user); diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index bbb243717ec8..89dbeb92c68e 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -22,8 +22,7 @@ #include "color.h" #include "symbol.h" #include "thread.h" -#include <api/fs/debugfs.h> -#include <api/fs/tracefs.h> +#include <api/fs/tracing_path.h> #include "probe-event.h" #include "probe-file.h" #include "session.h" @@ -73,21 +72,11 @@ static void print_both_open_warning(int kerr, int uerr) static int open_probe_events(const char *trace_file, bool readwrite) { char buf[PATH_MAX]; - const char *__debugfs; const char *tracing_dir = ""; int ret; - __debugfs = tracefs_find_mountpoint(); - if (__debugfs == NULL) { - tracing_dir = "tracing/"; - - __debugfs = debugfs_find_mountpoint(); - if (__debugfs == NULL) - return -ENOTSUP; - } - ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", - __debugfs, tracing_dir, trace_file); + tracing_path, tracing_dir, trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) @@ -267,7 +256,6 @@ static int __del_trace_probe_event(int fd, struct str_node *ent) goto error; } - pr_info("Removed event: %s\n", ent->s); return 0; error: pr_warning("Failed to delete event: %s\n", @@ -275,7 +263,8 @@ error: return ret; } -int probe_file__del_events(int fd, struct strfilter *filter) +int probe_file__get_events(int fd, struct strfilter *filter, + struct strlist *plist) { struct strlist *namelist; struct str_node *ent; @@ -290,12 +279,43 @@ int probe_file__del_events(int fd, struct strfilter *filter) p = strchr(ent->s, ':'); if ((p && strfilter__compare(filter, p + 1)) || strfilter__compare(filter, ent->s)) { - ret = __del_trace_probe_event(fd, ent); - if (ret < 0) - break; + strlist__add(plist, ent->s); + ret = 0; } } strlist__delete(namelist); return ret; } + +int probe_file__del_strlist(int fd, struct strlist *namelist) +{ + int ret = 0; + struct str_node *ent; + + strlist__for_each(ent, namelist) { + ret = __del_trace_probe_event(fd, ent); + if (ret < 0) + break; + } + return ret; +} + +int probe_file__del_events(int fd, struct strfilter *filter) +{ + struct strlist *namelist; + int ret; + + namelist = strlist__new(NULL, NULL); + if (!namelist) + return -ENOMEM; + + ret = probe_file__get_events(fd, filter, namelist); + if (ret < 0) + return ret; + + ret = probe_file__del_strlist(fd, namelist); + strlist__delete(namelist); + + return ret; +} diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index ada94a242a17..18ac9cf51c34 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -14,5 +14,9 @@ struct strlist *probe_file__get_namelist(int fd); struct strlist *probe_file__get_rawlist(int fd); int probe_file__add_event(int fd, struct probe_trace_event *tev); int probe_file__del_events(int fd, struct strfilter *filter); +int probe_file__get_events(int fd, struct strfilter *filter, + struct strlist *plist); +int probe_file__del_strlist(int fd, struct strlist *namelist); + #endif diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 29c43c0680a8..bd8f03de5e40 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -70,6 +70,7 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, if (!dbg->dwfl) goto error; + dwfl_report_begin(dbg->dwfl); dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); if (!dbg->mod) goto error; @@ -78,6 +79,8 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, if (!dbg->dbg) goto error; + dwfl_report_end(dbg->dwfl, NULL, NULL); + return 0; error: if (dbg->dwfl) @@ -591,6 +594,7 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) /* Convert subprogram DIE to trace point */ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, Dwarf_Addr paddr, bool retprobe, + const char *function, struct probe_trace_point *tp) { Dwarf_Addr eaddr, highaddr; @@ -634,8 +638,10 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, /* Return probe must be on the head of a subprogram */ if (retprobe) { if (eaddr != paddr) { - pr_warning("Return probe must be on the head of" - " a real function.\n"); + pr_warning("Failed to find \"%s%%return\",\n" + " because %s is an inlined function and" + " has no return point.\n", function, + function); return -EINVAL; } tp->retprobe = true; @@ -1175,6 +1181,7 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) { struct trace_event_finder *tf = container_of(pf, struct trace_event_finder, pf); + struct perf_probe_point *pp = &pf->pev->point; struct probe_trace_event *tev; struct perf_probe_arg *args; int ret, i; @@ -1189,7 +1196,7 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) /* Trace point should be converted from subprogram DIE */ ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr, - pf->pev->point.retprobe, &tev->point); + pp->retprobe, pp->function, &tev->point); if (ret < 0) return ret; @@ -1319,6 +1326,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) { struct available_var_finder *af = container_of(pf, struct available_var_finder, pf); + struct perf_probe_point *pp = &pf->pev->point; struct variable_list *vl; Dwarf_Die die_mem; int ret; @@ -1332,7 +1340,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) /* Trace point should be converted from subprogram DIE */ ret = convert_to_trace_point(&pf->sp_die, af->mod, pf->addr, - pf->pev->point.retprobe, &vl->point); + pp->retprobe, pp->function, &vl->point); if (ret < 0) return ret; @@ -1399,6 +1407,41 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, return (ret < 0) ? ret : af.nvls; } +/* For the kernel module, we need a special code to get a DIE */ +static int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs) +{ + int n, i; + Elf32_Word shndx; + Elf_Scn *scn; + Elf *elf; + GElf_Shdr mem, *shdr; + const char *p; + + elf = dwfl_module_getelf(dbg->mod, &dbg->bias); + if (!elf) + return -EINVAL; + + /* Get the number of relocations */ + n = dwfl_module_relocations(dbg->mod); + if (n < 0) + return -ENOENT; + /* Search the relocation related .text section */ + for (i = 0; i < n; i++) { + p = dwfl_module_relocation_info(dbg->mod, i, &shndx); + if (strcmp(p, ".text") == 0) { + /* OK, get the section header */ + scn = elf_getscn(elf, shndx); + if (!scn) + return -ENOENT; + shdr = gelf_getshdr(scn, &mem); + if (!shdr) + return -ENOENT; + *offs = shdr->sh_addr; + } + } + return 0; +} + /* Reverse search */ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, struct perf_probe_point *ppt) @@ -1407,9 +1450,16 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, Dwarf_Addr _addr = 0, baseaddr = 0; const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; int baseline = 0, lineno = 0, ret = 0; + bool reloc = false; +retry: /* Find cu die */ if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { + if (!reloc && debuginfo__get_text_offset(dbg, &baseaddr) == 0) { + addr += baseaddr; + reloc = true; + goto retry; + } pr_warning("Failed to find debug information for address %lx\n", addr); ret = -EINVAL; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 1bd593bbf7a5..544509c159ce 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -221,6 +221,7 @@ static void define_event_symbols(struct event_format *event, break; case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: + case PRINT_DYNAMIC_ARRAY_LEN: case PRINT_STRING: case PRINT_BITMASK: break; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index ace2484985cb..a8e825fca42a 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -251,6 +251,7 @@ static void define_event_symbols(struct event_format *event, /* gcc warns for these? */ case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: + case PRINT_DYNAMIC_ARRAY_LEN: case PRINT_FUNC: case PRINT_BITMASK: /* we should warn... */ @@ -318,7 +319,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, if (thread__resolve_callchain(al->thread, evsel, sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + scripting_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); goto exit; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fc3f7c922f99..428149bc64d2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -138,6 +138,8 @@ struct perf_session *perf_session__new(struct perf_data_file *file, perf_session__set_id_hdr_size(session); perf_session__set_comm_exec(session); } + } else { + session->machines.host.env = &perf_env; } if (!file || perf_data_file__is_write(file)) { @@ -170,30 +172,13 @@ static void perf_session__delete_threads(struct perf_session *session) machine__delete_threads(&session->machines.host); } -static void perf_session_env__exit(struct perf_env *env) -{ - zfree(&env->hostname); - zfree(&env->os_release); - zfree(&env->version); - zfree(&env->arch); - zfree(&env->cpu_desc); - zfree(&env->cpuid); - - zfree(&env->cmdline); - zfree(&env->cmdline_argv); - zfree(&env->sibling_cores); - zfree(&env->sibling_threads); - zfree(&env->numa_nodes); - zfree(&env->pmu_mappings); -} - void perf_session__delete(struct perf_session *session) { auxtrace__free(session); auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); perf_session__delete_threads(session); - perf_session_env__exit(&session->header.env); + perf_env__exit(&session->header.env); machines__exit(&session->machines); if (session->file) perf_data_file__close(session->file); @@ -1079,11 +1064,11 @@ static int machines__deliver_event(struct machines *machines, switch (event->header.type) { case PERF_RECORD_SAMPLE: - dump_sample(evsel, event, sample); if (evsel == NULL) { ++evlist->stats.nr_unknown_id; return 0; } + dump_sample(evsel, event, sample); if (machine == NULL) { ++evlist->stats.nr_unprocessable_samples; return 0; @@ -1116,6 +1101,9 @@ static int machines__deliver_event(struct machines *machines, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); case PERF_RECORD_AUX: + if (tool->aux == perf_event__process_aux && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)) + evlist->stats.total_aux_lost += 1; return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: return tool->itrace_start(tool, event, sample, machine); @@ -1323,7 +1311,7 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) return machine__findnew_thread(&session->machines.host, -1, pid); } -static struct thread *perf_session__register_idle_thread(struct perf_session *session) +struct thread *perf_session__register_idle_thread(struct perf_session *session) { struct thread *thread; @@ -1361,6 +1349,13 @@ static void perf_session__warn_about_errors(const struct perf_session *session) } } + if (session->tool->aux == perf_event__process_aux && + stats->total_aux_lost != 0) { + ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n", + stats->total_aux_lost, + stats->nr_events[PERF_RECORD_AUX]); + } + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " @@ -1805,7 +1800,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, if (thread__resolve_callchain(al->thread, evsel, sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + stack_depth) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index b44afc75d1cc..3e900c0efc73 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -89,6 +89,8 @@ struct machine *perf_session__findnew_machine(struct perf_session *session, pid_ } struct thread *perf_session__findnew(struct perf_session *session, pid_t pid); +struct thread *perf_session__register_idle_thread(struct perf_session *session); + size_t perf_session__fprintf(struct perf_session *session, FILE *fp); size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7e3871606df3..ee94b728fca4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -21,6 +21,7 @@ int sort__need_collapse = 0; int sort__has_parent = 0; int sort__has_sym = 0; int sort__has_dso = 0; +int sort__has_socket = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; @@ -328,8 +329,8 @@ static char *get_srcfile(struct hist_entry *e) char *sf, *p; struct map *map = e->ms.map; - sf = get_srcline(map->dso, map__rip_2objdump(map, e->ip), - e->ms.sym, true); + sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), + e->ms.sym, false, true); if (!strcmp(sf, SRCLINE_UNKNOWN)) return no_srcfile; p = strchr(sf, ':'); @@ -421,6 +422,27 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +/* --sort socket */ + +static int64_t +sort__socket_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->socket - left->socket; +} + +static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket); +} + +struct sort_entry sort_socket = { + .se_header = "Socket", + .se_cmp = sort__socket_cmp, + .se_snprintf = hist_entry__socket_snprintf, + .se_width_idx = HISTC_SOCKET, +}; + /* sort keys for branch stacks */ static int64_t @@ -633,6 +655,35 @@ static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf, } static int64_t +sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = 0, r = 0; + + if (left->mem_info) + l = left->mem_info->iaddr.addr; + if (right->mem_info) + r = right->mem_info->iaddr.addr; + + return (int64_t)(r - l); +} + +static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + uint64_t addr = 0; + struct map *map = NULL; + struct symbol *sym = NULL; + + if (he->mem_info) { + addr = he->mem_info->iaddr.addr; + map = he->mem_info->iaddr.map; + sym = he->mem_info->iaddr.sym; + } + return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size, + width); +} + +static int64_t sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right) { struct map *map_l = NULL; @@ -1055,6 +1106,13 @@ struct sort_entry sort_mem_daddr_sym = { .se_width_idx = HISTC_MEM_DADDR_SYMBOL, }; +struct sort_entry sort_mem_iaddr_sym = { + .se_header = "Code Symbol", + .se_cmp = sort__iaddr_cmp, + .se_snprintf = hist_entry__iaddr_snprintf, + .se_width_idx = HISTC_MEM_IADDR_SYMBOL, +}; + struct sort_entry sort_mem_daddr_dso = { .se_header = "Data Object", .se_cmp = sort__dso_daddr_cmp, @@ -1248,6 +1306,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_SYM, "symbol", sort_sym), DIM(SORT_PARENT, "parent", sort_parent), DIM(SORT_CPU, "cpu", sort_cpu), + DIM(SORT_SOCKET, "socket", sort_socket), DIM(SORT_SRCLINE, "srcline", sort_srcline), DIM(SORT_SRCFILE, "srcfile", sort_srcfile), DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), @@ -1276,6 +1335,7 @@ static struct sort_dimension bstack_sort_dimensions[] = { static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym), + DIM(SORT_MEM_IADDR_SYMBOL, "symbol_iaddr", sort_mem_iaddr_sym), DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso), DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked), DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb), @@ -1550,6 +1610,8 @@ int sort_dimension__add(const char *tok) } else if (sd->entry == &sort_dso) { sort__has_dso = 1; + } else if (sd->entry == &sort_socket) { + sort__has_socket = 1; } return __sort_dimension__add(sd); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3c2a399f8f5b..33b3d30e18d3 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -34,6 +34,7 @@ extern int have_ignore_callees; extern int sort__need_collapse; extern int sort__has_parent; extern int sort__has_sym; +extern int sort__has_socket; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; @@ -90,6 +91,7 @@ struct hist_entry { struct comm *comm; u64 ip; u64 transaction; + s32 socket; s32 cpu; u8 cpumode; @@ -172,6 +174,7 @@ enum sort_type { SORT_SYM, SORT_PARENT, SORT_CPU, + SORT_SOCKET, SORT_SRCLINE, SORT_SRCFILE, SORT_LOCAL_WEIGHT, @@ -198,6 +201,7 @@ enum sort_type { SORT_MEM_LVL, SORT_MEM_SNOOP, SORT_MEM_DCACHELINE, + SORT_MEM_IADDR_SYMBOL, }; /* diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index fc08248f08ca..b4db3f48e3b0 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -149,8 +149,11 @@ static void addr2line_cleanup(struct a2l_data *a2l) free(a2l); } +#define MAX_INLINE_NEST 1024 + static int addr2line(const char *dso_name, u64 addr, - char **file, unsigned int *line, struct dso *dso) + char **file, unsigned int *line, struct dso *dso, + bool unwind_inlines) { int ret = 0; struct a2l_data *a2l = dso->a2l; @@ -170,6 +173,15 @@ static int addr2line(const char *dso_name, u64 addr, bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); + if (a2l->found && unwind_inlines) { + int cnt = 0; + + while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, + &a2l->funcname, &a2l->line) && + cnt++ < MAX_INLINE_NEST) + ; + } + if (a2l->found && a2l->filename) { *file = strdup(a2l->filename); *line = a2l->line; @@ -197,7 +209,8 @@ void dso__free_a2l(struct dso *dso) static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, - struct dso *dso __maybe_unused) + struct dso *dso __maybe_unused, + bool unwind_inlines __maybe_unused) { FILE *fp; char cmd[PATH_MAX]; @@ -254,8 +267,8 @@ void dso__free_a2l(struct dso *dso __maybe_unused) */ #define A2L_FAIL_LIMIT 123 -char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym) +char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym, bool unwind_inlines) { char *file = NULL; unsigned line = 0; @@ -276,7 +289,7 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (!strncmp(dso_name, "/tmp/perf-", 10)) goto out; - if (!addr2line(dso_name, addr, &file, &line, dso)) + if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines)) goto out; if (asprintf(&srcline, "%s:%u", @@ -310,3 +323,9 @@ void free_srcline(char *srcline) if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0) free(srcline); } + +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym) +{ + return __get_srcline(dso, addr, sym, show_sym, false); +} diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1f97ffb158a6..bcda43bee4d4 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -624,7 +624,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name, * symbols, setting length to 0, and rely on * symbols__fixup_end() to fix it up. */ - sym = symbol__new(start, 0, kallsyms2elf_type(type), name); + sym = symbol__new(start, 0, kallsyms2elf_binding(type), name); if (sym == NULL) return -ENOMEM; /* diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 440ba8ae888f..40073c60b83d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -191,6 +191,7 @@ struct addr_location { u8 filtered; u8 cpumode; s32 cpu; + s32 socket; }; struct symsrc { diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 22245986e59e..d995743cb673 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -38,7 +38,7 @@ #include "../perf.h" #include "trace-event.h" -#include <api/fs/debugfs.h> +#include <api/fs/tracing_path.h> #include "evsel.h" #include "debug.h" diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index b90e646c7a91..802bb868d446 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -7,7 +7,9 @@ #include <sys/stat.h> #include <fcntl.h> #include <linux/kernel.h> +#include <linux/err.h> #include <traceevent/event-parse.h> +#include <api/fs/tracing_path.h> #include "trace-event.h" #include "machine.h" #include "util.h" @@ -65,6 +67,9 @@ void trace_event__cleanup(struct trace_event *t) pevent_free(t->pevent); } +/* + * Returns pointer with encoded error via <linux/err.h> interface. + */ static struct event_format* tp_format(const char *sys, const char *name) { @@ -73,12 +78,14 @@ tp_format(const char *sys, const char *name) char path[PATH_MAX]; size_t size; char *data; + int err; scnprintf(path, PATH_MAX, "%s/%s/%s/format", tracing_events_path, sys, name); - if (filename__read_str(path, &data, &size)) - return NULL; + err = filename__read_str(path, &data, &size); + if (err) + return ERR_PTR(err); pevent_parse_format(pevent, &event, data, size, sys); @@ -86,11 +93,14 @@ tp_format(const char *sys, const char *name) return event; } +/* + * Returns pointer with encoded error via <linux/err.h> interface. + */ struct event_format* trace_event__tp_format(const char *sys, const char *name) { if (!tevent_initialized && trace_event__init2()) - return NULL; + return ERR_PTR(-ENOMEM); return tp_format(sys, name); } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index da6cc4cc2a4f..b85ee55cca0c 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -78,6 +78,8 @@ struct scripting_ops { int (*generate_script) (struct pevent *pevent, const char *outfile); }; +extern unsigned int scripting_max_stack; + int script_spec_register(const char *spec, struct scripting_ops *ops); void setup_perl_scripting(void); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index c2cd9bf2348b..c1bf9ff210b0 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -17,9 +17,9 @@ #include "callchain.h" struct callchain_param callchain_param = { - .mode = CHAIN_GRAPH_REL, + .mode = CHAIN_GRAPH_ABS, .min_percent = 0.5, - .order = ORDER_CALLEE, + .order = ORDER_CALLER, .key = CCKEY_FUNCTION }; @@ -34,9 +34,6 @@ bool test_attr__enabled; bool perf_host = true; bool perf_guest = false; -char tracing_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing"; -char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; - void event_attr_init(struct perf_event_attr *attr) { if (!perf_host) @@ -390,73 +387,6 @@ void set_term_quiet_input(struct termios *old) tcsetattr(0, TCSANOW, &tc); } -static void set_tracing_events_path(const char *tracing, const char *mountpoint) -{ - snprintf(tracing_path, sizeof(tracing_path), "%s/%s", - mountpoint, tracing); - snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", - mountpoint, tracing, "events"); -} - -static const char *__perf_tracefs_mount(const char *mountpoint) -{ - const char *mnt; - - mnt = tracefs_mount(mountpoint); - if (!mnt) - return NULL; - - set_tracing_events_path("", mnt); - - return mnt; -} - -static const char *__perf_debugfs_mount(const char *mountpoint) -{ - const char *mnt; - - mnt = debugfs_mount(mountpoint); - if (!mnt) - return NULL; - - set_tracing_events_path("tracing/", mnt); - - return mnt; -} - -const char *perf_debugfs_mount(const char *mountpoint) -{ - const char *mnt; - - mnt = __perf_tracefs_mount(mountpoint); - if (mnt) - return mnt; - - mnt = __perf_debugfs_mount(mountpoint); - - return mnt; -} - -void perf_debugfs_set_path(const char *mntpt) -{ - set_tracing_events_path("tracing/", mntpt); -} - -char *get_tracing_file(const char *name) -{ - char *file; - - if (asprintf(&file, "%s/%s", tracing_path, name) < 0) - return NULL; - - return file; -} - -void put_tracing_file(char *file) -{ - free(file); -} - int parse_nsec_time(const char *str, u64 *ptime) { u64 time_sec, time_nsec; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 291be1d84bc3..3d5b01e8978f 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -74,8 +74,7 @@ #include <linux/magic.h> #include <linux/types.h> #include <sys/ttydefaults.h> -#include <api/fs/debugfs.h> -#include <api/fs/tracefs.h> +#include <api/fs/tracing_path.h> #include <termios.h> #include <linux/bitops.h> #include <termios.h> @@ -83,12 +82,6 @@ extern const char *graph_line; extern const char *graph_dotted_line; extern char buildid_dir[]; -extern char tracing_path[]; -extern char tracing_events_path[]; -extern void perf_debugfs_set_path(const char *mountpoint); -const char *perf_debugfs_mount(const char *mountpoint); -char *get_tracing_file(const char *name); -void put_tracing_file(char *file); /* On most systems <limits.h> would have given us this, but * not on some systems (e.g. GNU/Hurd). @@ -321,6 +314,8 @@ struct symbol; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym); +char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym, bool unwind_inlines); void free_srcline(char *srcline); int filename__read_str(const char *filename, char **buf, size_t *sizep); diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 7f73fa32a590..bcf5ec760eb9 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -42,7 +42,7 @@ #include <sys/mman.h> #include "../../include/uapi/linux/magic.h" #include "../../include/uapi/linux/kernel-page-flags.h" -#include <api/fs/debugfs.h> +#include <api/fs/fs.h> #ifndef MAX_PATH # define MAX_PATH 256 @@ -188,7 +188,7 @@ static int kpageflags_fd; static int opt_hwpoison; static int opt_unpoison; -static char *hwpoison_debug_fs; +static const char *hwpoison_debug_fs; static int hwpoison_inject_fd; static int hwpoison_forget_fd; @@ -487,7 +487,7 @@ static void prepare_hwpoison_fd(void) { char buf[MAX_PATH + 1]; - hwpoison_debug_fs = debugfs_mount(NULL); + hwpoison_debug_fs = debugfs__mount(); if (!hwpoison_debug_fs) { perror("mount debugfs"); exit(EXIT_FAILURE); |