From 57fb188fcaabe72f0a210301e3554c39af9936de Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 3 Sep 2015 08:25:46 -0400 Subject: [PATCH] Bring in build fixes for perf on non-x86 machines This is literally Ingo's perf-urgent-for-linus branch right now. I guess we need it more urgently than he wants to push it to Linus --- kernel.spec | 1 + perf-build-fixes-for-non-x86.patch | 951 +++++++++++++++++++++++++++++ 2 files changed, 952 insertions(+) create mode 100644 perf-build-fixes-for-non-x86.patch diff --git a/kernel.spec b/kernel.spec index 347c34dc1..e0836c0c5 100644 --- a/kernel.spec +++ b/kernel.spec @@ -605,6 +605,7 @@ Patch518: drm-vmwgfx-Allow-dropped-masters-render-node-like-ac.patch Patch519: security-device_cgroup-fix-RCU-lockdep-splat.patch Patch520: ARM-dts-Fix-Makefile-target-for-sun4i-a10-itead-itea.patch +Patch521: perf-build-fixes-for-non-x86.patch Patch904: kdbus.patch diff --git a/perf-build-fixes-for-non-x86.patch b/perf-build-fixes-for-non-x86.patch new file mode 100644 index 000000000..2f3ba1a2d --- /dev/null +++ b/perf-build-fixes-for-non-x86.patch @@ -0,0 +1,951 @@ +From 97db62062ac76e314c8bda4dc5b63f0ea906d15f Mon Sep 17 00:00:00 2001 +From: Adrian Hunter +Date: Mon, 31 Aug 2015 21:39:44 +0300 +Subject: [PATCH 1/9] perf tools: Fix build on powerpc broken by pt/bts +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +It is theoretically possible to process perf.data files created on x86 +and that contain Intel PT or Intel BTS data, on any other architecture, +which is why it is possible for there to be build errors on powerpc +caused by pt/bts. + +The errors were: + + util/intel-pt-decoder/intel-pt-insn-decoder.c: In function ‘intel_pt_insn_decoder’: + util/intel-pt-decoder/intel-pt-insn-decoder.c:138:3: error: switch missing default case [-Werror=switch-default] + switch (insn->immediate.nbytes) { + ^ + cc1: all warnings being treated as errors + + linux-acme.git/tools/perf/perf-obj/libperf.a(libperf-in.o): In function `intel_pt_synth_branch_sample': + sources/linux-acme.git/tools/perf/util/intel-pt.c:871: undefined reference to `tsc_to_perf_time' + linux-acme.git/tools/perf/perf-obj/libperf.a(libperf-in.o): In function `intel_pt_sample': + sources/linux-acme.git/tools/perf/util/intel-pt.c:915: undefined reference to `tsc_to_perf_time' + sources/linux-acme.git/tools/perf/util/intel-pt.c:962: undefined reference to `tsc_to_perf_time' + linux-acme.git/tools/perf/perf-obj/libperf.a(libperf-in.o): In function `intel_pt_process_event': + sources/linux-acme.git/tools/perf/util/intel-pt.c:1454: undefined reference to `perf_time_to_tsc' + +Signed-off-by: Adrian Hunter +Cc: Jiri Olsa +Cc: Sukadev Bhattiprolu +Cc: Wang Nan +Cc: Zefan Li +Cc: pi3orama@163.com +Link: http://lkml.kernel.org/r/1441046384-28663-1-git-send-email-adrian.hunter@intel.com +Signed-off-by: Arnaldo Carvalho de Melo +--- + tools/perf/util/Build | 1 + + tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/tools/perf/util/Build b/tools/perf/util/Build +index e912856cc4e5..e79e4522368a 100644 +--- a/tools/perf/util/Build ++++ b/tools/perf/util/Build +@@ -75,6 +75,7 @@ libperf-y += record.o + libperf-y += srcline.o + libperf-y += data.o + libperf-$(CONFIG_X86) += tsc.o ++libperf-$(CONFIG_AUXTRACE) += tsc.o + libperf-y += cloexec.o + libperf-y += thread-stack.o + libperf-$(CONFIG_AUXTRACE) += auxtrace.o +diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +index 9e4eb8fcd559..d23138c06665 100644 +--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c ++++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +@@ -146,6 +146,9 @@ static void intel_pt_insn_decoder(struct insn *insn, + case 4: + intel_pt_insn->rel = bswap_32(insn->immediate.value); + break; ++ default: ++ intel_pt_insn->rel = 0; ++ break; + } + #else + intel_pt_insn->rel = insn->immediate.value; +-- +2.4.3 + + +From acf860ae7c53cc8b0c5d372c218332aac3eeba4f Mon Sep 17 00:00:00 2001 +From: Wang Nan +Date: Thu, 27 Aug 2015 02:30:55 +0000 +Subject: [PATCH 2/9] bpf tools: New API to get name from a BPF object + +Before this patch there's no way to connect a loaded bpf object +to its source file. However, during applying perf's '--filter' to BPF +object, without this connection makes things harder, because perf loads +all programs together, but '--filter' setting is for each object. + +The API of bpf_object__open_buffer() is changed to allow passing a name. +Fortunately, at this time there's only one user of it (perf test LLVM), +so we change it together. + +Signed-off-by: Wang Nan +Cc: Alexei Starovoitov +Cc: Brendan Gregg +Cc: Daniel Borkmann +Cc: David Ahern +Cc: He Kuang +Cc: Jiri Olsa +Cc: Kaixu Xia +Cc: Masami Hiramatsu +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Zefan Li +Cc: pi3orama@163.com +Link: http://lkml.kernel.org/r/1440742821-44548-2-git-send-email-wangnan0@huawei.com +Signed-off-by: Arnaldo Carvalho de Melo +--- + tools/lib/bpf/libbpf.c | 25 ++++++++++++++++++++++--- + tools/lib/bpf/libbpf.h | 4 +++- + tools/perf/tests/llvm.c | 2 +- + 3 files changed, 26 insertions(+), 5 deletions(-) + +diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c +index 4fa4bc4505f5..4252fc22f78f 100644 +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -880,15 +880,26 @@ struct bpf_object *bpf_object__open(const char *path) + } + + struct bpf_object *bpf_object__open_buffer(void *obj_buf, +- size_t obj_buf_sz) ++ size_t obj_buf_sz, ++ const char *name) + { ++ char tmp_name[64]; ++ + /* param validation */ + if (!obj_buf || obj_buf_sz <= 0) + return NULL; + +- pr_debug("loading object from buffer\n"); ++ if (!name) { ++ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", ++ (unsigned long)obj_buf, ++ (unsigned long)obj_buf_sz); ++ tmp_name[sizeof(tmp_name) - 1] = '\0'; ++ name = tmp_name; ++ } ++ pr_debug("loading object '%s' from buffer\n", ++ name); + +- return __bpf_object__open("[buffer]", obj_buf, obj_buf_sz); ++ return __bpf_object__open(name, obj_buf, obj_buf_sz); + } + + int bpf_object__unload(struct bpf_object *obj) +@@ -975,6 +986,14 @@ bpf_object__next(struct bpf_object *prev) + return next; + } + ++const char * ++bpf_object__get_name(struct bpf_object *obj) ++{ ++ if (!obj) ++ return NULL; ++ return obj->path; ++} ++ + struct bpf_program * + bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) + { +diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h +index ea8adc206b62..f16170c95ffd 100644 +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -28,12 +28,14 @@ struct bpf_object; + + struct bpf_object *bpf_object__open(const char *path); + struct bpf_object *bpf_object__open_buffer(void *obj_buf, +- size_t obj_buf_sz); ++ size_t obj_buf_sz, ++ const char *name); + void bpf_object__close(struct bpf_object *object); + + /* Load/unload object into/from kernel */ + int bpf_object__load(struct bpf_object *obj); + int bpf_object__unload(struct bpf_object *obj); ++const char *bpf_object__get_name(struct bpf_object *obj); + + struct bpf_object *bpf_object__next(struct bpf_object *prev); + #define bpf_object__for_each_safe(pos, tmp) \ +diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c +index a337356fd979..52d55971f66f 100644 +--- a/tools/perf/tests/llvm.c ++++ b/tools/perf/tests/llvm.c +@@ -26,7 +26,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) + { + struct bpf_object *obj; + +- obj = bpf_object__open_buffer(obj_buf, obj_buf_sz); ++ obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL); + if (!obj) + return -1; + bpf_object__close(obj); +-- +2.4.3 + + +From d988d5ee647861706bc7a391ddbc29429b50f00e Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Fri, 21 Aug 2015 02:23:14 -0400 +Subject: [PATCH 3/9] perf evlist: Open event on evsel cpus and threads + +An evsel may have different cpus and threads than the evlist it is in. + +Use it's own cpus and threads, when opening the evsel in 'perf record'. + +Signed-off-by: Kan Liang +Cc: Jiri Olsa +Link: http://lkml.kernel.org/r/1440138194-17001-1-git-send-email-kan.liang@intel.com +Signed-off-by: Arnaldo Carvalho de Melo +--- + tools/perf/builtin-record.c | 2 +- + tools/perf/util/evlist.c | 4 ++++ + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c +index a660022f2c92..1d14f382f614 100644 +--- a/tools/perf/builtin-record.c ++++ b/tools/perf/builtin-record.c +@@ -279,7 +279,7 @@ static int record__open(struct record *rec) + + evlist__for_each(evlist, pos) { + try_again: +- if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { ++ if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { + if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { + if (verbose) + ui__warning("%s\n", msg); +diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c +index 8d00039d6a20..d51a5200c8af 100644 +--- a/tools/perf/util/evlist.c ++++ b/tools/perf/util/evlist.c +@@ -1181,6 +1181,10 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e + if (evsel->filter == NULL) + continue; + ++ /* ++ * filters only work for tracepoint event, which doesn't have cpu limit. ++ * So evlist and evsel should always be same. ++ */ + err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); + if (err) { + *err_evsel = evsel; +-- +2.4.3 + + +From fc36f9485aee3a62b22be1f561543a31bce6d48e Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 31 Aug 2015 18:41:10 +0200 +Subject: [PATCH 4/9] perf script: Enable printing of interrupted machine state + +This patch adds the output of the interrupted machine state (iregs) to +perf script. It presents them as NAME:VALUE so this is easy to parse +during post processing. + +To capture the interrupted machine state: + $ perf record -I .... + +to display iregs, use the -F option: + + $ perf script -F ip,iregs + 40afc2 AX:0x6c5770 BX:0x1e CX:0x5f4d80a DX:0x101010101010101 SI:0x1 + +Signed-off-by: Stephane Eranian +Tested-by: Arnaldo Carvalho de Melo +Cc: Adrian Hunter +Cc: Andi Kleen +Cc: David Ahern +Cc: Jiri Olsa +Cc: Kan Liang +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: http://lkml.kernel.org/r/1441039273-16260-2-git-send-email-eranian@google.com +Signed-off-by: Arnaldo Carvalho de Melo +--- + tools/perf/Documentation/perf-script.txt | 2 +- + tools/perf/builtin-script.c | 31 ++++++++++++++++++++++++++++++- + 2 files changed, 31 insertions(+), 2 deletions(-) + +diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt +index 614b2c7b0293..dc3ec783b7bd 100644 +--- a/tools/perf/Documentation/perf-script.txt ++++ b/tools/perf/Documentation/perf-script.txt +@@ -116,7 +116,7 @@ OPTIONS + --fields:: + Comma separated list of fields to print. Options are: + comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, +- srcline, period, flags. ++ srcline, period, iregs, flags. + Field list can be prepended with the type, trace, sw or hw, + to indicate to which event type the field list applies. + e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace +diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c +index 4430340292c0..eb51325e8ad9 100644 +--- a/tools/perf/builtin-script.c ++++ b/tools/perf/builtin-script.c +@@ -6,6 +6,7 @@ + #include "util/exec_cmd.h" + #include "util/header.h" + #include "util/parse-options.h" ++#include "util/perf_regs.h" + #include "util/session.h" + #include "util/tool.h" + #include "util/symbol.h" +@@ -46,6 +47,7 @@ enum perf_output_field { + PERF_OUTPUT_SYMOFFSET = 1U << 11, + PERF_OUTPUT_SRCLINE = 1U << 12, + PERF_OUTPUT_PERIOD = 1U << 13, ++ PERF_OUTPUT_IREGS = 1U << 14, + }; + + struct output_option { +@@ -66,6 +68,7 @@ struct output_option { + {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, + {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, + {.str = "period", .field = PERF_OUTPUT_PERIOD}, ++ {.str = "iregs", .field = PERF_OUTPUT_IREGS}, + }; + + /* default set to maintain compatibility with current format */ +@@ -255,6 +258,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, + PERF_OUTPUT_PERIOD)) + return -EINVAL; + ++ if (PRINT_FIELD(IREGS) && ++ perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", ++ PERF_OUTPUT_IREGS)) ++ return -EINVAL; ++ + return 0; + } + +@@ -352,6 +360,24 @@ out: + return 0; + } + ++static void print_sample_iregs(union perf_event *event __maybe_unused, ++ struct perf_sample *sample, ++ struct thread *thread __maybe_unused, ++ struct perf_event_attr *attr) ++{ ++ struct regs_dump *regs = &sample->intr_regs; ++ uint64_t mask = attr->sample_regs_intr; ++ unsigned i = 0, r; ++ ++ if (!regs) ++ return; ++ ++ for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { ++ u64 val = regs->regs[i++]; ++ printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); ++ } ++} ++ + static void print_sample_start(struct perf_sample *sample, + struct thread *thread, + struct perf_evsel *evsel) +@@ -525,6 +551,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample, + PERF_MAX_STACK_DEPTH); + } + ++ if (PRINT_FIELD(IREGS)) ++ print_sample_iregs(event, sample, thread, attr); ++ + printf("\n"); + } + +@@ -1643,7 +1672,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) + "comma separated output fields prepend with 'type:'. " + "Valid types: hw,sw,trace,raw. " + "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," +- "addr,symoff,period,flags", parse_output_fields), ++ "addr,symoff,period,iregs,flags", parse_output_fields), + OPT_BOOLEAN('a', "all-cpus", &system_wide, + "system-wide collection from all CPUs"), + OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", +-- +2.4.3 + + +From c5e991ee9dff0f8136168ed2d0d1a8cc3620dac4 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 31 Aug 2015 18:41:11 +0200 +Subject: [PATCH 5/9] perf/x86: Add list of register names + +This patch adds a way to locate a register identifier (PERF_X86_REG_*) +based on its name, e.g., AX. + +This will be used by a subsequent patch to improved flexibility of perf +record. + +Signed-off-by: Stephane Eranian +Cc: Adrian Hunter +Cc: Andi Kleen +Cc: David Ahern +Cc: Jiri Olsa +Cc: Kan Liang +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: http://lkml.kernel.org/r/1441039273-16260-3-git-send-email-eranian@google.com +Signed-off-by: Arnaldo Carvalho de Melo +--- + tools/perf/arch/x86/util/Build | 1 + + tools/perf/arch/x86/util/perf_regs.c | 30 ++++++++++++++++++++++++++++++ + tools/perf/util/perf_regs.h | 7 +++++++ + 3 files changed, 38 insertions(+) + create mode 100644 tools/perf/arch/x86/util/perf_regs.c + +diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build +index 2c55e1b336c5..ff63649fa9ac 100644 +--- a/tools/perf/arch/x86/util/Build ++++ b/tools/perf/arch/x86/util/Build +@@ -2,6 +2,7 @@ libperf-y += header.o + libperf-y += tsc.o + libperf-y += pmu.o + libperf-y += kvm-stat.o ++libperf-y += perf_regs.o + + libperf-$(CONFIG_DWARF) += dwarf-regs.o + +diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c +new file mode 100644 +index 000000000000..087c84ef5234 +--- /dev/null ++++ b/tools/perf/arch/x86/util/perf_regs.c +@@ -0,0 +1,30 @@ ++#include "../../perf.h" ++#include "../../util/perf_regs.h" ++ ++#define REG(n, b) { .name = #n, .mask = 1ULL << (b) } ++#define REG_END { .name = NULL } ++const struct sample_reg sample_reg_masks[] = { ++ REG(AX, PERF_REG_X86_AX), ++ REG(BX, PERF_REG_X86_BX), ++ REG(CX, PERF_REG_X86_CX), ++ REG(DX, PERF_REG_X86_DX), ++ REG(SI, PERF_REG_X86_SI), ++ REG(DI, PERF_REG_X86_DI), ++ REG(BP, PERF_REG_X86_BP), ++ REG(SP, PERF_REG_X86_SP), ++ REG(IP, PERF_REG_X86_IP), ++ REG(FLAGS, PERF_REG_X86_FLAGS), ++ REG(CS, PERF_REG_X86_CS), ++ REG(SS, PERF_REG_X86_SS), ++#ifdef HAVE_ARCH_X86_64_SUPPORT ++ REG(R8, PERF_REG_X86_R8), ++ REG(R9, PERF_REG_X86_R9), ++ REG(R10, PERF_REG_X86_R10), ++ REG(R11, PERF_REG_X86_R11), ++ REG(R12, PERF_REG_X86_R12), ++ REG(R13, PERF_REG_X86_R13), ++ REG(R14, PERF_REG_X86_R14), ++ REG(R15, PERF_REG_X86_R15), ++#endif ++ REG_END ++}; +diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h +index 980dbf76bc98..92c1fff2153e 100644 +--- a/tools/perf/util/perf_regs.h ++++ b/tools/perf/util/perf_regs.h +@@ -5,6 +5,13 @@ + + struct regs_dump; + ++struct sample_reg { ++ const char *name; ++ uint64_t mask; ++}; ++ ++extern const struct sample_reg sample_reg_masks[]; ++ + #ifdef HAVE_PERF_REGS_SUPPORT + #include + +-- +2.4.3 + + +From bcc84ec65ad1bd9f777a1fade6f8e5e0c5808fa5 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Mon, 31 Aug 2015 18:41:12 +0200 +Subject: [PATCH 6/9] perf record: Add ability to name registers to record + +This patch modifies the -I/--int-regs option to enablepassing the name +of the registers to sample on interrupt. Registers can be specified by +their symbolic names. For instance on x86, --intr-regs=ax,si. + +The motivation is to reduce the size of the perf.data file and the +overhead of sampling by only collecting the registers useful to a +specific analysis. For instance, for value profiling, sampling only the +registers used to passed arguements to functions. + +With no parameter, the --intr-regs still records all possible registers +based on the architecture. + +To name registers, it is necessary to use the long form of the option, +i.e., --intr-regs: + + $ perf record --intr-regs=si,di,r8,r9 ..... + +To record any possible registers: + + $ perf record -I ..... + $ perf report --intr-regs ... + +To display the register, one can use perf report -D + +To list the available registers: + + $ perf record --intr-regs=\? + available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15 + +Signed-off-by: Stephane Eranian +Tested-by: Arnaldo Carvalho de Melo +Cc: Adrian Hunter +Cc: Andi Kleen +Cc: David Ahern +Cc: Jiri Olsa +Cc: Kan Liang +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: http://lkml.kernel.org/r/1441039273-16260-4-git-send-email-eranian@google.com +Signed-off-by: Arnaldo Carvalho de Melo +--- + tools/perf/Documentation/perf-record.txt | 6 ++- + tools/perf/builtin-record.c | 7 +++- + tools/perf/perf.h | 2 +- + tools/perf/util/Build | 1 + + tools/perf/util/evsel.c | 2 +- + tools/perf/util/parse-regs-options.c | 71 ++++++++++++++++++++++++++++++++ + tools/perf/util/parse-regs-options.h | 5 +++ + 7 files changed, 89 insertions(+), 5 deletions(-) + create mode 100644 tools/perf/util/parse-regs-options.c + create mode 100644 tools/perf/util/parse-regs-options.h + +diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt +index 347a27322ed8..2e9ce77b5e14 100644 +--- a/tools/perf/Documentation/perf-record.txt ++++ b/tools/perf/Documentation/perf-record.txt +@@ -276,7 +276,11 @@ filter out the startup phase of the program, which is often very different. + --intr-regs:: + Capture machine state (registers) at interrupt, i.e., on counter overflows for + each sample. List of captured registers depends on the architecture. This option +-is off by default. ++is off by default. It is possible to select the registers to sample using their ++symbolic names, e.g. on x86, ax, si. To list the available registers use ++--intr-regs=\?. To name registers, pass a comma separated list such as ++--intr-regs=ax,bx. The list of register is architecture dependent. ++ + + --running-time:: + Record running and enabled time for read events (:S) +diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c +index 1d14f382f614..142eeb341b29 100644 +--- a/tools/perf/builtin-record.c ++++ b/tools/perf/builtin-record.c +@@ -27,8 +27,10 @@ + #include "util/cpumap.h" + #include "util/thread_map.h" + #include "util/data.h" ++#include "util/perf_regs.h" + #include "util/auxtrace.h" + #include "util/parse-branch-options.h" ++#include "util/parse-regs-options.h" + + #include + #include +@@ -1080,8 +1082,9 @@ struct option __record_options[] = { + "sample transaction flags (special events only)"), + OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, + "use per-thread mmaps"), +- OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, +- "Sample machine registers on interrupt"), ++ OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", ++ "sample selected machine registers on interrupt," ++ " use -I ? to list register names", parse_regs), + OPT_BOOLEAN(0, "running-time", &record.opts.running_time, + "Record running/enabled time of read (:S) events"), + OPT_CALLBACK('k', "clockid", &record.opts, +diff --git a/tools/perf/perf.h b/tools/perf/perf.h +index cccb4cf575d3..90129accffbe 100644 +--- a/tools/perf/perf.h ++++ b/tools/perf/perf.h +@@ -54,7 +54,6 @@ struct record_opts { + bool sample_time_set; + bool callgraph_set; + bool period; +- bool sample_intr_regs; + bool running_time; + bool full_auxtrace; + bool auxtrace_snapshot_mode; +@@ -64,6 +63,7 @@ struct record_opts { + unsigned int auxtrace_mmap_pages; + unsigned int user_freq; + u64 branch_stack; ++ u64 sample_intr_regs; + u64 default_interval; + u64 user_interval; + size_t auxtrace_snapshot_size; +diff --git a/tools/perf/util/Build b/tools/perf/util/Build +index e79e4522368a..349bc96ca1fe 100644 +--- a/tools/perf/util/Build ++++ b/tools/perf/util/Build +@@ -83,6 +83,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ + libperf-$(CONFIG_AUXTRACE) += intel-pt.o + libperf-$(CONFIG_AUXTRACE) += intel-bts.o + libperf-y += parse-branch-options.o ++libperf-y += parse-regs-options.o + + libperf-$(CONFIG_LIBELF) += symbol-elf.o + libperf-$(CONFIG_LIBELF) += probe-file.o +diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c +index bac25f41a751..c53f79123b37 100644 +--- a/tools/perf/util/evsel.c ++++ b/tools/perf/util/evsel.c +@@ -787,7 +787,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) + perf_evsel__config_callgraph(evsel, opts, &callchain_param); + + if (opts->sample_intr_regs) { +- attr->sample_regs_intr = PERF_REGS_MASK; ++ attr->sample_regs_intr = opts->sample_intr_regs; + perf_evsel__set_sample_bit(evsel, REGS_INTR); + } + +diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c +new file mode 100644 +index 000000000000..4f2c1c255d81 +--- /dev/null ++++ b/tools/perf/util/parse-regs-options.c +@@ -0,0 +1,71 @@ ++#include "perf.h" ++#include "util/util.h" ++#include "util/debug.h" ++#include "util/parse-options.h" ++#include "util/parse-regs-options.h" ++ ++int ++parse_regs(const struct option *opt, const char *str, int unset) ++{ ++ uint64_t *mode = (uint64_t *)opt->value; ++ const struct sample_reg *r; ++ char *s, *os = NULL, *p; ++ int ret = -1; ++ ++ if (unset) ++ return 0; ++ ++ /* ++ * cannot set it twice ++ */ ++ if (*mode) ++ return -1; ++ ++ /* str may be NULL in case no arg is passed to -I */ ++ if (str) { ++ /* because str is read-only */ ++ s = os = strdup(str); ++ if (!s) ++ return -1; ++ ++ for (;;) { ++ p = strchr(s, ','); ++ if (p) ++ *p = '\0'; ++ ++ if (!strcmp(s, "?")) { ++ fprintf(stderr, "available registers: "); ++ for (r = sample_reg_masks; r->name; r++) { ++ fprintf(stderr, "%s ", r->name); ++ } ++ fputc('\n', stderr); ++ /* just printing available regs */ ++ return -1; ++ } ++ for (r = sample_reg_masks; r->name; r++) { ++ if (!strcasecmp(s, r->name)) ++ break; ++ } ++ if (!r->name) { ++ ui__warning("unknown register %s," ++ " check man page\n", s); ++ goto error; ++ } ++ ++ *mode |= r->mask; ++ ++ if (!p) ++ break; ++ ++ s = p + 1; ++ } ++ } ++ ret = 0; ++ ++ /* default to all possible regs */ ++ if (*mode == 0) ++ *mode = PERF_REGS_MASK; ++error: ++ free(os); ++ return ret; ++} +diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h +new file mode 100644 +index 000000000000..7d762b188007 +--- /dev/null ++++ b/tools/perf/util/parse-regs-options.h +@@ -0,0 +1,5 @@ ++#ifndef _PERF_PARSE_REGS_OPTIONS_H ++#define _PERF_PARSE_REGS_OPTIONS_H 1 ++struct option; ++int parse_regs(const struct option *opt, const char *str, int unset); ++#endif /* _PERF_PARSE_REGS_OPTIONS_H */ +-- +2.4.3 + + +From 3b27d13940c3710a1128527c43719cb0bb05d73b Mon Sep 17 00:00:00 2001 +From: Wang Nan +Date: Tue, 1 Sep 2015 03:29:44 +0000 +Subject: [PATCH 7/9] perf dwarf: Fix potential array out of bounds access + +There is a problem in the dwarf-regs.c files for sh, sparc and x86 where +it is possible to make an out-of-bounds array access when searching for +register names. + +This patch fixes it by replacing '<=' to '<', so when register (number +== XXX_MAX_REGS), get_arch_regstr() will return NULL. + +Signed-off-by: Wang Nan +Reviewed-by: Matt Fleming +Acked-by: Jiri Olsa +Acked-by: Masami Hiramatsu +Cc: David S. Miller +Cc: Zefan Li +Cc: pi3orama@huawei.com +Link: http://lkml.kernel.org/r/1441078184-105038-1-git-send-email-wangnan0@huawei.com +Signed-off-by: Arnaldo Carvalho de Melo +--- + tools/perf/arch/sh/util/dwarf-regs.c | 2 +- + tools/perf/arch/sparc/util/dwarf-regs.c | 2 +- + tools/perf/arch/x86/util/dwarf-regs.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c +index 0d0897f57a10..f8dfa89696f4 100644 +--- a/tools/perf/arch/sh/util/dwarf-regs.c ++++ b/tools/perf/arch/sh/util/dwarf-regs.c +@@ -51,5 +51,5 @@ const char *sh_regs_table[SH_MAX_REGS] = { + /* Return architecture dependent register string (for kprobe-tracer) */ + const char *get_arch_regstr(unsigned int n) + { +- return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL; ++ return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL; + } +diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c +index 92eda412fed3..b704fdb9237a 100644 +--- a/tools/perf/arch/sparc/util/dwarf-regs.c ++++ b/tools/perf/arch/sparc/util/dwarf-regs.c +@@ -39,5 +39,5 @@ const char *sparc_regs_table[SPARC_MAX_REGS] = { + */ + const char *get_arch_regstr(unsigned int n) + { +- return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; ++ return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; + } +diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c +index be22dd463232..a08de0a35b83 100644 +--- a/tools/perf/arch/x86/util/dwarf-regs.c ++++ b/tools/perf/arch/x86/util/dwarf-regs.c +@@ -71,5 +71,5 @@ const char *x86_64_regs_table[X86_64_MAX_REGS] = { + /* Return architecture dependent register string (for kprobe-tracer) */ + const char *get_arch_regstr(unsigned int n) + { +- return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; ++ return (n < ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; + } +-- +2.4.3 + + +From 04aa90b529ee45c5ee88997bc214202e07b26979 Mon Sep 17 00:00:00 2001 +From: Wang Nan +Date: Tue, 1 Sep 2015 05:56:45 +0000 +Subject: [PATCH 8/9] perf build: Fix Intel PT instruction decoder dependency + problem + +I hit following building error randomly: + + ... + /bin/sh: /path/to/kernel/buildperf/util/intel-pt-decoder/inat-tables.c: No such file or directory + ... + LINK /path/to/kernel/buildperf/plugin_mac80211.so + LINK /path/to/kernel/buildperf/plugin_kmem.so + LINK /path/to/kernel/buildperf/plugin_xen.so + LINK /path/to/kernel/buildperf/plugin_hrtimer.so + In file included from util/intel-pt-decoder/intel-pt-insn-decoder.c:25:0: + util/intel-pt-decoder/inat.c:24:25: fatal error: inat-tables.c: No such file or directory + #include "inat-tables.c" + ^ + compilation terminated. + make[4]: *** [/path/to/kernel/buildperf/util/intel-pt-decoder/intel-pt-insn-decoder.o] Error 1 + make[4]: *** Waiting for unfinished jobs.... + LINK /path/to/kernel/buildperf/plugin_function.so + +This is caused by tools/perf/util/intel-pt-decoder/Build that, it tries +to generate $(OUTPUT)util/intel-pt-decoder/inat-tables.c atomatically +but forget to ensure the existance of $(OUTPUT)util/intel-pt-decoder +directory. + +This patch fixes it by adding $(call rule_mkdir) like other similar rules. + +Signed-off-by: Wang Nan +Acked-by: Adrian Hunter +Acked-by: Jiri Olsa +Cc: Zefan Li +Cc: pi3orama@163.com +Link: http://lkml.kernel.org/r/1441087005-107540-1-git-send-email-wangnan0@huawei.com +Signed-off-by: Arnaldo Carvalho de Melo +--- + tools/perf/util/intel-pt-decoder/Build | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build +index 240730d682c1..2386322ece4f 100644 +--- a/tools/perf/util/intel-pt-decoder/Build ++++ b/tools/perf/util/intel-pt-decoder/Build +@@ -4,6 +4,7 @@ inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk + inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt + + $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) ++ $(call rule_mkdir) + @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ + + $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c +-- +2.4.3 + + +From af4aeadd8c04303c0aa2d112145c3627e2ebd026 Mon Sep 17 00:00:00 2001 +From: Stephane Eranian +Date: Tue, 1 Sep 2015 11:30:14 +0200 +Subject: [PATCH 9/9] perf tools: Fix link time error with sample_reg_masks on + non x86 + +This patch makes perf compile on non x86 platforms by defining a weak +symbol for sample_reg_masks[] in util/perf_regs.c. + +The patch also moves the REG() and REG_END() macros into the +util/per_regs.h header file. The macros are renamed to +SMPL_REG/SMPL_REG_END to avoid clashes with other header files. + +Signed-off-by: Stephane Eranian +Acked-by: Jiri Olsa +Cc: Adrian Hunter +Cc: Andi Kleen +Cc: David Ahern +Cc: Kan Liang +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: http://lkml.kernel.org/r/1441099814-26783-1-git-send-email-eranian@google.com +Signed-off-by: Arnaldo Carvalho de Melo +--- + tools/perf/arch/x86/util/perf_regs.c | 44 +++++++++++++++++------------------- + tools/perf/util/perf_regs.c | 4 ++++ + tools/perf/util/perf_regs.h | 2 ++ + 3 files changed, 27 insertions(+), 23 deletions(-) + +diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c +index 087c84ef5234..c5db14f36cc7 100644 +--- a/tools/perf/arch/x86/util/perf_regs.c ++++ b/tools/perf/arch/x86/util/perf_regs.c +@@ -1,30 +1,28 @@ + #include "../../perf.h" + #include "../../util/perf_regs.h" + +-#define REG(n, b) { .name = #n, .mask = 1ULL << (b) } +-#define REG_END { .name = NULL } + const struct sample_reg sample_reg_masks[] = { +- REG(AX, PERF_REG_X86_AX), +- REG(BX, PERF_REG_X86_BX), +- REG(CX, PERF_REG_X86_CX), +- REG(DX, PERF_REG_X86_DX), +- REG(SI, PERF_REG_X86_SI), +- REG(DI, PERF_REG_X86_DI), +- REG(BP, PERF_REG_X86_BP), +- REG(SP, PERF_REG_X86_SP), +- REG(IP, PERF_REG_X86_IP), +- REG(FLAGS, PERF_REG_X86_FLAGS), +- REG(CS, PERF_REG_X86_CS), +- REG(SS, PERF_REG_X86_SS), ++ SMPL_REG(AX, PERF_REG_X86_AX), ++ SMPL_REG(BX, PERF_REG_X86_BX), ++ SMPL_REG(CX, PERF_REG_X86_CX), ++ SMPL_REG(DX, PERF_REG_X86_DX), ++ SMPL_REG(SI, PERF_REG_X86_SI), ++ SMPL_REG(DI, PERF_REG_X86_DI), ++ SMPL_REG(BP, PERF_REG_X86_BP), ++ SMPL_REG(SP, PERF_REG_X86_SP), ++ SMPL_REG(IP, PERF_REG_X86_IP), ++ SMPL_REG(FLAGS, PERF_REG_X86_FLAGS), ++ SMPL_REG(CS, PERF_REG_X86_CS), ++ SMPL_REG(SS, PERF_REG_X86_SS), + #ifdef HAVE_ARCH_X86_64_SUPPORT +- REG(R8, PERF_REG_X86_R8), +- REG(R9, PERF_REG_X86_R9), +- REG(R10, PERF_REG_X86_R10), +- REG(R11, PERF_REG_X86_R11), +- REG(R12, PERF_REG_X86_R12), +- REG(R13, PERF_REG_X86_R13), +- REG(R14, PERF_REG_X86_R14), +- REG(R15, PERF_REG_X86_R15), ++ SMPL_REG(R8, PERF_REG_X86_R8), ++ SMPL_REG(R9, PERF_REG_X86_R9), ++ SMPL_REG(R10, PERF_REG_X86_R10), ++ SMPL_REG(R11, PERF_REG_X86_R11), ++ SMPL_REG(R12, PERF_REG_X86_R12), ++ SMPL_REG(R13, PERF_REG_X86_R13), ++ SMPL_REG(R14, PERF_REG_X86_R14), ++ SMPL_REG(R15, PERF_REG_X86_R15), + #endif +- REG_END ++ SMPL_REG_END + }; +diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c +index 43168fb0d9a2..885e8ac83997 100644 +--- a/tools/perf/util/perf_regs.c ++++ b/tools/perf/util/perf_regs.c +@@ -2,6 +2,10 @@ + #include "perf_regs.h" + #include "event.h" + ++const struct sample_reg __weak sample_reg_masks[] = { ++ SMPL_REG_END ++}; ++ + int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) + { + int i, idx = 0; +diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h +index 92c1fff2153e..2984dcc54d67 100644 +--- a/tools/perf/util/perf_regs.h ++++ b/tools/perf/util/perf_regs.h +@@ -9,6 +9,8 @@ struct sample_reg { + const char *name; + uint64_t mask; + }; ++#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } ++#define SMPL_REG_END { .name = NULL } + + extern const struct sample_reg sample_reg_masks[]; + +-- +2.4.3 +