Commit e0c4a5fc authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates/fixes from Ingo Molnar:
 "Mostly tooling updates, but also two kernel fixes: a call chain
  handling robustness fix and an x86 PMU driver event definition fix"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/callchain: Force USER_DS when invoking perf_callchain_user()
  tools build: Fixup sched_getcpu feature test
  perf tests kmod-path: Don't fail if compressed modules aren't supported
  perf annotate: Fix AArch64 comment char
  perf tools: Fix spelling mistakes
  perf/x86: Fix Broadwell-EP DRAM RAPL events
  perf config: Refactor a duplicated code for obtaining config file name
  perf symbols: Allow user probes on versioned symbols
  perf symbols: Accept symbols starting at address 0
  tools lib string: Adopt prefixcmp() from perf and subcmd
  perf units: Move parse_tag_value() to units.[ch]
  perf ui gtk: Move gtk .so name to the only place where it is used
  perf tools: Move HAS_BOOL define to where perl headers are used
  perf memswap: Split the byteswap memory range wrappers from util.[ch]
  perf tools: Move event prototypes from util.h to event.h
  perf buildid: Move prototypes from util.h to build-id.h
parents dfcb7b23 88b0193d
......@@ -761,7 +761,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
......
......@@ -229,12 +229,18 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
}
if (regs) {
mm_segment_t fs;
if (crosstask)
goto exit_put;
if (add_mark)
perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
fs = get_fs();
set_fs(USER_DS);
perf_callchain_user(&ctx, regs);
set_fs(fs);
}
}
......
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <sched.h>
int main(void)
......
......@@ -18,4 +18,6 @@ extern size_t strlcpy(char *dest, const char *src, size_t size);
char *str_error_r(int errnum, char *buf, size_t buflen);
int prefixcmp(const char *str, const char *prefix);
#endif /* _LINUX_STRING_H_ */
......@@ -87,3 +87,12 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size)
}
return ret;
}
int prefixcmp(const char *str, const char *prefix)
{
for (; ; str++, prefix++)
if (!*prefix)
return 0;
else if (*str != *prefix)
return (unsigned char)*prefix - (unsigned char)*str;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <linux/string.h>
#include <termios.h>
#include <sys/ioctl.h>
#include <sys/types.h>
......
#include <linux/compiler.h>
#include <linux/string.h>
#include <linux/types.h>
#include <stdio.h>
#include <stdlib.h>
......
......@@ -79,13 +79,4 @@ static inline void astrcat(char **out, const char *add)
free(tmp);
}
static inline int prefixcmp(const char *str, const char *prefix)
{
for (; ; str++, prefix++)
if (!*prefix)
return 0;
else if (*str != *prefix)
return (unsigned char)*prefix - (unsigned char)*str;
}
#endif /* __SUBCMD_UTIL_H */
......@@ -76,7 +76,7 @@ REPORT OPTIONS
-c::
--coalesce::
Specify sorintg fields for single cacheline display.
Specify sorting fields for single cacheline display.
Following fields are available: tid,pid,iaddr,dso
(see COALESCE)
......@@ -106,7 +106,7 @@ REPORT OPTIONS
-d::
--display::
Siwtch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
C2C RECORD
----------
......
......@@ -225,7 +225,7 @@ OPTIONS
the libunwind or libdw library) should be used instead.
Using the "lbr" method doesn't require any compiler options. It
will produce call graphs from the hardware LBR registers. The
main limition is that it is only available on new Intel
main limitation is that it is only available on new Intel
platforms, such as Haswell. It can only get user call chain. It
doesn't work with branch stack sampling at the same time.
......
......@@ -182,7 +182,7 @@ OPTIONS
--parent=<regex>::
A regex filter to identify parent. The parent is a caller of this
function and searched through the callchain, thus it requires callchain
information recorded. The pattern is in the exteneded regex format and
information recorded. The pattern is in the extended regex format and
defaults to "\^sys_|^do_page_fault", see '--sort parent'.
-x::
......@@ -207,8 +207,8 @@ OPTIONS
-g::
--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
Display call chains using type, min percent threshold, print limit,
call order, sort key, optional branch and value. Note that ordering of
parameters is not fixed so any parement can be given in an arbitraty order.
call order, sort key, optional branch and value. Note that ordering
is not fixed so any parameter can be given in an arbitrary order.
One exception is the print_limit which should be preceded by threshold.
print_type can be either:
......
......@@ -270,7 +270,7 @@ When the event stream contains multiple events each event is identified
by an ID. This can be either through the PERF_SAMPLE_ID or the
PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
at a fixed offset from the event header, which allows reliable
parsing of the header. Relying on ID may be ambigious.
parsing of the header. Relying on ID may be ambiguous.
IDENTIFIER is only supported by newer Linux kernels.
Perf record specific events:
......@@ -288,7 +288,7 @@ struct attr_event {
uint64_t id[];
};
PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */
PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */
#define MAX_EVENT_NAME 64
......
......@@ -23,7 +23,7 @@ For memory address profiling, try: perf mem record / perf mem report
For tracepoint events, try: perf report -s trace_fields
To record callchains for each sample: perf record -g
To record every process run by a user: perf record -u <user>
Skip collecing build-id when recording: perf record -B
Skip collecting build-id when recording: perf record -B
To change sampling frequency to 100 Hz: perf record -F 100
See assembly instructions with percentage: perf annotate <symbol>
If you prefer Intel style assembly, try: perf annotate -M intel
......
......@@ -50,7 +50,7 @@ static int arm64__annotate_init(struct arch *arch)
arch->initialized = true;
arch->priv = arm;
arch->associate_instruction_ops = arm64__associate_instruction_ops;
arch->objdump.comment_char = ';';
arch->objdump.comment_char = '/';
arch->objdump.skip_functions_char = '+';
return 0;
......
......@@ -52,6 +52,18 @@ int arch__compare_symbol_names(const char *namea, const char *nameb)
return strcmp(namea, nameb);
}
int arch__compare_symbol_names_n(const char *namea, const char *nameb,
unsigned int n)
{
/* Skip over initial dot */
if (*namea == '.')
namea++;
if (*nameb == '.')
nameb++;
return strncmp(namea, nameb, n);
}
#endif
#if defined(_CALL_ELF) && _CALL_ELF == 2
......
......@@ -49,19 +49,22 @@ static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
char to[PATH_MAX];
const char *name;
u64 addr1 = 0, addr2 = 0;
int i;
int i, err = -1;
scnprintf(from, sizeof(from), "%s/kallsyms", from_dir);
scnprintf(to, sizeof(to), "%s/kallsyms", to_dir);
for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
addr1 = kallsyms__get_function_start(from, name);
if (addr1)
err = kallsyms__get_function_start(from, name, &addr1);
if (!err)
break;
}
if (name)
addr2 = kallsyms__get_function_start(to, name);
if (err)
return false;
if (kallsyms__get_function_start(to, name, &addr2))
return false;
return addr1 == addr2;
}
......
......@@ -27,6 +27,7 @@
#include "tool.h"
#include "data.h"
#include "sort.h"
#include "event.h"
#include "evlist.h"
#include "evsel.h"
#include <asm/bug.h>
......
......@@ -159,6 +159,7 @@ int cmd_config(int argc, const char **argv)
int i, ret = 0;
struct perf_config_set *set;
char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
const char *config_filename;
argc = parse_options(argc, argv, config_options, config_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
......@@ -175,6 +176,11 @@ int cmd_config(int argc, const char **argv)
else if (use_user_config)
config_exclusive_filename = user_config;
if (!config_exclusive_filename)
config_filename = user_config;
else
config_filename = config_exclusive_filename;
/*
* At only 'config' sub-command, individually use the config set
* because of reinitializing with options config file location.
......@@ -192,13 +198,9 @@ int cmd_config(int argc, const char **argv)
parse_options_usage(config_usage, config_options, "l", 1);
} else {
ret = show_config(set);
if (ret < 0) {
const char * config_filename = config_exclusive_filename;
if (!config_exclusive_filename)
config_filename = user_config;
if (ret < 0)
pr_err("Nothing configured, "
"please check your %s \n", config_filename);
}
}
break;
default:
......@@ -221,13 +223,8 @@ int cmd_config(int argc, const char **argv)
if (value == NULL)
ret = show_spec_config(set, var);
else {
const char *config_filename = config_exclusive_filename;
if (!config_exclusive_filename)
config_filename = user_config;
else
ret = set_config(set, config_filename, var, value);
}
free(arg);
}
} else
......
......@@ -27,13 +27,13 @@
#include "util/drv_configs.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/event.h"
#include "util/machine.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/thread.h"
#include "util/thread_map.h"
#include "util/top.h"
#include "util/util.h"
#include <linux/rbtree.h>
#include <subcmd/parse-options.h>
#include "util/parse-events.h"
......
......@@ -21,6 +21,7 @@
#include "builtin.h"
#include "util/color.h"
#include "util/debug.h"
#include "util/event.h"
#include "util/evlist.h"
#include <subcmd/exec-cmd.h>
#include "util/machine.h"
......
......@@ -17,6 +17,7 @@
#include <subcmd/parse-options.h>
#include "util/bpf-loader.h"
#include "util/debug.h"
#include "util/event.h"
#include <api/fs/fs.h>
#include <api/fs/tracing_path.h>
#include <errno.h>
......
#include "perf.h"
#include "util/debug.h"
#include "util/event.h"
#include "util/symbol.h"
#include "util/sort.h"
#include "util/evsel.h"
......
......@@ -3,6 +3,7 @@
#include "util/symbol.h"
#include "util/sort.h"
#include "util/evsel.h"
#include "util/event.h"
#include "util/evlist.h"
#include "util/machine.h"
#include "util/thread.h"
......
#include "perf.h"
#include "util/debug.h"
#include "util/event.h"
#include "util/symbol.h"
#include "util/sort.h"
#include "util/evsel.h"
......
......@@ -61,6 +61,7 @@ int test__kmod_path__parse(int subtest __maybe_unused)
M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
#ifdef HAVE_ZLIB_SUPPORT
/* path alloc_name alloc_ext kmod comp name ext */
T("/xxxx/xxxx/x.ko.gz", true , true , true, true, "[x]", "gz");
T("/xxxx/xxxx/x.ko.gz", false , true , true, true, NULL , "gz");
......@@ -96,6 +97,7 @@ int test__kmod_path__parse(int subtest __maybe_unused)
M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
M("x.ko.gz", PERF_RECORD_MISC_USER, false);
#endif
/* path alloc_name alloc_ext kmod comp name ext */
T("[test_module]", true , true , true, false, "[test_module]", NULL);
......
#include <errno.h>
#include <stdio.h>
#include <sys/epoll.h>
#include <util/util.h>
#include <util/evlist.h>
#include <linux/filter.h>
#include "tests.h"
......
......@@ -10,7 +10,10 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
void *perf_gtk_handle;
int use_browser = -1;
#define PERF_GTK_DSO "libperf-gtk.so"
#ifdef HAVE_GTK2_SUPPORT
static int setup_gtk_browser(void)
{
int (*perf_ui_init)(void);
......
......@@ -13,6 +13,7 @@ libperf-y += find_bit.o
libperf-y += kallsyms.o
libperf-y += levenshtein.o
libperf-y += llvm-utils.o
libperf-y += memswap.o
libperf-y += parse-events.o
libperf-y += perf_regs.o
libperf-y += path.o
......
......@@ -44,6 +44,10 @@ bool build_id_cache__cached(const char *sbuild_id);
int build_id_cache__add_s(const char *sbuild_id,
const char *name, bool is_kallsyms, bool is_vdso);
int build_id_cache__remove_s(const char *sbuild_id);
extern char buildid_dir[];
void set_buildid_dir(const char *dir);
void disable_buildid_cache(void);
#endif
......@@ -8,6 +8,7 @@
#include <unistd.h>
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <api/fs/fs.h>
#include <linux/perf_event.h>
#include "event.h"
#include "debug.h"
#include "hist.h"
......@@ -767,15 +768,16 @@ static int find_symbol_cb(void *arg, const char *name, char type,
return 1;
}
u64 kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name)
int kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name, u64 *addr)
{
struct process_symbol_args args = { .name = symbol_name, };
if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
return 0;
return -1;
return args.start;
*addr = args.start;
return 0;
}
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
......
......@@ -229,7 +229,7 @@ struct build_id_event {
enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_USER_TYPE_START = 64,
PERF_RECORD_HEADER_ATTR = 64,
PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */
PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */
PERF_RECORD_HEADER_TRACING_DATA = 66,
PERF_RECORD_HEADER_BUILD_ID = 67,
PERF_RECORD_FINISHED_ROUND = 68,
......@@ -675,10 +675,18 @@ size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
u64 kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name);
int kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name, u64 *addr);
void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max);
void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
u16 type, int max);
void event_attr_init(struct perf_event_attr *attr);
int perf_event_paranoid(void);
extern int sysctl_perf_event_max_stack;
extern int sysctl_perf_event_max_contexts_per_stack;
#endif /* __PERF_RECORD_H */
......@@ -21,6 +21,7 @@
#include "asm/bug.h"
#include "callchain.h"
#include "cgroup.h"
#include "event.h"
#include "evsel.h"
#include "evlist.h"
#include "util.h"
......
......@@ -19,6 +19,7 @@
#include "evlist.h"
#include "evsel.h"
#include "header.h"
#include "memswap.h"
#include "../perf.h"
#include "trace-event.h"
#include "session.h"
......
......@@ -23,6 +23,7 @@
#include "../perf.h"
#include "session.h"
#include "machine.h"
#include "memswap.h"
#include "sort.h"
#include "tool.h"
#include "event.h"
......
......@@ -796,11 +796,11 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
* Returns the name of the start symbol in *symbol_name. Pass in NULL as
* symbol_name if it's not that important.
*/
static u64 machine__get_running_kernel_start(struct machine *machine,
const char **symbol_name)
static int machine__get_running_kernel_start(struct machine *machine,
const char **symbol_name, u64 *start)
{
char filename[PATH_MAX];
int i;
int i, err = -1;
const char *name;
u64 addr = 0;
......@@ -810,21 +810,28 @@ static u64 machine__get_running_kernel_start(struct machine *machine,
return 0;
for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
addr = kallsyms__get_function_start(filename, name);
if (addr)
err = kallsyms__get_function_start(filename, name, &addr);
if (!err)
break;
}
if (err)
return -1;
if (symbol_name)
*symbol_name = name;
return addr;
*start = addr;
return 0;
}
int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
{
int type;
u64 start = machine__get_running_kernel_start(machine, NULL);
u64 start = 0;
if (machine__get_running_kernel_start(machine, NULL, &start))
return -1;
/* In case of renewal the kernel map, destroy previous one */
machine__destroy_kernel_maps(machine);
......@@ -1185,8 +1192,8 @@ static int machine__create_modules(struct machine *machine)
int machine__create_kernel_maps(struct machine *machine)
{
struct dso *kernel = machine__get_kernel(machine);
const char *name;
u64 addr;
const char *name = NULL;
u64 addr = 0;
int ret;
if (kernel == NULL)
......@@ -1211,8 +1218,7 @@ int machine__create_kernel_maps(struct machine *machine)
*/
map_groups__fixup_end(&machine->kmaps);
addr = machine__get_running_kernel_start(machine, &name);
if (!addr) {
if (machine__get_running_kernel_start(machine, &name, &addr)) {
} else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
machine__destroy_kernel_maps(machine);
return -1;
......
......@@ -325,11 +325,6 @@ int map__load(struct map *map)
return 0;
}
int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
{
return strcmp(namea, nameb);
}
struct symbol *map__find_symbol(struct map *map, u64 addr)
{
if (map__load(map) < 0)
......
......@@ -130,13 +130,14 @@ struct thread;
*/
#define __map__for_each_symbol_by_name(map, sym_name, pos) \
for (pos = map__find_symbol_by_name(map, sym_name); \
pos && arch__compare_symbol_names(pos->name, sym_name) == 0; \
pos && \
!symbol__match_symbol_name(pos->name, sym_name, \
SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \
pos = symbol__next_by_name(pos))
#define map__for_each_symbol_by_name(map, sym_name, pos) \
__map__for_each_symbol_by_name(map, sym_name, (pos))
int arch__compare_symbol_names(const char *namea, const char *nameb);
void map__init(struct map *map, enum map_type type,
u64 start, u64 end, u64 pgoff, struct dso *dso);
struct map *map__new(struct machine *machine, u64 start, u64 len,
......
#include <byteswap.h>
#include "memswap.h"
#include <linux/types.h>
void mem_bswap_32(void *src, int byte_size)
{
u32 *m = src;
while (byte_size > 0) {
*m = bswap_32(*m);
byte_size -= sizeof(u32);
++m;
}
}
void mem_bswap_64(void *src, int byte_size)
{
u64 *m = src;
while (byte_size > 0) {
*m = bswap_64(*m);
byte_size -= sizeof(u64);
++m;
}
}
#ifndef PERF_MEMSWAP_H_
#define PERF_MEMSWAP_H_
void mem_bswap_64(void *src, int byte_size);
void mem_bswap_32(void *src, int byte_size);
#endif /* PERF_MEMSWAP_H_ */
......@@ -28,7 +28,9 @@
#include <linux/bitmap.h>
#include <linux/time64.h>
#include "../util.h"
#include <stdbool.h>
/* perl needs the following define, right after including stdbool.h */
#define HAS_BOOL
#include <EXTERN.h>
#include <perl.h>
......
......@@ -11,6 +11,7 @@
#include "evlist.h"
#include "evsel.h"
#include "memswap.h"
#include "session.h"
#include "tool.h"
#include "sort.h"
......
......@@ -3,15 +3,6 @@
#include <linux/kernel.h>
#include <errno.h>
int prefixcmp(const char *str, const char *prefix)
{
for (; ; str++, prefix++)
if (!*prefix)
return 0;
else if (*str != *prefix)
return (unsigned char)*prefix - (unsigned char)*str;
}
/*
* Used as the default ->buf value, so that people can always assume