diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 865dea55454b52d89bbaf1136bd2c0e7bbbee47a..190cc886ab9105ea6be58ad58caf8e940274c4a9 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -2398,6 +2398,12 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 				break;
 			*val = left + right;
 			break;
+		case '~':
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			*val = ~right;
+			break;
 		default:
 			do_warning("unknown op '%s'", arg->op.op);
 			ret = 0;
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 14d9e8ffaff744f734e7e4fa29a5997dd2d57e76..04f23b404bbc5a7bd4b5bcdfb7e833d79ba1234e 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -147,6 +147,10 @@ Print count deltas every N milliseconds (minimum: 10ms)
 The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
 	example: 'perf stat -I 1000 -e cycles -a sleep 5'
 
+--metric-only::
+Only print computed metrics. Print them in a single line.
+Don't show any raw values. Not supported with --per-thread.
+
 --per-socket::
 Aggregate counts per processor socket for system-wide mode measurements.  This
 is a useful mode to detect imbalance between sockets.  To enable this mode,
@@ -219,6 +223,29 @@ $ perf stat -- make -j
 
  Wall-clock time elapsed:   719.554352 msecs
 
+CSV FORMAT
+----------
+
+With -x, perf stat is able to output a not-quite-CSV format output
+Commas in the output are not put into "". To make it easy to parse
+it is recommended to use a different character like -x \;
+
+The fields are in this order:
+
+	- optional usec time stamp in fractions of second (with -I xxx)
+	- optional CPU, core, or socket identifier
+	- optional number of logical CPUs aggregated
+	- counter value
+	- unit of the counter value or empty
+	- event name
+	- run time of counter
+	- percentage of measurement time the counter was running
+	- optional variance if multiple values are collected with -r
+	- optional metric value
+	- optional unit of metric
+
+Additional metrics may be printed with all earlier fields being empty.
+
 SEE ALSO
 --------
 linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
index 7fbca175099ec917ad69b8025c8249ee6c52a6a4..18b13518d8d8701137d489a65f7dd752bdc7f43f 100644
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index 7fbca175099ec917ad69b8025c8249ee6c52a6a4..18b13518d8d8701137d489a65f7dd752bdc7f43f 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 9f9cea3478fdda25aede1cfbf865b107c88a262c..56e05f126ad8793d25bb1a70ce3dac5f100d3b3a 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 
 HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 09ba923debe86810f8380f7df54504dee4232ec8..269af21437353b2fb886383ede1e3f9a2f586f25 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index b2885776b60207f73b39f2fcbb176a26c36ff8f4..7fa68663ed7287f63adad2342db987941ba0c1b4 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -73,7 +73,7 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool,
 	return perf_event__repipe_synth(tool, event);
 }
 
-#ifdef HAVE_LIBELF_SUPPORT
+#ifdef HAVE_JITDUMP
 static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
 			       union perf_event *event __maybe_unused,
 			       struct ordered_events *oe __maybe_unused)
@@ -245,7 +245,7 @@ static int perf_event__repipe_mmap(struct perf_tool *tool,
 	return err;
 }
 
-#ifdef HAVE_LIBELF_SUPPORT
+#ifdef HAVE_JITDUMP
 static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
 				       union perf_event *event,
 				       struct perf_sample *sample,
@@ -283,7 +283,7 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool,
 	return err;
 }
 
-#ifdef HAVE_LIBELF_SUPPORT
+#ifdef HAVE_JITDUMP
 static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
 					union perf_event *event,
 					struct perf_sample *sample,
@@ -778,7 +778,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
 			    "Merge sched-stat and sched-switch for getting events "
 			    "where and how long tasks slept"),
+#ifdef HAVE_JITDUMP
 		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
+#endif
 		OPT_INCR('v', "verbose", &verbose,
 			 "be more verbose (show build ids, etc)"),
 		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
@@ -795,7 +797,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		"perf inject [<options>]",
 		NULL
 	};
-#ifndef HAVE_LIBELF_SUPPORT
+#ifndef HAVE_JITDUMP
 	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
 #endif
 	argc = parse_options(argc, argv, options, inject_usage, 0);
@@ -833,7 +835,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		inject.tool.ordered_events = true;
 		inject.tool.ordering_requires_timestamps = true;
 	}
-#ifdef HAVE_LIBELF_SUPPORT
+#ifdef HAVE_JITDUMP
 	if (inject.jit_mode) {
 		inject.tool.mmap2	   = perf_event__jit_repipe_mmap2;
 		inject.tool.mmap	   = perf_event__jit_repipe_mmap;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index baa82078c148a132115eed28749d0ae3a24f924f..1f19f2f999c841b9da140e10bcaf5e6e0f41ee6b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -122,6 +122,7 @@ static bool			sync_run			= false;
 static unsigned int		initial_delay			= 0;
 static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
+static bool			metric_only			= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
 static aggr_get_id_t		aggr_get_id;
@@ -827,6 +828,99 @@ static void print_metric_csv(void *ctx,
 	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
 }
 
+#define METRIC_ONLY_LEN 20
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+	if (!unit)
+		return false;
+	if (strstr(unit, "/sec") ||
+	    strstr(unit, "hz") ||
+	    strstr(unit, "Hz") ||
+	    strstr(unit, "CPUs utilized"))
+		return false;
+	return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+			   const char *unit)
+{
+	if (!strncmp(unit, "of all", 6)) {
+		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+			 unit);
+		return buf;
+	}
+	return unit;
+}
+
+static void print_metric_only(void *ctx, const char *color, const char *fmt,
+			      const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	char buf[1024];
+	unsigned mlen = METRIC_ONLY_LEN;
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(buf, os->evsel, unit);
+	if (color)
+		n = color_fprintf(out, color, fmt, val);
+	else
+		n = fprintf(out, fmt, val);
+	if (n > METRIC_ONLY_LEN)
+		n = METRIC_ONLY_LEN;
+	if (mlen < strlen(unit))
+		mlen = strlen(unit) + 1;
+	fprintf(out, "%*s", mlen - n, "");
+}
+
+static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
+				  const char *fmt,
+				  const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	snprintf(buf, sizeof buf, fmt, val);
+	vals = buf;
+	while (isspace(*vals))
+		vals++;
+	ends = vals;
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "%s%s", vals, csv_sep);
+}
+
+static void new_line_metric(void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(void *ctx, const char *color __maybe_unused,
+				const char *fmt __maybe_unused,
+				const char *unit, double val __maybe_unused)
+{
+	struct outstate *os = ctx;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	if (csv_output)
+		fprintf(os->fh, "%s%s", unit, csv_sep);
+	else
+		fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
+}
+
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
@@ -921,9 +1015,16 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	print_metric_t pm = print_metric_std;
 	void (*nl)(void *);
 
-	nl = new_line_std;
+	if (metric_only) {
+		nl = new_line_metric;
+		if (csv_output)
+			pm = print_metric_only_csv;
+		else
+			pm = print_metric_only;
+	} else
+		nl = new_line_std;
 
-	if (csv_output) {
+	if (csv_output && !metric_only) {
 		static int aggr_fields[] = {
 			[AGGR_GLOBAL] = 0,
 			[AGGR_THREAD] = 1,
@@ -940,6 +1041,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 			os.nfields++;
 	}
 	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+		if (metric_only) {
+			pm(&os, NULL, "", "", 0);
+			return;
+		}
 		aggr_printout(counter, id, nr);
 
 		fprintf(stat_config.output, "%*s%s",
@@ -968,7 +1073,9 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 		return;
 	}
 
-	if (nsec_counter(counter))
+	if (metric_only)
+		/* nothing */;
+	else if (nsec_counter(counter))
 		nsec_printout(id, nr, counter, uval);
 	else
 		abs_printout(id, nr, counter, uval);
@@ -977,7 +1084,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	out.new_line = nl;
 	out.ctx = &os;
 
-	if (csv_output) {
+	if (csv_output && !metric_only) {
 		print_noise(counter, noise);
 		print_running(run, ena);
 	}
@@ -985,7 +1092,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	perf_stat__print_shadow_stats(counter, uval,
 				first_shadow_cpu(counter, id),
 				&out);
-	if (!csv_output) {
+	if (!csv_output && !metric_only) {
 		print_noise(counter, noise);
 		print_running(run, ena);
 	}
@@ -1021,14 +1128,23 @@ static void print_aggr(char *prefix)
 	int cpu, s, s2, id, nr;
 	double uval;
 	u64 ena, run, val;
+	bool first;
 
 	if (!(aggr_map || aggr_get_id))
 		return;
 
 	aggr_update_shadow();
 
+	/*
+	 * With metric_only everything is on a single line.
+	 * Without each counter has its own line.
+	 */
 	for (s = 0; s < aggr_map->nr; s++) {
+		if (prefix && metric_only)
+			fprintf(output, "%s", prefix);
+
 		id = aggr_map->map[s];
+		first = true;
 		evlist__for_each(evsel_list, counter) {
 			val = ena = run = 0;
 			nr = 0;
@@ -1041,13 +1157,20 @@ static void print_aggr(char *prefix)
 				run += perf_counts(counter->counts, cpu, 0)->run;
 				nr++;
 			}
-			if (prefix)
+			if (first && metric_only) {
+				first = false;
+				aggr_printout(counter, id, nr);
+			}
+			if (prefix && !metric_only)
 				fprintf(output, "%s", prefix);
 
 			uval = val * counter->scale;
 			printout(id, nr, counter, uval, prefix, run, ena, 1.0);
-			fputc('\n', output);
+			if (!metric_only)
+				fputc('\n', output);
 		}
+		if (metric_only)
+			fputc('\n', output);
 	}
 }
 
@@ -1092,12 +1215,13 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 	avg_enabled = avg_stats(&ps->res_stats[1]);
 	avg_running = avg_stats(&ps->res_stats[2]);
 
-	if (prefix)
+	if (prefix && !metric_only)
 		fprintf(output, "%s", prefix);
 
 	uval = avg * counter->scale;
 	printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
-	fprintf(output, "\n");
+	if (!metric_only)
+		fprintf(output, "\n");
 }
 
 /*
@@ -1126,6 +1250,73 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 	}
 }
 
+static void print_no_aggr_metric(char *prefix)
+{
+	int cpu;
+	int nrcpus = 0;
+	struct perf_evsel *counter;
+	u64 ena, run, val;
+	double uval;
+
+	nrcpus = evsel_list->cpus->nr;
+	for (cpu = 0; cpu < nrcpus; cpu++) {
+		bool first = true;
+
+		if (prefix)
+			fputs(prefix, stat_config.output);
+		evlist__for_each(evsel_list, counter) {
+			if (first) {
+				aggr_printout(counter, cpu, 0);
+				first = false;
+			}
+			val = perf_counts(counter->counts, cpu, 0)->val;
+			ena = perf_counts(counter->counts, cpu, 0)->ena;
+			run = perf_counts(counter->counts, cpu, 0)->run;
+
+			uval = val * counter->scale;
+			printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
+		}
+		fputc('\n', stat_config.output);
+	}
+}
+
+static int aggr_header_lens[] = {
+	[AGGR_CORE] = 18,
+	[AGGR_SOCKET] = 12,
+	[AGGR_NONE] = 6,
+	[AGGR_THREAD] = 24,
+	[AGGR_GLOBAL] = 0,
+};
+
+static void print_metric_headers(char *prefix)
+{
+	struct perf_stat_output_ctx out;
+	struct perf_evsel *counter;
+	struct outstate os = {
+		.fh = stat_config.output
+	};
+
+	if (prefix)
+		fprintf(stat_config.output, "%s", prefix);
+
+	if (!csv_output)
+		fprintf(stat_config.output, "%*s",
+			aggr_header_lens[stat_config.aggr_mode], "");
+
+	/* Print metrics headers only */
+	evlist__for_each(evsel_list, counter) {
+		os.evsel = counter;
+		out.ctx = &os;
+		out.print_metric = print_metric_header;
+		out.new_line = new_line_metric;
+		os.evsel = counter;
+		perf_stat__print_shadow_stats(counter, 0,
+					      0,
+					      &out);
+	}
+	fputc('\n', stat_config.output);
+}
+
 static void print_interval(char *prefix, struct timespec *ts)
 {
 	FILE *output = stat_config.output;
@@ -1133,7 +1324,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 
 	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
-	if (num_print_interval == 0 && !csv_output) {
+	if (num_print_interval == 0 && !csv_output && !metric_only) {
 		switch (stat_config.aggr_mode) {
 		case AGGR_SOCKET:
 			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
@@ -1220,6 +1411,17 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	else
 		print_header(argc, argv);
 
+	if (metric_only) {
+		static int num_print_iv;
+
+		if (num_print_iv == 0)
+			print_metric_headers(prefix);
+		if (num_print_iv++ == 25)
+			num_print_iv = 0;
+		if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
+			fprintf(stat_config.output, "%s", prefix);
+	}
+
 	switch (stat_config.aggr_mode) {
 	case AGGR_CORE:
 	case AGGR_SOCKET:
@@ -1232,10 +1434,16 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	case AGGR_GLOBAL:
 		evlist__for_each(evsel_list, counter)
 			print_counter_aggr(counter, prefix);
+		if (metric_only)
+			fputc('\n', stat_config.output);
 		break;
 	case AGGR_NONE:
-		evlist__for_each(evsel_list, counter)
-			print_counter(counter, prefix);
+		if (metric_only)
+			print_no_aggr_metric(prefix);
+		else {
+			evlist__for_each(evsel_list, counter)
+				print_counter(counter, prefix);
+		}
 		break;
 	case AGGR_UNSET:
 	default:
@@ -1356,6 +1564,8 @@ static const struct option stat_options[] = {
 		     "aggregate counts per thread", AGGR_THREAD),
 	OPT_UINTEGER('D', "delay", &initial_delay,
 		     "ms to wait before starting measurement after program start"),
+	OPT_BOOLEAN(0, "metric-only", &metric_only,
+			"Only print computed metrics. No raw values"),
 	OPT_END()
 };
 
@@ -1997,6 +2207,16 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		goto out;
 	}
 
+	if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
+		goto out;
+	}
+
+	if (metric_only && run_count > 1) {
+		fprintf(stderr, "--metric-only is not supported with -r\n");
+		goto out;
+	}
+
 	if (output_fd < 0) {
 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
 		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index f7aeaf303f5a0c072dc91ccd595725fb36986567..eca6a912e8c22b0df342a7d955c6bad2db5ddf47 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -328,6 +328,13 @@ ifndef NO_LIBELF
   endif # NO_LIBBPF
 endif # NO_LIBELF
 
+ifdef PERF_HAVE_JITDUMP
+  ifndef NO_DWARF
+    $(call detected,CONFIG_JITDUMP)
+    CFLAGS += -DHAVE_JITDUMP
+  endif
+endif
+
 ifeq ($(ARCH),powerpc)
   ifndef NO_DWARF
     CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e0e217ec856b182afbd0e30eaab00df36e594225..4b98165559462025ded4db4a41fe8b5e66a9f151 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1928,8 +1928,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
 
 static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
 						 struct hist_entry *he,
-						 FILE *fp, int level,
-						 int nr_sort_keys)
+						 FILE *fp, int level)
 {
 	char s[8192];
 	int printed = 0;
@@ -1939,23 +1938,20 @@ static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
 		.size = sizeof(s),
 	};
 	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
 	bool first = true;
 	int ret;
-	int hierarchy_indent = nr_sort_keys * HIERARCHY_INDENT;
+	int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
 
 	printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, "");
 
 	folded_sign = hist_entry__folded(he);
 	printed += fprintf(fp, "%c", folded_sign);
 
-	hists__for_each_format(he->hists, fmt) {
-		if (perf_hpp__should_skip(fmt, he->hists))
-			continue;
-
-		if (perf_hpp__is_sort_entry(fmt) ||
-		    perf_hpp__is_dynamic_entry(fmt))
-			break;
-
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&he->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
 		if (!first) {
 			ret = scnprintf(hpp.buf, hpp.size, "  ");
 			advance_hpp(&hpp, ret);
@@ -1992,7 +1988,6 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
 	struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
 						   browser->min_pcnt);
 	int printed = 0;
-	int nr_sort = browser->hists->nr_sort_keys;
 
 	while (nd) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
@@ -2000,8 +1995,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
 		if (symbol_conf.report_hierarchy) {
 			printed += hist_browser__fprintf_hierarchy_entry(browser,
 									 h, fp,
-									 h->depth,
-									 nr_sort);
+									 h->depth);
 		} else {
 			printed += hist_browser__fprintf_entry(browser, h, fp);
 		}
@@ -2142,11 +2136,18 @@ static int hists__browser_title(struct hists *hists,
 	if (hists->uid_filter_str)
 		printed += snprintf(bf + printed, size - printed,
 				    ", UID: %s", hists->uid_filter_str);
-	if (thread)
-		printed += scnprintf(bf + printed, size - printed,
+	if (thread) {
+		if (sort__has_thread) {
+			printed += scnprintf(bf + printed, size - printed,
 				    ", Thread: %s(%d)",
 				     (thread->comm_set ? thread__comm_str(thread) : ""),
 				    thread->tid);
+		} else {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s",
+				     (thread->comm_set ? thread__comm_str(thread) : ""));
+		}
+	}
 	if (dso)
 		printed += scnprintf(bf + printed, size - printed,
 				    ", DSO: %s", dso->short_name);
@@ -2321,15 +2322,24 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
 {
 	struct thread *thread = act->thread;
 
+	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+		return 0;
+
 	if (browser->hists->thread_filter) {
 		pstack__remove(browser->pstack, &browser->hists->thread_filter);
 		perf_hpp__set_elide(HISTC_THREAD, false);
 		thread__zput(browser->hists->thread_filter);
 		ui_helpline__pop();
 	} else {
-		ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
-				   thread->comm_set ? thread__comm_str(thread) : "",
-				   thread->tid);
+		if (sort__has_thread) {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "",
+					   thread->tid);
+		} else {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "");
+		}
+
 		browser->hists->thread_filter = thread__get(thread);
 		perf_hpp__set_elide(HISTC_THREAD, false);
 		pstack__push(browser->pstack, &browser->hists->thread_filter);
@@ -2344,13 +2354,22 @@ static int
 add_thread_opt(struct hist_browser *browser, struct popup_action *act,
 	       char **optstr, struct thread *thread)
 {
-	if (!sort__has_thread || thread == NULL)
+	int ret;
+
+	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
 		return 0;
 
-	if (asprintf(optstr, "Zoom %s %s(%d) thread",
-		     browser->hists->thread_filter ? "out of" : "into",
-		     thread->comm_set ? thread__comm_str(thread) : "",
-		     thread->tid) < 0)
+	if (sort__has_thread) {
+		ret = asprintf(optstr, "Zoom %s %s(%d) thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "",
+			       thread->tid);
+	} else {
+		ret = asprintf(optstr, "Zoom %s %s thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "");
+	}
+	if (ret < 0)
 		return 0;
 
 	act->thread = thread;
@@ -2363,6 +2382,9 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
 {
 	struct map *map = act->ms.map;
 
+	if (!sort__has_dso || map == NULL)
+		return 0;
+
 	if (browser->hists->dso_filter) {
 		pstack__remove(browser->pstack, &browser->hists->dso_filter);
 		perf_hpp__set_elide(HISTC_DSO, false);
@@ -2514,6 +2536,9 @@ add_exit_opt(struct hist_browser *browser __maybe_unused,
 static int
 do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
 {
+	if (!sort__has_socket || act->socket < 0)
+		return 0;
+
 	if (browser->hists->socket_filter > -1) {
 		pstack__remove(browser->pstack, &browser->hists->socket_filter);
 		browser->hists->socket_filter = -1;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index f03c4f70438fb90daf720164edf65d5a680f2d30..3baeaa6e71b5a51e113b8b485df97b7c8ae003a2 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -515,9 +515,6 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list,
 void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
 					struct perf_hpp_fmt *format)
 {
-	if (perf_hpp__is_sort_entry(format) || perf_hpp__is_dynamic_entry(format))
-		list->nr_sort_keys++;
-
 	list_add_tail(&format->sort_list, &list->sorts);
 }
 
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index df2b690970ac39f4f78102bddd4a062db0ce5f2c..eea25e2424e99172715c681a05ffc68b3c58e78a 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -107,9 +107,12 @@ libperf-y += scripting-engines/
 libperf-$(CONFIG_ZLIB) += zlib.o
 libperf-$(CONFIG_LZMA) += lzma.o
 libperf-y += demangle-java.o
+
+ifdef CONFIG_JITDUMP
 libperf-$(CONFIG_LIBELF) += jitdump.o
 libperf-$(CONFIG_LIBELF) += genelf.o
 libperf-$(CONFIG_LIBELF) += genelf_debug.o
+endif
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 # avoid compiler warnings in 32-bit mode
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index efad78f811adcc70697b35d20baa08fe4ee247ec..501ea6e565f13a4a4817947957c79f15d805d130 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -93,10 +93,8 @@ struct perf_evsel {
 	const char		*unit;
 	struct event_format	*tp_format;
 	off_t			id_offset;
-	union {
-		void		*priv;
-		u64		db_id;
-	};
+	void			*priv;
+	u64			db_id;
 	struct cgroup_sel	*cgrp;
 	void			*handler;
 	struct cpu_map		*cpus;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 29da9e0d8db90591e8c3c82798a5f06713dcd5d8..290b3cbf68772de883bff4fee5c2294234dbefd3 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1087,10 +1087,103 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
  */
 
 static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+				       enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+	return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+						enum hist_filter type,
+						fmt_chk_fn check)
+{
+	struct perf_hpp_fmt *fmt;
+	bool type_match = false;
+	struct hist_entry *parent = he->parent_he;
+
+	switch (type) {
+	case HIST_FILTER__THREAD:
+		if (symbol_conf.comm_list == NULL &&
+		    symbol_conf.pid_list == NULL &&
+		    symbol_conf.tid_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__DSO:
+		if (symbol_conf.dso_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__SYMBOL:
+		if (symbol_conf.sym_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__PARENT:
+	case HIST_FILTER__GUEST:
+	case HIST_FILTER__HOST:
+	case HIST_FILTER__SOCKET:
+	default:
+		return;
+	}
+
+	/* if it's filtered by own fmt, it has to have filter bits */
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (check(fmt)) {
+			type_match = true;
+			break;
+		}
+	}
+
+	if (type_match) {
+		/*
+		 * If the filter is for current level entry, propagate
+		 * filter marker to parents.  The marker bit was
+		 * already set by default so it only needs to clear
+		 * non-filtered entries.
+		 */
+		if (!(he->filtered & (1 << type))) {
+			while (parent) {
+				parent->filtered &= ~(1 << type);
+				parent = parent->parent_he;
+			}
+		}
+	} else {
+		/*
+		 * If current entry doesn't have matching formats, set
+		 * filter marker for upper level entries.  it will be
+		 * cleared if its lower level entries is not filtered.
+		 *
+		 * For lower-level entries, it inherits parent's
+		 * filter bit so that lower level entries of a
+		 * non-filtered entry won't set the filter marker.
+		 */
+		if (parent == NULL)
+			he->filtered |= (1 << type);
+		else
+			he->filtered |= (parent->filtered & (1 << type));
+	}
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+					    check_thread_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+					    perf_hpp__is_dso_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+					    perf_hpp__is_sym_entry);
+
+	hists__apply_filters(he->hists, he);
+}
 
 static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
 						 struct rb_root *root,
 						 struct hist_entry *he,
+						 struct hist_entry *parent_he,
 						 struct perf_hpp_list *hpp_list)
 {
 	struct rb_node **p = &root->rb_node;
@@ -1125,11 +1218,13 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
 	if (new == NULL)
 		return NULL;
 
-	hists__apply_filters(hists, new);
 	hists->nr_entries++;
 
 	/* save related format list for output */
 	new->hpp_list = hpp_list;
+	new->parent_he = parent_he;
+
+	hist_entry__apply_hierarchy_filters(new);
 
 	/* some fields are now passed to 'new' */
 	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
@@ -1170,14 +1265,13 @@ static int hists__hierarchy_insert_entry(struct hists *hists,
 			continue;
 
 		/* insert copy of 'he' for each fmt into the hierarchy */
-		new_he = hierarchy_insert_entry(hists, root, he, &node->hpp);
+		new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
 		if (new_he == NULL) {
 			ret = -1;
 			break;
 		}
 
 		root = &new_he->hroot_in;
-		new_he->parent_he = parent;
 		new_he->depth = depth++;
 		parent = new_he;
 	}
@@ -1359,6 +1453,31 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
 	hists->stats.total_period += h->stat.period;
 }
 
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	node = rb_first(&hists->entries);
+
+	hists->stats.total_period = 0;
+	hists->stats.total_non_filtered_period = 0;
+
+	/*
+	 * recalculate total period using top-level entries only
+	 * since lower level entries only see non-filtered entries
+	 * but upper level entries have sum of both entries.
+	 */
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node);
+		node = rb_next(node);
+
+		hists->stats.total_period += he->stat.period;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period += he->stat.period;
+	}
+}
+
 static void hierarchy_insert_output_entry(struct rb_root *root,
 					  struct hist_entry *he)
 {
@@ -1424,11 +1543,6 @@ static void hists__hierarchy_output_resort(struct hists *hists,
 			continue;
 		}
 
-		/* only update stat for leaf entries to avoid duplication */
-		hists__inc_stats(hists, he);
-		if (!he->filtered)
-			hists__calc_col_len(hists, he);
-
 		if (!use_callchain)
 			continue;
 
@@ -1508,11 +1622,13 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
 	hists__reset_col_len(hists);
 
 	if (symbol_conf.report_hierarchy) {
-		return hists__hierarchy_output_resort(hists, prog,
-						      &hists->entries_collapsed,
-						      &hists->entries,
-						      min_callchain_hits,
-						      use_callchain);
+		hists__hierarchy_output_resort(hists, prog,
+					       &hists->entries_collapsed,
+					       &hists->entries,
+					       min_callchain_hits,
+					       use_callchain);
+		hierarchy_recalc_total_periods(hists);
+		return;
 	}
 
 	if (sort__need_collapse)
@@ -1833,6 +1949,8 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a
 		}
 	}
 
+	hierarchy_recalc_total_periods(hists);
+
 	/*
 	 * resort output after applying a new filter since filter in a lower
 	 * hierarchy can change periods in a upper hierarchy.
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2cb017f28f9ec98a5cae6c7dc41dbfe308fa7aa9..ead18c82294faf881f1d4bc89be653e27ff5c708 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -79,7 +79,6 @@ struct hists {
 	int			socket_filter;
 	struct perf_hpp_list	*hpp_list;
 	struct list_head	hpp_formats;
-	int			nr_sort_keys;
 	int			nr_hpp_node;
 };
 
@@ -241,7 +240,6 @@ struct perf_hpp_fmt {
 struct perf_hpp_list {
 	struct list_head fields;
 	struct list_head sorts;
-	int nr_sort_keys;
 };
 
 extern struct perf_hpp_list perf_hpp_list;
@@ -318,6 +316,10 @@ bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *his
 bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
 bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
 bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
 
 struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
 
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d8cd038baed267335891ac87ed4477fc886e5c6e..adef23b1352e836fec9f0f9a5290c578bb25cc43 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -98,7 +98,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
 	char scale[128];
 	int fd, ret = -1;
 	char path[PATH_MAX];
-	const char *lc;
+	char *lc;
 
 	snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
 
@@ -146,7 +146,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
 	/* restore locale */
 	setlocale(LC_NUMERIC, lc);
 
-	free((char *) lc);
+	free(lc);
 
 	ret = 0;
 error:
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 041f236379e012a97e6bf3726dd1876387c8d51d..93fa136b0025c02e1e4383be523fcf37c092f36e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -27,6 +27,7 @@ int		sort__has_sym = 0;
 int		sort__has_dso = 0;
 int		sort__has_socket = 0;
 int		sort__has_thread = 0;
+int		sort__has_comm = 0;
 enum sort_mode	sort__mode = SORT_MODE__NORMAL;
 
 /*
@@ -1488,38 +1489,26 @@ bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
 	return format->header == __sort__hpp_header;
 }
 
-bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt)
-{
-	struct hpp_sort_entry *hse;
-
-	if (!perf_hpp__is_sort_entry(fmt))
-		return false;
-
-	hse = container_of(fmt, struct hpp_sort_entry, hpp);
-	return hse->se == &sort_trace;
+#define MK_SORT_ENTRY_CHK(key)					\
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt)	\
+{								\
+	struct hpp_sort_entry *hse;				\
+								\
+	if (!perf_hpp__is_sort_entry(fmt))			\
+		return false;					\
+								\
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);	\
+	return hse->se == &sort_ ## key ;			\
 }
 
-bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt)
-{
-	struct hpp_sort_entry *hse;
+MK_SORT_ENTRY_CHK(trace)
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
 
-	if (!perf_hpp__is_sort_entry(fmt))
-		return false;
-
-	hse = container_of(fmt, struct hpp_sort_entry, hpp);
-	return hse->se == &sort_srcline;
-}
-
-bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt)
-{
-	struct hpp_sort_entry *hse;
-
-	if (!perf_hpp__is_sort_entry(fmt))
-		return false;
-
-	hse = container_of(fmt, struct hpp_sort_entry, hpp);
-	return hse->se == &sort_srcfile;
-}
 
 static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
 {
@@ -1602,31 +1591,47 @@ int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
 {
 	struct perf_hpp_fmt *fmt;
 	struct hpp_sort_entry *hse;
+	int ret = -1;
+	int r;
 
-	fmt = he->fmt;
-	if (fmt == NULL || !perf_hpp__is_sort_entry(fmt))
-		return -1;
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
 
-	hse = container_of(fmt, struct hpp_sort_entry, hpp);
-	if (hse->se->se_filter == NULL)
-		return -1;
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_filter == NULL)
+			continue;
+
+		/*
+		 * hist entry is filtered if any of sort key in the hpp list
+		 * is applied.  But it should skip non-matched filter types.
+		 */
+		r = hse->se->se_filter(he, type, arg);
+		if (r >= 0) {
+			if (ret < 0)
+				ret = 0;
+			ret |= r;
+		}
+	}
 
-	return hse->se->se_filter(he, type, arg);
+	return ret;
 }
 
-static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, int level)
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+					  struct perf_hpp_list *list,
+					  int level)
 {
 	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
 
 	if (hse == NULL)
 		return -1;
 
-	perf_hpp__register_sort_field(&hse->hpp);
+	perf_hpp_list__register_sort_field(list, &hse->hpp);
 	return 0;
 }
 
-static int __sort_dimension__add_hpp_output(struct perf_hpp_list *list,
-					    struct sort_dimension *sd)
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+					    struct perf_hpp_list *list)
 {
 	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
 
@@ -2147,12 +2152,14 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
 	return ret;
 }
 
-static int __sort_dimension__add(struct sort_dimension *sd, int level)
+static int __sort_dimension__add(struct sort_dimension *sd,
+				 struct perf_hpp_list *list,
+				 int level)
 {
 	if (sd->taken)
 		return 0;
 
-	if (__sort_dimension__add_hpp_sort(sd, level) < 0)
+	if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
 		return -1;
 
 	if (sd->entry->se_collapse)
@@ -2163,7 +2170,9 @@ static int __sort_dimension__add(struct sort_dimension *sd, int level)
 	return 0;
 }
 
-static int __hpp_dimension__add(struct hpp_dimension *hd, int level)
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+				struct perf_hpp_list *list,
+				int level)
 {
 	struct perf_hpp_fmt *fmt;
 
@@ -2175,7 +2184,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd, int level)
 		return -1;
 
 	hd->taken = 1;
-	perf_hpp__register_sort_field(fmt);
+	perf_hpp_list__register_sort_field(list, fmt);
 	return 0;
 }
 
@@ -2185,7 +2194,7 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list,
 	if (sd->taken)
 		return 0;
 
-	if (__sort_dimension__add_hpp_output(list, sd) < 0)
+	if (__sort_dimension__add_hpp_output(sd, list) < 0)
 		return -1;
 
 	sd->taken = 1;
@@ -2215,7 +2224,8 @@ int hpp_dimension__add_output(unsigned col)
 	return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
 }
 
-static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
+static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			       struct perf_evlist *evlist __maybe_unused,
 			       int level)
 {
 	unsigned int i;
@@ -2253,9 +2263,11 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 			sort__has_socket = 1;
 		} else if (sd->entry == &sort_thread) {
 			sort__has_thread = 1;
+		} else if (sd->entry == &sort_comm) {
+			sort__has_comm = 1;
 		}
 
-		return __sort_dimension__add(sd, level);
+		return __sort_dimension__add(sd, list, level);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2264,7 +2276,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 		if (strncasecmp(tok, hd->name, strlen(tok)))
 			continue;
 
-		return __hpp_dimension__add(hd, level);
+		return __hpp_dimension__add(hd, list, level);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2279,7 +2291,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 		if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
 			sort__has_sym = 1;
 
-		__sort_dimension__add(sd, level);
+		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
 
@@ -2295,7 +2307,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 		if (sd->entry == &sort_mem_daddr_sym)
 			sort__has_sym = 1;
 
-		__sort_dimension__add(sd, level);
+		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
 
@@ -2305,7 +2317,8 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 	return -ESRCH;
 }
 
-static int setup_sort_list(char *str, struct perf_evlist *evlist)
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+			   struct perf_evlist *evlist)
 {
 	char *tmp, *tok;
 	int ret = 0;
@@ -2332,7 +2345,7 @@ static int setup_sort_list(char *str, struct perf_evlist *evlist)
 		}
 
 		if (*tok) {
-			ret = sort_dimension__add(tok, evlist, level);
+			ret = sort_dimension__add(list, tok, evlist, level);
 			if (ret == -EINVAL) {
 				error("Invalid --sort key: `%s'", tok);
 				break;
@@ -2480,7 +2493,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
 		}
 	}
 
-	ret = setup_sort_list(str, evlist);
+	ret = setup_sort_list(&perf_hpp_list, str, evlist);
 
 	free(str);
 	return ret;
@@ -2693,29 +2706,6 @@ static int __setup_output_field(void)
 	return ret;
 }
 
-static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist)
-{
-	struct perf_evsel *evsel;
-
-	evlist__for_each(evlist, evsel) {
-		struct perf_hpp_fmt *fmt;
-		struct hists *hists = evsel__hists(evsel);
-
-		hists->nr_sort_keys = perf_hpp_list.nr_sort_keys;
-
-		/*
-		 * If dynamic entries were used, it might add multiple
-		 * entries to each evsel for a single field name.  Set
-		 * actual number of sort keys for each hists.
-		 */
-		perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
-			if (perf_hpp__is_dynamic_entry(fmt) &&
-			    !perf_hpp__defined_dynamic_entry(fmt, hists))
-				hists->nr_sort_keys--;
-		}
-	}
-}
-
 int setup_sorting(struct perf_evlist *evlist)
 {
 	int err;
@@ -2725,14 +2715,11 @@ int setup_sorting(struct perf_evlist *evlist)
 		return err;
 
 	if (parent_pattern != default_parent_pattern) {
-		err = sort_dimension__add("parent", evlist, -1);
+		err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
 		if (err < 0)
 			return err;
 	}
 
-	if (evlist != NULL)
-		evlist__set_hists_nr_sort_keys(evlist);
-
 	reset_dimensions();
 
 	/*
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index ea1f722cffea73d4ad47a63178d570ce7195d874..3f4e359981192ac50b56e770aa472749666e1f98 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -37,6 +37,7 @@ extern int sort__has_parent;
 extern int sort__has_sym;
 extern int sort__has_socket;
 extern int sort__has_thread;
+extern int sort__has_comm;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
@@ -129,7 +130,6 @@ struct hist_entry {
 	void			*raw_data;
 	u32			raw_size;
 	void			*trace_output;
-	struct perf_hpp_fmt	*fmt;
 	struct perf_hpp_list	*hpp_list;
 	struct hist_entry	*parent_he;
 	union {