Commit e7c15cd8 authored by Steven Rostedt (Red Hat)'s avatar Steven Rostedt (Red Hat) Committed by Steven Rostedt

tracing: Added hardware latency tracer

The hardware latency tracer has been in the PREEMPT_RT patch for some time.
It is used to detect possible SMIs or any other hardware interruptions that
the kernel is unaware of. Note, NMIs may also be detected, but that may be
good to note as well.

The logic is pretty simple. It simply creates a thread that spins on a
single CPU for a specified amount of time (width) within a periodic window
(window). These numbers may be adjusted by their cooresponding names in


The defaults are window = 1000000 us (1 second)
                 width  =  500000 us (1/2 second)

The loop consists of:

	t1 = trace_clock_local();
	t2 = trace_clock_local();

Where trace_clock_local() is a variant of sched_clock().

The difference of t2 - t1 is recorded as the "inner" timestamp and also the
timestamp  t1 - prev_t2 is recorded as the "outer" timestamp. If either of
these differences are greater than the time denoted in
/sys/kernel/tracing/tracing_thresh then it records the event.

When this tracer is started, and tracing_thresh is zero, it changes to the
default threshold of 10 us.

The hwlat tracer in the PREEMPT_RT patch was originally written by
Jon Masters. I have modified it quite a bit and turned it into a
Based-on-code-by: default avatarJon Masters <>
Signed-off-by: default avatarSteven Rostedt <>
parent 8861dd30
......@@ -221,6 +221,41 @@ config SCHED_TRACER
This tracer tracks the latency of the highest priority task
to be scheduled in, starting from the point it has woken up.
bool "Tracer to detect hardware latencies (like SMIs)"
This tracer, when enabled will create one or more kernel threads,
depening on what the cpumask file is set to, which each thread
spinning in a loop looking for interruptions caused by
something other than the kernel. For example, if a
System Management Interrupt (SMI) takes a noticeable amount of
time, this tracer will detect it. This is useful for testing
if a system is reliable for Real Time tasks.
Some files are created in the tracing directory when this
is enabled:
hwlat_detector/width - time in usecs for how long to spin for
hwlat_detector/window - time in usecs between the start of each
A kernel thread is created that will spin with interrupts disabled
for "width" microseconds in every "widow" cycle. It will not spin
for "window - width" microseconds, where the system can
continue to operate.
The output will appear in the trace and trace_pipe files.
When the tracer is not running, it has no affect on the system,
but when it is running, it can cause the system to be
periodically non responsive. Do not run this tracer on a
production system.
To enable this tracer, echo in "hwlat" into the current_tracer
file. Every time a latency is greater than tracing_thresh, it will
be recorded into the ring buffer.
bool "Trace process context switches and events"
depends on !GENERIC_TRACER
......@@ -41,6 +41,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
obj-$(CONFIG_HWLAT_TRACER) += trace_hwlat.o
obj-$(CONFIG_NOP_TRACER) += trace_nop.o
obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
......@@ -1047,7 +1047,7 @@ void disable_trace_on_warning(void)
* Shows real state of the ring buffer if it is enabled or not.
static int tracer_tracing_is_on(struct trace_array *tr)
int tracer_tracing_is_on(struct trace_array *tr)
if (tr->trace_buffer.buffer)
return ring_buffer_record_is_on(tr->trace_buffer.buffer);
......@@ -38,6 +38,7 @@ enum trace_type {
......@@ -326,6 +327,7 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \
IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
......@@ -571,6 +573,7 @@ void tracing_reset_current(int cpu);
void tracing_reset_all_online_cpus(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
bool tracing_is_disabled(void);
int tracer_tracing_is_on(struct trace_array *tr);
struct dentry *trace_create_file(const char *name,
umode_t mode,
struct dentry *parent,
......@@ -322,3 +322,26 @@ FTRACE_ENTRY(branch, trace_branch,
FTRACE_ENTRY(hwlat, hwlat_entry,
__field( u64, duration )
__field( u64, outer_duration )
__field_struct( struct timespec, timestamp )
__field_desc( long, timestamp, tv_sec )
__field_desc( long, timestamp, tv_nsec )
__field( unsigned int, seqnum )
This diff is collapsed.
......@@ -1098,6 +1098,57 @@ static struct trace_event trace_user_stack_event = {
.funcs = &trace_user_stack_funcs,
static enum print_line_t
trace_hwlat_print(struct trace_iterator *iter, int flags,
struct trace_event *event)
struct trace_entry *entry = iter->ent;
struct trace_seq *s = &iter->seq;
struct hwlat_entry *field;
trace_assign_type(field, entry);
trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld\n",
return trace_handle_return(s);
static enum print_line_t
trace_hwlat_raw(struct trace_iterator *iter, int flags,
struct trace_event *event)
struct hwlat_entry *field;
struct trace_seq *s = &iter->seq;
trace_assign_type(field, iter->ent);
trace_seq_printf(s, "%llu %lld %ld %09ld %u\n",
return trace_handle_return(s);
static struct trace_event_functions trace_hwlat_funcs = {
.trace = trace_hwlat_print,
.raw = trace_hwlat_raw,
static struct trace_event trace_hwlat_event = {
.type = TRACE_HWLAT,
.funcs = &trace_hwlat_funcs,
static enum print_line_t
trace_bputs_print(struct trace_iterator *iter, int flags,
......@@ -1233,6 +1284,7 @@ static struct trace_event *events[] __initdata = {
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment