diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 7ce296618717ee7df8d65fdd7919030f2425c800..e00e2eaf89da836d10eea864bc92279a32ecedf1 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -831,7 +831,7 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
 		return -1;
 	}
 
-	atoms->thread = thread;
+	atoms->thread = thread__get(thread);
 	INIT_LIST_HEAD(&atoms->work_list);
 	__thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid);
 	return 0;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d95a8f4d988c28003647ad2f8fbd7cc60e73606b..211614fba2178e7866c136f9e5a55ab44af5e27b 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1741,7 +1741,10 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	} else
 		ttrace->entry_pending = true;
 
-	trace->current = thread;
+	if (trace->current != thread) {
+		thread__put(trace->current);
+		trace->current = thread__get(thread);
+	}
 
 	return 0;
 }
@@ -2274,6 +2277,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	}
 
 out_disable:
+	thread__zput(trace->current);
+
 	perf_evlist__disable(evlist);
 
 	if (!err) {
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 788506eef5671da5e64016063569b79d4e060d97..ad312d91caed1252ef5b0d66afb50c9916ad47c5 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1467,7 +1467,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
 
 	while (1) {
-		const struct thread *thread = NULL;
+		struct thread *thread = NULL;
 		const struct dso *dso = NULL;
 		int choice = 0,
 		    annotate = -2, zoom_dso = -2, zoom_thread = -2,
@@ -1754,13 +1754,13 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				pstack__remove(fstack, &browser->hists->thread_filter);
 zoom_out_thread:
 				ui_helpline__pop();
-				browser->hists->thread_filter = NULL;
+				thread__zput(browser->hists->thread_filter);
 				perf_hpp__set_elide(HISTC_THREAD, false);
 			} else {
 				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
 						   thread->comm_set ? thread__comm_str(thread) : "",
 						   thread->tid);
-				browser->hists->thread_filter = thread;
+				browser->hists->thread_filter = thread__get(thread);
 				perf_hpp__set_elide(HISTC_THREAD, false);
 				pstack__push(fstack, &browser->hists->thread_filter);
 			}
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index ffdc338df9256a8a805b0cdd5e8422264f512552..a19674666b4e47d73602ac6bbf25207d1f0c5741 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -61,8 +61,9 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
 
 	if (thread) {
 		rb_erase(&thread->rb_node, &machine->threads);
-		machine->last_match = NULL;
-		thread__delete(thread);
+		if (machine->last_match == thread)
+			thread__zput(machine->last_match);
+		thread__put(thread);
 	}
 
 	return 0;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 70b48a65064cbc85fd30d6777eb1a9ca25ca0098..95f5ab707b74fbd684910beb47888560a086e0fc 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -355,6 +355,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
 			callchain_init(he->callchain);
 
 		INIT_LIST_HEAD(&he->pairs.node);
+		thread__get(he->thread);
 	}
 
 	return he;
@@ -941,6 +942,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 
 void hist_entry__delete(struct hist_entry *he)
 {
+	thread__zput(he->thread);
 	zfree(&he->branch_info);
 	zfree(&he->mem_info);
 	zfree(&he->stat_acc);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2b690d02890707f2b916220c1402df3b8f774e98..e988c9fcd1bc058b75afcf1b2df96418b1cf8c43 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -60,7 +60,7 @@ struct hists {
 	struct rb_root		entries_collapsed;
 	u64			nr_entries;
 	u64			nr_non_filtered_entries;
-	const struct thread	*thread_filter;
+	struct thread		*thread_filter;
 	const struct dso	*dso_filter;
 	const char		*uid_filter_str;
 	const char		*symbol_filter_str;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9e0f60a7e7b35d08ebe3fd5c88fe1157af6acbc3..24f8c978cfd4e802b483c26a01ad47036b85ea83 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -14,6 +14,8 @@
 #include "unwind.h"
 #include "linux/hash.h"
 
+static void machine__remove_thread(struct machine *machine, struct thread *th);
+
 static void dsos__init(struct dsos *dsos)
 {
 	INIT_LIST_HEAD(&dsos->head);
@@ -89,16 +91,6 @@ static void dsos__delete(struct dsos *dsos)
 	}
 }
 
-void machine__delete_dead_threads(struct machine *machine)
-{
-	struct thread *n, *t;
-
-	list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
-		list_del(&t->node);
-		thread__delete(t);
-	}
-}
-
 void machine__delete_threads(struct machine *machine)
 {
 	struct rb_node *nd = rb_first(&machine->threads);
@@ -106,9 +98,8 @@ void machine__delete_threads(struct machine *machine)
 	while (nd) {
 		struct thread *t = rb_entry(nd, struct thread, rb_node);
 
-		rb_erase(&t->rb_node, &machine->threads);
 		nd = rb_next(nd);
-		thread__delete(t);
+		machine__remove_thread(machine, t);
 	}
 }
 
@@ -361,9 +352,13 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
 	 * the full rbtree:
 	 */
 	th = machine->last_match;
-	if (th && th->tid == tid) {
-		machine__update_thread_pid(machine, th, pid);
-		return th;
+	if (th != NULL) {
+		if (th->tid == tid) {
+			machine__update_thread_pid(machine, th, pid);
+			return th;
+		}
+
+		thread__zput(machine->last_match);
 	}
 
 	while (*p != NULL) {
@@ -371,7 +366,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
 		th = rb_entry(parent, struct thread, rb_node);
 
 		if (th->tid == tid) {
-			machine->last_match = th;
+			machine->last_match = thread__get(th);
 			machine__update_thread_pid(machine, th, pid);
 			return th;
 		}
@@ -403,8 +398,11 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
 			thread__delete(th);
 			return NULL;
 		}
-
-		machine->last_match = th;
+		/*
+		 * It is now in the rbtree, get a ref
+		 */
+		thread__get(th);
+		machine->last_match = thread__get(th);
 	}
 
 	return th;
@@ -1238,13 +1236,17 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 
 static void machine__remove_thread(struct machine *machine, struct thread *th)
 {
-	machine->last_match = NULL;
+	if (machine->last_match == th)
+		thread__zput(machine->last_match);
+
 	rb_erase(&th->rb_node, &machine->threads);
 	/*
-	 * We may have references to this thread, for instance in some hist_entry
-	 * instances, so just move them to a separate list.
+	 * Move it first to the dead_threads list, then drop the reference,
+	 * if this is the last reference, then the thread__delete destructor
+	 * will be called and we will remove it from the dead_threads list.
 	 */
 	list_add_tail(&th->node, &machine->dead_threads);
+	thread__put(th);
 }
 
 int machine__process_fork_event(struct machine *machine, union perf_event *event,
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index e8b7779a0a3f85c05e6d01f086a0b56ea8ec1ff2..e2faf3b47e7bf5be277758a5ff0a425336d1a19b 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -118,7 +118,6 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec);
 struct machine *machine__new_host(void);
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
 void machine__exit(struct machine *machine);
-void machine__delete_dead_threads(struct machine *machine);
 void machine__delete_threads(struct machine *machine);
 void machine__delete(struct machine *machine);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index e4f166981ff050fd92731dad3957eead957c9cc8..ed4e5cf2bd9d6469911b1d87d1a11611afefadfc 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -138,11 +138,6 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
 	return NULL;
 }
 
-static void perf_session__delete_dead_threads(struct perf_session *session)
-{
-	machine__delete_dead_threads(&session->machines.host);
-}
-
 static void perf_session__delete_threads(struct perf_session *session)
 {
 	machine__delete_threads(&session->machines.host);
@@ -167,7 +162,6 @@ static void perf_session_env__delete(struct perf_session_env *env)
 void perf_session__delete(struct perf_session *session)
 {
 	perf_session__destroy_kernel_maps(session);
-	perf_session__delete_dead_threads(session);
 	perf_session__delete_threads(session);
 	perf_session_env__delete(&session->header.env);
 	machines__exit(&session->machines);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 9ebc8b1f9be51f82801e305804e4017eaa274db7..a5dbba95107ff3919e4a57495c7bcb1fafb77966 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -82,6 +82,20 @@ void thread__delete(struct thread *thread)
 	free(thread);
 }
 
+struct thread *thread__get(struct thread *thread)
+{
+	++thread->refcnt;
+	return thread;
+}
+
+void thread__put(struct thread *thread)
+{
+	if (thread && --thread->refcnt == 0) {
+		list_del_init(&thread->node);
+		thread__delete(thread);
+	}
+}
+
 struct comm *thread__comm(const struct thread *thread)
 {
 	if (list_empty(&thread->comm_list))
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 160fd066a7d1efe7a45a9bcfb8012bf0bccfc777..783b6688d2f71c53e3ee2e662bbb8cb6d357628a 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -20,6 +20,7 @@ struct thread {
 	pid_t			tid;
 	pid_t			ppid;
 	int			cpu;
+	int			refcnt;
 	char			shortname[3];
 	bool			comm_set;
 	bool			dead; /* if set thread has exited */
@@ -37,6 +38,18 @@ struct comm;
 struct thread *thread__new(pid_t pid, pid_t tid);
 int thread__init_map_groups(struct thread *thread, struct machine *machine);
 void thread__delete(struct thread *thread);
+
+struct thread *thread__get(struct thread *thread);
+void thread__put(struct thread *thread);
+
+static inline void __thread__zput(struct thread **thread)
+{
+	thread__put(*thread);
+	*thread = NULL;
+}
+
+#define thread__zput(thread) __thread__zput(&thread)
+
 static inline void thread__exited(struct thread *thread)
 {
 	thread->dead = true;