diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 961f5734b97d60e7196226e1ec8b5b9deb4f5d65..6b838869554b144c3a5e23e74f3d13f715ee90df 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3787,8 +3787,6 @@
 
 	nomce		[X86-32] Disable Machine Check Exception
 
-	nomem_profiling	Disable memory allocation profiling.
-
 	nomfgpt		[X86-32] Disable Multi-Function General Purpose
 			Timer usage (for AMD Geode machines).
 
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 988f6a4c8084fbd3a7952bb7ab820c59218cc0b3..b22d9189d6dd2364b71f786b01a10fecc2e00386 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -43,6 +43,7 @@ Currently, these files are in /proc/sys/vm:
 - legacy_va_layout
 - lowmem_reserve_ratio
 - max_map_count
+- mem_profiling         (only if CONFIG_MEM_ALLOC_PROFILING=y)
 - memory_failure_early_kill
 - memory_failure_recovery
 - min_free_kbytes
@@ -425,6 +426,21 @@ e.g., up to one or two maps per allocation.
 The default value is 65530.
 
 
+mem_profiling
+==============
+
+Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y)
+
+1: Enable memory profiling.
+
+0: Disabld memory profiling.
+
+Enabling memory profiling introduces a small performance overhead for all
+memory allocations.
+
+The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
+
+
 memory_failure_early_kill:
 ==========================
 
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 898c99eae8e44630b459a2adca8b84ebb1423d8f..552bcf0e64f0509d37cd65ca2353d2f497b9ade0 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -677,6 +677,7 @@ files are there, and which are missing.
  ============ ===============================================================
  File         Content
  ============ ===============================================================
+ allocinfo    Memory allocations profiling information
  apm          Advanced power management info
  buddyinfo    Kernel memory allocator information (see text)	(2.5)
  bus          Directory containing bus specific information
@@ -936,6 +937,33 @@ also be allocatable although a lot of filesystem metadata may have to be
 reclaimed to achieve this.
 
 
+allocinfo
+~~~~~~~
+
+Provides information about memory allocations at all locations in the code
+base. Each allocation in the code is identified by its source file, line
+number, module and the function calling the allocation. The number of bytes
+allocated at each location is reported.
+
+Example output.
+
+::
+
+    > cat /proc/allocinfo
+
+      153MiB     mm/slub.c:1826 module:slub func:alloc_slab_page
+     6.08MiB     mm/slab_common.c:950 module:slab_common func:_kmalloc_order
+     5.09MiB     mm/memcontrol.c:2814 module:memcontrol func:alloc_slab_obj_exts
+     4.54MiB     mm/page_alloc.c:5777 module:page_alloc func:alloc_pages_exact
+     1.32MiB     include/asm-generic/pgalloc.h:63 module:pgtable func:__pte_alloc_one
+     1.16MiB     fs/xfs/xfs_log_priv.h:700 module:xfs func:xlog_kvmalloc
+     1.00MiB     mm/swap_cgroup.c:48 module:swap_cgroup func:swap_cgroup_prepare
+      734KiB     fs/xfs/kmem.c:20 module:xfs func:kmem_alloc
+      640KiB     kernel/rcu/tree.c:3184 module:tree func:fill_page_cache_func
+      640KiB     drivers/char/virtio_console.c:452 module:virtio_console func:alloc_buf
+      ...
+
+
 meminfo
 ~~~~~~~
 
diff --git a/MAINTAINERS b/MAINTAINERS
index e9f21f92dbee86655dd7930055cc9868dc1413fa..cca13f71c52c5908ec61fc63ba59cf1d0773b155 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11516,12 +11516,6 @@ M:	John Hawley <warthog9@eaglescrag.net>
 S:	Maintained
 F:	tools/testing/ktest
 
-LAZY PERCPU COUNTERS
-M:	Kent Overstreet <kent.overstreet@linux.dev>
-S:	Maintained
-F:	include/linux/lazy-percpu-counter.h
-F:	lib/lazy-percpu-counter.c
-
 L3MDEV
 M:	David Ahern <dsahern@kernel.org>
 L:	netdev@vger.kernel.org
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 26245aaf12b8bddb8f6082ce5b2b3c093f2e00cc..d1950f7b3813b39acc40f73047484d73efe633ce 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -261,7 +261,7 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
 	if (end <= start)
 		return;
 
-	string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
+	string_get_size(size, 1, STRING_SIZE_BASE2, buf, sizeof(buf));
 
 	pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
 		exec ? " (exec)" : "");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index a7697027ce43b75ae5eac72a0cec2808b3cec3ca..15759b5400e200198e6cbe40852c4e73c2566432 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -575,9 +575,9 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
 	nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
 
 	string_get_size(nblocks, queue_logical_block_size(q),
-			STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
+			STRING_SIZE_BASE2, cap_str_2, sizeof(cap_str_2));
 	string_get_size(nblocks, queue_logical_block_size(q),
-			STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
+			0, cap_str_10, sizeof(cap_str_10));
 
 	dev_notice(&vdev->dev,
 		   "[%s] %s%llu %d-byte logical blocks (%s/%s)\n",
diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c
index 8d1630b8edac1e7cd4cd76cbd6373ce221809831..d287ec57610cb7a886087402296b8a8bd336fa09 100644
--- a/drivers/gpu/drm/gud/gud_drv.c
+++ b/drivers/gpu/drm/gud/gud_drv.c
@@ -329,7 +329,7 @@ static int gud_stats_debugfs(struct seq_file *m, void *data)
 	struct gud_device *gdrm = to_gud_device(node->minor->dev);
 	char buf[10];
 
-	string_get_size(gdrm->bulk_len, 1, STRING_UNITS_2, buf, sizeof(buf));
+	string_get_size(gdrm->bulk_len, 1, STRING_SIZE_BASE2, buf, sizeof(buf));
 	seq_printf(m, "Max buffer size: %s\n", buf);
 	seq_printf(m, "Number of errors:  %u\n", gdrm->stats_num_errors);
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index db6d8a0999100bdf0a18e4a32abb5a92ffd45b87..cba61a9838dd8d487f68f359d7f69c8b60170aa9 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2506,7 +2506,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 
 	blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled);
 
-	string_get_size((u64)size, 512, STRING_UNITS_2,
+	string_get_size((u64)size, 512, STRING_SIZE_BASE2,
 			cap_str, sizeof(cap_str));
 	pr_info("%s: %s %s %s %s\n",
 		md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
@@ -2702,7 +2702,7 @@ static int mmc_blk_alloc_rpmb_part(struct mmc_card *card,
 
 	list_add(&rpmb->node, &md->rpmbs);
 
-	string_get_size((u64)size, 512, STRING_UNITS_2,
+	string_get_size((u64)size, 512, STRING_SIZE_BASE2,
 			cap_str, sizeof(cap_str));
 
 	pr_info("%s: %s %s %s, chardev (%d:%d)\n",
diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c
index df76cb5de3f93452487ae73e265a411dc295cb93..db631a9d7077eb5c9a50a34252f80f6bb3288189 100644
--- a/drivers/mtd/spi-nor/debugfs.c
+++ b/drivers/mtd/spi-nor/debugfs.c
@@ -82,7 +82,7 @@ static int spi_nor_params_show(struct seq_file *s, void *data)
 
 	seq_printf(s, "name\t\t%s\n", info->name);
 	seq_printf(s, "id\t\t%*ph\n", info->id_len, info->id);
-	string_get_size(params->size, 1, STRING_UNITS_2, buf, sizeof(buf));
+	string_get_size(params->size, 1, STRING_SIZE_BASE2, buf, sizeof(buf));
 	seq_printf(s, "size\t\t%s\n", buf);
 	seq_printf(s, "write size\t%u\n", params->writesize);
 	seq_printf(s, "page size\t%u\n", params->page_size);
@@ -127,14 +127,14 @@ static int spi_nor_params_show(struct seq_file *s, void *data)
 		struct spi_nor_erase_type *et = &erase_map->erase_type[i];
 
 		if (et->size) {
-			string_get_size(et->size, 1, STRING_UNITS_2, buf,
+			string_get_size(et->size, 1, STRING_SIZE_BASE2, buf,
 					sizeof(buf));
 			seq_printf(s, " %02x (%s) [%d]\n", et->opcode, buf, i);
 		}
 	}
 
 	if (!(nor->flags & SNOR_F_NO_OP_CHIP_ERASE)) {
-		string_get_size(params->size, 1, STRING_UNITS_2, buf, sizeof(buf));
+		string_get_size(params->size, 1, STRING_SIZE_BASE2, buf, sizeof(buf));
 		seq_printf(s, " %02x (%s)\n", SPINOR_OP_CHIP_ERASE, buf);
 	}
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 14e0d989c3ba5f7efe5fb4c9de71ac3b2117fbbb..7d5fbebd36fc38bff6f80646d19fa79f38df6546 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3457,8 +3457,8 @@ static void mem_region_show(struct seq_file *seq, const char *name,
 {
 	char buf[40];
 
-	string_get_size((u64)to - from + 1, 1, STRING_UNITS_2, buf,
-			sizeof(buf));
+	string_get_size((u64)to - from + 1, 1, STRING_SIZE_BASE2,
+			buf, sizeof(buf));
 	seq_printf(seq, "%-15s %#x-%#x [%s]\n", name, from, to, buf);
 }
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e934779bf05c8fb57fda6472852523e9e26ee685..3b4a324a7113e5b72890e846953c3647a2ff43ed 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2526,10 +2526,10 @@ sd_print_capacity(struct scsi_disk *sdkp,
 	if (!sdkp->first_scan && old_capacity == sdkp->capacity)
 		return;
 
-	string_get_size(sdkp->capacity, sector_size,
-			STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
-	string_get_size(sdkp->capacity, sector_size,
-			STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
+	string_get_size(sdkp->capacity, sector_size, STRING_SIZE_BASE2,
+			cap_str_2, sizeof(cap_str_2));
+	string_get_size(sdkp->capacity, sector_size, 0,
+			cap_str_10, sizeof(cap_str_10));
 
 	sd_printk(KERN_NOTICE, sdkp,
 		  "%llu %d-byte logical blocks: (%s/%s)\n",
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index c557a030acfea42d7a48677cfecc227db3539dfa..9aa57a4e2478fc0f3c14b1a7065bb93e597e3880 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -8,7 +8,7 @@
 #include "xfs_trace.h"
 
 void *
-kmem_alloc(size_t size, xfs_km_flags_t flags)
+kmem_alloc_noprof(size_t size, xfs_km_flags_t flags)
 {
 	int	retries = 0;
 	gfp_t	lflags = kmem_flags_convert(flags);
@@ -17,7 +17,7 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
 	trace_kmem_alloc(size, flags, _RET_IP_);
 
 	do {
-		ptr = kmalloc(size, lflags);
+		ptr = kmalloc_noprof(size, lflags);
 		if (ptr || (flags & KM_MAYFAIL))
 			return ptr;
 		if (!(++retries % 100))
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index b987dc2c685170ad1ce08768553c7a8cee02d7a6..c4cf1dc2a7af1c81c6e5ba4684c2a4a35cc3f23c 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -6,6 +6,7 @@
 #ifndef __XFS_SUPPORT_KMEM_H__
 #define __XFS_SUPPORT_KMEM_H__
 
+#include <linux/alloc_tag.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
@@ -56,18 +57,15 @@ kmem_flags_convert(xfs_km_flags_t flags)
 	return lflags;
 }
 
-extern void *kmem_alloc(size_t, xfs_km_flags_t);
 static inline void  kmem_free(const void *ptr)
 {
 	kvfree(ptr);
 }
 
+extern void *kmem_alloc_noprof(size_t, xfs_km_flags_t);
+#define kmem_alloc(...)			alloc_hooks(kmem_alloc_noprof(__VA_ARGS__))
 
-static inline void *
-kmem_zalloc(size_t size, xfs_km_flags_t flags)
-{
-	return kmem_alloc(size, flags | KM_ZERO);
-}
+#define kmem_zalloc(_size, _flags)	kmem_alloc((_size), (_flags) | KM_ZERO)
 
 /*
  * Zone interfaces
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 2c3f4f3a8c930fbf1cd846479da295864f3eae9a..102caf62c2a9cab89896611e8e7ed1d104214dc4 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -8,47 +8,73 @@
 #include <linux/bug.h>
 #include <linux/codetag.h>
 #include <linux/container_of.h>
-#include <linux/lazy-percpu-counter.h>
+#include <linux/preempt.h>
+#include <asm/percpu.h>
+#include <linux/cpumask.h>
 #include <linux/static_key.h>
 
+struct alloc_tag_counters {
+	u64 bytes;
+	u64 calls;
+};
+
 /*
  * An instance of this structure is created in a special ELF section at every
  * allocation callsite. At runtime, the special section is treated as
  * an array of these. Embedded codetag utilizes codetag framework.
  */
 struct alloc_tag {
-	struct codetag_with_ctx		ctc;
-	struct lazy_percpu_counter	bytes_allocated;
+	struct codetag			ct;
+	struct alloc_tag_counters __percpu	*counters;
 } __aligned(8);
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING
 
 void alloc_tags_show_mem_report(struct seq_buf *s);
 
-static inline struct alloc_tag *ctc_to_alloc_tag(struct codetag_with_ctx *ctc)
-{
-	return container_of(ctc, struct alloc_tag, ctc);
-}
-
 static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct)
 {
-	return container_of(ct_to_ctc(ct), struct alloc_tag, ctc);
+	return container_of(ct, struct alloc_tag, ct);
 }
 
-struct codetag_ctx *alloc_tag_create_ctx(struct alloc_tag *tag, size_t size);
-void alloc_tag_free_ctx(struct codetag_ctx *ctx, struct alloc_tag **ptag);
-bool alloc_tag_enable_ctx(struct alloc_tag *tag, bool enable);
+#ifdef ARCH_NEEDS_WEAK_PER_CPU
+/*
+ * When percpu variables are required to be defined as weak, static percpu
+ * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION).
+ */
+#error "Memory allocation profiling is incompatible with ARCH_NEEDS_WEAK_PER_CPU"
+#endif
 
-#define DEFINE_ALLOC_TAG(_alloc_tag, _old)				\
-	static struct alloc_tag _alloc_tag __used __aligned(8)		\
-	__section("alloc_tags") = { .ctc.ct = CODE_TAG_INIT };		\
+#define DEFINE_ALLOC_TAG(_alloc_tag, _old)					\
+	static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr);	\
+	static struct alloc_tag _alloc_tag __used __aligned(8)			\
+	__section("alloc_tags") = {						\
+		.ct = CODE_TAG_INIT,						\
+		.counters = &_alloc_tag_cntr };					\
 	struct alloc_tag * __maybe_unused _old = alloc_tag_save(&_alloc_tag)
 
-extern struct static_key_true mem_alloc_profiling_key;
+DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+			mem_alloc_profiling_key);
 
 static inline bool mem_alloc_profiling_enabled(void)
 {
-	return static_branch_likely(&mem_alloc_profiling_key);
+	return static_branch_maybe(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+				   &mem_alloc_profiling_key);
+}
+
+static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
+{
+	struct alloc_tag_counters v = { 0, 0 };
+	struct alloc_tag_counters *counter;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		counter = per_cpu_ptr(tag->counters, cpu);
+		v.bytes += counter->bytes;
+		v.calls += counter->calls;
+	}
+
+	return v;
 }
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
@@ -73,15 +99,11 @@ static inline void set_codetag_empty(union codetag_ref *ref) {}
 
 #endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
 
-static inline void __alloc_tag_sub(union codetag_ref *ref, size_t bytes,
-				   bool may_allocate)
+static inline void __alloc_tag_sub(union codetag_ref *ref, size_t bytes)
 {
 	struct alloc_tag *tag;
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
-	/* The switch should be checked before this */
-	BUG_ON(!mem_alloc_profiling_enabled());
-
 	WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
 #endif
 	if (!ref || !ref->ct)
@@ -92,34 +114,27 @@ static inline void __alloc_tag_sub(union codetag_ref *ref, size_t bytes,
 		return;
 	}
 
-	if (is_codetag_ctx_ref(ref))
-		alloc_tag_free_ctx(ref->ctx, &tag);
-	else
-		tag = ct_to_alloc_tag(ref->ct);
+	tag = ct_to_alloc_tag(ref->ct);
+
+	this_cpu_sub(tag->counters->bytes, bytes);
+	this_cpu_dec(tag->counters->calls);
 
-	if (may_allocate)
-		lazy_percpu_counter_add(&tag->bytes_allocated, -bytes);
-	else
-		lazy_percpu_counter_add_noupgrade(&tag->bytes_allocated, -bytes);
 	ref->ct = NULL;
 }
 
 static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
 {
-	__alloc_tag_sub(ref, bytes, true);
+	__alloc_tag_sub(ref, bytes);
 }
 
 static inline void alloc_tag_sub_noalloc(union codetag_ref *ref, size_t bytes)
 {
-	__alloc_tag_sub(ref, bytes, false);
+	__alloc_tag_sub(ref, bytes);
 }
 
 static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
 {
 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
-	/* The switch should be checked before this */
-	BUG_ON(!mem_alloc_profiling_enabled());
-
 	WARN_ONCE(ref && ref->ct,
 		  "alloc_tag was not cleared (got tag for %s:%u)\n",\
 		  ref->ct->filename, ref->ct->lineno);
@@ -129,27 +144,25 @@ static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
 	if (!ref || !tag)
 		return;
 
-	if (codetag_ctx_enabled(&tag->ctc))
-		ref->ctx = alloc_tag_create_ctx(tag, bytes);
-	else
-		ref->ct = &tag->ctc.ct;
-	lazy_percpu_counter_add(&tag->bytes_allocated, bytes);
+	ref->ct = &tag->ct;
+	this_cpu_add(tag->counters->bytes, bytes);
+	this_cpu_inc(tag->counters->calls);
 }
 
 #else
 
 #define DEFINE_ALLOC_TAG(_alloc_tag, _old)
-static inline void set_codetag_empty(union codetag_ref *ref) {}
 static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {}
 static inline void alloc_tag_sub_noalloc(union codetag_ref *ref, size_t bytes) {}
 static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
 				 size_t bytes) {}
+static inline void set_codetag_empty(union codetag_ref *ref) {}
 
 #endif
 
-#define alloc_hooks(_do_alloc, _res_type, _err)			\
+#define alloc_hooks(_do_alloc)						\
 ({									\
-	_res_type _res;							\
+	typeof(_do_alloc) _res;						\
 	DEFINE_ALLOC_TAG(_alloc_tag, _old);				\
 									\
 	_res = _do_alloc;						\
@@ -157,5 +170,19 @@ static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
 	_res;								\
 })
 
+/*
+ * workaround for a sparse bug: it complains about res_type_to_err() when
+ * typeof(_do_alloc) is a __percpu pointer, but gcc won't let us add a separate
+ * __percpu case to res_type_to_err():
+ */
+#define alloc_hooks_pcpu(_do_alloc)					\
+({									\
+	typeof(_do_alloc) _res;						\
+	DEFINE_ALLOC_TAG(_alloc_tag, _old);				\
+									\
+	_res = _do_alloc;						\
+	alloc_tag_restore(&_alloc_tag, _old);				\
+	_res;								\
+})
 
 #endif /* _LINUX_ALLOC_TAG_H */
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index b6a2f0287a83b9fb2b1de52cffb6a417d0dab007..d98e4c8e86f0dee7e9e366b5fa0b54d3fac67f6e 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -5,12 +5,8 @@
 #ifndef _LINUX_CODETAG_H
 #define _LINUX_CODETAG_H
 
-#include <linux/container_of.h>
-#include <linux/spinlock.h>
 #include <linux/types.h>
 
-struct kref;
-struct codetag_ctx;
 struct codetag_iterator;
 struct codetag_type;
 struct seq_buf;
@@ -22,38 +18,15 @@ struct module;
  * an array of these.
  */
 struct codetag {
-	unsigned int flags; /* has to be the first member shared with codetag_ctx */
+	unsigned int flags; /* used in later patches */
 	unsigned int lineno;
 	const char *modname;
 	const char *function;
 	const char *filename;
 } __aligned(8);
 
-/* codetag_with_ctx flags */
-#define CTC_FLAG_CTX_PTR	(1 << 0)
-#define CTC_FLAG_CTX_READY	(1 << 1)
-#define CTC_FLAG_CTX_ENABLED	(1 << 2)
-
-/*
- * Code tag with context capture support. Contains a list to store context for
- * each tag hit, a lock protecting the list and a flag to indicate whether
- * context capture is enabled for the tag.
- */
-struct codetag_with_ctx {
-	struct codetag ct;
-	struct list_head ctx_head;
-	spinlock_t ctx_lock;
-} __aligned(8);
-
-/*
- * Tag reference can point to codetag directly or indirectly via codetag_ctx.
- * Direct codetag pointer is used when context capture is disabled or not
- * supported. When context capture for the tag is used, the reference points
- * to the codetag_ctx through which the codetag can be reached.
- */
 union codetag_ref {
 	struct codetag *ct;
-	struct codetag_ctx *ctx;
 };
 
 struct codetag_range {
@@ -73,7 +46,6 @@ struct codetag_type_desc {
 			    struct codetag_module *cmod);
 	bool (*module_unload)(struct codetag_type *cttype,
 			      struct codetag_module *cmod);
-	void (*free_ctx)(struct kref *ref);
 };
 
 struct codetag_iterator {
@@ -81,7 +53,6 @@ struct codetag_iterator {
 	struct codetag_module *cmod;
 	unsigned long mod_id;
 	struct codetag *ct;
-	struct codetag_ctx *ctx;
 };
 
 #define CODE_TAG_INIT {					\
@@ -92,29 +63,9 @@ struct codetag_iterator {
 	.flags		= 0,				\
 }
 
-static inline bool is_codetag_ctx_ref(union codetag_ref *ref)
-{
-	return !!(ref->ct->flags & CTC_FLAG_CTX_PTR);
-}
-
-static inline
-struct codetag_with_ctx *ct_to_ctc(struct codetag *ct)
-{
-	return container_of(ct, struct codetag_with_ctx, ct);
-}
-
 void codetag_lock_module_list(struct codetag_type *cttype, bool lock);
-void codetag_init_iter(struct codetag_iterator *iter,
-		       struct codetag_type *cttype);
+struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype);
 struct codetag *codetag_next_ct(struct codetag_iterator *iter);
-struct codetag_ctx *codetag_next_ctx(struct codetag_iterator *iter);
-
-bool codetag_enable_ctx(struct codetag_with_ctx *ctc, bool enable);
-static inline bool codetag_ctx_enabled(struct codetag_with_ctx *ctc)
-{
-	return !!(ctc->ct.flags & CTC_FLAG_CTX_ENABLED);
-}
-bool codetag_has_ctx(struct codetag_with_ctx *ctc);
 
 void codetag_to_text(struct seq_buf *out, struct codetag *ct);
 
@@ -129,31 +80,4 @@ static inline void codetag_load_module(struct module *mod) {}
 static inline bool codetag_unload_module(struct module *mod) { return true; }
 #endif
 
-/* Codetag query parsing */
-
-struct codetag_query {
-	const char	*filename;
-	const char	*module;
-	const char	*function;
-	const char	*class;
-	unsigned int	first_line, last_line;
-	unsigned int	first_index, last_index;
-	unsigned int	cur_index;
-
-	bool		match_line:1;
-	bool		match_index:1;
-
-	unsigned int	set_enabled:1;
-	unsigned int	enabled:2;
-
-	unsigned int	set_frequency:1;
-	unsigned int	frequency;
-};
-
-char *codetag_query_parse(struct codetag_query *q, char *buf);
-bool codetag_matches_query(struct codetag_query *q,
-			   const struct codetag *ct,
-			   const struct codetag_module *mod,
-			   const char *class);
-
 #endif /* _LINUX_CODETAG_H */
diff --git a/include/linux/codetag_ctx.h b/include/linux/codetag_ctx.h
deleted file mode 100644
index e741484f0e08569015efd5240f336e4d33ad94a5..0000000000000000000000000000000000000000
--- a/include/linux/codetag_ctx.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * code tag context
- */
-#ifndef _LINUX_CODETAG_CTX_H
-#define _LINUX_CODETAG_CTX_H
-
-#include <linux/codetag.h>
-#include <linux/kref.h>
-
-/* Code tag hit context. */
-struct codetag_ctx {
-	unsigned int flags; /* has to be the first member shared with codetag */
-	struct codetag_with_ctx *ctc;
-	struct list_head node;
-	struct kref refcount;
-} __aligned(8);
-
-static inline struct codetag_ctx *kref_to_ctx(struct kref *refcount)
-{
-	return container_of(refcount, struct codetag_ctx, refcount);
-}
-
-static inline void add_ctx(struct codetag_ctx *ctx,
-			   struct codetag_with_ctx *ctc)
-{
-	kref_init(&ctx->refcount);
-	spin_lock(&ctc->ctx_lock);
-	ctx->flags = CTC_FLAG_CTX_PTR;
-	ctx->ctc = ctc;
-	list_add_tail(&ctx->node, &ctc->ctx_head);
-	spin_unlock(&ctc->ctx_lock);
-}
-
-static inline void rem_ctx(struct codetag_ctx *ctx,
-			   void (*free_ctx)(struct kref *refcount))
-{
-	struct codetag_with_ctx *ctc = ctx->ctc;
-
-	spin_lock(&ctc->ctx_lock);
-	/* ctx might have been removed while we were using it */
-	if (!list_empty(&ctx->node))
-		list_del_init(&ctx->node);
-	spin_unlock(&ctc->ctx_lock);
-	kref_put(&ctx->refcount, free_ctx);
-}
-
-#endif /* _LINUX_CODETAG_CTX_H */
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 5001a11258e4d8b03a5d6850cd834d3505574886..9c1f8375cab6d1d35990f2e2befbfbf0f9143510 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -573,8 +573,8 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size)
 	return __real_memchr_inv(p, c, size);
 }
 
-extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
-__FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp)
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup_noprof);
+__FORTIFY_INLINE void *kmemdup_noprof(const void * const POS0 p, size_t size, gfp_t gfp)
 {
 	size_t p_size = __struct_size(p);
 
@@ -608,6 +608,7 @@ char *strcpy(char * const POS p, const char * const POS q)
 	__underlying_memcpy(p, q, size);
 	return p;
 }
+#define kmemdup(...)	alloc_hooks(kmemdup_noprof(__VA_ARGS__))
 
 /* Don't use these outside the FORITFY_SOURCE implementation */
 #undef __underlying_memchr
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5c4281d830f1996279892bf8b6189bd7ab97887c..e68008c34d12816f9cc52232674d516e32c35670 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -176,56 +176,42 @@ static inline void arch_free_page(struct page *page, int order) { }
 static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
 
-struct page *_alloc_pages2(gfp_t gfp, unsigned int order, int preferred_nid,
+struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
 		nodemask_t *nodemask);
-#define __alloc_pages(_gfp, _order, _preferred_nid, _nodemask) \
-		alloc_hooks(_alloc_pages2(_gfp, _order, _preferred_nid, \
-					    _nodemask), struct page *, NULL)
+#define __alloc_pages(...)			alloc_hooks(__alloc_pages_noprof(__VA_ARGS__))
 
-struct folio *_folio_alloc2(gfp_t gfp, unsigned int order, int preferred_nid,
+struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
 		nodemask_t *nodemask);
-#define __folio_alloc(_gfp, _order, _preferred_nid, _nodemask) \
-		alloc_hooks(_folio_alloc2(_gfp, _order, _preferred_nid, \
-					    _nodemask), struct folio *, NULL)
+#define __folio_alloc(...)			alloc_hooks(__folio_alloc_noprof(__VA_ARGS__))
 
-unsigned long _alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
 				nodemask_t *nodemask, int nr_pages,
 				struct list_head *page_list,
 				struct page **page_array);
-#define __alloc_pages_bulk(_gfp, _preferred_nid, _nodemask, _nr_pages, \
-			   _page_list, _page_array) \
-		alloc_hooks(_alloc_pages_bulk(_gfp, _preferred_nid, \
-						_nodemask, _nr_pages, \
-						_page_list, _page_array), \
-						unsigned long, 0)
-
-unsigned long _alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+#define __alloc_pages_bulk(...)			alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__))
+
+unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp,
 				unsigned long nr_pages,
 				struct page **page_array);
-#define  alloc_pages_bulk_array_mempolicy(_gfp, _nr_pages, _page_array) \
-		alloc_hooks(_alloc_pages_bulk_array_mempolicy(_gfp, \
-					_nr_pages, _page_array), \
-					unsigned long, 0)
+#define  alloc_pages_bulk_array_mempolicy(...)	alloc_hooks(alloc_pages_bulk_array_mempolicy_noprof(__VA_ARGS__))
 
 /* Bulk allocate order-0 pages */
-#define alloc_pages_bulk_list(_gfp, _nr_pages, _list)				\
+#define alloc_pages_bulk_list(_gfp, _nr_pages, _list)			\
 	__alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _list, NULL)
 
-#define alloc_pages_bulk_array(_gfp, _nr_pages, _page_array)			\
+#define alloc_pages_bulk_array(_gfp, _nr_pages, _page_array)		\
 	__alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, NULL, _page_array)
 
 static inline unsigned long
-_alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array)
+alloc_pages_bulk_array_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array)
 {
 	if (nid == NUMA_NO_NODE)
 		nid = numa_mem_id();
 
-	return _alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
+	return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, NULL, page_array);
 }
 
-#define alloc_pages_bulk_array_node(_gfp, _nid, _nr_pages, _page_array) \
-	alloc_hooks(_alloc_pages_bulk_array_node(_gfp, _nid, _nr_pages, _page_array), \
-		    unsigned long, 0)
+#define alloc_pages_bulk_array_node(...)	alloc_hooks(alloc_pages_bulk_array_node_noprof(__VA_ARGS__))
 
 static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
 {
@@ -246,17 +232,15 @@ static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
  * online. For more general interface, see alloc_pages_node().
  */
 static inline struct page *
-_alloc_pages_node2(int nid, gfp_t gfp_mask, unsigned int order)
+__alloc_pages_node_noprof(int nid, gfp_t gfp_mask, unsigned int order)
 {
 	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
 	warn_if_node_offline(nid, gfp_mask);
 
-	return _alloc_pages2(gfp_mask, order, nid, NULL);
+	return __alloc_pages_noprof(gfp_mask, order, nid, NULL);
 }
 
-#define  __alloc_pages_node(_nid, _gfp_mask, _order) \
-		alloc_hooks(_alloc_pages_node2(_nid, _gfp_mask, _order), \
-					struct page *, NULL)
+#define  __alloc_pages_node(...)		alloc_hooks(__alloc_pages_node_noprof(__VA_ARGS__))
 
 static inline
 struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
@@ -264,7 +248,7 @@ struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
 	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
 	warn_if_node_offline(nid, gfp);
 
-	return _folio_alloc2(gfp, order, nid, NULL);
+	return __folio_alloc_noprof(gfp, order, nid, NULL);
 }
 
 /*
@@ -272,75 +256,69 @@ struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
  * prefer the current CPU's closest node. Otherwise node must be valid and
  * online.
  */
-static inline struct page *_alloc_pages_node(int nid, gfp_t gfp_mask,
-						unsigned int order)
+static inline struct page *alloc_pages_node_noprof(int nid, gfp_t gfp_mask,
+						   unsigned int order)
 {
 	if (nid == NUMA_NO_NODE)
 		nid = numa_mem_id();
 
-	return _alloc_pages_node2(nid, gfp_mask, order);
+	return __alloc_pages_node_noprof(nid, gfp_mask, order);
 }
 
-#define  alloc_pages_node(_nid, _gfp_mask, _order) \
-		alloc_hooks(_alloc_pages_node(_nid, _gfp_mask, _order), \
-					struct page *, NULL)
+#define  alloc_pages_node(...)			alloc_hooks(alloc_pages_node_noprof(__VA_ARGS__))
 
 #ifdef CONFIG_NUMA
-struct page *_alloc_pages(gfp_t gfp, unsigned int order);
-struct folio *_folio_alloc(gfp_t gfp, unsigned int order);
-struct folio *_vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
+struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order);
+struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order);
+struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma,
 		unsigned long addr, bool hugepage);
 #else
-static inline struct page *_alloc_pages(gfp_t gfp_mask, unsigned int order)
+static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order)
 {
-	return _alloc_pages_node(numa_node_id(), gfp_mask, order);
+	return alloc_pages_node_noprof(numa_node_id(), gfp_mask, order);
 }
-static inline struct folio *_folio_alloc(gfp_t gfp, unsigned int order)
+static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order)
 {
 	return __folio_alloc_node(gfp, order, numa_node_id());
 }
-#define _vma_alloc_folio(gfp, order, vma, addr, hugepage)		\
-	_folio_alloc(gfp, order)
+#define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage)		\
+	folio_alloc_noprof(gfp, order)
 #endif
 
-#define alloc_pages(_gfp, _order) \
-		alloc_hooks(_alloc_pages(_gfp, _order), struct page *, NULL)
-#define folio_alloc(_gfp, _order) \
-		alloc_hooks(_folio_alloc(_gfp, _order), struct folio *, NULL)
-#define vma_alloc_folio(_gfp, _order, _vma, _addr, _hugepage)		\
-		alloc_hooks(_vma_alloc_folio(_gfp, _order, _vma, _addr, \
-				_hugepage), struct folio *, NULL)
+#define alloc_pages(...)			alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
+#define folio_alloc(...)			alloc_hooks(folio_alloc_noprof(__VA_ARGS__))
+#define vma_alloc_folio(...)			alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__))
 
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
-static inline struct page *alloc_page_vma(gfp_t gfp,
+
+static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
 		struct vm_area_struct *vma, unsigned long addr)
 {
-	struct folio *folio = vma_alloc_folio(gfp, 0, vma, addr, false);
+	struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr, false);
 
 	return &folio->page;
 }
+#define alloc_page_vma(...)			alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
+
+extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);
+#define __get_free_pages(...)			alloc_hooks(get_free_pages_noprof(__VA_ARGS__))
 
-extern unsigned long _get_free_pages(gfp_t gfp_mask, unsigned int order);
-#define __get_free_pages(_gfp_mask, _order) \
-		alloc_hooks(_get_free_pages(_gfp_mask, _order), unsigned long, 0)
-extern unsigned long _get_zeroed_page(gfp_t gfp_mask);
-#define get_zeroed_page(_gfp_mask) \
-		alloc_hooks(_get_zeroed_page(_gfp_mask), unsigned long, 0)
+extern unsigned long get_zeroed_page_noprof(gfp_t gfp_mask);
+#define get_zeroed_page(...)			alloc_hooks(get_zeroed_page_noprof(__VA_ARGS__))
+
+void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask) __alloc_size(1);
+#define alloc_pages_exact(...)			alloc_hooks(alloc_pages_exact_noprof(__VA_ARGS__))
 
-void *_alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1);
-#define alloc_pages_exact(_size, _gfp_mask) \
-		alloc_hooks(_alloc_pages_exact(_size, _gfp_mask), void *, NULL)
 void free_pages_exact(void *virt, size_t size);
 
-__meminit void *_alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2);
-#define alloc_pages_exact_nid(_nid, _size, _gfp_mask) \
-		alloc_hooks(_alloc_pages_exact_nid(_nid, _size, _gfp_mask), void *, NULL)
+__meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2);
+#define alloc_pages_exact_nid(...)		alloc_hooks(alloc_pages_exact_nid_noprof(__VA_ARGS__))
 
-#define __get_free_page(gfp_mask) \
-		__get_free_pages((gfp_mask), 0)
+#define __get_free_page(gfp_mask)					\
+	__get_free_pages((gfp_mask), 0)
 
-#define __get_dma_pages(gfp_mask, order) \
-		__get_free_pages((gfp_mask) | GFP_DMA, (order))
+#define __get_dma_pages(gfp_mask, order)				\
+	__get_free_pages((gfp_mask) | GFP_DMA, (order))
 
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
@@ -397,16 +375,14 @@ static inline bool pm_suspended_storage(void)
 
 #ifdef CONFIG_CONTIG_ALLOC
 /* The below functions must be run on a range from a single zone. */
-extern int _alloc_contig_range(unsigned long start, unsigned long end,
+extern int alloc_contig_range_noprof(unsigned long start, unsigned long end,
 			      unsigned migratetype, gfp_t gfp_mask);
-#define alloc_contig_range(_start, _end, _migratetype, _gfp_mask) \
-		alloc_hooks(_alloc_contig_range(_start, _end, _migratetype, \
-						 _gfp_mask), int, -ENOMEM)
-extern struct page *_alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
-					int nid, nodemask_t *nodemask);
-#define alloc_contig_pages(_nr_pages, _gfp_mask, _nid, _nodemask) \
-		alloc_hooks(_alloc_contig_pages(_nr_pages, _gfp_mask, _nid, \
-						  _nodemask), struct page *, NULL)
+#define alloc_contig_range(...)			alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
+
+extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
+					      int nid, nodemask_t *nodemask);
+#define alloc_contig_pages(...)			alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
+
 #endif
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
 
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index a4499e48711b613ccd7f0c8bca1946a2d3983326..5f40e02f7c144b7795c15030bf6507e2e2ebc241 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -21,50 +21,89 @@ typedef unsigned int __bitwise gfp_t;
  * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c
  */
 
+enum {
+	___GFP_DMA_BIT,
+	___GFP_HIGHMEM_BIT,
+	___GFP_DMA32_BIT,
+	___GFP_MOVABLE_BIT,
+	___GFP_RECLAIMABLE_BIT,
+	___GFP_HIGH_BIT,
+	___GFP_IO_BIT,
+	___GFP_FS_BIT,
+	___GFP_ZERO_BIT,
+	___GFP_ATOMIC_BIT,
+	___GFP_DIRECT_RECLAIM_BIT,
+	___GFP_KSWAPD_RECLAIM_BIT,
+	___GFP_WRITE_BIT,
+	___GFP_NOWARN_BIT,
+	___GFP_RETRY_MAYFAIL_BIT,
+	___GFP_NOFAIL_BIT,
+	___GFP_NORETRY_BIT,
+	___GFP_MEMALLOC_BIT,
+	___GFP_COMP_BIT,
+	___GFP_NOMEMALLOC_BIT,
+	___GFP_HARDWALL_BIT,
+	___GFP_THISNODE_BIT,
+	___GFP_ACCOUNT_BIT,
+	___GFP_ZEROTAGS_BIT,
+#ifdef CONFIG_KASAN_HW_TAGS
+	___GFP_SKIP_ZERO_BIT,
+	___GFP_SKIP_KASAN_UNPOISON_BIT,
+	___GFP_SKIP_KASAN_POISON_BIT,
+#endif
+#ifdef CONFIG_LOCKDEP
+	___GFP_NOLOCKDEP_BIT,
+#endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+	___GFP_NO_OBJ_EXT_BIT,
+#endif
+	___GFP_LAST_BIT
+};
+
 /* Plain integer GFP bitmasks. Do not use this directly. */
-#define ___GFP_DMA		0x01u
-#define ___GFP_HIGHMEM		0x02u
-#define ___GFP_DMA32		0x04u
-#define ___GFP_MOVABLE		0x08u
-#define ___GFP_RECLAIMABLE	0x10u
-#define ___GFP_HIGH		0x20u
-#define ___GFP_IO		0x40u
-#define ___GFP_FS		0x80u
-#define ___GFP_ZERO		0x100u
-#define ___GFP_ATOMIC		0x200u
-#define ___GFP_DIRECT_RECLAIM	0x400u
-#define ___GFP_KSWAPD_RECLAIM	0x800u
-#define ___GFP_WRITE		0x1000u
-#define ___GFP_NOWARN		0x2000u
-#define ___GFP_RETRY_MAYFAIL	0x4000u
-#define ___GFP_NOFAIL		0x8000u
-#define ___GFP_NORETRY		0x10000u
-#define ___GFP_MEMALLOC		0x20000u
-#define ___GFP_COMP		0x40000u
-#define ___GFP_NOMEMALLOC	0x80000u
-#define ___GFP_HARDWALL		0x100000u
-#define ___GFP_THISNODE		0x200000u
-#define ___GFP_ACCOUNT		0x400000u
-#define ___GFP_ZEROTAGS		0x800000u
+#define ___GFP_DMA		BIT(___GFP_DMA_BIT)
+#define ___GFP_HIGHMEM		BIT(___GFP_HIGHMEM_BIT)
+#define ___GFP_DMA32		BIT(___GFP_DMA32_BIT)
+#define ___GFP_MOVABLE		BIT(___GFP_MOVABLE_BIT)
+#define ___GFP_RECLAIMABLE	BIT(___GFP_RECLAIMABLE_BIT)
+#define ___GFP_HIGH		BIT(___GFP_HIGH_BIT)
+#define ___GFP_IO		BIT(___GFP_IO_BIT)
+#define ___GFP_FS		BIT(___GFP_FS_BIT)
+#define ___GFP_ZERO		BIT(___GFP_ZERO_BIT)
+#define ___GFP_ATOMIC		BIT(___GFP_ATOMIC_BIT)
+#define ___GFP_DIRECT_RECLAIM	BIT(___GFP_DIRECT_RECLAIM_BIT)
+#define ___GFP_KSWAPD_RECLAIM	BIT(___GFP_KSWAPD_RECLAIM_BIT)
+#define ___GFP_WRITE		BIT(___GFP_WRITE_BIT)
+#define ___GFP_NOWARN		BIT(___GFP_NOWARN_BIT)
+#define ___GFP_RETRY_MAYFAIL	BIT(___GFP_RETRY_MAYFAIL_BIT)
+#define ___GFP_NOFAIL		BIT(___GFP_NOFAIL_BIT)
+#define ___GFP_NORETRY		BIT(___GFP_NORETRY_BIT)
+#define ___GFP_MEMALLOC		BIT(___GFP_MEMALLOC_BIT)
+#define ___GFP_COMP		BIT(___GFP_COMP_BIT)
+#define ___GFP_NOMEMALLOC	BIT(___GFP_NOMEMALLOC_BIT)
+#define ___GFP_HARDWALL		BIT(___GFP_HARDWALL_BIT)
+#define ___GFP_THISNODE		BIT(___GFP_THISNODE_BIT)
+#define ___GFP_ACCOUNT		BIT(___GFP_ACCOUNT_BIT)
+#define ___GFP_ZEROTAGS		BIT(___GFP_ZEROTAGS_BIT)
 #ifdef CONFIG_KASAN_HW_TAGS
-#define ___GFP_SKIP_ZERO		0x1000000u
-#define ___GFP_SKIP_KASAN_UNPOISON	0x2000000u
-#define ___GFP_SKIP_KASAN_POISON	0x4000000u
+#define ___GFP_SKIP_ZERO	BIT(___GFP_SKIP_ZERO_BIT)
+#define ___GFP_SKIP_KASAN_UNPOISON	BIT(___GFP_SKIP_KASAN_UNPOISON_BIT)
+#define ___GFP_SKIP_KASAN_POISON	BIT(___GFP_SKIP_KASAN_POISON_BIT)
 #else
 #define ___GFP_SKIP_ZERO		0
 #define ___GFP_SKIP_KASAN_UNPOISON	0
 #define ___GFP_SKIP_KASAN_POISON	0
 #endif
-#ifdef CONFIG_SLAB_OBJ_EXT
-#define ___GFP_NO_OBJ_EXT       0x4000000u
-#else
-#define ___GFP_NO_OBJ_EXT       0
-#endif
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP	0x8000000u
+#define ___GFP_NOLOCKDEP	BIT(___GFP_NOLOCKDEP_BIT)
 #else
 #define ___GFP_NOLOCKDEP	0
 #endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+#define ___GFP_NO_OBJ_EXT       BIT(___GFP_NO_OBJ_EXT_BIT)
+#else
+#define ___GFP_NO_OBJ_EXT       0
+#endif
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
@@ -264,7 +303,7 @@ typedef unsigned int __bitwise gfp_t;
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
 
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT ___GFP_LAST_BIT
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /**
diff --git a/include/linux/lazy-percpu-counter.h b/include/linux/lazy-percpu-counter.h
deleted file mode 100644
index 45ca9e2ce58b83bf0dae0e0dd48e26ba21e87af2..0000000000000000000000000000000000000000
--- a/include/linux/lazy-percpu-counter.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Lazy percpu counters:
- * (C) 2022 Kent Overstreet
- *
- * Lazy percpu counters start out in atomic mode, then switch to percpu mode if
- * the update rate crosses some threshold.
- *
- * This means we don't have to decide between low memory overhead atomic
- * counters and higher performance percpu counters - we can have our cake and
- * eat it, too!
- *
- * Internally we use an atomic64_t, where the low bit indicates whether we're in
- * percpu mode, and the high 8 bits are a secondary counter that's incremented
- * when the counter is modified - meaning 55 bits of precision are available for
- * the counter itself.
- */
-
-#ifndef _LINUX_LAZY_PERCPU_COUNTER_H
-#define _LINUX_LAZY_PERCPU_COUNTER_H
-
-#include <linux/atomic.h>
-#include <asm/percpu.h>
-
-struct lazy_percpu_counter {
-	atomic64_t			v;
-	unsigned long			last_wrap;
-};
-
-void lazy_percpu_counter_exit(struct lazy_percpu_counter *c);
-void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i);
-void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i);
-s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c);
-
-/*
- * We use the high bits of the atomic counter for a secondary counter, which is
- * incremented every time the counter is touched. When the secondary counter
- * wraps, we check the time the counter last wrapped, and if it was recent
- * enough that means the update frequency has crossed our threshold and we
- * switch to percpu mode:
- */
-#define COUNTER_MOD_BITS		8
-#define COUNTER_MOD_MASK		~(~0ULL >> COUNTER_MOD_BITS)
-#define COUNTER_MOD_BITS_START		(64 - COUNTER_MOD_BITS)
-
-/*
- * We use the low bit of the counter to indicate whether we're in atomic mode
- * (low bit clear), or percpu mode (low bit set, counter is a pointer to actual
- * percpu counters:
- */
-#define COUNTER_IS_PCPU_BIT		1
-
-static inline u64 __percpu *lazy_percpu_counter_is_pcpu(u64 v)
-{
-	if (!(v & COUNTER_IS_PCPU_BIT))
-		return NULL;
-
-	v ^= COUNTER_IS_PCPU_BIT;
-	return (u64 __percpu *)(unsigned long)v;
-}
-
-/**
- * lazy_percpu_counter_add: Add a value to a lazy_percpu_counter
- *
- * @c: counter to modify
- * @i: value to add
- */
-static inline void lazy_percpu_counter_add(struct lazy_percpu_counter *c, s64 i)
-{
-	u64 v = atomic64_read(&c->v);
-	u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
-
-	if (likely(pcpu_v))
-		this_cpu_add(*pcpu_v, i);
-	else
-		lazy_percpu_counter_add_slowpath(c, i);
-}
-
-/**
- * lazy_percpu_counter_add_noupgrade: Add a value to a lazy_percpu_counter,
- * without upgrading to percpu mode
- *
- * @c: counter to modify
- * @i: value to add
- */
-static inline void lazy_percpu_counter_add_noupgrade(struct lazy_percpu_counter *c, s64 i)
-{
-	u64 v = atomic64_read(&c->v);
-	u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
-
-	if (likely(pcpu_v))
-		this_cpu_add(*pcpu_v, i);
-	else
-		lazy_percpu_counter_add_slowpath_noupgrade(c, i);
-}
-
-static inline void lazy_percpu_counter_sub(struct lazy_percpu_counter *c, s64 i)
-{
-	lazy_percpu_counter_add(c, -i);
-}
-
-#endif /* _LINUX_LAZY_PERCPU_COUNTER_H */
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 3f37d1644f49b8b9df09880e313feed49ab23add..0dcb63d69d6fbab10044e55f22b6eccece378d7e 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -37,19 +37,19 @@ int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
 		      mempool_free_t *free_fn, void *pool_data,
 		      gfp_t gfp_mask, int node_id);
 
-int _mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+int mempool_init_noprof(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
 		 mempool_free_t *free_fn, void *pool_data);
-#define mempool_init(...)			\
-	alloc_hooks(_mempool_init(__VA_ARGS__), int, -ENOMEM)
+#define mempool_init(...)						\
+	alloc_hooks(mempool_init_noprof(__VA_ARGS__))
 
 extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
 			mempool_free_t *free_fn, void *pool_data);
 
-extern mempool_t *_mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
+extern mempool_t *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn,
 			mempool_free_t *free_fn, void *pool_data,
 			gfp_t gfp_mask, int nid);
-#define mempool_create_node(...)			\
-	alloc_hooks(_mempool_create_node(__VA_ARGS__), mempool_t *, NULL)
+#define mempool_create_node(...)					\
+	alloc_hooks(mempool_create_node_noprof(__VA_ARGS__))
 
 #define mempool_create(_min_nr, _alloc_fn, _free_fn, _pool_data)	\
 	mempool_create_node(_min_nr, _alloc_fn, _free_fn, _pool_data,	\
@@ -58,9 +58,9 @@ extern mempool_t *_mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
 extern int mempool_resize(mempool_t *pool, int new_min_nr);
 extern void mempool_destroy(mempool_t *pool);
 
-extern void *_mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc;
-#define mempool_alloc(_pool, _gfp)			\
-	alloc_hooks(_mempool_alloc((_pool), (_gfp)), void *, NULL)
+extern void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) __malloc;
+#define mempool_alloc(...)						\
+	alloc_hooks(mempool_alloc_noprof(__VA_ARGS__))
 
 extern void mempool_free(void *element, mempool_t *pool);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5931c51e601d6bb8ca47b2bd9989f8a3f19e7c9c..a9f95fab4304248f21363219ad22768ef16755c7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -467,16 +467,16 @@ static inline void *detach_page_private(struct page *page)
 }
 
 #ifdef CONFIG_NUMA
-struct folio *_filemap_alloc_folio(gfp_t gfp, unsigned int order);
+struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
 #else
-static inline struct folio *_filemap_alloc_folio(gfp_t gfp, unsigned int order)
+static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
 {
-	return _folio_alloc(gfp, order);
+	return folio_alloc_noprof(gfp, order);
 }
 #endif
 
-#define filemap_alloc_folio(_gfp, _order) \
-	alloc_hooks(_filemap_alloc_folio(_gfp, _order), struct folio *, NULL)
+#define filemap_alloc_folio(...)				\
+	alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__))
 
 static inline struct page *__page_cache_alloc(gfp_t gfp)
 {
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 501dc8cca0031b9435bc1b2de85336b2399bf9c3..22f0a7fb16dd7278fa028ac08804013b325a7f98 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -126,15 +126,15 @@ extern bool is_kernel_percpu_address(unsigned long addr);
 extern void __init setup_per_cpu_areas(void);
 #endif
 
-extern void __percpu *__pcpu_alloc(size_t size, size_t align, bool reserved,
+extern void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
 				   gfp_t gfp) __alloc_size(1);
 
-#define __alloc_percpu_gfp(_size, _align, _gfp)	alloc_hooks(		\
-		__pcpu_alloc(_size, _align, false, _gfp), void __percpu *, NULL)
-#define __alloc_percpu(_size, _align)		alloc_hooks(		\
-		__pcpu_alloc(_size, _align, false, GFP_KERNEL), void __percpu *, NULL)
-#define __alloc_reserved_percpu(_size, _align)	alloc_hooks(		\
-		__pcpu_alloc(_size, _align, true, GFP_KERNEL), void __percpu *, NULL)
+#define __alloc_percpu_gfp(_size, _align, _gfp)				\
+	alloc_hooks_pcpu(pcpu_alloc_noprof(_size, _align, false, _gfp))
+#define __alloc_percpu(_size, _align)					\
+	alloc_hooks_pcpu(pcpu_alloc_noprof(_size, _align, false, GFP_KERNEL))
+#define __alloc_reserved_percpu(_size, _align)				\
+	alloc_hooks_pcpu(pcpu_alloc_noprof(_size, _align, true, GFP_KERNEL))
 
 #define alloc_percpu_gfp(type, gfp)					\
 	(typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type),	\
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index e4661bbd40c64e5db8a84008579be9f27c8641cf..ae9b0f35926424e992c513e9a67eefa7a2762cfb 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -6,7 +6,6 @@
 #define _LINUX_PGALLOC_TAG_H
 
 #include <linux/alloc_tag.h>
-#include <linux/codetag_ctx.h>
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING
 
@@ -39,11 +38,21 @@ static inline union codetag_ref *get_page_tag_ref(struct page *page)
 
 static inline void put_page_tag_ref(union codetag_ref *ref)
 {
-	if (ref)
-		page_ext_put(page_ext_from_codetag_ref(ref));
+	page_ext_put(page_ext_from_codetag_ref(ref));
 }
 
-static inline void pgalloc_tag_dec(struct page *page, unsigned int order)
+static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+				   unsigned int order)
+{
+	union codetag_ref *ref = get_page_tag_ref(page);
+
+	if (ref) {
+		alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE << order);
+		put_page_tag_ref(ref);
+	}
+}
+
+static inline void pgalloc_tag_sub(struct page *page, unsigned int order)
 {
 	union codetag_ref *ref = get_page_tag_ref(page);
 
@@ -71,8 +80,7 @@ static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
 	if (!ref->ct)
 		goto out;
 
-	tag = is_codetag_ctx_ref(ref) ? ctc_to_alloc_tag(ref->ctx->ctc)
-				      : ct_to_alloc_tag(ref->ct);
+	tag = ct_to_alloc_tag(ref->ct);
 	page_ext = page_ext_next(page_ext);
 	for (i = 1; i < nr; i++) {
 		/* New reference with 0 bytes accounted */
@@ -87,7 +95,9 @@ static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
 
 static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
 static inline void put_page_tag_ref(union codetag_ref *ref) {}
-#define pgalloc_tag_dec(__page, __size)		do {} while (0)
+static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+				   unsigned int order) {}
+static inline void pgalloc_tag_sub(struct page *page, unsigned int order) {}
 static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {}
 
 #endif /* CONFIG_MEM_ALLOC_PROFILING */
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
index 57467cbf4c5b1e0e8e68e440f1feadc45b1e806e..aac2984c2ef0ef16f7cde4d9a1ac9a5d65a19cc8 100644
--- a/include/linux/rhashtable-types.h
+++ b/include/linux/rhashtable-types.h
@@ -9,6 +9,7 @@
 #ifndef _LINUX_RHASHTABLE_TYPES_H
 #define _LINUX_RHASHTABLE_TYPES_H
 
+#include <linux/alloc_tag.h>
 #include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <linux/mutex.h>
@@ -88,6 +89,9 @@ struct rhashtable {
 	struct mutex                    mutex;
 	spinlock_t			lock;
 	atomic_t			nelems;
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+	struct alloc_tag		*alloc_tag;
+#endif
 };
 
 /**
@@ -127,9 +131,12 @@ struct rhashtable_iter {
 	bool end_of_table;
 };
 
-int rhashtable_init(struct rhashtable *ht,
+int rhashtable_init_noprof(struct rhashtable *ht,
 		    const struct rhashtable_params *params);
-int rhltable_init(struct rhltable *hlt,
+#define rhashtable_init(...)	alloc_hooks(rhashtable_init_noprof(__VA_ARGS__))
+
+int rhltable_init_noprof(struct rhltable *hlt,
 		  const struct rhashtable_params *params);
+#define rhltable_init(...)	alloc_hooks(rhltable_init_noprof(__VA_ARGS__))
 
 #endif /* _LINUX_RHASHTABLE_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6606e3a12ca26d70f7cbff28b0fe19da1a42775a..c8a6f66dba838ac6ce4c7d99ce28b217aa0d949b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -946,9 +946,9 @@ struct task_struct {
 	/* Stalled due to lack of memory */
 	unsigned			in_memstall:1;
 #endif
-#ifdef CONFIG_STACKDEPOT
-	/* Used by stack_depot_capture_stack to detect recursion. */
-	unsigned			in_capture_stack:1;
+#ifdef CONFIG_PAGE_OWNER
+	/* Used by page_owner=on to detect recursion in page tracking. */
+	unsigned			in_page_owner:1;
 #endif
 #ifdef CONFIG_EVENTFD
 	/* Recursion prevention for eventfd_signal() */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 97fde83b2ac55c5fccff8942e9d05b46217e64a7..9155cf2e96b914ba7ce8e2007e8f300b609a3e78 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -199,9 +199,8 @@ int kmem_cache_shrink(struct kmem_cache *s);
 /*
  * Common kmalloc functions provided by all allocators
  */
-void * __must_check _krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2);
-#define krealloc(_p, _size, _flags)					\
-	alloc_hooks(_krealloc(_p, _size, _flags), void*, NULL)
+void * __must_check krealloc_noprof(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2);
+#define krealloc(...)				alloc_hooks(krealloc_noprof(__VA_ARGS__))
 
 void kfree(const void *objp);
 void kfree_sensitive(const void *objp);
@@ -452,7 +451,10 @@ static_assert(PAGE_SHIFT <= 20);
 
 #include <linux/alloc_tag.h>
 
-void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
+void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
+#define __kmalloc(...)				alloc_hooks(__kmalloc_noprof(__VA_ARGS__))
+
+
 /**
  * kmem_cache_alloc - Allocate an object
  * @cachep: The cache to allocate from.
@@ -463,14 +465,12 @@ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_siz
  *
  * Return: pointer to the new object or %NULL in case of error
  */
-void *_kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc;
-#define kmem_cache_alloc(_s, _flags)				\
-	alloc_hooks(_kmem_cache_alloc(_s, _flags), void*, NULL)
+void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc;
+#define kmem_cache_alloc(...)			alloc_hooks(kmem_cache_alloc_noprof(__VA_ARGS__))
 
-void *_kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
 			    gfp_t gfpflags) __assume_slab_alignment __malloc;
-#define kmem_cache_alloc_lru(_s, _lru, _flags)			\
-	alloc_hooks(_kmem_cache_alloc_lru(_s, _lru, _flags), void*, NULL)
+#define kmem_cache_alloc_lru(...)		alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__))
 
 void kmem_cache_free(struct kmem_cache *s, void *objp);
 
@@ -482,9 +482,8 @@ void kmem_cache_free(struct kmem_cache *s, void *objp);
  * Note that interrupts must be enabled when calling these functions.
  */
 void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p);
-int _kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p);
-#define kmem_cache_alloc_bulk(_s, _flags, _size, _p)		\
-	alloc_hooks(_kmem_cache_alloc_bulk(_s, _flags, _size, _p), int, 0)
+int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p);
+#define kmem_cache_alloc_bulk(...)		alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__))
 
 /*
  * Caller must not use kfree_bulk() on memory not originally allocated
@@ -495,34 +494,31 @@ static __always_inline void kfree_bulk(size_t size, void **p)
 	kmem_cache_free_bulk(NULL, size, p);
 }
 
-void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
+void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
 							 __alloc_size(1);
-void *_kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
+#define __kmalloc_node(...)			alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__))
+
+void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
 									  __malloc;
-#define kmem_cache_alloc_node(_s, _flags, _node)		\
-	alloc_hooks(_kmem_cache_alloc_node(_s, _flags, _node), void*, NULL)
+#define kmem_cache_alloc_node(...)		alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
 
-void *_kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
+void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
 		    __assume_kmalloc_alignment __alloc_size(3);
 
-void *_kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
-			 int node, size_t size) __assume_kmalloc_alignment
+void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
+		int node, size_t size) __assume_kmalloc_alignment
 						__alloc_size(4);
-#define kmalloc_trace(_s, _flags, _size)		\
-	alloc_hooks(_kmalloc_trace(_s, _flags, _size), void*, NULL)
+#define kmalloc_trace(...)			alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__))
 
-#define kmalloc_node_trace(_s, _gfpflags, _node, _size)	\
-	alloc_hooks(_kmalloc_node_trace(_s, _gfpflags, _node, _size), void*, NULL)
+#define kmalloc_node_trace(...)			alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__))
 
-void *_kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment
+void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment
 					      __alloc_size(1);
-#define kmalloc_large(_size, _flags)			\
-	alloc_hooks(_kmalloc_large(_size, _flags), void*, NULL)
+#define kmalloc_large(...)			alloc_hooks(kmalloc_large_noprof(__VA_ARGS__))
 
-void *_kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment
+void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment
 							     __alloc_size(1);
-#define kmalloc_large_node(_size, _flags, _node)	\
-	alloc_hooks(_kmalloc_large_node(_size, _flags, _node), void*, NULL)
+#define kmalloc_large_node(...)			alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__))
 
 /**
  * kmalloc - allocate memory
@@ -578,62 +574,61 @@ void *_kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alig
  *	Try really hard to succeed the allocation but fail
  *	eventually.
  */
-static __always_inline __alloc_size(1) void *_kmalloc(size_t size, gfp_t flags)
+static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
 #ifndef CONFIG_SLOB
 		unsigned int index;
 #endif
 		if (size > KMALLOC_MAX_CACHE_SIZE)
-			return _kmalloc_large(size, flags);
+			return kmalloc_large_noprof(size, flags);
 #ifndef CONFIG_SLOB
 		index = kmalloc_index(size);
 
 		if (!index)
 			return ZERO_SIZE_PTR;
 
-		return _kmalloc_trace(
+		return kmalloc_trace_noprof(
 				kmalloc_caches[kmalloc_type(flags)][index],
 				flags, size);
 #endif
 	}
-	return __kmalloc(size, flags);
+	return __kmalloc_noprof(size, flags);
 }
 
 #ifndef CONFIG_SLOB
-#define kmalloc(_size, _flags)  alloc_hooks(_kmalloc(_size, _flags), void*, NULL)
+#define kmalloc(...)				alloc_hooks(kmalloc_noprof(__VA_ARGS__))
 
-static __always_inline __alloc_size(1) void *_kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
 {
 	if (__builtin_constant_p(size)) {
 		unsigned int index;
 
 		if (size > KMALLOC_MAX_CACHE_SIZE)
-			return _kmalloc_large_node(size, flags, node);
+			return kmalloc_large_node_noprof(size, flags, node);
 
 		index = kmalloc_index(size);
 
 		if (!index)
 			return ZERO_SIZE_PTR;
 
-		return _kmalloc_node_trace(
+		return kmalloc_node_trace_noprof(
 				kmalloc_caches[kmalloc_type(flags)][index],
 				flags, node, size);
 	}
-	return __kmalloc_node(size, flags, node);
+	return __kmalloc_node_noprof(size, flags, node);
 }
 #else
-static __always_inline __alloc_size(1) void *_kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
 {
 	if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
-		return kmalloc_large_node(size, flags, node);
+		return kmalloc_large_node_noprof(size, flags, node);
 
-	return __kmalloc_node(size, flags, node);
+	return __kmalloc_node_noprof(size, flags, node);
 }
 #endif
 
-#define kmalloc_node(_size, _flags, _node)		\
-	alloc_hooks(_kmalloc_node(_size, _flags, _node), void*, NULL)
+#define kmalloc_node(...)			alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))
 
 /**
  * kmalloc_array - allocate memory for an array.
@@ -641,18 +636,17 @@ static __always_inline __alloc_size(1) void *_kmalloc_node(size_t size, gfp_t fl
  * @size: element size.
  * @flags: the type of memory to allocate (see kmalloc).
  */
-static inline __alloc_size(1, 2) void *_kmalloc_array(size_t n, size_t size, gfp_t flags)
+static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags)
 {
 	size_t bytes;
 
 	if (unlikely(check_mul_overflow(n, size, &bytes)))
 		return NULL;
 	if (__builtin_constant_p(n) && __builtin_constant_p(size))
-		return _kmalloc(bytes, flags);
-	return _kmalloc(bytes, flags);
+		return kmalloc_noprof(bytes, flags);
+	return kmalloc_noprof(bytes, flags);
 }
-#define kmalloc_array(_n, _size, _flags)		\
-	alloc_hooks(_kmalloc_array(_n, _size, _flags), void*, NULL)
+#define kmalloc_array(...)			alloc_hooks(kmalloc_array_noprof(__VA_ARGS__))
 
 /**
  * krealloc_array - reallocate memory for an array.
@@ -661,7 +655,7 @@ static inline __alloc_size(1, 2) void *_kmalloc_array(size_t n, size_t size, gfp
  * @new_size: new size of a single member of the array
  * @flags: the type of memory to allocate (see kmalloc)
  */
-static inline __realloc_size(2, 3) void * __must_check _krealloc_array(void *p,
+static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p,
 								       size_t new_n,
 								       size_t new_size,
 								       gfp_t flags)
@@ -671,10 +665,9 @@ static inline __realloc_size(2, 3) void * __must_check _krealloc_array(void *p,
 	if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
 		return NULL;
 
-	return _krealloc(p, bytes, flags);
+	return krealloc_noprof(p, bytes, flags);
 }
-#define krealloc_array(_p, _n, _size, _flags)		\
-	alloc_hooks(_krealloc_array(_p, _n, _size, _flags), void*, NULL)
+#define krealloc_array(...)			alloc_hooks(krealloc_array_noprof(__VA_ARGS__))
 
 /**
  * kcalloc - allocate memory for an array. The memory is set to zero.
@@ -682,14 +675,11 @@ static inline __realloc_size(2, 3) void * __must_check _krealloc_array(void *p,
  * @size: element size.
  * @flags: the type of memory to allocate (see kmalloc).
  */
-#define kcalloc(_n, _size, _flags)			\
-	kmalloc_array(_n, _size, (_flags) | __GFP_ZERO)
+#define kcalloc(_n, _size, _flags)		kmalloc_array(_n, _size, (_flags) | __GFP_ZERO)
 
-void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
+void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node,
 				  unsigned long caller) __alloc_size(1);
-#define kmalloc_node_track_caller(size, flags, node) \
-	alloc_hooks(__kmalloc_node_track_caller(size, flags, node, \
-				    _RET_IP_), void*, NULL)
+#define kmalloc_node_track_caller(...)		alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_))
 
 /*
  * kmalloc_track_caller is a special version of kmalloc that records the
@@ -699,10 +689,9 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
  * allocator where we care about the real place the memory allocation
  * request comes from.
  */
-#define kmalloc_track_caller(size, flags)		\
-	kmalloc_node_track_caller(size, flags, NUMA_NO_NODE)
+#define kmalloc_track_caller(...)		kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE)
 
-static inline __alloc_size(1, 2) void *_kmalloc_array_node(size_t n, size_t size, gfp_t flags,
+static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags,
 							  int node)
 {
 	size_t bytes;
@@ -710,32 +699,32 @@ static inline __alloc_size(1, 2) void *_kmalloc_array_node(size_t n, size_t size
 	if (unlikely(check_mul_overflow(n, size, &bytes)))
 		return NULL;
 	if (__builtin_constant_p(n) && __builtin_constant_p(size))
-		return _kmalloc_node(bytes, flags, node);
-	return __kmalloc_node(bytes, flags, node);
+		return kmalloc_node_noprof(bytes, flags, node);
+	return __kmalloc_node_noprof(bytes, flags, node);
 }
-#define kmalloc_array_node(_n, _size, _flags, _node)	\
-	alloc_hooks(_kmalloc_array_node(_n, _size, _flags, _node), void*, NULL)
+#define kmalloc_array_node(...)			alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__))
 
-#define kcalloc_node(_n, _size, _flags, _node)		\
-	kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node)
+#define kcalloc_node(_n, _size, _flags, _node)	kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node)
 
 /*
  * Shortcuts
  */
-#define kmem_cache_zalloc(_k, _flags)			\
-	kmem_cache_alloc(_k, (_flags)|__GFP_ZERO)
+#define kmem_cache_zalloc(_k, _flags)		kmem_cache_alloc(_k, (_flags)|__GFP_ZERO)
 
 /**
  * kzalloc - allocate memory. The memory is set to zero.
  * @size: how many bytes of memory are required.
  * @flags: the type of memory to allocate (see kmalloc).
  */
-#define kzalloc(_size, _flags)			kmalloc(_size, (_flags)|__GFP_ZERO)
+static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
+{
+	return kmalloc_noprof(size, flags | __GFP_ZERO);
+}
+#define kzalloc(...)				alloc_hooks(kzalloc_noprof(__VA_ARGS__))
 #define kzalloc_node(_size, _flags, _node)	kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
 
-extern void *_kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1);
-#define kvmalloc_node(_size, _flags, _node)              \
-	alloc_hooks(_kvmalloc_node(_size, _flags, _node), void*, NULL)
+extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1);
+#define kvmalloc_node(...)			alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))
 
 #define kvmalloc(_size, _flags)			kvmalloc_node(_size, _flags, NUMA_NO_NODE)
 #define kvzalloc(_size, _flags)			kvmalloc(_size, _flags|__GFP_ZERO)
@@ -751,11 +740,9 @@ extern void *_kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1);
 
 #define kvcalloc(_n, _size, _flags)		kvmalloc_array(_n, _size, _flags|__GFP_ZERO)
 
-extern void *_kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
+extern void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
 		      __realloc_size(3);
-
-#define kvrealloc(_p, _oldsize, _newsize, _flags)					\
-	alloc_hooks(_kvrealloc(_p, _oldsize, _newsize, _flags), void*, NULL)
+#define kvrealloc(...)				alloc_hooks(kvrealloc_noprof(__VA_ARGS__))
 
 extern void kvfree(const void *addr);
 extern void kvfree_sensitive(const void *addr, size_t len);
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index c09090ddc2c295dc08bf6c08afa274ca39fc3dd9..9ca7798d7a318c92522da45488992c2ff81a0d04 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -72,20 +72,4 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
 
 void stack_depot_print(depot_stack_handle_t stack);
 
-/**
- * stack_depot_capture_init - Initialize stack depot capture mechanism
- *
- * Return: Stack depot initialization status
- */
-bool stack_depot_capture_init(void);
-
-/**
- * stack_depot_capture_stack - Capture current stack trace into stack depot
- *
- * @flags:	Allocation GFP flags
- *
- * Return: Handle of the stack trace stored in depot, 0 on failure
- */
-depot_stack_handle_t stack_depot_capture_stack(gfp_t flags);
-
 #endif
diff --git a/include/linux/string.h b/include/linux/string.h
index d34f8b637bb0a686bd0ce8b2a94221fe56b8a3e6..ce29498c4e4e8ae2a10774c134006b3edddc0314 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -96,7 +96,6 @@ extern char * strpbrk(const char *,const char *);
 #ifndef __HAVE_ARCH_STRSEP
 extern char * strsep(char **,const char *);
 #endif
-extern char *strsep_no_empty(char **, const char *);
 #ifndef __HAVE_ARCH_STRSPN
 extern __kernel_size_t strspn(const char *,const char *);
 #endif
@@ -177,7 +176,9 @@ extern void kfree_const(const void *x);
 extern char *kstrdup(const char *s, gfp_t gfp) __malloc;
 extern const char *kstrdup_const(const char *s, gfp_t gfp);
 extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
-extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
+extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
+#define kmemdup(...)	alloc_hooks(kmemdup_noprof(__VA_ARGS__))
+
 extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
 
 extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 8530c7328269257eb19ffdcd94e35ce7f3e50e07..069ae170eff07b8f26252297f8fa0e3ee7418664 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -11,15 +11,14 @@ struct device;
 struct file;
 struct task_struct;
 
-/* Descriptions of the types of units to
- * print in */
-enum string_size_units {
-	STRING_UNITS_10,	/* use powers of 10^3 (standard SI) */
-	STRING_UNITS_2,		/* use binary powers of 2^10 */
+enum string_size_flags {
+	STRING_SIZE_BASE2	= (1 << 0),
+	STRING_SIZE_NOSPACE	= (1 << 1),
+	STRING_SIZE_NOBYTES	= (1 << 2),
 };
 
-void string_get_size(u64 size, u64 blk_size, enum string_size_units units,
-		     char *buf, int len);
+int string_get_size(u64 size, u64 blk_size, enum string_size_flags flags,
+		    char *buf, int len);
 
 int parse_int_array_user(const char __user *from, size_t count, int **array);
 
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 096d48aa3437330b7274d06fff48348195147da4..6b9a632d2c28335cad66aea277c84b8fbd14e359 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_VMALLOC_H
 #define _LINUX_VMALLOC_H
 
+#include <linux/alloc_tag.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
 #include <linux/list.h>
@@ -139,26 +141,54 @@ static inline void vmalloc_init(void)
 static inline unsigned long vmalloc_nr_pages(void) { return 0; }
 #endif
 
-extern void *vmalloc(unsigned long size) __alloc_size(1);
-extern void *vzalloc(unsigned long size) __alloc_size(1);
-extern void *vmalloc_user(unsigned long size) __alloc_size(1);
-extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1);
-extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1);
-extern void *vmalloc_32(unsigned long size) __alloc_size(1);
-extern void *vmalloc_32_user(unsigned long size) __alloc_size(1);
-extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
-extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
+extern void *vmalloc_noprof(unsigned long size) __alloc_size(1);
+#define vmalloc(...)		alloc_hooks(vmalloc_noprof(__VA_ARGS__))
+
+extern void *vzalloc_noprof(unsigned long size) __alloc_size(1);
+#define vzalloc(...)		alloc_hooks(vzalloc_noprof(__VA_ARGS__))
+
+extern void *vmalloc_user_noprof(unsigned long size) __alloc_size(1);
+#define vmalloc_user(...)	alloc_hooks(vmalloc_user_noprof(__VA_ARGS__))
+
+extern void *vmalloc_node_noprof(unsigned long size, int node) __alloc_size(1);
+#define vmalloc_node(...)	alloc_hooks(vmalloc_node_noprof(__VA_ARGS__))
+
+extern void *vzalloc_node_noprof(unsigned long size, int node) __alloc_size(1);
+#define vzalloc_node(...)	alloc_hooks(vzalloc_node_noprof(__VA_ARGS__))
+
+extern void *vmalloc_32_noprof(unsigned long size) __alloc_size(1);
+#define vmalloc_32(...)		alloc_hooks(vmalloc_32_noprof(__VA_ARGS__))
+
+extern void *vmalloc_32_user_noprof(unsigned long size) __alloc_size(1);
+#define vmalloc_32_user(...)	alloc_hooks(vmalloc_32_user_noprof(__VA_ARGS__))
+
+extern void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
+#define __vmalloc(...)		alloc_hooks(__vmalloc_noprof(__VA_ARGS__))
+
+extern void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
 			unsigned long start, unsigned long end, gfp_t gfp_mask,
 			pgprot_t prot, unsigned long vm_flags, int node,
 			const void *caller) __alloc_size(1);
-void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
+#define __vmalloc_node_range(...)	alloc_hooks(__vmalloc_node_range_noprof(__VA_ARGS__))
+
+void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask,
 		int node, const void *caller) __alloc_size(1);
-void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
+#define __vmalloc_node(...)	alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__))
+
+void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
+#define vmalloc_huge(...)	alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__))
+
+extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
+#define __vmalloc_array(...)	alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__))
+
+extern void *vmalloc_array_noprof(size_t n, size_t size) __alloc_size(1, 2);
+#define vmalloc_array(...)	alloc_hooks(vmalloc_array_noprof(__VA_ARGS__))
+
+extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
+#define __vcalloc(...)		alloc_hooks(__vcalloc_noprof(__VA_ARGS__))
 
-extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
-extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2);
-extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
-extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2);
+extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2);
+#define vcalloc(...)		alloc_hooks(vcalloc_noprof(__VA_ARGS__))
 
 extern void vfree(const void *addr);
 extern void vfree_atomic(const void *addr);
diff --git a/lib/Kconfig b/lib/Kconfig
index adf727c6f0f2b8e8e694304ba48923dd71c576aa..9bbf8a4b2108e6b9e263fd420aaaf3c1a20453ff 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -499,9 +499,6 @@ config ASSOCIATIVE_ARRAY
 
 	  for more information.
 
-config LAZY_PERCPU_COUNTER
-	bool
-
 config HAS_IOMEM
 	bool
 	depends on !NO_IOMEM
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index bce89a9726c7e11077887a06ed2c39b2d0138ccf..26400e1e3439ed3153aaa234d4c6de55b07b3c3e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -988,21 +988,26 @@ config CODE_TAGGING
 config MEM_ALLOC_PROFILING
 	bool "Enable memory allocation profiling"
 	default n
-	depends on DEBUG_FS
+	depends on PROC_FS
+	depends on !DEBUG_FORCE_WEAK_PER_CPU
 	select CODE_TAGGING
-	select LAZY_PERCPU_COUNTER
 	select PAGE_EXTENSION
 	select SLAB_OBJ_EXT
-	select STACKDEPOT
 	help
 	  Track allocation source code and record total allocation size
 	  initiated at that code location. The mechanism can be used to track
-	  memory leaks with a low performance impact.
+	  memory leaks with a low performance and memory impact.
+
+config MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+	bool "Enable memory allocation profiling by default"
+	default y
+	depends on MEM_ALLOC_PROFILING
 
 config MEM_ALLOC_PROFILING_DEBUG
 	bool "Memory allocation profiler debugging"
 	default n
 	depends on MEM_ALLOC_PROFILING
+	select MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
 	help
 	  Adds warnings with helpful error messages for memory allocation
 	  profiling.
diff --git a/lib/Makefile b/lib/Makefile
index 917f0a403478d3c52f943490b4d5b118584f9aa5..195c1c9fb5b1517c7b21126a90c08b314a323dbf 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -160,8 +160,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
-obj-$(CONFIG_LAZY_PERCPU_COUNTER) += lazy-percpu-counter.o
-
 obj-$(CONFIG_BITREVERSE) += bitrev.o
 obj-$(CONFIG_LINEAR_RANGES) += linear_ranges.o
 obj-$(CONFIG_PACKING)	+= packing.o
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index e2ebab8999a9835dceea29678e80fda96b26c144..2f7a2e3ddf55cd4ca8e76b7034240ba688ab606e 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -1,140 +1,101 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/alloc_tag.h>
-#include <linux/codetag_ctx.h>
-#include <linux/debugfs.h>
 #include <linux/fs.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
 #include <linux/page_ext.h>
-#include <linux/sched/clock.h>
+#include <linux/proc_fs.h>
 #include <linux/seq_buf.h>
-#include <linux/stackdepot.h>
-#include <linux/uaccess.h>
-
-#define STACK_BUF_SIZE 1024
+#include <linux/seq_file.h>
 
 static struct codetag_type *alloc_tag_cttype;
 
-DEFINE_STATIC_KEY_TRUE(mem_alloc_profiling_key);
-
-/*
- * Won't need to be exported once page allocation accounting is moved to the
- * correct place:
- */
-EXPORT_SYMBOL(mem_alloc_profiling_key);
+DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+			mem_alloc_profiling_key);
 
-static int __init mem_alloc_profiling_disable(char *s)
+static void *allocinfo_start(struct seq_file *m, loff_t *pos)
 {
-	static_branch_disable(&mem_alloc_profiling_key);
-	return 1;
-}
-__setup("nomem_profiling", mem_alloc_profiling_disable);
-
-struct alloc_call_ctx {
-	struct codetag_ctx ctx;
-	size_t size;
-	pid_t pid;
-	pid_t tgid;
-	char comm[TASK_COMM_LEN];
-	u64 ts_nsec;
-	depot_stack_handle_t stack_handle;
-} __aligned(8);
-
-struct alloc_tag_file_iterator {
-	struct codetag_iterator ct_iter;
-	struct seq_buf		buf;
-	char			rawbuf[4096];
-};
+	struct codetag_iterator *iter;
+	struct codetag *ct;
+	loff_t node = *pos;
 
-struct user_buf {
-	char __user		*buf;	/* destination user buffer */
-	size_t			size;	/* size of requested read */
-	ssize_t			ret;	/* bytes read so far */
-};
+	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+	m->private = iter;
+	if (!iter)
+		return NULL;
 
-static int flush_ubuf(struct user_buf *dst, struct seq_buf *src)
-{
-	if (src->len) {
-		size_t bytes = min_t(size_t, src->len, dst->size);
-		int err = copy_to_user(dst->buf, src->buffer, bytes);
-
-		if (err)
-			return err;
-
-		dst->ret	+= bytes;
-		dst->buf	+= bytes;
-		dst->size	-= bytes;
-		src->len	-= bytes;
-		memmove(src->buffer, src->buffer + bytes, src->len);
-	}
+	codetag_lock_module_list(alloc_tag_cttype, true);
+	*iter = codetag_get_ct_iter(alloc_tag_cttype);
+	while ((ct = codetag_next_ct(iter)) != NULL && node)
+		node--;
 
-	return 0;
+	return ct ? iter : NULL;
 }
 
-static int allocations_file_open(struct inode *inode, struct file *file)
+static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
 {
-	struct codetag_type *cttype = inode->i_private;
-	struct alloc_tag_file_iterator *iter;
+	struct codetag_iterator *iter = (struct codetag_iterator *)arg;
+	struct codetag *ct = codetag_next_ct(iter);
 
-	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
-	if (!iter)
-		return -ENOMEM;
-
-	codetag_lock_module_list(cttype, true);
-	codetag_init_iter(&iter->ct_iter, cttype);
-	codetag_lock_module_list(cttype, false);
-	seq_buf_init(&iter->buf, iter->rawbuf, sizeof(iter->rawbuf));
-	file->private_data = iter;
+	(*pos)++;
+	if (!ct)
+		return NULL;
 
-	return 0;
+	return iter;
 }
 
-static int allocations_file_release(struct inode *inode, struct file *file)
+static void allocinfo_stop(struct seq_file *m, void *arg)
 {
-	struct alloc_tag_file_iterator *iter = file->private_data;
+	struct codetag_iterator *iter = (struct codetag_iterator *)m->private;
 
-	kfree(iter);
-	return 0;
+	if (iter) {
+		codetag_lock_module_list(alloc_tag_cttype, false);
+		kfree(iter);
+	}
 }
 
 static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
 {
 	struct alloc_tag *tag = ct_to_alloc_tag(ct);
-	char buf[10];
+	struct alloc_tag_counters counter = alloc_tag_read(tag);
+	s64 bytes = counter.bytes;
+	char val[10], *p = val;
 
-	string_get_size(lazy_percpu_counter_read(&tag->bytes_allocated), 1,
-			STRING_UNITS_2, buf, sizeof(buf));
+	if (bytes < 0) {
+		*p++ = '-';
+		bytes = -bytes;
+	}
 
-	seq_buf_printf(out, "%8s ", buf);
+	string_get_size(bytes, 1,
+			STRING_SIZE_BASE2|STRING_SIZE_NOSPACE,
+			p, val + ARRAY_SIZE(val) - p);
+
+	seq_buf_printf(out, "%8s %8llu ", val, counter.calls);
 	codetag_to_text(out, ct);
+	seq_buf_putc(out, ' ');
 	seq_buf_putc(out, '\n');
 }
 
-static ssize_t allocations_file_read(struct file *file, char __user *ubuf,
-				     size_t size, loff_t *ppos)
+static int allocinfo_show(struct seq_file *m, void *arg)
 {
-	struct alloc_tag_file_iterator *iter = file->private_data;
-	struct user_buf	buf = { .buf = ubuf, .size = size };
-	struct codetag *ct;
-	int err = 0;
-
-	codetag_lock_module_list(iter->ct_iter.cttype, true);
-	while (1) {
-		err = flush_ubuf(&buf, &iter->buf);
-		if (err || !buf.size)
-			break;
-
-		ct = codetag_next_ct(&iter->ct_iter);
-		if (!ct)
-			break;
-
-		alloc_tag_to_text(&iter->buf, ct);
-	}
-	codetag_lock_module_list(iter->ct_iter.cttype, false);
+	struct codetag_iterator *iter = (struct codetag_iterator *)arg;
+	char *bufp;
+	size_t n = seq_get_buf(m, &bufp);
+	struct seq_buf buf;
 
-	return err ? : buf.ret;
+	seq_buf_init(&buf, bufp, n);
+	alloc_tag_to_text(&buf, iter->ct);
+	seq_commit(m, seq_buf_used(&buf));
+	return 0;
 }
 
+static const struct seq_operations allocinfo_seq_op = {
+	.start	= allocinfo_start,
+	.next	= allocinfo_next,
+	.stop	= allocinfo_stop,
+	.show	= allocinfo_show,
+};
+
 void alloc_tags_show_mem_report(struct seq_buf *s)
 {
 	struct codetag_iterator iter;
@@ -145,12 +106,12 @@ void alloc_tags_show_mem_report(struct seq_buf *s)
 	} tags[10], n;
 	unsigned int i, nr = 0;
 
-	codetag_init_iter(&iter, alloc_tag_cttype);
-
 	codetag_lock_module_list(alloc_tag_cttype, true);
+	iter = codetag_get_ct_iter(alloc_tag_cttype);
 	while ((ct = codetag_next_ct(&iter))) {
+		struct alloc_tag_counters counter = alloc_tag_read(ct_to_alloc_tag(ct));
 		n.tag	= ct;
-		n.bytes = lazy_percpu_counter_read(&ct_to_alloc_tag(ct)->bytes_allocated);
+		n.bytes = counter.bytes;
 
 		for (i = 0; i < nr; i++)
 			if (n.bytes > tags[i].bytes)
@@ -172,258 +133,29 @@ void alloc_tags_show_mem_report(struct seq_buf *s)
 	codetag_lock_module_list(alloc_tag_cttype, false);
 }
 
-static const struct file_operations allocations_file_ops = {
-	.owner	= THIS_MODULE,
-	.open	= allocations_file_open,
-	.release = allocations_file_release,
-	.read	= allocations_file_read,
-};
-
-static void alloc_tag_ops_free_ctx(struct kref *refcount)
-{
-	kfree(container_of(kref_to_ctx(refcount), struct alloc_call_ctx, ctx));
-}
-
-struct codetag_ctx *alloc_tag_create_ctx(struct alloc_tag *tag, size_t size)
-{
-	struct alloc_call_ctx *ac_ctx;
-
-	/* TODO: use a dedicated kmem_cache */
-	ac_ctx = kmalloc(sizeof(struct alloc_call_ctx), GFP_KERNEL);
-	if (WARN_ON(!ac_ctx))
-		return NULL;
-
-	ac_ctx->size = size;
-	ac_ctx->pid = current->pid;
-	ac_ctx->tgid = current->tgid;
-	strscpy(ac_ctx->comm, current->comm, sizeof(ac_ctx->comm));
-	ac_ctx->ts_nsec = local_clock();
-	ac_ctx->stack_handle =
-			stack_depot_capture_stack(GFP_NOWAIT | __GFP_NOWARN);
-	add_ctx(&ac_ctx->ctx, &tag->ctc);
-
-	return &ac_ctx->ctx;
-}
-EXPORT_SYMBOL_GPL(alloc_tag_create_ctx);
-
-void alloc_tag_free_ctx(struct codetag_ctx *ctx, struct alloc_tag **ptag)
-{
-	*ptag = ctc_to_alloc_tag(ctx->ctc);
-	rem_ctx(ctx, alloc_tag_ops_free_ctx);
-}
-EXPORT_SYMBOL_GPL(alloc_tag_free_ctx);
-
-bool alloc_tag_enable_ctx(struct alloc_tag *tag, bool enable)
-{
-	static bool stack_depot_ready;
-
-	if (enable && !stack_depot_ready) {
-		stack_depot_init();
-		stack_depot_capture_init();
-		stack_depot_ready = true;
-	}
-
-	return codetag_enable_ctx(&tag->ctc, enable);
-}
-
-static void alloc_tag_ctx_to_text(struct seq_buf *out, struct codetag_ctx *ctx)
-{
-	struct alloc_call_ctx *ac_ctx;
-	char *buf;
-
-	ac_ctx = container_of(ctx, struct alloc_call_ctx, ctx);
-	seq_buf_printf(out, "    size: %zu\n", ac_ctx->size);
-	seq_buf_printf(out, "    pid: %d\n", ac_ctx->pid);
-	seq_buf_printf(out, "    tgid: %d\n", ac_ctx->tgid);
-	seq_buf_printf(out, "    comm: %s\n", ac_ctx->comm);
-	seq_buf_printf(out, "    ts: %llu\n", ac_ctx->ts_nsec);
-
-	buf = kmalloc(STACK_BUF_SIZE, GFP_KERNEL);
-	if (buf) {
-		int bytes_read = stack_depot_snprint(ac_ctx->stack_handle, buf,
-						     STACK_BUF_SIZE - 1, 8);
-		buf[bytes_read] = '\0';
-		seq_buf_printf(out, "    call stack:\n%s\n", buf);
-	}
-	kfree(buf);
-}
-
-static ssize_t allocations_ctx_file_read(struct file *file, char __user *ubuf,
-					 size_t size, loff_t *ppos)
-{
-	struct alloc_tag_file_iterator *iter = file->private_data;
-	struct codetag_iterator *ct_iter = &iter->ct_iter;
-	struct user_buf	buf = { .buf = ubuf, .size = size };
-	struct codetag_ctx *ctx;
-	struct codetag *prev_ct;
-	int err = 0;
-
-	codetag_lock_module_list(ct_iter->cttype, true);
-	while (1) {
-		err = flush_ubuf(&buf, &iter->buf);
-		if (err || !buf.size)
-			break;
-
-		prev_ct = ct_iter->ct;
-		ctx = codetag_next_ctx(ct_iter);
-		if (!ctx)
-			break;
-
-		if (prev_ct != &ctx->ctc->ct)
-			alloc_tag_to_text(&iter->buf, &ctx->ctc->ct);
-		alloc_tag_ctx_to_text(&iter->buf, ctx);
-	}
-	codetag_lock_module_list(ct_iter->cttype, false);
-
-	return err ? : buf.ret;
-}
-
-#define CTX_CAPTURE_TOKENS()	\
-	x(disable,	0)	\
-	x(enable,	0)
-
-static const char * const ctx_capture_token_strs[] = {
-#define x(name, nr_args)	#name,
-	CTX_CAPTURE_TOKENS()
-#undef x
-	NULL
-};
-
-enum ctx_capture_token {
-#define x(name, nr_args)	TOK_##name,
-	CTX_CAPTURE_TOKENS()
-#undef x
-};
-
-static int enable_ctx_capture(struct codetag_type *cttype,
-			      struct codetag_query *query, bool enable)
-{
-	struct codetag_iterator ct_iter;
-	struct codetag_with_ctx *ctc;
-	struct codetag *ct;
-	unsigned int nfound = 0;
-
-	codetag_lock_module_list(cttype, true);
-
-	codetag_init_iter(&ct_iter, cttype);
-	while ((ct = codetag_next_ct(&ct_iter))) {
-		if (!codetag_matches_query(query, ct, ct_iter.cmod, NULL))
-			continue;
-
-		ctc = ct_to_ctc(ct);
-		if (codetag_ctx_enabled(ctc) == enable)
-			continue;
-
-		if (!alloc_tag_enable_ctx(ctc_to_alloc_tag(ctc), enable)) {
-			pr_warn("Failed to toggle context capture\n");
-			continue;
-		}
-
-		nfound++;
-	}
-
-	codetag_lock_module_list(cttype, false);
-
-	return nfound ? 0 : -ENOENT;
-}
-
-static int parse_command(struct codetag_type *cttype, char *buf)
+static void __init procfs_init(void)
 {
-	struct codetag_query query = { NULL };
-	char *cmd;
-	int ret;
-	int tok;
-
-	buf = codetag_query_parse(&query, buf);
-	if (IS_ERR(buf))
-		return PTR_ERR(buf);
-
-	cmd = strsep_no_empty(&buf, " \t\r\n");
-	if (!cmd)
-		return -EINVAL;	/* no command */
-
-	tok = match_string(ctx_capture_token_strs,
-			   ARRAY_SIZE(ctx_capture_token_strs), cmd);
-	if (tok < 0)
-		return -EINVAL;	/* unknown command */
-
-	ret = enable_ctx_capture(cttype, &query, tok == TOK_enable);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	proc_create_seq("allocinfo", 0444, NULL, &allocinfo_seq_op);
 }
 
-static ssize_t allocations_ctx_file_write(struct file *file, const char __user *ubuf,
-					  size_t len, loff_t *offp)
+static bool alloc_tag_module_unload(struct codetag_type *cttype,
+				    struct codetag_module *cmod)
 {
-	struct alloc_tag_file_iterator *iter = file->private_data;
-	char tmpbuf[256];
-
-	if (len == 0)
-		return 0;
-	/* we don't check *offp -- multiple writes() are allowed */
-	if (len > sizeof(tmpbuf) - 1)
-		return -E2BIG;
-
-	if (copy_from_user(tmpbuf, ubuf, len))
-		return -EFAULT;
-
-	tmpbuf[len] = '\0';
-	parse_command(iter->ct_iter.cttype, tmpbuf);
-
-	*offp += len;
-	return len;
-}
-
-static const struct file_operations allocations_ctx_file_ops = {
-	.owner	= THIS_MODULE,
-	.open	= allocations_file_open,
-	.release = allocations_file_release,
-	.read	= allocations_ctx_file_read,
-	.write	= allocations_ctx_file_write,
-};
-
-static int __init dbgfs_init(struct codetag_type *cttype)
-{
-	struct dentry *file;
-	struct dentry *ctx_file;
-
-	file = debugfs_create_file("allocations", 0444, NULL, cttype,
-				   &allocations_file_ops);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
-	ctx_file = debugfs_create_file("allocations.ctx", 0666, NULL, cttype,
-				       &allocations_ctx_file_ops);
-	if (IS_ERR(ctx_file)) {
-		debugfs_remove(file);
-		return PTR_ERR(ctx_file);
-	}
-
-	return 0;
-}
-
-static bool alloc_tag_module_unload(struct codetag_type *cttype, struct codetag_module *cmod)
-{
-	struct codetag_iterator iter;
+	struct codetag_iterator iter = codetag_get_ct_iter(cttype);
+	struct alloc_tag_counters counter;
 	bool module_unused = true;
 	struct alloc_tag *tag;
 	struct codetag *ct;
-	size_t bytes;
 
-	codetag_init_iter(&iter, cttype);
 	for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) {
 		if (iter.cmod != cmod)
 			continue;
 
 		tag = ct_to_alloc_tag(ct);
-		bytes = lazy_percpu_counter_read(&tag->bytes_allocated);
+		counter = alloc_tag_read(tag);
 
-		if (!WARN(bytes, "%s:%u module %s func:%s has %zu allocated at module unload",
-			  ct->filename, ct->lineno, ct->modname, ct->function, bytes))
-			lazy_percpu_counter_exit(&tag->bytes_allocated);
-		else
+		if (WARN(counter.bytes, "%s:%u module %s func:%s has %llu allocated at module unload",
+			  ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes))
 			module_unused = false;
 	}
 
@@ -446,19 +178,35 @@ struct page_ext_operations page_alloc_tagging_ops = {
 };
 EXPORT_SYMBOL(page_alloc_tagging_ops);
 
+static struct ctl_table memory_allocation_profiling_sysctls[] = {
+	{
+		.procname	= "mem_profiling",
+		.data		= &mem_alloc_profiling_key,
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+		.mode		= 0444,
+#else
+		.mode		= 0644,
+#endif
+		.proc_handler	= proc_do_static_key,
+	},
+	{ }
+};
+
 static int __init alloc_tag_init(void)
 {
 	const struct codetag_type_desc desc = {
 		.section	= "alloc_tags",
 		.tag_size	= sizeof(struct alloc_tag),
 		.module_unload	= alloc_tag_module_unload,
-		.free_ctx	= alloc_tag_ops_free_ctx,
 	};
 
 	alloc_tag_cttype = codetag_register_type(&desc);
 	if (IS_ERR_OR_NULL(alloc_tag_cttype))
 		return PTR_ERR(alloc_tag_cttype);
 
-	return dbgfs_init(alloc_tag_cttype);
+	register_sysctl_init("vm", memory_allocation_profiling_sysctls);
+	procfs_init();
+
+	return 0;
 }
 module_init(alloc_tag_init);
diff --git a/lib/codetag.c b/lib/codetag.c
index cbff146b3fe8275835fd4eaa4ef9c7cd0a71abc5..0ad4ea66c76973893c8c411c4871efad2c9eca6b 100644
--- a/lib/codetag.c
+++ b/lib/codetag.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/codetag.h>
-#include <linux/codetag_ctx.h>
 #include <linux/idr.h>
 #include <linux/kallsyms.h>
 #include <linux/module.h>
@@ -27,14 +26,16 @@ void codetag_lock_module_list(struct codetag_type *cttype, bool lock)
 		up_read(&cttype->mod_lock);
 }
 
-void codetag_init_iter(struct codetag_iterator *iter,
-		       struct codetag_type *cttype)
+struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype)
 {
-	iter->cttype = cttype;
-	iter->cmod = NULL;
-	iter->mod_id = 0;
-	iter->ct = NULL;
-	iter->ctx = NULL;
+	struct codetag_iterator iter = {
+		.cttype = cttype,
+		.cmod = NULL,
+		.mod_id = 0,
+		.ct = NULL,
+	};
+
+	return iter;
 }
 
 static inline struct codetag *get_first_module_ct(struct codetag_module *cmod)
@@ -91,143 +92,6 @@ struct codetag *codetag_next_ct(struct codetag_iterator *iter)
 	return ct;
 }
 
-static struct codetag_ctx *next_ctx_from_ct(struct codetag_iterator *iter)
-{
-	struct codetag_with_ctx *ctc;
-	struct codetag_ctx *ctx = NULL;
-	struct codetag *ct = iter->ct;
-
-	while (ct) {
-		if (!(ct->flags & CTC_FLAG_CTX_READY))
-			goto next;
-
-		ctc = ct_to_ctc(ct);
-		spin_lock(&ctc->ctx_lock);
-		if (!list_empty(&ctc->ctx_head)) {
-			ctx = list_first_entry(&ctc->ctx_head,
-					       struct codetag_ctx, node);
-			kref_get(&ctx->refcount);
-		}
-		spin_unlock(&ctc->ctx_lock);
-		if (ctx)
-			break;
-next:
-		ct = codetag_next_ct(iter);
-	}
-
-	iter->ctx = ctx;
-	return ctx;
-}
-
-struct codetag_ctx *codetag_next_ctx(struct codetag_iterator *iter)
-{
-	struct codetag_ctx *ctx = iter->ctx;
-	struct codetag_ctx *found = NULL;
-
-	lockdep_assert_held(&iter->cttype->mod_lock);
-
-	/* Move to the first codetag if search just started */
-	if (!iter->ct)
-		codetag_next_ct(iter);
-
-	if (!ctx)
-		return next_ctx_from_ct(iter);
-
-	spin_lock(&ctx->ctc->ctx_lock);
-	/*
-	 * Do not advance if the object was isolated, restart at the same tag.
-	 */
-	if (!list_empty(&ctx->node)) {
-		if (list_is_last(&ctx->node, &ctx->ctc->ctx_head)) {
-			/* Finished with this tag, advance to the next */
-			codetag_next_ct(iter);
-		} else {
-			found = list_next_entry(ctx, node);
-			kref_get(&found->refcount);
-		}
-	}
-	spin_unlock(&ctx->ctc->ctx_lock);
-	kref_put(&ctx->refcount, iter->cttype->desc.free_ctx);
-
-	if (!found)
-		return next_ctx_from_ct(iter);
-
-	iter->ctx = found;
-	return found;
-}
-
-static struct codetag_type *find_cttype(struct codetag *ct)
-{
-	struct codetag_module *cmod;
-	struct codetag_type *cttype;
-	unsigned long mod_id;
-	unsigned long tmp;
-
-	mutex_lock(&codetag_lock);
-	list_for_each_entry(cttype, &codetag_types, link) {
-		down_read(&cttype->mod_lock);
-		idr_for_each_entry_ul(&cttype->mod_idr, cmod, tmp, mod_id) {
-			if (ct >= cmod->range.start && ct < cmod->range.stop) {
-				up_read(&cttype->mod_lock);
-				goto found;
-			}
-		}
-		up_read(&cttype->mod_lock);
-	}
-	cttype = NULL;
-found:
-	mutex_unlock(&codetag_lock);
-
-	return cttype;
-}
-
-bool codetag_enable_ctx(struct codetag_with_ctx *ctc, bool enable)
-{
-	struct codetag_type *cttype = find_cttype(&ctc->ct);
-
-	if (!cttype || !cttype->desc.free_ctx)
-		return false;
-
-	lockdep_assert_held(&cttype->mod_lock);
-	BUG_ON(!rwsem_is_locked(&cttype->mod_lock));
-
-	if (codetag_ctx_enabled(ctc) == enable)
-		return false;
-
-	if (enable) {
-		/* Initialize context capture fields only once */
-		if (!(ctc->ct.flags & CTC_FLAG_CTX_READY)) {
-			spin_lock_init(&ctc->ctx_lock);
-			INIT_LIST_HEAD(&ctc->ctx_head);
-			ctc->ct.flags |= CTC_FLAG_CTX_READY;
-		}
-		ctc->ct.flags |= CTC_FLAG_CTX_ENABLED;
-	} else {
-		/*
-		 * The list of context objects is intentionally left untouched.
-		 * It can be read back and if context capture is re-enablied it
-		 * will append new objects.
-		 */
-		ctc->ct.flags &= ~CTC_FLAG_CTX_ENABLED;
-	}
-
-	return true;
-}
-
-bool codetag_has_ctx(struct codetag_with_ctx *ctc)
-{
-	bool no_ctx;
-
-	if (!(ctc->ct.flags & CTC_FLAG_CTX_READY))
-		return false;
-
-	spin_lock(&ctc->ctx_lock);
-	no_ctx = list_empty(&ctc->ctx_head);
-	spin_unlock(&ctc->ctx_lock);
-
-	return !no_ctx;
-}
-
 void codetag_to_text(struct seq_buf *out, struct codetag *ct)
 {
 	seq_buf_printf(out, "%s:%u module:%s func:%s",
@@ -392,138 +256,3 @@ bool codetag_unload_module(struct module *mod)
 
 	return unload_ok;
 }
-
-/* Codetag query parsing */
-
-#define CODETAG_QUERY_TOKENS()	\
-	x(func)			\
-	x(file)			\
-	x(line)			\
-	x(module)		\
-	x(class)		\
-	x(index)
-
-enum tokens {
-#define x(name)		TOK_##name,
-	CODETAG_QUERY_TOKENS()
-#undef x
-};
-
-static const char * const token_strs[] = {
-#define x(name)		#name,
-	CODETAG_QUERY_TOKENS()
-#undef x
-	NULL
-};
-
-static int parse_range(char *str, unsigned int *first, unsigned int *last)
-{
-	char *first_str = str;
-	char *last_str = strchr(first_str, '-');
-
-	if (last_str)
-		*last_str++ = '\0';
-
-	if (kstrtouint(first_str, 10, first))
-		return -EINVAL;
-
-	if (!last_str)
-		*last = *first;
-	else if (kstrtouint(last_str, 10, last))
-		return -EINVAL;
-
-	return 0;
-}
-
-char *codetag_query_parse(struct codetag_query *q, char *buf)
-{
-	while (1) {
-		char *p = buf;
-		char *str1 = strsep_no_empty(&p, " \t\r\n");
-		char *str2 = strsep_no_empty(&p, " \t\r\n");
-		int ret, token;
-
-		if (!str1 || !str2)
-			break;
-
-		token = match_string(token_strs, ARRAY_SIZE(token_strs), str1);
-		if (token < 0)
-			break;
-
-		switch (token) {
-		case TOK_func:
-			q->function = str2;
-			break;
-		case TOK_file:
-			q->filename = str2;
-			break;
-		case TOK_line:
-			ret = parse_range(str2, &q->first_line, &q->last_line);
-			if (ret)
-				return ERR_PTR(ret);
-			q->match_line = true;
-			break;
-		case TOK_module:
-			q->module = str2;
-			break;
-		case TOK_class:
-			q->class = str2;
-			break;
-		case TOK_index:
-			ret = parse_range(str2, &q->first_index, &q->last_index);
-			if (ret)
-				return ERR_PTR(ret);
-			q->match_index = true;
-			break;
-		}
-
-		buf = p;
-	}
-
-	return buf;
-}
-
-bool codetag_matches_query(struct codetag_query *q,
-			   const struct codetag *ct,
-			   const struct codetag_module *mod,
-			   const char *class)
-{
-	size_t classlen = q->class ? strlen(q->class) : 0;
-
-	if (q->module &&
-	    (!mod->mod ||
-	     strcmp(q->module, ct->modname)))
-		return false;
-
-	if (q->filename &&
-	    strcmp(q->filename, ct->filename) &&
-	    strcmp(q->filename, kbasename(ct->filename)))
-		return false;
-
-	if (q->function &&
-	    strcmp(q->function, ct->function))
-		return false;
-
-	/* match against the line number range */
-	if (q->match_line &&
-	    (ct->lineno < q->first_line ||
-	     ct->lineno > q->last_line))
-		return false;
-
-	/* match against the class */
-	if (classlen &&
-	    (strncmp(q->class, class, classlen) ||
-	     (class[classlen] && class[classlen] != ':')))
-		return false;
-
-	/* match against the fault index */
-	if (q->match_index &&
-	    (q->cur_index < q->first_index ||
-	     q->cur_index > q->last_index)) {
-		q->cur_index++;
-		return false;
-	}
-
-	q->cur_index++;
-	return true;
-}
diff --git a/lib/lazy-percpu-counter.c b/lib/lazy-percpu-counter.c
deleted file mode 100644
index 4f4e32c2dc0980f6888b7708b6dbbbef0bf624ec..0000000000000000000000000000000000000000
--- a/lib/lazy-percpu-counter.c
+++ /dev/null
@@ -1,127 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include <linux/atomic.h>
-#include <linux/gfp.h>
-#include <linux/jiffies.h>
-#include <linux/lazy-percpu-counter.h>
-#include <linux/percpu.h>
-
-static inline s64 lazy_percpu_counter_atomic_val(s64 v)
-{
-	/* Ensure output is sign extended properly: */
-	return (v << COUNTER_MOD_BITS) >>
-		(COUNTER_MOD_BITS + COUNTER_IS_PCPU_BIT);
-}
-
-static void lazy_percpu_counter_switch_to_pcpu(struct lazy_percpu_counter *c)
-{
-	u64 __percpu *pcpu_v = alloc_percpu_gfp(u64, GFP_ATOMIC|__GFP_NOWARN);
-	u64 old, new, v;
-
-	if (!pcpu_v)
-		return;
-
-	preempt_disable();
-	v = atomic64_read(&c->v);
-	do {
-		if (lazy_percpu_counter_is_pcpu(v)) {
-			free_percpu(pcpu_v);
-			return;
-		}
-
-		old = v;
-		new = (unsigned long)pcpu_v | 1;
-
-		*this_cpu_ptr(pcpu_v) = lazy_percpu_counter_atomic_val(v);
-	} while ((v = atomic64_cmpxchg(&c->v, old, new)) != old);
-	preempt_enable();
-}
-
-/**
- * lazy_percpu_counter_exit: Free resources associated with a
- * lazy_percpu_counter
- *
- * @c: counter to exit
- */
-void lazy_percpu_counter_exit(struct lazy_percpu_counter *c)
-{
-	free_percpu(lazy_percpu_counter_is_pcpu(atomic64_read(&c->v)));
-}
-EXPORT_SYMBOL_GPL(lazy_percpu_counter_exit);
-
-/**
- * lazy_percpu_counter_read: Read current value of a lazy_percpu_counter
- *
- * @c: counter to read
- */
-s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c)
-{
-	s64 v = atomic64_read(&c->v);
-	u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
-
-	if (pcpu_v) {
-		int cpu;
-
-		v = 0;
-		for_each_possible_cpu(cpu)
-			v += *per_cpu_ptr(pcpu_v, cpu);
-	} else {
-		v = lazy_percpu_counter_atomic_val(v);
-	}
-
-	return v;
-}
-EXPORT_SYMBOL_GPL(lazy_percpu_counter_read);
-
-void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i)
-{
-	u64 atomic_i;
-	u64 old, v = atomic64_read(&c->v);
-	u64 __percpu *pcpu_v;
-
-	atomic_i  = i << COUNTER_IS_PCPU_BIT;
-	atomic_i &= ~COUNTER_MOD_MASK;
-	atomic_i |= 1ULL << COUNTER_MOD_BITS_START;
-
-	do {
-		pcpu_v = lazy_percpu_counter_is_pcpu(v);
-		if (pcpu_v) {
-			this_cpu_add(*pcpu_v, i);
-			return;
-		}
-
-		old = v;
-	} while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old);
-
-	if (unlikely(!(v & COUNTER_MOD_MASK))) {
-		unsigned long now = jiffies;
-
-		if (c->last_wrap &&
-		    unlikely(time_after(c->last_wrap + HZ, now)))
-			lazy_percpu_counter_switch_to_pcpu(c);
-		else
-			c->last_wrap = now;
-	}
-}
-EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath);
-
-void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i)
-{
-	u64 atomic_i;
-	u64 old, v = atomic64_read(&c->v);
-	u64 __percpu *pcpu_v;
-
-	atomic_i  = i << COUNTER_IS_PCPU_BIT;
-	atomic_i &= ~COUNTER_MOD_MASK;
-
-	do {
-		pcpu_v = lazy_percpu_counter_is_pcpu(v);
-		if (pcpu_v) {
-			this_cpu_add(*pcpu_v, i);
-			return;
-		}
-
-		old = v;
-	} while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old);
-}
-EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath_noupgrade);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e12bbfb240b812bc4945c6f0d4910f35667b0e8e..70c2292dbd8ee14f958692b213b2e09c8c91cfb3 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -63,6 +63,27 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #define ASSERT_RHT_MUTEX(HT)
 #endif
 
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+static inline void rhashtable_alloc_tag_init(struct rhashtable *ht)
+{
+	ht->alloc_tag = current->alloc_tag;
+}
+
+static inline struct alloc_tag *rhashtable_alloc_tag_save(struct rhashtable *ht)
+{
+	return alloc_tag_save(ht->alloc_tag);
+}
+
+static inline void rhashtable_alloc_tag_restore(struct rhashtable *ht, struct alloc_tag *old)
+{
+	alloc_tag_restore(ht->alloc_tag, old);
+}
+#else
+#define rhashtable_alloc_tag_init(ht)
+static inline struct alloc_tag *rhashtable_alloc_tag_save(struct rhashtable *ht) { return NULL; }
+#define rhashtable_alloc_tag_restore(ht, old)
+#endif
+
 static inline union nested_table *nested_table_top(
 	const struct bucket_table *tbl)
 {
@@ -130,7 +151,7 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht,
 	if (ntbl)
 		return ntbl;
 
-	ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
+	ntbl = kmalloc_noprof(PAGE_SIZE, GFP_ATOMIC|__GFP_ZERO);
 
 	if (ntbl && leaf) {
 		for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
@@ -157,7 +178,7 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
 
 	size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
 
-	tbl = kzalloc(size, gfp);
+	tbl = kmalloc_noprof(size, gfp|__GFP_ZERO);
 	if (!tbl)
 		return NULL;
 
@@ -180,8 +201,10 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	size_t size;
 	int i;
 	static struct lock_class_key __key;
+	struct alloc_tag * __maybe_unused old = rhashtable_alloc_tag_save(ht);
 
-	tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
+	tbl = kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets),
+				   gfp|__GFP_ZERO, NUMA_NO_NODE);
 
 	size = nbuckets;
 
@@ -190,6 +213,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 		nbuckets = 0;
 	}
 
+	rhashtable_alloc_tag_restore(ht, old);
+
 	if (tbl == NULL)
 		return NULL;
 
@@ -971,7 +996,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
 }
 
 /**
- * rhashtable_init - initialize a new hash table
+ * rhashtable_init_noprof - initialize a new hash table
  * @ht:		hash table to be initialized
  * @params:	configuration parameters
  *
@@ -1012,7 +1037,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
  *	.obj_hashfn = my_hash_fn,
  * };
  */
-int rhashtable_init(struct rhashtable *ht,
+int rhashtable_init_noprof(struct rhashtable *ht,
 		    const struct rhashtable_params *params)
 {
 	struct bucket_table *tbl;
@@ -1027,6 +1052,8 @@ int rhashtable_init(struct rhashtable *ht,
 	spin_lock_init(&ht->lock);
 	memcpy(&ht->p, params, sizeof(*params));
 
+	rhashtable_alloc_tag_init(ht);
+
 	if (params->min_size)
 		ht->p.min_size = roundup_pow_of_two(params->min_size);
 
@@ -1072,26 +1099,26 @@ int rhashtable_init(struct rhashtable *ht,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rhashtable_init);
+EXPORT_SYMBOL_GPL(rhashtable_init_noprof);
 
 /**
- * rhltable_init - initialize a new hash list table
+ * rhltable_init_noprof - initialize a new hash list table
  * @hlt:	hash list table to be initialized
  * @params:	configuration parameters
  *
  * Initializes a new hash list table.
  *
- * See documentation for rhashtable_init.
+ * See documentation for rhashtable_init_noprof.
  */
-int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
+int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params)
 {
 	int err;
 
-	err = rhashtable_init(&hlt->ht, params);
+	err = rhashtable_init_noprof(&hlt->ht, params);
 	hlt->ht.rhlist = true;
 	return err;
 }
-EXPORT_SYMBOL_GPL(rhltable_init);
+EXPORT_SYMBOL_GPL(rhltable_init_noprof);
 
 static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
 				void (*free_fn)(void *ptr, void *arg),
@@ -1218,6 +1245,7 @@ struct rhash_lock_head __rcu **rht_bucket_nested_insert(
 	unsigned int index = hash & ((1 << tbl->nest) - 1);
 	unsigned int size = tbl->size >> tbl->nest;
 	union nested_table *ntbl;
+	struct alloc_tag * __maybe_unused old = rhashtable_alloc_tag_save(ht);
 
 	ntbl = nested_table_top(tbl);
 	hash >>= tbl->nest;
@@ -1232,6 +1260,8 @@ struct rhash_lock_head __rcu **rht_bucket_nested_insert(
 					  size <= (1 << shift));
 	}
 
+	rhashtable_alloc_tag_restore(ht, old);
+
 	if (!ntbl)
 		return NULL;
 
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 86456aa91a55d67113bd28e9e1bafa7631e6f38b..0d7585cde2a69ef1fcdda569b939d6edc602ca99 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -7,7 +7,6 @@
 
 #include <linux/mm.h>
 #include <linux/cma.h>
-#include <linux/seq_buf.h>
 
 void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
 {
@@ -42,18 +41,4 @@ void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
 #ifdef CONFIG_MEMORY_FAILURE
 	printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
 #endif
-#ifdef CONFIG_MEM_ALLOC_PROFILING
-	{
-		struct seq_buf s;
-		char *buf = kmalloc(4096, GFP_ATOMIC);
-
-		if (buf) {
-			printk("Memory allocations:\n");
-			seq_buf_init(&s, buf, 4096);
-			alloc_tags_show_mem_report(&s);
-			printk("%s", buf);
-			kfree(buf);
-		}
-	}
-#endif
 }
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 72ed071411f3a3cfc58ccd2ddb0bfa34badd4f14..79e894cf84064ab3b84f8a0a38cc8cb278347e57 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -533,71 +533,3 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
 	return __stack_depot_save(entries, nr_entries, 0, alloc_flags, true);
 }
 EXPORT_SYMBOL_GPL(stack_depot_save);
-
-static depot_stack_handle_t recursion_handle;
-static depot_stack_handle_t failure_handle;
-
-static __always_inline depot_stack_handle_t create_custom_stack(void)
-{
-	unsigned long entries[4];
-	unsigned int nr_entries;
-
-	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
-	return stack_depot_save(entries, nr_entries, GFP_KERNEL);
-}
-
-static noinline void register_recursion_stack(void)
-{
-	recursion_handle = create_custom_stack();
-}
-
-static noinline void register_failure_stack(void)
-{
-	failure_handle = create_custom_stack();
-}
-
-bool stack_depot_capture_init(void)
-{
-	static DEFINE_MUTEX(stack_depot_capture_init_mutex);
-	static bool utility_stacks_ready;
-
-	mutex_lock(&stack_depot_capture_init_mutex);
-	if (!utility_stacks_ready) {
-		register_recursion_stack();
-		register_failure_stack();
-		utility_stacks_ready = true;
-	}
-	mutex_unlock(&stack_depot_capture_init_mutex);
-
-	return utility_stacks_ready;
-}
-
-/* TODO: teach stack_depot_capture_stack to use off stack temporal storage */
-#define CAPTURE_STACK_DEPTH (16)
-
-depot_stack_handle_t stack_depot_capture_stack(gfp_t flags)
-{
-	unsigned long entries[CAPTURE_STACK_DEPTH];
-	depot_stack_handle_t handle;
-	unsigned int nr_entries;
-
-	/*
-	 * Avoid recursion.
-	 *
-	 * Sometimes page metadata allocation tracking requires more
-	 * memory to be allocated:
-	 * - when new stack trace is saved to stack depot
-	 * - when backtrace itself is calculated (ia64)
-	 */
-	if (current->in_capture_stack)
-		return recursion_handle;
-	current->in_capture_stack = 1;
-
-	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
-	handle = stack_depot_save(entries, nr_entries, flags);
-	if (!handle)
-		handle = failure_handle;
-
-	current->in_capture_stack = 0;
-	return handle;
-}
diff --git a/lib/string.c b/lib/string.c
index 909e73f6a48ccef8a69292fbad055c5b230a96ef..3371d26a0e390cd6c3650be0d59b2e78d0b452b3 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -604,25 +604,6 @@ char *strsep(char **s, const char *ct)
 EXPORT_SYMBOL(strsep);
 #endif
 
-/**
- * strsep_no_empt - Split a string into tokens, but don't return empty tokens
- * @s: The string to be searched
- * @ct: The characters to search for
- *
- * strsep() updates @s to point after the token, ready for the next call.
- */
-char *strsep_no_empty(char **s, const char *ct)
-{
-	char *ret;
-
-	do {
-		ret = strsep(s, ct);
-	} while (ret && !*ret);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(strsep_no_empty);
-
 #ifndef __HAVE_ARCH_MEMSET
 /**
  * memset - Fill a region of memory with the given value
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 593b29fece32793432270859fb0f974523c60b1d..1d1645da74ff3744ca932f9a5d6fce0ce8930496 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -19,11 +19,17 @@
 #include <linux/string.h>
 #include <linux/string_helpers.h>
 
+enum string_size_units {
+	STRING_UNITS_10,	/* use powers of 10^3 (standard SI) */
+	STRING_UNITS_2,		/* use binary powers of 2^10 */
+};
+
 /**
  * string_get_size - get the size in the specified units
  * @size:	The size to be converted in blocks
  * @blk_size:	Size of the block (use 1 for size in bytes)
- * @units:	units to use (powers of 1000 or 1024)
+ * @flags:	units to use (powers of 1000 or 1024), whether to include space
+ *		separator
  * @buf:	buffer to format to
  * @len:	length of buffer
  *
@@ -32,14 +38,16 @@
  * at least 9 bytes and will always be zero terminated.
  *
  */
-void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
-		     char *buf, int len)
+int string_get_size(u64 size, u64 blk_size, enum string_size_flags flags,
+		    char *buf, int len)
 {
+	enum string_size_units units = flags & flags & STRING_SIZE_BASE2
+		? STRING_UNITS_2 : STRING_UNITS_10;
 	static const char *const units_10[] = {
-		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
+		"", "k", "M", "G", "T", "P", "E", "Z", "Y"
 	};
 	static const char *const units_2[] = {
-		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
+		"", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"
 	};
 	static const char *const *const units_str[] = {
 		[STRING_UNITS_10] = units_10,
@@ -126,7 +134,10 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
 	else
 		unit = units_str[units][i];
 
-	snprintf(buf, len, "%u%s%s", (u32)size, tmp, unit);
+	return snprintf(buf, len, "%u%s%s%s%s", (u32)size, tmp,
+			(flags & STRING_SIZE_NOSPACE)		? "" : " ",
+			unit,
+			(flags & STRING_SIZE_NOBYTES)		? "" : "B");
 }
 EXPORT_SYMBOL(string_get_size);
 
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index 86fadd3ba08c58ed23961a2378bb25f82e793d1a..3f346e0a3cf741432234861b7544227ac256f396 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -507,8 +507,8 @@ static __init void __test_string_get_size(const u64 size, const u64 blk_size,
 	char buf10[string_get_size_maxbuf];
 	char buf2[string_get_size_maxbuf];
 
-	string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
-	string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+	string_get_size(size, blk_size, 0, buf10, sizeof(buf10));
+	string_get_size(size, blk_size, STRING_SIZE_BASE2, buf2, sizeof(buf2));
 
 	test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
 				   size, blk_size);
diff --git a/mm/compaction.c b/mm/compaction.c
index 158ef2b2863c4c59abc1a884fb5f97762bc89553..6a2174b4f09e45ad52efa6cd7ae24025479a1939 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1673,8 +1673,7 @@ static void isolate_freepages(struct compact_control *cc)
  * This is a migrate-callback that "allocates" freepages by taking pages
  * from the isolated freelists in the block we are migrating to.
  */
-static struct page *_compaction_alloc(struct page *migratepage,
-					unsigned long data)
+static struct page *compaction_alloc_noprof(struct page *src, unsigned long data)
 {
 	struct compact_control *cc = (struct compact_control *)data;
 	struct page *freepage;
@@ -1693,11 +1692,9 @@ static struct page *_compaction_alloc(struct page *migratepage,
 	return freepage;
 }
 
-static struct page *compaction_alloc(struct page *migratepage,
-				     unsigned long data)
+static struct page *compaction_alloc(struct page *src, unsigned long data)
 {
-	return alloc_hooks(_compaction_alloc(migratepage, data),
-			   struct page *, NULL);
+	return alloc_hooks(compaction_alloc_noprof(src, data));
 }
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index 4039ac592d384f79ec71a385ab61e6bcc41f521e..386b92d1c9ba7efcc150fc365912fc97707370cd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -953,7 +953,7 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
 EXPORT_SYMBOL_GPL(filemap_add_folio);
 
 #ifdef CONFIG_NUMA
-struct folio *_filemap_alloc_folio(gfp_t gfp, unsigned int order)
+struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
 {
 	int n;
 	struct folio *folio;
@@ -968,9 +968,9 @@ struct folio *_filemap_alloc_folio(gfp_t gfp, unsigned int order)
 
 		return folio;
 	}
-	return _folio_alloc(gfp, order);
+	return folio_alloc_noprof(gfp, order);
 }
-EXPORT_SYMBOL(_filemap_alloc_folio);
+EXPORT_SYMBOL(filemap_alloc_folio_noprof);
 #endif
 
 /*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d3ffa0fd49e57b479f9896f7752fb90fed735eed..89aa8a308b81134c5080659951ec19903210c7fd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3232,7 +3232,7 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
 	if (i == h->max_huge_pages_node[nid])
 		return;
 
-	string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
+	string_get_size(huge_page_size(h), 1, STRING_SIZE_BASE2, buf, 32);
 	pr_warn("HugeTLB: allocating %u of page size %s failed node%d.  Only allocated %lu hugepages.\n",
 		h->max_huge_pages_node[nid], buf, nid, i);
 	h->max_huge_pages -= (h->max_huge_pages_node[nid] - i);
@@ -3294,7 +3294,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 	if (i < h->max_huge_pages) {
 		char buf[32];
 
-		string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
+		string_get_size(huge_page_size(h), 1, STRING_SIZE_BASE2, buf, 32);
 		pr_warn("HugeTLB: allocating %lu of page size %s failed.  Only allocated %lu hugepages.\n",
 			h->max_huge_pages, buf, i);
 		h->max_huge_pages = i;
@@ -3340,7 +3340,7 @@ static void __init report_hugepages(void)
 	for_each_hstate(h) {
 		char buf[32];
 
-		string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
+		string_get_size(huge_page_size(h), 1, STRING_SIZE_BASE2, buf, 32);
 		pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n",
 			buf, h->free_huge_pages);
 		pr_info("HugeTLB: %d KiB vmemmap can be freed for a %s page\n",
@@ -4224,7 +4224,7 @@ static int __init hugetlb_init(void)
 				char buf[32];
 
 				string_get_size(huge_page_size(&default_hstate),
-					1, STRING_UNITS_2, buf, 32);
+					1, STRING_SIZE_BASE2, buf, 32);
 				pr_warn("HugeTLB: Ignoring hugepages=%lu associated with %s page size\n",
 					default_hstate.max_huge_pages, buf);
 				pr_warn("HugeTLB: Using hugepages=%lu for number of default huge pages\n",
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a32a5f07e52dbc2d21d58ae854f1b0fa70110c96..c0ae09fa8fcd59667f68c964e7101503a9beddcc 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2117,7 +2117,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
 {
 	struct page *page;
 
-	page = __alloc_pages(gfp, order, nid, NULL);
+	page = __alloc_pages_noprof(gfp, order, nid, NULL);
 	/* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */
 	if (!static_branch_likely(&vm_numa_stat_key))
 		return page;
@@ -2143,15 +2143,15 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
 	 */
 	preferred_gfp = gfp | __GFP_NOWARN;
 	preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
-	page = __alloc_pages(preferred_gfp, order, nid, &pol->nodes);
+	page = __alloc_pages_noprof(preferred_gfp, order, nid, &pol->nodes);
 	if (!page)
-		page = __alloc_pages(gfp, order, nid, NULL);
+		page = __alloc_pages_noprof(gfp, order, nid, NULL);
 
 	return page;
 }
 
 /**
- * _vma_alloc_folio - Allocate a folio for a VMA.
+ * vma_alloc_folio_noprof - Allocate a folio for a VMA.
  * @gfp: GFP flags.
  * @order: Order of the folio.
  * @vma: Pointer to VMA or NULL if not available.
@@ -2165,7 +2165,7 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
  *
  * Return: The folio on success or NULL if allocation fails.
  */
-struct folio *_vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
+struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma,
 		unsigned long addr, bool hugepage)
 {
 	struct mempolicy *pol;
@@ -2236,7 +2236,7 @@ struct folio *_vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
 			 * memory with both reclaim and compact as well.
 			 */
 			if (!folio && (gfp & __GFP_DIRECT_RECLAIM))
-				folio = __folio_alloc(gfp, order, hpage_node,
+				folio = __folio_alloc_noprof(gfp, order, hpage_node,
 						      nmask);
 
 			goto out;
@@ -2245,15 +2245,15 @@ struct folio *_vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
 
 	nmask = policy_nodemask(gfp, pol);
 	preferred_nid = policy_node(gfp, pol, node);
-	folio = __folio_alloc(gfp, order, preferred_nid, nmask);
+	folio = __folio_alloc_noprof(gfp, order, preferred_nid, nmask);
 	mpol_cond_put(pol);
 out:
 	return folio;
 }
-EXPORT_SYMBOL(_vma_alloc_folio);
+EXPORT_SYMBOL(vma_alloc_folio_noprof);
 
 /**
- * _alloc_pages - Allocate pages.
+ * alloc_pages_noprof - Allocate pages.
  * @gfp: GFP flags.
  * @order: Power of two of number of pages to allocate.
  *
@@ -2266,7 +2266,7 @@ EXPORT_SYMBOL(_vma_alloc_folio);
  * flags are used.
  * Return: The page on success or NULL if allocation fails.
  */
-struct page *_alloc_pages(gfp_t gfp, unsigned int order)
+struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order)
 {
 	struct mempolicy *pol = &default_policy;
 	struct page *page;
@@ -2284,23 +2284,23 @@ struct page *_alloc_pages(gfp_t gfp, unsigned int order)
 		page = alloc_pages_preferred_many(gfp, order,
 				  policy_node(gfp, pol, numa_node_id()), pol);
 	else
-		page = _alloc_pages2(gfp, order,
+		page = __alloc_pages_noprof(gfp, order,
 				policy_node(gfp, pol, numa_node_id()),
 				policy_nodemask(gfp, pol));
 
 	return page;
 }
-EXPORT_SYMBOL(_alloc_pages);
+EXPORT_SYMBOL(alloc_pages_noprof);
 
-struct folio *_folio_alloc(gfp_t gfp, unsigned int order)
+struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order)
 {
-	struct page *page = alloc_pages(gfp | __GFP_COMP, order);
+	struct page *page = alloc_pages_noprof(gfp | __GFP_COMP, order);
 
 	if (page && order > 1)
 		prep_transhuge_page(page);
 	return (struct folio *)page;
 }
-EXPORT_SYMBOL(_folio_alloc);
+EXPORT_SYMBOL(folio_alloc_noprof);
 
 static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
 		struct mempolicy *pol, unsigned long nr_pages,
@@ -2319,13 +2319,13 @@ static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
 
 	for (i = 0; i < nodes; i++) {
 		if (delta) {
-			nr_allocated = _alloc_pages_bulk(gfp,
+			nr_allocated = alloc_pages_bulk_noprof(gfp,
 					interleave_nodes(pol), NULL,
 					nr_pages_per_node + 1, NULL,
 					page_array);
 			delta--;
 		} else {
-			nr_allocated = _alloc_pages_bulk(gfp,
+			nr_allocated = alloc_pages_bulk_noprof(gfp,
 					interleave_nodes(pol), NULL,
 					nr_pages_per_node, NULL, page_array);
 		}
@@ -2347,11 +2347,11 @@ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
 	preferred_gfp = gfp | __GFP_NOWARN;
 	preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
 
-	nr_allocated  = _alloc_pages_bulk(preferred_gfp, nid, &pol->nodes,
+	nr_allocated  = alloc_pages_bulk_noprof(preferred_gfp, nid, &pol->nodes,
 					   nr_pages, NULL, page_array);
 
 	if (nr_allocated < nr_pages)
-		nr_allocated += _alloc_pages_bulk(gfp, numa_node_id(), NULL,
+		nr_allocated += alloc_pages_bulk_noprof(gfp, numa_node_id(), NULL,
 				nr_pages - nr_allocated, NULL,
 				page_array + nr_allocated);
 	return nr_allocated;
@@ -2363,7 +2363,7 @@ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
  * It can accelerate memory allocation especially interleaving
  * allocate memory.
  */
-unsigned long _alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp,
 		unsigned long nr_pages, struct page **page_array)
 {
 	struct mempolicy *pol = &default_policy;
@@ -2379,7 +2379,7 @@ unsigned long _alloc_pages_bulk_array_mempolicy(gfp_t gfp,
 		return alloc_pages_bulk_array_preferred_many(gfp,
 				numa_node_id(), pol, nr_pages, page_array);
 
-	return _alloc_pages_bulk(gfp, policy_node(gfp, pol, numa_node_id()),
+	return alloc_pages_bulk_noprof(gfp, policy_node(gfp, pol, numa_node_id()),
 				  policy_nodemask(gfp, pol), nr_pages, NULL,
 				  page_array);
 }
diff --git a/mm/mempool.c b/mm/mempool.c
index 6555043686b24f90ccf5542b320be17844c4cd5e..ec45ae1885213dfa661b84ff3955adac7568170d 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -224,14 +224,14 @@ EXPORT_SYMBOL(mempool_init_node);
  *
  * Return: %0 on success, negative error code otherwise.
  */
-int _mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
-		 mempool_free_t *free_fn, void *pool_data)
+int mempool_init_noprof(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+			mempool_free_t *free_fn, void *pool_data)
 {
 	return mempool_init_node(pool, min_nr, alloc_fn, free_fn,
 				 pool_data, GFP_KERNEL, NUMA_NO_NODE);
 
 }
-EXPORT_SYMBOL(_mempool_init);
+EXPORT_SYMBOL(mempool_init_noprof);
 
 /**
  * mempool_create_node - create a memory pool
@@ -249,9 +249,9 @@ EXPORT_SYMBOL(_mempool_init);
  *
  * Return: pointer to the created memory pool object or %NULL on error.
  */
-mempool_t *_mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
-			       mempool_free_t *free_fn, void *pool_data,
-			       gfp_t gfp_mask, int node_id)
+mempool_t *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn,
+				      mempool_free_t *free_fn, void *pool_data,
+				      gfp_t gfp_mask, int node_id)
 {
 	mempool_t *pool;
 
@@ -267,7 +267,7 @@ mempool_t *_mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
 
 	return pool;
 }
-EXPORT_SYMBOL(_mempool_create_node);
+EXPORT_SYMBOL(mempool_create_node_noprof);
 
 /**
  * mempool_resize - resize an existing memory pool
@@ -363,7 +363,7 @@ EXPORT_SYMBOL(mempool_resize);
  *
  * Return: pointer to the allocated element or %NULL on error.
  */
-void *_mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
 {
 	void *element;
 	unsigned long flags;
@@ -430,7 +430,7 @@ void *_mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
 	finish_wait(&pool->wait, &wait);
 	goto repeat_alloc;
 }
-EXPORT_SYMBOL(_mempool_alloc);
+EXPORT_SYMBOL(mempool_alloc_noprof);
 
 /**
  * mempool_free - return an element to the pool.
@@ -501,7 +501,7 @@ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
 {
 	struct kmem_cache *mem = pool_data;
 	VM_BUG_ON(mem->ctor);
-	return _kmem_cache_alloc(mem, gfp_mask);
+	return kmem_cache_alloc_noprof(mem, gfp_mask);
 }
 EXPORT_SYMBOL(mempool_alloc_slab);
 
@@ -519,7 +519,7 @@ EXPORT_SYMBOL(mempool_free_slab);
 void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
 {
 	size_t size = (size_t)pool_data;
-	return _kmalloc(size, gfp_mask);
+	return kmalloc_noprof(size, gfp_mask);
 }
 EXPORT_SYMBOL(mempool_kmalloc);
 
@@ -536,7 +536,7 @@ EXPORT_SYMBOL(mempool_kfree);
 void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data)
 {
 	int order = (int)(long)pool_data;
-	return _alloc_pages(gfp_mask, order);
+	return alloc_pages_noprof(gfp_mask, order);
 }
 EXPORT_SYMBOL(mempool_alloc_pages);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9ba41f94e085257f1303678ee60c6ae3989488c5..2017397e78a39adc9647d24d4708242f7d0b0abc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -76,9 +76,9 @@
 #include <linux/khugepaged.h>
 #include <linux/buffer_head.h>
 #include <linux/delayacct.h>
-#include <linux/pgalloc_tag.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
+#include <linux/pgalloc_tag.h>
 #include <asm/div64.h>
 #include "internal.h"
 #include "shuffle.h"
@@ -767,7 +767,6 @@ static inline bool pcp_allowed_order(unsigned int order)
 
 static inline void free_the_page(struct page *page, unsigned int order)
 {
-
 	if (pcp_allowed_order(order))		/* Via pcp? */
 		free_unref_page(page, order);
 	else
@@ -1421,7 +1420,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 			__memcg_kmem_uncharge_page(page, order);
 		reset_page_owner(page, order);
 		page_table_check_free(page, order);
-		pgalloc_tag_dec(page, order);
+		pgalloc_tag_sub(page, order);
 		return false;
 	}
 
@@ -1462,7 +1461,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 	page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	reset_page_owner(page, order);
 	page_table_check_free(page, order);
-	pgalloc_tag_dec(page, order);
+	pgalloc_tag_sub(page, order);
 
 	if (!PageHighMem(page)) {
 		debug_check_no_locks_freed(page_address(page),
@@ -2482,9 +2481,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 	bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) &&
 			!should_skip_init(gfp_flags);
 	bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS);
-#ifdef CONFIG_MEM_ALLOC_PROFILING
-	union codetag_ref *ref;
-#endif
 	int i;
 
 	set_page_private(page, 0);
@@ -2539,14 +2535,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 
 	set_page_owner(page, order, gfp_flags);
 	page_table_check_alloc(page, order);
-
-#ifdef CONFIG_MEM_ALLOC_PROFILING
-	ref = get_page_tag_ref(page);
-	if (ref) {
-		alloc_tag_add(ref, current->alloc_tag, PAGE_SIZE << order);
-		put_page_tag_ref(ref);
-	}
-#endif
+	pgalloc_tag_add(page, current, order);
 }
 
 static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
@@ -5383,7 +5372,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
  *
  * Returns the number of pages on the list or array.
  */
-unsigned long _alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
 			nodemask_t *nodemask, int nr_pages,
 			struct list_head *page_list,
 			struct page **page_array)
@@ -5520,7 +5509,7 @@ unsigned long _alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 	pcp_trylock_finish(UP_flags);
 
 failed:
-	page = _alloc_pages2(gfp, 0, preferred_nid, nodemask);
+	page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
 	if (page) {
 		if (page_list)
 			list_add(&page->lru, page_list);
@@ -5531,13 +5520,13 @@ unsigned long _alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 
 	goto out;
 }
-EXPORT_SYMBOL_GPL(_alloc_pages_bulk);
+EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof);
 
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
-struct page *_alloc_pages2(gfp_t gfp, unsigned int order, int preferred_nid,
-							nodemask_t *nodemask)
+struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
+				      int preferred_nid, nodemask_t *nodemask)
 {
 	struct page *page;
 	unsigned int alloc_flags = ALLOC_WMARK_LOW;
@@ -5599,41 +5588,41 @@ struct page *_alloc_pages2(gfp_t gfp, unsigned int order, int preferred_nid,
 
 	return page;
 }
-EXPORT_SYMBOL(_alloc_pages2);
+EXPORT_SYMBOL(__alloc_pages_noprof);
 
-struct folio *_folio_alloc2(gfp_t gfp, unsigned int order, int preferred_nid,
+struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
 		nodemask_t *nodemask)
 {
-	struct page *page = _alloc_pages2(gfp | __GFP_COMP, order,
+	struct page *page = __alloc_pages_noprof(gfp | __GFP_COMP, order,
 			preferred_nid, nodemask);
 
 	if (page && order > 1)
 		prep_transhuge_page(page);
 	return (struct folio *)page;
 }
-EXPORT_SYMBOL(_folio_alloc2);
+EXPORT_SYMBOL(__folio_alloc_noprof);
 
 /*
  * Common helper functions. Never use with __GFP_HIGHMEM because the returned
  * address cannot represent highmem pages. Use alloc_pages and then kmap if
  * you need to access high mem.
  */
-unsigned long _get_free_pages(gfp_t gfp_mask, unsigned int order)
+unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order)
 {
 	struct page *page;
 
-	page = _alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order);
+	page = alloc_pages_noprof(gfp_mask & ~__GFP_HIGHMEM, order);
 	if (!page)
 		return 0;
 	return (unsigned long) page_address(page);
 }
-EXPORT_SYMBOL(_get_free_pages);
+EXPORT_SYMBOL(get_free_pages_noprof);
 
-unsigned long _get_zeroed_page(gfp_t gfp_mask)
+unsigned long get_zeroed_page_noprof(gfp_t gfp_mask)
 {
-	return _get_free_pages(gfp_mask | __GFP_ZERO, 0);
+	return get_free_pages_noprof(gfp_mask | __GFP_ZERO, 0);
 }
-EXPORT_SYMBOL(_get_zeroed_page);
+EXPORT_SYMBOL(get_zeroed_page_noprof);
 
 /**
  * __free_pages - Free pages allocated with alloc_pages().
@@ -5826,7 +5815,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
 }
 
 /**
- * _alloc_pages_exact - allocate an exact number physically-contiguous pages.
+ * alloc_pages_exact_noprof - allocate an exact number physically-contiguous pages.
  * @size: the number of bytes to allocate
  * @gfp_mask: GFP flags for the allocation, must not contain __GFP_COMP
  *
@@ -5840,7 +5829,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
  *
  * Return: pointer to the allocated area or %NULL in case of error.
  */
-void *_alloc_pages_exact(size_t size, gfp_t gfp_mask)
+void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask)
 {
 	unsigned int order = get_order(size);
 	unsigned long addr;
@@ -5848,13 +5837,13 @@ void *_alloc_pages_exact(size_t size, gfp_t gfp_mask)
 	if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
 		gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
 
-	addr = _get_free_pages(gfp_mask, order);
+	addr = get_free_pages_noprof(gfp_mask, order);
 	return make_alloc_exact(addr, order, size);
 }
-EXPORT_SYMBOL(_alloc_pages_exact);
+EXPORT_SYMBOL(alloc_pages_exact_noprof);
 
 /**
- * _alloc_pages_exact_nid - allocate an exact number of physically-contiguous
+ * alloc_pages_exact_nid_noprof - allocate an exact number of physically-contiguous
  *			   pages on a node.
  * @nid: the preferred node ID where memory should be allocated
  * @size: the number of bytes to allocate
@@ -5865,7 +5854,7 @@ EXPORT_SYMBOL(_alloc_pages_exact);
  *
  * Return: pointer to the allocated area or %NULL in case of error.
  */
-void * __meminit _alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask)
 {
 	unsigned int order = get_order(size);
 	struct page *p;
@@ -5873,7 +5862,7 @@ void * __meminit _alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
 	if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
 		gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
 
-	p = _alloc_pages_node(nid, gfp_mask, order);
+	p = alloc_pages_node_noprof(nid, gfp_mask, order);
 	if (!p)
 		return NULL;
 	return make_alloc_exact((unsigned long)page_address(p), order, size);
@@ -9264,7 +9253,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
 }
 
 /**
- * _alloc_contig_range() -- tries to allocate given range of pages
+ * alloc_contig_range_noprof() -- tries to allocate given range of pages
  * @start:	start PFN to allocate
  * @end:	one-past-the-last PFN to allocate
  * @migratetype:	migratetype of the underlying pageblocks (either
@@ -9284,7 +9273,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
  * pages which PFN is in [start, end) are allocated for the caller and
  * need to be freed with free_contig_range().
  */
-int _alloc_contig_range(unsigned long start, unsigned long end,
+int alloc_contig_range_noprof(unsigned long start, unsigned long end,
 		       unsigned migratetype, gfp_t gfp_mask)
 {
 	unsigned long outer_start, outer_end;
@@ -9408,14 +9397,14 @@ int _alloc_contig_range(unsigned long start, unsigned long end,
 	undo_isolate_page_range(start, end, migratetype);
 	return ret;
 }
-EXPORT_SYMBOL(_alloc_contig_range);
+EXPORT_SYMBOL(alloc_contig_range_noprof);
 
 static int __alloc_contig_pages(unsigned long start_pfn,
 				unsigned long nr_pages, gfp_t gfp_mask)
 {
 	unsigned long end_pfn = start_pfn + nr_pages;
 
-	return _alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
+	return alloc_contig_range_noprof(start_pfn, end_pfn, MIGRATE_MOVABLE,
 				   gfp_mask);
 }
 
@@ -9448,7 +9437,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
 }
 
 /**
- * _alloc_contig_pages() -- tries to find and allocate contiguous range of pages
+ * alloc_contig_pages_noprof() -- tries to find and allocate contiguous range of pages
  * @nr_pages:	Number of contiguous pages to allocate
  * @gfp_mask:	GFP mask to limit search and used during compaction
  * @nid:	Target node
@@ -9468,7 +9457,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
  *
  * Return: pointer to contiguous pages on success, or NULL if not successful.
  */
-struct page *_alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
+struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
 				 int nid, nodemask_t *nodemask)
 {
 	unsigned long ret, pfn, flags;
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 62964e5d01e65045b377f9dab9c9bb22c68777ca..94fbf52a14c6c658de3780535179fe1b6adb3635 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -93,7 +93,7 @@ static struct page_ext_operations *page_ext_ops[] __initdata = {
 unsigned long page_ext_size = sizeof(struct page_ext);
 
 static unsigned long total_usage;
-struct page_ext *lookup_page_ext(const struct page *page);
+static struct page_ext *lookup_page_ext(const struct page *page);
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
 /*
@@ -203,7 +203,7 @@ void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
 	pgdat->node_page_ext = NULL;
 }
 
-struct page_ext *lookup_page_ext(const struct page *page)
+static struct page_ext *lookup_page_ext(const struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
 	unsigned long index;
@@ -223,7 +223,6 @@ struct page_ext *lookup_page_ext(const struct page *page)
 					MAX_ORDER_NR_PAGES);
 	return get_entry(base, index);
 }
-EXPORT_SYMBOL(lookup_page_ext);
 
 static int __init alloc_node_page_ext(int nid)
 {
@@ -283,7 +282,7 @@ static bool page_ext_invalid(struct page_ext *page_ext)
 	return !page_ext || (((unsigned long)page_ext & PAGE_EXT_INVALID) == PAGE_EXT_INVALID);
 }
 
-struct page_ext *lookup_page_ext(const struct page *page)
+static struct page_ext *lookup_page_ext(const struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
 	struct mem_section *section = __pfn_to_section(pfn);
@@ -300,7 +299,6 @@ struct page_ext *lookup_page_ext(const struct page *page)
 		return NULL;
 	return get_entry(page_ext, pfn);
 }
-EXPORT_SYMBOL(lookup_page_ext);
 
 static void *__meminit alloc_page_ext(size_t size, int nid)
 {
diff --git a/mm/page_owner.c b/mm/page_owner.c
index d740f36ae052a3240800f8a7aa52ff7ff2b53398..8f99081c7adcc741be1c6db4e4cc326fda35aed6 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -15,6 +15,12 @@
 
 #include "internal.h"
 
+/*
+ * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
+ * to use off stack temporal storage
+ */
+#define PAGE_OWNER_STACK_DEPTH (16)
+
 struct page_owner {
 	unsigned short order;
 	short last_migrate_reason;
@@ -31,6 +37,8 @@ struct page_owner {
 static bool page_owner_enabled __initdata;
 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 
+static depot_stack_handle_t dummy_handle;
+static depot_stack_handle_t failure_handle;
 static depot_stack_handle_t early_handle;
 
 static void init_early_allocated_pages(void);
@@ -60,6 +68,16 @@ static __always_inline depot_stack_handle_t create_dummy_stack(void)
 	return stack_depot_save(entries, nr_entries, GFP_KERNEL);
 }
 
+static noinline void register_dummy_stack(void)
+{
+	dummy_handle = create_dummy_stack();
+}
+
+static noinline void register_failure_stack(void)
+{
+	failure_handle = create_dummy_stack();
+}
+
 static noinline void register_early_stack(void)
 {
 	early_handle = create_dummy_stack();
@@ -70,7 +88,8 @@ static __init void init_page_owner(void)
 	if (!page_owner_enabled)
 		return;
 
-	stack_depot_capture_init();
+	register_dummy_stack();
+	register_failure_stack();
 	register_early_stack();
 	static_branch_enable(&page_owner_inited);
 	init_early_allocated_pages();
@@ -87,6 +106,33 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
 	return (void *)page_ext + page_owner_ops.offset;
 }
 
+static noinline depot_stack_handle_t save_stack(gfp_t flags)
+{
+	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
+	depot_stack_handle_t handle;
+	unsigned int nr_entries;
+
+	/*
+	 * Avoid recursion.
+	 *
+	 * Sometimes page metadata allocation tracking requires more
+	 * memory to be allocated:
+	 * - when new stack trace is saved to stack depot
+	 * - when backtrace itself is calculated (ia64)
+	 */
+	if (current->in_page_owner)
+		return dummy_handle;
+	current->in_page_owner = 1;
+
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
+	handle = stack_depot_save(entries, nr_entries, flags);
+	if (!handle)
+		handle = failure_handle;
+
+	current->in_page_owner = 0;
+	return handle;
+}
+
 void __reset_page_owner(struct page *page, unsigned short order)
 {
 	int i;
@@ -99,7 +145,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
 	if (unlikely(!page_ext))
 		return;
 
-	handle = stack_depot_capture_stack(GFP_NOWAIT | __GFP_NOWARN);
+	handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
 	for (i = 0; i < (1 << order); i++) {
 		__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
 		page_owner = get_page_owner(page_ext);
@@ -141,7 +187,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
 	struct page_ext *page_ext;
 	depot_stack_handle_t handle;
 
-	handle = stack_depot_capture_stack(gfp_mask);
+	handle = save_stack(gfp_mask);
 
 	page_ext = page_ext_get(page);
 	if (unlikely(!page_ext))
diff --git a/mm/percpu.c b/mm/percpu.c
index f257a90e5ae9b7d2f70ff6743a87a78392372d86..ebd88695673b1d8baf87ab2acd3dff56e92f5d25 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1731,7 +1731,7 @@ static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t s
 #endif
 
 /**
- * __pcpu_alloc - the percpu allocator
+ * pcpu_alloc_noprof - the percpu allocator
  * @size: size of area to allocate in bytes
  * @align: alignment of area (max PAGE_SIZE)
  * @reserved: allocate from the reserved chunk if available
@@ -1745,8 +1745,8 @@ static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t s
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
-void __percpu *__pcpu_alloc(size_t size, size_t align, bool reserved,
-			    gfp_t gfp)
+void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
+				 gfp_t gfp)
 {
 	gfp_t pcpu_gfp;
 	bool is_atomic;
@@ -1944,7 +1944,7 @@ void __percpu *__pcpu_alloc(size_t size, size_t align, bool reserved,
 
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(__pcpu_alloc);
+EXPORT_SYMBOL_GPL(pcpu_alloc_noprof);
 
 /**
  * pcpu_balance_free - manage the amount of free chunks
diff --git a/mm/show_mem.c b/mm/show_mem.c
new file mode 100644
index 0000000000000000000000000000000000000000..90b7ff00f07a1d35c94bdb29cb799c5e6758dd1d
--- /dev/null
+++ b/mm/show_mem.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic show_mem() implementation
+ *
+ * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de>
+ */
+
+#include <linux/blkdev.h>
+#include <linux/cma.h>
+#include <linux/cpuset.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/seq_buf.h>
+#include <linux/swap.h>
+#include <linux/vmstat.h>
+
+#include "internal.h"
+#include "swap.h"
+
+atomic_long_t _totalram_pages __read_mostly;
+EXPORT_SYMBOL(_totalram_pages);
+unsigned long totalreserve_pages __read_mostly;
+unsigned long totalcma_pages __read_mostly;
+
+static inline void show_node(struct zone *zone)
+{
+	if (IS_ENABLED(CONFIG_NUMA))
+		printk("Node %d ", zone_to_nid(zone));
+}
+
+long si_mem_available(void)
+{
+	long available;
+	unsigned long pagecache;
+	unsigned long wmark_low = 0;
+	unsigned long pages[NR_LRU_LISTS];
+	unsigned long reclaimable;
+	struct zone *zone;
+	int lru;
+
+	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
+
+	for_each_zone(zone)
+		wmark_low += low_wmark_pages(zone);
+
+	/*
+	 * Estimate the amount of memory available for userspace allocations,
+	 * without causing swapping or OOM.
+	 */
+	available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages;
+
+	/*
+	 * Not all the page cache can be freed, otherwise the system will
+	 * start swapping or thrashing. Assume at least half of the page
+	 * cache, or the low watermark worth of cache, needs to stay.
+	 */
+	pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
+	pagecache -= min(pagecache / 2, wmark_low);
+	available += pagecache;
+
+	/*
+	 * Part of the reclaimable slab and other kernel memory consists of
+	 * items that are in use, and cannot be freed. Cap this estimate at the
+	 * low watermark.
+	 */
+	reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
+		global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
+	available += reclaimable - min(reclaimable / 2, wmark_low);
+
+	if (available < 0)
+		available = 0;
+	return available;
+}
+EXPORT_SYMBOL_GPL(si_mem_available);
+
+void si_meminfo(struct sysinfo *val)
+{
+	val->totalram = totalram_pages();
+	val->sharedram = global_node_page_state(NR_SHMEM);
+	val->freeram = global_zone_page_state(NR_FREE_PAGES);
+	val->bufferram = nr_blockdev_pages();
+	val->totalhigh = totalhigh_pages();
+	val->freehigh = nr_free_highpages();
+	val->mem_unit = PAGE_SIZE;
+}
+
+EXPORT_SYMBOL(si_meminfo);
+
+#ifdef CONFIG_NUMA
+void si_meminfo_node(struct sysinfo *val, int nid)
+{
+	int zone_type;		/* needs to be signed */
+	unsigned long managed_pages = 0;
+	unsigned long managed_highpages = 0;
+	unsigned long free_highpages = 0;
+	pg_data_t *pgdat = NODE_DATA(nid);
+
+	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+		managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
+	val->totalram = managed_pages;
+	val->sharedram = node_page_state(pgdat, NR_SHMEM);
+	val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
+#ifdef CONFIG_HIGHMEM
+	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
+		struct zone *zone = &pgdat->node_zones[zone_type];
+
+		if (is_highmem(zone)) {
+			managed_highpages += zone_managed_pages(zone);
+			free_highpages += zone_page_state(zone, NR_FREE_PAGES);
+		}
+	}
+	val->totalhigh = managed_highpages;
+	val->freehigh = free_highpages;
+#else
+	val->totalhigh = managed_highpages;
+	val->freehigh = free_highpages;
+#endif
+	val->mem_unit = PAGE_SIZE;
+}
+#endif
+
+/*
+ * Determine whether the node should be displayed or not, depending on whether
+ * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
+ */
+static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask)
+{
+	if (!(flags & SHOW_MEM_FILTER_NODES))
+		return false;
+
+	/*
+	 * no node mask - aka implicit memory numa policy. Do not bother with
+	 * the synchronization - read_mems_allowed_begin - because we do not
+	 * have to be precise here.
+	 */
+	if (!nodemask)
+		nodemask = &cpuset_current_mems_allowed;
+
+	return !node_isset(nid, *nodemask);
+}
+
+static void show_migration_types(unsigned char type)
+{
+	static const char types[MIGRATE_TYPES] = {
+		[MIGRATE_UNMOVABLE]	= 'U',
+		[MIGRATE_MOVABLE]	= 'M',
+		[MIGRATE_RECLAIMABLE]	= 'E',
+		[MIGRATE_HIGHATOMIC]	= 'H',
+#ifdef CONFIG_CMA
+		[MIGRATE_CMA]		= 'C',
+#endif
+#ifdef CONFIG_MEMORY_ISOLATION
+		[MIGRATE_ISOLATE]	= 'I',
+#endif
+	};
+	char tmp[MIGRATE_TYPES + 1];
+	char *p = tmp;
+	int i;
+
+	for (i = 0; i < MIGRATE_TYPES; i++) {
+		if (type & (1 << i))
+			*p++ = types[i];
+	}
+
+	*p = '\0';
+	printk(KERN_CONT "(%s) ", tmp);
+}
+
+static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx)
+{
+	int zone_idx;
+	for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++)
+		if (zone_managed_pages(pgdat->node_zones + zone_idx))
+			return true;
+	return false;
+}
+
+/*
+ * Show free area list (used inside shift_scroll-lock stuff)
+ * We also calculate the percentage fragmentation. We do this by counting the
+ * memory on each free list with the exception of the first item on the list.
+ *
+ * Bits in @filter:
+ * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
+ *   cpuset.
+ */
+void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
+{
+	unsigned long free_pcp = 0;
+	int cpu, nid;
+	struct zone *zone;
+	pg_data_t *pgdat;
+
+	for_each_populated_zone(zone) {
+		if (zone_idx(zone) > max_zone_idx)
+			continue;
+		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
+			continue;
+
+		for_each_online_cpu(cpu)
+			free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+	}
+
+	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
+		" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
+		" unevictable:%lu dirty:%lu writeback:%lu\n"
+		" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
+		" mapped:%lu shmem:%lu pagetables:%lu\n"
+		" sec_pagetables:%lu bounce:%lu\n"
+		" kernel_misc_reclaimable:%lu\n"
+		" free:%lu free_pcp:%lu free_cma:%lu\n",
+		global_node_page_state(NR_ACTIVE_ANON),
+		global_node_page_state(NR_INACTIVE_ANON),
+		global_node_page_state(NR_ISOLATED_ANON),
+		global_node_page_state(NR_ACTIVE_FILE),
+		global_node_page_state(NR_INACTIVE_FILE),
+		global_node_page_state(NR_ISOLATED_FILE),
+		global_node_page_state(NR_UNEVICTABLE),
+		global_node_page_state(NR_FILE_DIRTY),
+		global_node_page_state(NR_WRITEBACK),
+		global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
+		global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
+		global_node_page_state(NR_FILE_MAPPED),
+		global_node_page_state(NR_SHMEM),
+		global_node_page_state(NR_PAGETABLE),
+		global_node_page_state(NR_SECONDARY_PAGETABLE),
+		global_zone_page_state(NR_BOUNCE),
+		global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
+		global_zone_page_state(NR_FREE_PAGES),
+		free_pcp,
+		global_zone_page_state(NR_FREE_CMA_PAGES));
+
+	for_each_online_pgdat(pgdat) {
+		if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
+			continue;
+		if (!node_has_managed_zones(pgdat, max_zone_idx))
+			continue;
+
+		printk("Node %d"
+			" active_anon:%lukB"
+			" inactive_anon:%lukB"
+			" active_file:%lukB"
+			" inactive_file:%lukB"
+			" unevictable:%lukB"
+			" isolated(anon):%lukB"
+			" isolated(file):%lukB"
+			" mapped:%lukB"
+			" dirty:%lukB"
+			" writeback:%lukB"
+			" shmem:%lukB"
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+			" shmem_thp: %lukB"
+			" shmem_pmdmapped: %lukB"
+			" anon_thp: %lukB"
+#endif
+			" writeback_tmp:%lukB"
+			" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+			" shadow_call_stack:%lukB"
+#endif
+			" pagetables:%lukB"
+			" sec_pagetables:%lukB"
+			" all_unreclaimable? %s"
+			"\n",
+			pgdat->node_id,
+			K(node_page_state(pgdat, NR_ACTIVE_ANON)),
+			K(node_page_state(pgdat, NR_INACTIVE_ANON)),
+			K(node_page_state(pgdat, NR_ACTIVE_FILE)),
+			K(node_page_state(pgdat, NR_INACTIVE_FILE)),
+			K(node_page_state(pgdat, NR_UNEVICTABLE)),
+			K(node_page_state(pgdat, NR_ISOLATED_ANON)),
+			K(node_page_state(pgdat, NR_ISOLATED_FILE)),
+			K(node_page_state(pgdat, NR_FILE_MAPPED)),
+			K(node_page_state(pgdat, NR_FILE_DIRTY)),
+			K(node_page_state(pgdat, NR_WRITEBACK)),
+			K(node_page_state(pgdat, NR_SHMEM)),
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+			K(node_page_state(pgdat, NR_SHMEM_THPS)),
+			K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
+			K(node_page_state(pgdat, NR_ANON_THPS)),
+#endif
+			K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
+			node_page_state(pgdat, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+			node_page_state(pgdat, NR_KERNEL_SCS_KB),
+#endif
+			K(node_page_state(pgdat, NR_PAGETABLE)),
+			K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
+			pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
+				"yes" : "no");
+	}
+
+	for_each_populated_zone(zone) {
+		int i;
+
+		if (zone_idx(zone) > max_zone_idx)
+			continue;
+		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
+			continue;
+
+		free_pcp = 0;
+		for_each_online_cpu(cpu)
+			free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+
+		show_node(zone);
+		printk(KERN_CONT
+			"%s"
+			" free:%lukB"
+			" boost:%lukB"
+			" min:%lukB"
+			" low:%lukB"
+			" high:%lukB"
+			" reserved_highatomic:%luKB"
+			" active_anon:%lukB"
+			" inactive_anon:%lukB"
+			" active_file:%lukB"
+			" inactive_file:%lukB"
+			" unevictable:%lukB"
+			" writepending:%lukB"
+			" present:%lukB"
+			" managed:%lukB"
+			" mlocked:%lukB"
+			" bounce:%lukB"
+			" free_pcp:%lukB"
+			" local_pcp:%ukB"
+			" free_cma:%lukB"
+			"\n",
+			zone->name,
+			K(zone_page_state(zone, NR_FREE_PAGES)),
+			K(zone->watermark_boost),
+			K(min_wmark_pages(zone)),
+			K(low_wmark_pages(zone)),
+			K(high_wmark_pages(zone)),
+			K(zone->nr_reserved_highatomic),
+			K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
+			K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
+			K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
+			K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
+			K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
+			K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
+			K(zone->present_pages),
+			K(zone_managed_pages(zone)),
+			K(zone_page_state(zone, NR_MLOCK)),
+			K(zone_page_state(zone, NR_BOUNCE)),
+			K(free_pcp),
+			K(this_cpu_read(zone->per_cpu_pageset->count)),
+			K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
+		printk("lowmem_reserve[]:");
+		for (i = 0; i < MAX_NR_ZONES; i++)
+			printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
+		printk(KERN_CONT "\n");
+	}
+
+	for_each_populated_zone(zone) {
+		unsigned int order;
+		unsigned long nr[MAX_ORDER + 1], flags, total = 0;
+		unsigned char types[MAX_ORDER + 1];
+
+		if (zone_idx(zone) > max_zone_idx)
+			continue;
+		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
+			continue;
+		show_node(zone);
+		printk(KERN_CONT "%s: ", zone->name);
+
+		spin_lock_irqsave(&zone->lock, flags);
+		for (order = 0; order <= MAX_ORDER; order++) {
+			struct free_area *area = &zone->free_area[order];
+			int type;
+
+			nr[order] = area->nr_free;
+			total += nr[order] << order;
+
+			types[order] = 0;
+			for (type = 0; type < MIGRATE_TYPES; type++) {
+				if (!free_area_empty(area, type))
+					types[order] |= 1 << type;
+			}
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+		for (order = 0; order <= MAX_ORDER; order++) {
+			printk(KERN_CONT "%lu*%lukB ",
+			       nr[order], K(1UL) << order);
+			if (nr[order])
+				show_migration_types(types[order]);
+		}
+		printk(KERN_CONT "= %lukB\n", K(total));
+	}
+
+	for_each_online_node(nid) {
+		if (show_mem_node_skip(filter, nid, nodemask))
+			continue;
+		hugetlb_show_meminfo_node(nid);
+	}
+
+	printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES));
+
+	show_swap_cache_info();
+}
+
+void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
+{
+	unsigned long total = 0, reserved = 0, highmem = 0;
+	struct zone *zone;
+
+	printk("Mem-Info:\n");
+	__show_free_areas(filter, nodemask, max_zone_idx);
+
+	for_each_populated_zone(zone) {
+
+		total += zone->present_pages;
+		reserved += zone->present_pages - zone_managed_pages(zone);
+
+		if (is_highmem(zone))
+			highmem += zone->present_pages;
+	}
+
+	printk("%lu pages RAM\n", total);
+	printk("%lu pages HighMem/MovableOnly\n", highmem);
+	printk("%lu pages reserved\n", reserved);
+#ifdef CONFIG_CMA
+	printk("%lu pages cma reserved\n", totalcma_pages);
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+	printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
+#endif
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+	{
+		struct seq_buf s;
+		char *buf = kmalloc(4096, GFP_ATOMIC);
+
+		if (buf) {
+			printk("Memory allocations:\n");
+			seq_buf_init(&s, buf, 4096);
+			alloc_tags_show_mem_report(&s);
+			printk("%s", buf);
+			kfree(buf);
+		}
+	}
+#endif
+}
diff --git a/mm/slab.c b/mm/slab.c
index 478faf36273ff471246cd30a90090520d7c5a623..7c4450fb21d32f2b6811cc233c97d55fc7cfc681 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3458,18 +3458,18 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
  *
  * Return: pointer to the new object or %NULL in case of error
  */
-void *_kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, gfp_t flags)
 {
 	return __kmem_cache_alloc_lru(cachep, NULL, flags);
 }
-EXPORT_SYMBOL(_kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_alloc_noprof);
 
-void *_kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
+void *kmem_cache_alloc_lru_noprof(struct kmem_cache *cachep, struct list_lru *lru,
 			   gfp_t flags)
 {
 	return __kmem_cache_alloc_lru(cachep, lru, flags);
 }
-EXPORT_SYMBOL(_kmem_cache_alloc_lru);
+EXPORT_SYMBOL(kmem_cache_alloc_lru_noprof);
 
 static __always_inline void
 cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
@@ -3481,8 +3481,8 @@ cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
 		p[i] = cache_alloc_debugcheck_after(s, flags, p[i], caller);
 }
 
-int _kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
-			  void **p)
+int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
+				 void **p)
 {
 	size_t i;
 	struct obj_cgroup *objcg = NULL;
@@ -3519,7 +3519,7 @@ int _kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	kmem_cache_free_bulk(s, i, p);
 	return 0;
 }
-EXPORT_SYMBOL(_kmem_cache_alloc_bulk);
+EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof);
 
 /**
  * kmem_cache_alloc_node - Allocate an object on the specified node
@@ -3534,7 +3534,7 @@ EXPORT_SYMBOL(_kmem_cache_alloc_bulk);
  *
  * Return: pointer to the new object or %NULL in case of error
  */
-void *_kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+void *kmem_cache_alloc_node_noprof(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, NULL, flags, nodeid, cachep->object_size, _RET_IP_);
 
@@ -3542,7 +3542,7 @@ void *_kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 
 	return ret;
 }
-EXPORT_SYMBOL(_kmem_cache_alloc_node);
+EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
 
 void *__kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
 			     int nodeid, size_t orig_size,
diff --git a/mm/slab.h b/mm/slab.h
index 404541f5256812caedafb2d2e5387d13af5c20e2..4f10a37ea79c2d0d07787d9cdc90263a2c4847e5 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -472,9 +472,34 @@ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts)
 	}
 }
 
+static inline void mark_failed_objexts_alloc(struct slab *slab)
+{
+	slab->obj_exts = OBJEXTS_ALLOC_FAIL;
+}
+
+static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
+			struct slabobj_ext *vec, unsigned int objects)
+{
+	/*
+	 * If vector previously failed to allocate then we have live
+	 * objects with no tag reference. Mark all references in this
+	 * vector as empty to avoid warnings later on.
+	 */
+	if (obj_exts & OBJEXTS_ALLOC_FAIL) {
+		unsigned int i;
+
+		for (i = 0; i < objects; i++)
+			set_codetag_empty(&vec[i].ref);
+	}
+}
+
+
 #else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
 
 static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {}
+static inline void mark_failed_objexts_alloc(struct slab *slab) {}
+static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
+			struct slabobj_ext *vec, unsigned int objects) {}
 
 #endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
 
@@ -572,9 +597,6 @@ static inline void alloc_tagging_slab_free_hook(struct kmem_cache *s, struct sla
 	struct slabobj_ext *obj_exts;
 	int i;
 
-	if (!mem_alloc_profiling_enabled())
-		return;
-
 	obj_exts = slab_obj_exts(slab);
 	if (!obj_exts)
 		return;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f84b0db52dbdecc8b9e0f2d3b24f61d2ab71b0c3..13516acd152d5724a11c9f8a333b0a8b6946c313 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -214,7 +214,8 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
 			gfp_t gfp, bool new_slab)
 {
 	unsigned int objects = objs_per_slab(s, slab);
-	unsigned long obj_exts;
+	unsigned long new_exts;
+	unsigned long old_exts;
 	struct slabobj_ext *vec;
 
 	gfp &= ~OBJCGS_CLEAR_MASK;
@@ -223,43 +224,27 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
 	vec = kcalloc_node(objects, sizeof(struct slabobj_ext), gfp,
 			   slab_nid(slab));
 	if (!vec) {
-#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
-		if (new_slab) {
-			/* Mark vectors which failed to allocate */
-			slab->obj_exts = OBJEXTS_ALLOC_FAIL;
-#ifdef CONFIG_MEMCG
-			slab->obj_exts |= MEMCG_DATA_OBJEXTS;
-#endif
-		}
-#endif
+		/* Mark vectors which failed to allocate */
+		if (new_slab)
+			mark_failed_objexts_alloc(slab);
+
 		return -ENOMEM;
 	}
 
-	obj_exts = (unsigned long)vec;
+	new_exts = (unsigned long)vec;
 #ifdef CONFIG_MEMCG
-	obj_exts |= MEMCG_DATA_OBJEXTS;
+	new_exts |= MEMCG_DATA_OBJEXTS;
 #endif
+	old_exts = slab->obj_exts;
+	handle_failed_objexts_alloc(old_exts, vec, objects);
 	if (new_slab) {
-#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
-		/*
-		 * If vector previously failed to allocate then we have live
-		 * objects with no tag reference. Mark all references in this
-		 * vector as empty to avoid warnings later on.
-		 */
-		if (slab->obj_exts & OBJEXTS_ALLOC_FAIL) {
-			unsigned int i;
-
-			for (i = 0; i < objects; i++)
-				set_codetag_empty(&vec[i].ref);
-		}
-#endif
 		/*
 		 * If the slab is brand new and nobody can yet access its
 		 * obj_exts, no synchronization is required and obj_exts can
 		 * be simply assigned.
 		 */
-		slab->obj_exts = obj_exts;
-	} else if (cmpxchg(&slab->obj_exts, 0, obj_exts)) {
+		slab->obj_exts = new_exts;
+	} else if (cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) {
 		/*
 		 * If the slab is already in use, somebody can allocate and
 		 * assign slabobj_exts in parallel. In this case the existing
@@ -1030,24 +1015,24 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller
 	return ret;
 }
 
-void *__kmalloc_node(size_t size, gfp_t flags, int node)
+void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node)
 {
 	return __do_kmalloc_node(size, flags, node, _RET_IP_);
 }
-EXPORT_SYMBOL(__kmalloc_node);
+EXPORT_SYMBOL(__kmalloc_node_noprof);
 
-void *__kmalloc(size_t size, gfp_t flags)
+void *__kmalloc_noprof(size_t size, gfp_t flags)
 {
 	return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
 }
-EXPORT_SYMBOL(__kmalloc);
+EXPORT_SYMBOL(__kmalloc_noprof);
 
-void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
-				  int node, unsigned long caller)
+void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags,
+				       int node, unsigned long caller)
 {
 	return __do_kmalloc_node(size, flags, node, caller);
 }
-EXPORT_SYMBOL(__kmalloc_node_track_caller);
+EXPORT_SYMBOL(kmalloc_node_track_caller_noprof);
 
 /**
  * kfree - free previously allocated memory
@@ -1113,7 +1098,7 @@ size_t __ksize(const void *object)
 	return slab_ksize(folio_slab(folio)->slab_cache);
 }
 
-void *_kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
+void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE,
 					    size, _RET_IP_);
@@ -1123,9 +1108,9 @@ void *_kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
-EXPORT_SYMBOL(_kmalloc_trace);
+EXPORT_SYMBOL(kmalloc_trace_noprof);
 
-void *_kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
+void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
 			 int node, size_t size)
 {
 	void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_);
@@ -1135,7 +1120,7 @@ void *_kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
-EXPORT_SYMBOL(_kmalloc_node_trace);
+EXPORT_SYMBOL(kmalloc_node_trace_noprof);
 #endif /* !CONFIG_SLOB */
 
 gfp_t kmalloc_fix_flags(gfp_t flags)
@@ -1166,7 +1151,7 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
 		flags = kmalloc_fix_flags(flags);
 
 	flags |= __GFP_COMP;
-	page = alloc_pages_node(node, flags, order);
+	page = alloc_pages_node_noprof(node, flags, order);
 	if (page) {
 		ptr = page_address(page);
 		mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
@@ -1181,7 +1166,7 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
 	return ptr;
 }
 
-void *_kmalloc_large(size_t size, gfp_t flags)
+void *kmalloc_large_noprof(size_t size, gfp_t flags)
 {
 	void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
 
@@ -1189,9 +1174,9 @@ void *_kmalloc_large(size_t size, gfp_t flags)
 		      flags, NUMA_NO_NODE);
 	return ret;
 }
-EXPORT_SYMBOL(_kmalloc_large);
+EXPORT_SYMBOL(kmalloc_large_noprof);
 
-void *_kmalloc_large_node(size_t size, gfp_t flags, int node)
+void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
 {
 	void *ret = __kmalloc_large_node(size, flags, node);
 
@@ -1199,7 +1184,7 @@ void *_kmalloc_large_node(size_t size, gfp_t flags, int node)
 		      flags, node);
 	return ret;
 }
-EXPORT_SYMBOL(_kmalloc_large_node);
+EXPORT_SYMBOL(kmalloc_large_node_noprof);
 
 #ifdef CONFIG_SLAB_FREELIST_RANDOM
 /* Randomize a generic freelist */
@@ -1418,7 +1403,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
 		return (void *)p;
 	}
 
-	ret = __kmalloc_node_track_caller(new_size, flags, NUMA_NO_NODE, _RET_IP_);
+	ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
 	if (ret && p) {
 		/* Disable KASAN checks as the object's redzone is accessed. */
 		kasan_disable_current();
@@ -1442,7 +1427,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
  *
  * Return: pointer to the allocated memory or %NULL in case of error
  */
-void *_krealloc(const void *p, size_t new_size, gfp_t flags)
+void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
 {
 	void *ret;
 
@@ -1457,7 +1442,7 @@ void *_krealloc(const void *p, size_t new_size, gfp_t flags)
 
 	return ret;
 }
-EXPORT_SYMBOL(_krealloc);
+EXPORT_SYMBOL(krealloc_noprof);
 
 /**
  * kfree_sensitive - Clear sensitive information in memory before freeing
diff --git a/mm/slub.c b/mm/slub.c
index 0e8090a35fe7b5bd8924529140eedbc6d696380c..1aa41d11ff32105f0cacf1f7bf8d14c216b41b74 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3417,18 +3417,18 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
 	return ret;
 }
 
-void *_kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+void *kmem_cache_alloc_noprof(struct kmem_cache *s, gfp_t gfpflags)
 {
 	return __kmem_cache_alloc_lru(s, NULL, gfpflags);
 }
-EXPORT_SYMBOL(_kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_alloc_noprof);
 
-void *_kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
 			   gfp_t gfpflags)
 {
 	return __kmem_cache_alloc_lru(s, lru, gfpflags);
 }
-EXPORT_SYMBOL(_kmem_cache_alloc_lru);
+EXPORT_SYMBOL(kmem_cache_alloc_lru_noprof);
 
 void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
 			      int node, size_t orig_size,
@@ -3438,7 +3438,7 @@ void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
 			       caller, orig_size);
 }
 
-void *_kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
+void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
 
@@ -3446,7 +3446,7 @@ void *_kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 
 	return ret;
 }
-EXPORT_SYMBOL(_kmem_cache_alloc_node);
+EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
 
 /*
  * Slow path handling. This may still be called frequently since objects
@@ -3784,8 +3784,8 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
 /* Note that interrupts must be enabled when calling this function. */
-int _kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
-			   void **p)
+int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
+				 void **p)
 {
 	struct kmem_cache_cpu *c;
 	int i;
@@ -3861,7 +3861,7 @@ int _kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	kmem_cache_free_bulk(s, i, p);
 	return 0;
 }
-EXPORT_SYMBOL(_kmem_cache_alloc_bulk);
+EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof);
 
 
 /*
diff --git a/mm/util.c b/mm/util.c
index b4ca88baf43893e9367b9b2be4cfd184b50e1342..41565901885a983e0fa2072d43ffafdd486c1ae4 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -114,7 +114,7 @@ char *kstrndup(const char *s, size_t max, gfp_t gfp)
 EXPORT_SYMBOL(kstrndup);
 
 /**
- * kmemdup - duplicate region of memory
+ * kmemdup_noprof - duplicate region of memory
  *
  * @src: memory region to duplicate
  * @len: memory region length
@@ -122,16 +122,16 @@ EXPORT_SYMBOL(kstrndup);
  *
  * Return: newly allocated copy of @src or %NULL in case of error
  */
-void *kmemdup(const void *src, size_t len, gfp_t gfp)
+void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp)
 {
 	void *p;
 
-	p = kmalloc_track_caller(len, gfp);
+	p = kmalloc_node_track_caller_noprof(len, gfp, NUMA_NO_NODE, _RET_IP_);
 	if (p)
 		memcpy(p, src, len);
 	return p;
 }
-EXPORT_SYMBOL(kmemdup);
+EXPORT_SYMBOL(kmemdup_noprof);
 
 /**
  * kmemdup_nul - Create a NUL-terminated string from unterminated data
@@ -541,7 +541,7 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 EXPORT_SYMBOL(vm_mmap);
 
 /**
- * kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * kvmalloc_node_noprof - attempt to allocate physically contiguous memory, but upon
  * failure, fall back to non-contiguous (vmalloc) allocation.
  * @size: size of the request.
  * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
@@ -556,7 +556,7 @@ EXPORT_SYMBOL(vm_mmap);
  *
  * Return: pointer to the allocated memory of %NULL in case of failure
  */
-void *_kvmalloc_node(size_t size, gfp_t flags, int node)
+void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
 {
 	gfp_t kmalloc_flags = flags;
 	void *ret;
@@ -578,7 +578,7 @@ void *_kvmalloc_node(size_t size, gfp_t flags, int node)
 		kmalloc_flags &= ~__GFP_NOFAIL;
 	}
 
-	ret = _kmalloc_node(size, kmalloc_flags, node);
+	ret = kmalloc_node_noprof(size, kmalloc_flags, node);
 
 	/*
 	 * It doesn't really make sense to fallback to vmalloc for sub page
@@ -603,11 +603,11 @@ void *_kvmalloc_node(size_t size, gfp_t flags, int node)
 	 * about the resulting pointer, and cannot play
 	 * protection games.
 	 */
-	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+	return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
 			flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
 			node, __builtin_return_address(0));
 }
-EXPORT_SYMBOL(_kvmalloc_node);
+EXPORT_SYMBOL(kvmalloc_node_noprof);
 
 /**
  * kvfree() - Free memory.
@@ -646,7 +646,7 @@ void kvfree_sensitive(const void *addr, size_t len)
 }
 EXPORT_SYMBOL(kvfree_sensitive);
 
-void *_kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
+void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
 {
 	void *newp;
 
@@ -659,15 +659,15 @@ void *_kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
 	kvfree(p);
 	return newp;
 }
-EXPORT_SYMBOL(_kvrealloc);
+EXPORT_SYMBOL(kvrealloc_noprof);
 
 /**
- * __vmalloc_array - allocate memory for a virtually contiguous array.
+ * __vmalloc_array_noprof - allocate memory for a virtually contiguous array.
  * @n: number of elements.
  * @size: element size.
  * @flags: the type of memory to allocate (see kmalloc).
  */
-void *__vmalloc_array(size_t n, size_t size, gfp_t flags)
+void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags)
 {
 	size_t bytes;
 
@@ -675,18 +675,18 @@ void *__vmalloc_array(size_t n, size_t size, gfp_t flags)
 		return NULL;
 	return __vmalloc(bytes, flags);
 }
-EXPORT_SYMBOL(__vmalloc_array);
+EXPORT_SYMBOL(__vmalloc_array_noprof);
 
 /**
- * vmalloc_array - allocate memory for a virtually contiguous array.
+ * vmalloc_array_noprof - allocate memory for a virtually contiguous array.
  * @n: number of elements.
  * @size: element size.
  */
-void *vmalloc_array(size_t n, size_t size)
+void *vmalloc_array_noprof(size_t n, size_t size)
 {
 	return __vmalloc_array(n, size, GFP_KERNEL);
 }
-EXPORT_SYMBOL(vmalloc_array);
+EXPORT_SYMBOL(vmalloc_array_noprof);
 
 /**
  * __vcalloc - allocate and zero memory for a virtually contiguous array.
@@ -694,22 +694,22 @@ EXPORT_SYMBOL(vmalloc_array);
  * @size: element size.
  * @flags: the type of memory to allocate (see kmalloc).
  */
-void *__vcalloc(size_t n, size_t size, gfp_t flags)
+void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags)
 {
 	return __vmalloc_array(n, size, flags | __GFP_ZERO);
 }
-EXPORT_SYMBOL(__vcalloc);
+EXPORT_SYMBOL(__vcalloc_noprof);
 
 /**
- * vcalloc - allocate and zero memory for a virtually contiguous array.
+ * vcalloc_noprof - allocate and zero memory for a virtually contiguous array.
  * @n: number of elements.
  * @size: element size.
  */
-void *vcalloc(size_t n, size_t size)
+void *vcalloc_noprof(size_t n, size_t size)
 {
 	return __vmalloc_array(n, size, GFP_KERNEL | __GFP_ZERO);
 }
-EXPORT_SYMBOL(vcalloc);
+EXPORT_SYMBOL(vcalloc_noprof);
 
 /* Neutral page->mapping pointer to address_space or anon_vma or other */
 void *page_rmapping(struct page *page)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d606e53c650e5bce875f7cc8849db3bd727f59ba..b7de03816dcc4345900cb16b0864d9e40d7935bb 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2944,12 +2944,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 			 * but mempolicy wants to alloc memory by interleaving.
 			 */
 			if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE)
-				nr = alloc_pages_bulk_array_mempolicy(bulk_gfp,
+				nr = alloc_pages_bulk_array_mempolicy_noprof(bulk_gfp,
 							nr_pages_request,
 							pages + nr_allocated);
 
 			else
-				nr = alloc_pages_bulk_array_node(bulk_gfp, nid,
+				nr = alloc_pages_bulk_array_node_noprof(bulk_gfp, nid,
 							nr_pages_request,
 							pages + nr_allocated);
 
@@ -2972,9 +2972,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 			break;
 
 		if (nid == NUMA_NO_NODE)
-			page = alloc_pages(gfp, order);
+			page = alloc_pages_noprof(gfp, order);
 		else
-			page = alloc_pages_node(nid, gfp, order);
+			page = alloc_pages_node_noprof(nid, gfp, order);
 		if (unlikely(!page))
 			break;
 		/*
@@ -3023,10 +3023,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 	/* Please note that the recursion is strictly bounded. */
 	if (array_size > PAGE_SIZE) {
-		area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
+		area->pages = __vmalloc_node_noprof(array_size, 1, nested_gfp, node,
 					area->caller);
 	} else {
-		area->pages = kmalloc_node(array_size, nested_gfp, node);
+		area->pages = kmalloc_node_noprof(array_size, nested_gfp, node);
 	}
 
 	if (!area->pages) {
@@ -3100,7 +3100,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 }
 
 /**
- * __vmalloc_node_range - allocate virtually contiguous memory
+ * __vmalloc_node_range_noprof - allocate virtually contiguous memory
  * @size:		  allocation size
  * @align:		  desired alignment
  * @start:		  vm area range start
@@ -3127,7 +3127,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
  *
  * Return: the address of the area or %NULL on failure
  */
-void *__vmalloc_node_range(unsigned long size, unsigned long align,
+void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
 			unsigned long start, unsigned long end, gfp_t gfp_mask,
 			pgprot_t prot, unsigned long vm_flags, int node,
 			const void *caller)
@@ -3256,7 +3256,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 }
 
 /**
- * __vmalloc_node - allocate virtually contiguous memory
+ * __vmalloc_node_noprof - allocate virtually contiguous memory
  * @size:	    allocation size
  * @align:	    desired alignment
  * @gfp_mask:	    flags for the page level allocator
@@ -3274,10 +3274,10 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *__vmalloc_node(unsigned long size, unsigned long align,
+void *__vmalloc_node_noprof(unsigned long size, unsigned long align,
 			    gfp_t gfp_mask, int node, const void *caller)
 {
-	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
+	return __vmalloc_node_range_noprof(size, align, VMALLOC_START, VMALLOC_END,
 				gfp_mask, PAGE_KERNEL, 0, node, caller);
 }
 /*
@@ -3286,15 +3286,15 @@ void *__vmalloc_node(unsigned long size, unsigned long align,
  * than that.
  */
 #ifdef CONFIG_TEST_VMALLOC_MODULE
-EXPORT_SYMBOL_GPL(__vmalloc_node);
+EXPORT_SYMBOL_GPL(__vmalloc_node_noprof);
 #endif
 
-void *__vmalloc(unsigned long size, gfp_t gfp_mask)
+void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask)
 {
-	return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
+	return __vmalloc_node_noprof(size, 1, gfp_mask, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
-EXPORT_SYMBOL(__vmalloc);
+EXPORT_SYMBOL(__vmalloc_noprof);
 
 /**
  * vmalloc - allocate virtually contiguous memory
@@ -3308,12 +3308,12 @@ EXPORT_SYMBOL(__vmalloc);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc(unsigned long size)
+void *vmalloc_noprof(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
+	return __vmalloc_node_noprof(size, 1, GFP_KERNEL, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
-EXPORT_SYMBOL(vmalloc);
+EXPORT_SYMBOL(vmalloc_noprof);
 
 /**
  * vmalloc_huge - allocate virtually contiguous memory, allow huge pages
@@ -3327,16 +3327,16 @@ EXPORT_SYMBOL(vmalloc);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
+void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask)
 {
-	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+	return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
 				    gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
 				    NUMA_NO_NODE, __builtin_return_address(0));
 }
-EXPORT_SYMBOL_GPL(vmalloc_huge);
+EXPORT_SYMBOL_GPL(vmalloc_huge_noprof);
 
 /**
- * vzalloc - allocate virtually contiguous memory with zero fill
+ * vzalloc_noprof - allocate virtually contiguous memory with zero fill
  * @size:    allocation size
  *
  * Allocate enough pages to cover @size from the page level
@@ -3348,12 +3348,12 @@ EXPORT_SYMBOL_GPL(vmalloc_huge);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vzalloc(unsigned long size)
+void *vzalloc_noprof(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
+	return __vmalloc_node_noprof(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
-EXPORT_SYMBOL(vzalloc);
+EXPORT_SYMBOL(vzalloc_noprof);
 
 /**
  * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
@@ -3364,17 +3364,17 @@ EXPORT_SYMBOL(vzalloc);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc_user(unsigned long size)
+void *vmalloc_user_noprof(unsigned long size)
 {
-	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
+	return __vmalloc_node_range_noprof(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
 				    GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
 				    VM_USERMAP, NUMA_NO_NODE,
 				    __builtin_return_address(0));
 }
-EXPORT_SYMBOL(vmalloc_user);
+EXPORT_SYMBOL(vmalloc_user_noprof);
 
 /**
- * vmalloc_node - allocate memory on a specific node
+ * vmalloc_node_noprof - allocate memory on a specific node
  * @size:	  allocation size
  * @node:	  numa node
  *
@@ -3386,15 +3386,15 @@ EXPORT_SYMBOL(vmalloc_user);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc_node(unsigned long size, int node)
+void *vmalloc_node_noprof(unsigned long size, int node)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL, node,
+	return __vmalloc_node_noprof(size, 1, GFP_KERNEL, node,
 			__builtin_return_address(0));
 }
-EXPORT_SYMBOL(vmalloc_node);
+EXPORT_SYMBOL(vmalloc_node_noprof);
 
 /**
- * vzalloc_node - allocate memory on a specific node with zero fill
+ * vzalloc_node_noprof - allocate memory on a specific node with zero fill
  * @size:	allocation size
  * @node:	numa node
  *
@@ -3404,12 +3404,12 @@ EXPORT_SYMBOL(vmalloc_node);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vzalloc_node(unsigned long size, int node)
+void *vzalloc_node_noprof(unsigned long size, int node)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
+	return __vmalloc_node_noprof(size, 1, GFP_KERNEL | __GFP_ZERO, node,
 				__builtin_return_address(0));
 }
-EXPORT_SYMBOL(vzalloc_node);
+EXPORT_SYMBOL(vzalloc_node_noprof);
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
 #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
@@ -3424,7 +3424,7 @@ EXPORT_SYMBOL(vzalloc_node);
 #endif
 
 /**
- * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
+ * vmalloc_32_noprof - allocate virtually contiguous memory (32bit addressable)
  * @size:	allocation size
  *
  * Allocate enough 32bit PA addressable pages to cover @size from the
@@ -3432,15 +3432,15 @@ EXPORT_SYMBOL(vzalloc_node);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc_32(unsigned long size)
+void *vmalloc_32_noprof(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
+	return __vmalloc_node_noprof(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
 			__builtin_return_address(0));
 }
-EXPORT_SYMBOL(vmalloc_32);
+EXPORT_SYMBOL(vmalloc_32_noprof);
 
 /**
- * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
+ * vmalloc_32_user_noprof - allocate zeroed virtually contiguous 32bit memory
  * @size:	     allocation size
  *
  * The resulting memory area is 32bit addressable and zeroed so it can be
@@ -3448,14 +3448,14 @@ EXPORT_SYMBOL(vmalloc_32);
  *
  * Return: pointer to the allocated memory or %NULL on error
  */
-void *vmalloc_32_user(unsigned long size)
+void *vmalloc_32_user_noprof(unsigned long size)
 {
-	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
+	return __vmalloc_node_range_noprof(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
 				    GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
 				    VM_USERMAP, NUMA_NO_NODE,
 				    __builtin_return_address(0));
 }
-EXPORT_SYMBOL(vmalloc_32_user);
+EXPORT_SYMBOL(vmalloc_32_user_noprof);
 
 /*
  * small helper routine , copy contents to buf from addr.