diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index f02f31cea0e6c5dc3991cd12d37f06d71f38c77e..34cb9799e32442a46711e33a365a375d84be721e 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -29,6 +29,8 @@ struct pcpu_chunk {
 	int			end_offset;	/* additional area required to
 						   have the region end page
 						   aligned */
+
+	int			nr_pages;	/* # of pages served by this chunk */
 	int			nr_populated;	/* # of populated pages */
 	unsigned long		populated[];	/* populated bitmap */
 };
diff --git a/mm/percpu.c b/mm/percpu.c
index e08ed61ea70a55edde0cfc432572cb9faed23655..7c9f0d3ad1b5cbd778866ccad5c2102572e49ff8 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -181,19 +181,55 @@ static void pcpu_schedule_balance_work(void)
 		schedule_work(&pcpu_balance_work);
 }
 
+/**
+ * pcpu_addr_in_first_chunk - address check for first chunk's dynamic region
+ * @addr: percpu address of interest
+ *
+ * The first chunk is considered to be the dynamic region of the first chunk.
+ * While the true first chunk is composed of the static, dynamic, and
+ * reserved regions, it is the chunk that serves the dynamic region that is
+ * circulated in the chunk slots.
+ *
+ * The reserved chunk has a separate check and the static region addresses
+ * should never be passed into the percpu allocator.
+ *
+ * RETURNS:
+ * True if the address is in the dynamic region of the first chunk.
+ */
 static bool pcpu_addr_in_first_chunk(void *addr)
 {
-	void *first_start = pcpu_first_chunk->base_addr;
+	void *start_addr = pcpu_first_chunk->base_addr +
+			   pcpu_first_chunk->start_offset;
+	void *end_addr = pcpu_first_chunk->base_addr +
+			 pcpu_first_chunk->nr_pages * PAGE_SIZE -
+			 pcpu_first_chunk->end_offset;
 
-	return addr >= first_start && addr < first_start + pcpu_unit_size;
+	return addr >= start_addr && addr < end_addr;
 }
 
+/**
+ * pcpu_addr_in_reserved_chunk - address check for reserved region
+ *
+ * The reserved region is a part of the first chunk and primarily serves
+ * static percpu variables from kernel modules.
+ *
+ * RETURNS:
+ * True if the address is in the reserved region.
+ */
 static bool pcpu_addr_in_reserved_chunk(void *addr)
 {
-	void *first_start = pcpu_first_chunk->base_addr;
+	void *start_addr, *end_addr;
+
+	if (!pcpu_reserved_chunk)
+		return false;
 
-	return addr >= first_start &&
-		addr < first_start + pcpu_first_chunk->start_offset;
+	start_addr = pcpu_reserved_chunk->base_addr +
+		     pcpu_reserved_chunk->start_offset;
+	end_addr = pcpu_reserved_chunk->base_addr +
+		   pcpu_reserved_chunk->nr_pages * PAGE_SIZE -
+		   pcpu_reserved_chunk->end_offset;
+
+	return addr >= start_addr && addr < end_addr;
 }
 
 static int __pcpu_size_to_slot(int size)
@@ -234,11 +270,16 @@ static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
 	return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
 }
 
+static unsigned long pcpu_unit_page_offset(unsigned int cpu, int page_idx)
+{
+	return pcpu_unit_offsets[cpu] + (page_idx << PAGE_SHIFT);
+}
+
 static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
 				     unsigned int cpu, int page_idx)
 {
-	return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] +
-		(page_idx << PAGE_SHIFT);
+	return (unsigned long)chunk->base_addr +
+	       pcpu_unit_page_offset(cpu, page_idx);
 }
 
 static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk,
@@ -708,23 +749,34 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme,
 	pcpu_chunk_relocate(chunk, oslot);
 }
 
-static struct pcpu_chunk * __init pcpu_alloc_first_chunk(void *base_addr,
-							 int start_offset,
+static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 							 int map_size,
 							 int *map,
 							 int init_map_size)
 {
 	struct pcpu_chunk *chunk;
-	int region_size;
+	unsigned long aligned_addr;
+	int start_offset, region_size;
+
+	/* region calculations */
+	aligned_addr = tmp_addr & PAGE_MASK;
+
+	start_offset = tmp_addr - aligned_addr;
 
 	region_size = PFN_ALIGN(start_offset + map_size);
 
+	/* allocate chunk */
 	chunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
+
 	INIT_LIST_HEAD(&chunk->list);
 	INIT_LIST_HEAD(&chunk->map_extend_list);
-	chunk->base_addr = base_addr;
+
+	chunk->base_addr = (void *)aligned_addr;
 	chunk->start_offset = start_offset;
 	chunk->end_offset = region_size - chunk->start_offset - map_size;
+
+	chunk->nr_pages = pcpu_unit_pages;
+
 	chunk->map = map;
 	chunk->map_alloc = init_map_size;
 
@@ -734,10 +786,17 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(void *base_addr,
 	chunk->nr_populated = pcpu_unit_pages;
 
 	chunk->contig_hint = chunk->free_size = map_size;
-	chunk->map[0] = 1;
-	chunk->map[1] = chunk->start_offset;
-	chunk->map[2] = (chunk->start_offset + chunk->free_size) | 1;
-	chunk->map_used = 2;
+
+	if (chunk->start_offset) {
+		/* hide the beginning of the bitmap */
+		chunk->map[0] = 1;
+		chunk->map[1] = chunk->start_offset;
+		chunk->map_used = 1;
+	}
+
+	/* set chunk's free region */
+	chunk->map[++chunk->map_used] =
+		(chunk->start_offset + chunk->free_size) | 1;
 
 	if (chunk->end_offset) {
 		/* hide the end of the bitmap */
@@ -772,6 +831,8 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 	chunk->free_size = pcpu_unit_size;
 	chunk->contig_hint = pcpu_unit_size;
 
+	chunk->nr_pages = pcpu_unit_pages;
+
 	return chunk;
 }
 
@@ -859,18 +920,21 @@ static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
  * pcpu_chunk_addr_search - determine chunk containing specified address
  * @addr: address for which the chunk needs to be determined.
  *
+ * This is an internal function that handles all but static allocations.
+ * Static percpu address values should never be passed into the allocator.
+ *
  * RETURNS:
  * The address of the found chunk.
  */
 static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 {
-	/* is it in the first chunk? */
-	if (pcpu_addr_in_first_chunk(addr)) {
-		/* is it in the reserved area? */
-		if (pcpu_addr_in_reserved_chunk(addr))
-			return pcpu_reserved_chunk;
+	/* is it in the dynamic region (first chunk)? */
+	if (pcpu_addr_in_first_chunk(addr))
 		return pcpu_first_chunk;
-	}
+
+	/* is it in the reserved region? */
+	if (pcpu_addr_in_reserved_chunk(addr))
+		return pcpu_reserved_chunk;
 
 	/*
 	 * The address is relative to unit0 which might be unused and
@@ -1401,10 +1465,16 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
 	 * The following test on unit_low/high isn't strictly
 	 * necessary but will speed up lookups of addresses which
 	 * aren't in the first chunk.
+	 *
+	 * The address check is against full chunk sizes.  pcpu_base_addr
+	 * points to the beginning of the first chunk including the
+	 * static region.  Assumes good intent as the first chunk may
+	 * not be full (ie. < pcpu_unit_pages in size).
 	 */
-	first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
-	first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
-				     pcpu_unit_pages);
+	first_low = (unsigned long)pcpu_base_addr +
+		    pcpu_unit_page_offset(pcpu_low_unit_cpu, 0);
+	first_high = (unsigned long)pcpu_base_addr +
+		     pcpu_unit_page_offset(pcpu_high_unit_cpu, pcpu_unit_pages);
 	if ((unsigned long)addr >= first_low &&
 	    (unsigned long)addr < first_high) {
 		for_each_possible_cpu(cpu) {
@@ -1586,12 +1656,13 @@ static void pcpu_dump_alloc_info(const char *lvl,
  * The caller should have mapped the first chunk at @base_addr and
  * copied static data to each unit.
  *
- * If the first chunk ends up with both reserved and dynamic areas, it
- * is served by two chunks - one to serve the core static and reserved
- * areas and the other for the dynamic area.  They share the same vm
- * and page map but uses different area allocation map to stay away
- * from each other.  The latter chunk is circulated in the chunk slots
- * and available for dynamic allocation like any other chunks.
+ * The first chunk will always contain a static and a dynamic region.
+ * However, the static region is not managed by any chunk.  If the first
+ * chunk also contains a reserved region, it is served by two chunks -
+ * one for the reserved region and one for the dynamic region.  They
+ * share the same vm, but use offset regions in the area allocation map.
+ * The chunk serving the dynamic region is circulated in the chunk slots
+ * and available for dynamic allocation like any other chunk.
  *
  * RETURNS:
  * 0 on success, -errno on failure.
@@ -1609,7 +1680,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	unsigned int cpu;
 	int *unit_map;
 	int group, unit, i;
-	int map_size, start_offset;
+	int map_size;
+	unsigned long tmp_addr;
 
 #define PCPU_SETUP_BUG_ON(cond)	do {					\
 	if (unlikely(cond)) {						\
@@ -1712,25 +1784,26 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 		INIT_LIST_HEAD(&pcpu_slot[i]);
 
 	/*
-	 * Initialize static chunk.  If reserved_size is zero, the
-	 * static chunk covers static area + dynamic allocation area
-	 * in the first chunk.  If reserved_size is not zero, it
-	 * covers static area + reserved area (mostly used for module
-	 * static percpu allocation).
+	 * Initialize first chunk.
+	 * If the reserved_size is non-zero, this initializes the reserved
+	 * chunk.  If the reserved_size is zero, the reserved chunk is NULL
+	 * and the dynamic region is initialized here.  The first chunk,
+	 * pcpu_first_chunk, will always point to the chunk that serves
+	 * the dynamic region.
 	 */
-	start_offset = ai->static_size;
+	tmp_addr = (unsigned long)base_addr + ai->static_size;
 	map_size = ai->reserved_size ?: ai->dyn_size;
-	chunk = pcpu_alloc_first_chunk(base_addr, start_offset, map_size, smap,
+	chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, smap,
 				       ARRAY_SIZE(smap));
 
 	/* init dynamic chunk if necessary */
 	if (ai->reserved_size) {
 		pcpu_reserved_chunk = chunk;
 
-		start_offset = ai->static_size + ai->reserved_size;
+		tmp_addr = (unsigned long)base_addr + ai->static_size +
+			   ai->reserved_size;
 		map_size = ai->dyn_size;
-		chunk = pcpu_alloc_first_chunk(base_addr, start_offset,
-					       map_size, dmap,
+		chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, dmap,
 					       ARRAY_SIZE(dmap));
 	}