memblock.c 44.6 KB
Newer Older
Yinghai Lu's avatar
Yinghai Lu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
/*
 * Procedures for maintaining information about logical memory blocks.
 *
 * Peter Bergner, IBM Corp.	June 2001.
 * Copyright (C) 2001 Peter Bergner.
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
14
#include <linux/slab.h>
Yinghai Lu's avatar
Yinghai Lu committed
15
16
#include <linux/init.h>
#include <linux/bitops.h>
17
#include <linux/poison.h>
18
#include <linux/pfn.h>
19
20
#include <linux/debugfs.h>
#include <linux/seq_file.h>
Yinghai Lu's avatar
Yinghai Lu committed
21
22
#include <linux/memblock.h>

23
#include <asm-generic/sections.h>
24
25
26
#include <linux/io.h>

#include "internal.h"
27

Tejun Heo's avatar
Tejun Heo committed
28
29
static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
30
31
32
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
#endif
Tejun Heo's avatar
Tejun Heo committed
33
34
35
36
37
38
39
40
41
42

struct memblock memblock __initdata_memblock = {
	.memory.regions		= memblock_memory_init_regions,
	.memory.cnt		= 1,	/* empty dummy entry */
	.memory.max		= INIT_MEMBLOCK_REGIONS,

	.reserved.regions	= memblock_reserved_init_regions,
	.reserved.cnt		= 1,	/* empty dummy entry */
	.reserved.max		= INIT_MEMBLOCK_REGIONS,

43
44
45
46
47
48
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
	.physmem.regions	= memblock_physmem_init_regions,
	.physmem.cnt		= 1,	/* empty dummy entry */
	.physmem.max		= INIT_PHYSMEM_REGIONS,
#endif

49
	.bottom_up		= false,
Tejun Heo's avatar
Tejun Heo committed
50
51
	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
};
Yinghai Lu's avatar
Yinghai Lu committed
52

53
int memblock_debug __initdata_memblock;
54
55
56
#ifdef CONFIG_MOVABLE_NODE
bool movable_node_enabled __initdata_memblock = false;
#endif
57
static int memblock_can_resize __initdata_memblock;
58
59
static int memblock_memory_in_slab __initdata_memblock = 0;
static int memblock_reserved_in_slab __initdata_memblock = 0;
Yinghai Lu's avatar
Yinghai Lu committed
60

61
/* inline so we don't get a warning when pr_debug is compiled out */
62
63
static __init_memblock const char *
memblock_type_name(struct memblock_type *type)
64
65
66
67
68
69
70
71
72
{
	if (type == &memblock.memory)
		return "memory";
	else if (type == &memblock.reserved)
		return "reserved";
	else
		return "unknown";
}

73
74
75
76
77
78
/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
{
	return *size = min(*size, (phys_addr_t)ULLONG_MAX - base);
}

79
80
81
/*
 * Address comparison utilities
 */
82
static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
83
				       phys_addr_t base2, phys_addr_t size2)
Yinghai Lu's avatar
Yinghai Lu committed
84
85
86
87
{
	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
}

88
89
static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
					phys_addr_t base, phys_addr_t size)
90
91
92
93
94
95
96
97
98
99
100
101
102
{
	unsigned long i;

	for (i = 0; i < type->cnt; i++) {
		phys_addr_t rgnbase = type->regions[i].base;
		phys_addr_t rgnsize = type->regions[i].size;
		if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
			break;
	}

	return (i < type->cnt) ? i : -1;
}

103
104
105
106
107
108
/*
 * __memblock_find_range_bottom_up - find free area utility in bottom-up
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
 * @size: size of free area to find
 * @align: alignment of free area to find
109
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
 *
 * Utility called from memblock_find_in_range_node(), find free area bottom-up.
 *
 * RETURNS:
 * Found address on success, 0 on failure.
 */
static phys_addr_t __init_memblock
__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
				phys_addr_t size, phys_addr_t align, int nid)
{
	phys_addr_t this_start, this_end, cand;
	u64 i;

	for_each_free_mem_range(i, nid, &this_start, &this_end, NULL) {
		this_start = clamp(this_start, start, end);
		this_end = clamp(this_end, start, end);

		cand = round_up(this_start, align);
		if (cand < this_end && this_end - cand >= size)
			return cand;
	}

	return 0;
}

135
/**
136
 * __memblock_find_range_top_down - find free area utility, in top-down
137
138
139
140
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
 * @size: size of free area to find
 * @align: alignment of free area to find
141
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
142
 *
143
 * Utility called from memblock_find_in_range_node(), find free area top-down.
144
145
 *
 * RETURNS:
146
 * Found address on success, 0 on failure.
147
 */
148
149
150
static phys_addr_t __init_memblock
__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
			       phys_addr_t size, phys_addr_t align, int nid)
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
{
	phys_addr_t this_start, this_end, cand;
	u64 i;

	for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
		this_start = clamp(this_start, start, end);
		this_end = clamp(this_end, start, end);

		if (this_end < size)
			continue;

		cand = round_down(this_end - size, align);
		if (cand >= this_start)
			return cand;
	}
166

167
168
	return 0;
}
169

170
171
172
173
/**
 * memblock_find_in_range_node - find free area in given range and node
 * @size: size of free area to find
 * @align: alignment of free area to find
174
175
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
176
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
177
178
179
 *
 * Find @size free area aligned to @align in the specified range and node.
 *
180
181
182
183
184
185
186
187
 * When allocation direction is bottom-up, the @start should be greater
 * than the end of the kernel image. Otherwise, it will be trimmed. The
 * reason is that we want the bottom-up allocation just near the kernel
 * image so it is highly likely that the allocated memory and the kernel
 * will reside in the same node.
 *
 * If bottom-up allocation failed, will try to allocate memory top-down.
 *
188
 * RETURNS:
189
 * Found address on success, 0 on failure.
190
 */
191
192
193
phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
					phys_addr_t align, phys_addr_t start,
					phys_addr_t end, int nid)
194
{
195
	phys_addr_t kernel_end, ret;
196

197
198
199
200
201
202
203
	/* pump up @end */
	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
		end = memblock.current_limit;

	/* avoid allocating the first page */
	start = max_t(phys_addr_t, start, PAGE_SIZE);
	end = max(start, end);
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
	kernel_end = __pa_symbol(_end);

	/*
	 * try bottom-up allocation only when bottom-up mode
	 * is set and @end is above the kernel image.
	 */
	if (memblock_bottom_up() && end > kernel_end) {
		phys_addr_t bottom_up_start;

		/* make sure we will allocate above the kernel */
		bottom_up_start = max(start, kernel_end);

		/* ok, try bottom-up allocation first */
		ret = __memblock_find_range_bottom_up(bottom_up_start, end,
						      size, align, nid);
		if (ret)
			return ret;

		/*
		 * we always limit bottom-up allocation above the kernel,
		 * but top-down allocation doesn't have the limit, so
		 * retrying top-down allocation may succeed when bottom-up
		 * allocation failed.
		 *
		 * bottom-up allocation is expected to be fail very rarely,
		 * so we use WARN_ONCE() here to see the stack trace if
		 * fail happens.
		 */
		WARN_ONCE(1, "memblock: bottom-up allocation failed, "
			     "memory hotunplug may be affected\n");
	}
235
236
237
238

	return __memblock_find_range_top_down(start, end, size, align, nid);
}

239
240
241
242
243
244
245
246
247
248
/**
 * memblock_find_in_range - find free area in given range
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
 * @size: size of free area to find
 * @align: alignment of free area to find
 *
 * Find @size free area aligned to @align in the specified range.
 *
 * RETURNS:
249
 * Found address on success, 0 on failure.
250
 */
251
252
253
phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
					phys_addr_t end, phys_addr_t size,
					phys_addr_t align)
254
{
255
	return memblock_find_in_range_node(size, align, start, end,
256
					    NUMA_NO_NODE);
257
258
}

259
static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
Yinghai Lu's avatar
Yinghai Lu committed
260
{
261
	type->total_size -= type->regions[r].size;
Tejun Heo's avatar
Tejun Heo committed
262
263
	memmove(&type->regions[r], &type->regions[r + 1],
		(type->cnt - (r + 1)) * sizeof(type->regions[r]));
264
	type->cnt--;
Yinghai Lu's avatar
Yinghai Lu committed
265

266
267
	/* Special case for empty arrays */
	if (type->cnt == 0) {
268
		WARN_ON(type->total_size != 0);
269
270
271
		type->cnt = 1;
		type->regions[0].base = 0;
		type->regions[0].size = 0;
272
		type->regions[0].flags = 0;
Tejun Heo's avatar
Tejun Heo committed
273
		memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
274
	}
Yinghai Lu's avatar
Yinghai Lu committed
275
276
}

277
278
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK

279
280
281
282
283
284
285
286
287
288
289
290
phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
					phys_addr_t *addr)
{
	if (memblock.reserved.regions == memblock_reserved_init_regions)
		return 0;

	*addr = __pa(memblock.reserved.regions);

	return PAGE_ALIGN(sizeof(struct memblock_region) *
			  memblock.reserved.max);
}

291
292
293
294
295
296
297
298
299
300
301
302
303
304
phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info(
					phys_addr_t *addr)
{
	if (memblock.memory.regions == memblock_memory_init_regions)
		return 0;

	*addr = __pa(memblock.memory.regions);

	return PAGE_ALIGN(sizeof(struct memblock_region) *
			  memblock.memory.max);
}

#endif

305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
/**
 * memblock_double_array - double the size of the memblock regions array
 * @type: memblock type of the regions array being doubled
 * @new_area_start: starting address of memory range to avoid overlap with
 * @new_area_size: size of memory range to avoid overlap with
 *
 * Double the size of the @type regions array. If memblock is being used to
 * allocate memory for a new reserved regions array and there is a previously
 * allocated memory range [@new_area_start,@new_area_start+@new_area_size]
 * waiting to be reserved, ensure the memory used by the new array does
 * not overlap.
 *
 * RETURNS:
 * 0 on success, -1 on failure.
 */
static int __init_memblock memblock_double_array(struct memblock_type *type,
						phys_addr_t new_area_start,
						phys_addr_t new_area_size)
323
324
{
	struct memblock_region *new_array, *old_array;
325
	phys_addr_t old_alloc_size, new_alloc_size;
326
327
	phys_addr_t old_size, new_size, addr;
	int use_slab = slab_is_available();
328
	int *in_slab;
329
330
331
332
333
334
335
336
337
338

	/* We don't allow resizing until we know about the reserved regions
	 * of memory that aren't suitable for allocation
	 */
	if (!memblock_can_resize)
		return -1;

	/* Calculate new doubled size */
	old_size = type->max * sizeof(struct memblock_region);
	new_size = old_size << 1;
339
340
341
342
343
344
	/*
	 * We need to allocated new one align to PAGE_SIZE,
	 *   so we can free them completely later.
	 */
	old_alloc_size = PAGE_ALIGN(old_size);
	new_alloc_size = PAGE_ALIGN(new_size);
345

346
347
348
349
350
351
	/* Retrieve the slab flag */
	if (type == &memblock.memory)
		in_slab = &memblock_memory_in_slab;
	else
		in_slab = &memblock_reserved_in_slab;

352
353
354
	/* Try to find some space for it.
	 *
	 * WARNING: We assume that either slab_is_available() and we use it or
355
356
357
	 * we use MEMBLOCK for allocations. That means that this is unsafe to
	 * use when bootmem is currently active (unless bootmem itself is
	 * implemented on top of MEMBLOCK which isn't the case yet)
358
359
	 *
	 * This should however not be an issue for now, as we currently only
360
361
	 * call into MEMBLOCK while it's still active, or much later when slab
	 * is active for memory hotplug operations
362
363
364
	 */
	if (use_slab) {
		new_array = kmalloc(new_size, GFP_KERNEL);
Tejun Heo's avatar
Tejun Heo committed
365
		addr = new_array ? __pa(new_array) : 0;
366
	} else {
367
368
369
370
371
372
		/* only exclude range when trying to double reserved.regions */
		if (type != &memblock.reserved)
			new_area_start = new_area_size = 0;

		addr = memblock_find_in_range(new_area_start + new_area_size,
						memblock.current_limit,
373
						new_alloc_size, PAGE_SIZE);
374
375
		if (!addr && new_area_size)
			addr = memblock_find_in_range(0,
376
377
				min(new_area_start, memblock.current_limit),
				new_alloc_size, PAGE_SIZE);
378

379
		new_array = addr ? __va(addr) : NULL;
380
	}
Tejun Heo's avatar
Tejun Heo committed
381
	if (!addr) {
382
383
384
385
386
		pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
		       memblock_type_name(type), type->max, type->max * 2);
		return -1;
	}

387
388
389
	memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]",
			memblock_type_name(type), type->max * 2, (u64)addr,
			(u64)addr + new_size - 1);
390

391
392
393
394
	/*
	 * Found space, we now need to move the array over before we add the
	 * reserved region since it may be our reserved array itself that is
	 * full.
395
396
397
398
399
400
401
	 */
	memcpy(new_array, type->regions, old_size);
	memset(new_array + type->max, 0, old_size);
	old_array = type->regions;
	type->regions = new_array;
	type->max <<= 1;

402
	/* Free old array. We needn't free it if the array is the static one */
403
404
405
406
	if (*in_slab)
		kfree(old_array);
	else if (old_array != memblock_memory_init_regions &&
		 old_array != memblock_reserved_init_regions)
407
		memblock_free(__pa(old_array), old_alloc_size);
408

409
410
411
	/*
	 * Reserve the new array if that comes from the memblock.  Otherwise, we
	 * needn't do it
412
413
	 */
	if (!use_slab)
414
		BUG_ON(memblock_reserve(addr, new_alloc_size));
415
416
417
418

	/* Update slab flag */
	*in_slab = use_slab;

419
420
421
	return 0;
}

422
423
424
425
426
427
428
/**
 * memblock_merge_regions - merge neighboring compatible regions
 * @type: memblock type to scan
 *
 * Scan @type and merge neighboring compatible regions.
 */
static void __init_memblock memblock_merge_regions(struct memblock_type *type)
Yinghai Lu's avatar
Yinghai Lu committed
429
{
430
	int i = 0;
Yinghai Lu's avatar
Yinghai Lu committed
431

432
433
434
435
	/* cnt never goes below 1 */
	while (i < type->cnt - 1) {
		struct memblock_region *this = &type->regions[i];
		struct memblock_region *next = &type->regions[i + 1];
Yinghai Lu's avatar
Yinghai Lu committed
436

Tejun Heo's avatar
Tejun Heo committed
437
438
		if (this->base + this->size != next->base ||
		    memblock_get_region_node(this) !=
439
440
		    memblock_get_region_node(next) ||
		    this->flags != next->flags) {
441
442
443
			BUG_ON(this->base + this->size > next->base);
			i++;
			continue;
444
445
		}

446
		this->size += next->size;
447
448
		/* move forward from next + 1, index of which is i + 2 */
		memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next));
449
		type->cnt--;
Yinghai Lu's avatar
Yinghai Lu committed
450
	}
451
}
Yinghai Lu's avatar
Yinghai Lu committed
452

453
454
/**
 * memblock_insert_region - insert new memblock region
455
456
457
458
459
 * @type:	memblock type to insert into
 * @idx:	index for the insertion point
 * @base:	base address of the new region
 * @size:	size of the new region
 * @nid:	node id of the new region
460
 * @flags:	flags of the new region
461
462
463
464
465
466
 *
 * Insert new memblock region [@base,@base+@size) into @type at @idx.
 * @type must already have extra room to accomodate the new region.
 */
static void __init_memblock memblock_insert_region(struct memblock_type *type,
						   int idx, phys_addr_t base,
467
468
						   phys_addr_t size,
						   int nid, unsigned long flags)
469
470
471
472
473
474
475
{
	struct memblock_region *rgn = &type->regions[idx];

	BUG_ON(type->cnt >= type->max);
	memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
	rgn->base = base;
	rgn->size = size;
476
	rgn->flags = flags;
Tejun Heo's avatar
Tejun Heo committed
477
	memblock_set_region_node(rgn, nid);
478
	type->cnt++;
479
	type->total_size += size;
480
481
482
}

/**
483
 * memblock_add_range - add new memblock region
484
485
486
 * @type: memblock type to add new region into
 * @base: base address of the new region
 * @size: size of the new region
487
 * @nid: nid of the new region
488
 * @flags: flags of the new region
489
490
491
492
493
494
495
496
497
 *
 * Add new memblock region [@base,@base+@size) into @type.  The new region
 * is allowed to overlap with existing ones - overlaps don't affect already
 * existing regions.  @type is guaranteed to be minimal (all neighbouring
 * compatible regions are merged) after the addition.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
498
int __init_memblock memblock_add_range(struct memblock_type *type,
499
500
				phys_addr_t base, phys_addr_t size,
				int nid, unsigned long flags)
501
502
{
	bool insert = false;
503
504
	phys_addr_t obase = base;
	phys_addr_t end = base + memblock_cap_size(base, &size);
505
506
	int i, nr_new;

507
508
509
	if (!size)
		return 0;

510
511
	/* special case for empty array */
	if (type->regions[0].size == 0) {
512
		WARN_ON(type->cnt != 1 || type->total_size);
513
514
		type->regions[0].base = base;
		type->regions[0].size = size;
515
		type->regions[0].flags = flags;
516
		memblock_set_region_node(&type->regions[0], nid);
517
		type->total_size = size;
518
		return 0;
Yinghai Lu's avatar
Yinghai Lu committed
519
	}
520
521
522
523
524
repeat:
	/*
	 * The following is executed twice.  Once with %false @insert and
	 * then with %true.  The first counts the number of regions needed
	 * to accomodate the new area.  The second actually inserts them.
525
	 */
526
527
	base = obase;
	nr_new = 0;
Yinghai Lu's avatar
Yinghai Lu committed
528

529
530
531
532
533
534
	for (i = 0; i < type->cnt; i++) {
		struct memblock_region *rgn = &type->regions[i];
		phys_addr_t rbase = rgn->base;
		phys_addr_t rend = rbase + rgn->size;

		if (rbase >= end)
Yinghai Lu's avatar
Yinghai Lu committed
535
			break;
536
537
538
539
540
541
542
543
544
545
		if (rend <= base)
			continue;
		/*
		 * @rgn overlaps.  If it separates the lower part of new
		 * area, insert that portion.
		 */
		if (rbase > base) {
			nr_new++;
			if (insert)
				memblock_insert_region(type, i++, base,
546
547
						       rbase - base, nid,
						       flags);
Yinghai Lu's avatar
Yinghai Lu committed
548
		}
549
550
		/* area below @rend is dealt with, forget about it */
		base = min(rend, end);
Yinghai Lu's avatar
Yinghai Lu committed
551
	}
552
553
554
555
556

	/* insert the remaining portion */
	if (base < end) {
		nr_new++;
		if (insert)
557
558
			memblock_insert_region(type, i, base, end - base,
					       nid, flags);
Yinghai Lu's avatar
Yinghai Lu committed
559
560
	}

561
562
563
	/*
	 * If this was the first round, resize array and repeat for actual
	 * insertions; otherwise, merge and return.
564
	 */
565
566
	if (!insert) {
		while (type->cnt + nr_new > type->max)
567
			if (memblock_double_array(type, obase, size) < 0)
568
569
570
571
572
573
				return -ENOMEM;
		insert = true;
		goto repeat;
	} else {
		memblock_merge_regions(type);
		return 0;
574
	}
Yinghai Lu's avatar
Yinghai Lu committed
575
576
}

577
578
579
int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
				       int nid)
{
580
	return memblock_add_range(&memblock.memory, base, size, nid, 0);
581
582
}

583
int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
584
{
585
	return memblock_add_range(&memblock.memory, base, size,
586
				   MAX_NUMNODES, 0);
Yinghai Lu's avatar
Yinghai Lu committed
587
588
}

589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
/**
 * memblock_isolate_range - isolate given range into disjoint memblocks
 * @type: memblock type to isolate range for
 * @base: base of range to isolate
 * @size: size of range to isolate
 * @start_rgn: out parameter for the start of isolated region
 * @end_rgn: out parameter for the end of isolated region
 *
 * Walk @type and ensure that regions don't cross the boundaries defined by
 * [@base,@base+@size).  Crossing regions are split at the boundaries,
 * which may create at most two more regions.  The index of the first
 * region inside the range is returned in *@start_rgn and end in *@end_rgn.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
static int __init_memblock memblock_isolate_range(struct memblock_type *type,
					phys_addr_t base, phys_addr_t size,
					int *start_rgn, int *end_rgn)
{
609
	phys_addr_t end = base + memblock_cap_size(base, &size);
610
611
612
613
	int i;

	*start_rgn = *end_rgn = 0;

614
615
616
	if (!size)
		return 0;

617
618
	/* we'll create at most two more regions */
	while (type->cnt + 2 > type->max)
619
		if (memblock_double_array(type, base, size) < 0)
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
			return -ENOMEM;

	for (i = 0; i < type->cnt; i++) {
		struct memblock_region *rgn = &type->regions[i];
		phys_addr_t rbase = rgn->base;
		phys_addr_t rend = rbase + rgn->size;

		if (rbase >= end)
			break;
		if (rend <= base)
			continue;

		if (rbase < base) {
			/*
			 * @rgn intersects from below.  Split and continue
			 * to process the next region - the new top half.
			 */
			rgn->base = base;
638
639
			rgn->size -= base - rbase;
			type->total_size -= base - rbase;
640
			memblock_insert_region(type, i, rbase, base - rbase,
641
642
					       memblock_get_region_node(rgn),
					       rgn->flags);
643
644
645
646
647
648
		} else if (rend > end) {
			/*
			 * @rgn intersects from above.  Split and redo the
			 * current region - the new bottom half.
			 */
			rgn->base = end;
649
650
			rgn->size -= end - rbase;
			type->total_size -= end - rbase;
651
			memblock_insert_region(type, i--, rbase, end - rbase,
652
653
					       memblock_get_region_node(rgn),
					       rgn->flags);
654
655
656
657
658
659
660
661
662
663
664
		} else {
			/* @rgn is fully contained, record it */
			if (!*end_rgn)
				*start_rgn = i;
			*end_rgn = i + 1;
		}
	}

	return 0;
}

665
666
int __init_memblock memblock_remove_range(struct memblock_type *type,
					  phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
667
{
668
669
	int start_rgn, end_rgn;
	int i, ret;
Yinghai Lu's avatar
Yinghai Lu committed
670

671
672
673
	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;
Yinghai Lu's avatar
Yinghai Lu committed
674

675
676
	for (i = end_rgn - 1; i >= start_rgn; i--)
		memblock_remove_region(type, i);
677
	return 0;
Yinghai Lu's avatar
Yinghai Lu committed
678
679
}

680
int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
681
{
682
	return memblock_remove_range(&memblock.memory, base, size);
Yinghai Lu's avatar
Yinghai Lu committed
683
684
}

685

686
int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
687
{
688
	memblock_dbg("   memblock_free: [%#016llx-%#016llx] %pF\n",
689
		     (unsigned long long)base,
690
		     (unsigned long long)base + size - 1,
691
		     (void *)_RET_IP_);
692

693
	kmemleak_free_part(__va(base), size);
694
	return memblock_remove_range(&memblock.reserved, base, size);
Yinghai Lu's avatar
Yinghai Lu committed
695
696
}

697
698
699
700
static int __init_memblock memblock_reserve_region(phys_addr_t base,
						   phys_addr_t size,
						   int nid,
						   unsigned long flags)
Yinghai Lu's avatar
Yinghai Lu committed
701
{
702
	struct memblock_type *_rgn = &memblock.reserved;
Yinghai Lu's avatar
Yinghai Lu committed
703

704
	memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n",
705
		     (unsigned long long)base,
706
		     (unsigned long long)base + size - 1,
707
708
		     flags, (void *)_RET_IP_);

709
	return memblock_add_range(_rgn, base, size, nid, flags);
710
}
Yinghai Lu's avatar
Yinghai Lu committed
711

712
713
714
int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
{
	return memblock_reserve_region(base, size, MAX_NUMNODES, 0);
Yinghai Lu's avatar
Yinghai Lu committed
715
716
}

717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
/**
 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG.
 * @base: the base phys addr of the region
 * @size: the size of the region
 *
 * This function isolates region [@base, @base + @size), and mark it with flag
 * MEMBLOCK_HOTPLUG.
 *
 * Return 0 on succees, -errno on failure.
 */
int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
{
	struct memblock_type *type = &memblock.memory;
	int i, ret, start_rgn, end_rgn;

	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;

	for (i = start_rgn; i < end_rgn; i++)
		memblock_set_region_flags(&type->regions[i], MEMBLOCK_HOTPLUG);

	memblock_merge_regions(type);
	return 0;
}

/**
 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region.
 * @base: the base phys addr of the region
 * @size: the size of the region
 *
 * This function isolates region [@base, @base + @size), and clear flag
 * MEMBLOCK_HOTPLUG for the isolated regions.
 *
 * Return 0 on succees, -errno on failure.
 */
int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
{
	struct memblock_type *type = &memblock.memory;
	int i, ret, start_rgn, end_rgn;

	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;

	for (i = start_rgn; i < end_rgn; i++)
		memblock_clear_region_flags(&type->regions[i],
					    MEMBLOCK_HOTPLUG);

	memblock_merge_regions(type);
	return 0;
}

770
/**
771
 * __next__mem_range - next function for for_each_free_mem_range() etc.
772
 * @idx: pointer to u64 loop variable
773
 * @nid: node selector, %NUMA_NO_NODE for all nodes
774
775
 * @type_a: pointer to memblock_type from where the range is taken
 * @type_b: pointer to memblock_type which excludes memory from being taken
Wanpeng Li's avatar
Wanpeng Li committed
776
777
778
 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
 * @out_nid: ptr to int for nid of the range, can be %NULL
779
 *
780
 * Find the first area from *@idx which matches @nid, fill the out
781
 * parameters, and update *@idx for the next iteration.  The lower 32bit of
782
783
 * *@idx contains index into type_a and the upper 32bit indexes the
 * areas before each region in type_b.	For example, if type_b regions
784
785
786
787
788
789
790
791
792
793
794
 * look like the following,
 *
 *	0:[0-16), 1:[32-48), 2:[128-130)
 *
 * The upper 32bit indexes the following regions.
 *
 *	0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
 *
 * As both region arrays are sorted, the function advances the two indices
 * in lockstep and returns each intersection.
 */
795
796
797
798
799
void __init_memblock __next_mem_range(u64 *idx, int nid,
				      struct memblock_type *type_a,
				      struct memblock_type *type_b,
				      phys_addr_t *out_start,
				      phys_addr_t *out_end, int *out_nid)
800
{
801
802
	int idx_a = *idx & 0xffffffff;
	int idx_b = *idx >> 32;
803

804
805
	if (WARN_ONCE(nid == MAX_NUMNODES,
	"Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
806
		nid = NUMA_NO_NODE;
807

808
809
810
	for (; idx_a < type_a->cnt; idx_a++) {
		struct memblock_region *m = &type_a->regions[idx_a];

811
812
		phys_addr_t m_start = m->base;
		phys_addr_t m_end = m->base + m->size;
813
		int	    m_nid = memblock_get_region_node(m);
814
815

		/* only memory regions are associated with nodes, check it */
816
		if (nid != NUMA_NO_NODE && nid != m_nid)
817
818
			continue;

819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
		if (!type_b) {
			if (out_start)
				*out_start = m_start;
			if (out_end)
				*out_end = m_end;
			if (out_nid)
				*out_nid = m_nid;
			idx_a++;
			*idx = (u32)idx_a | (u64)idx_b << 32;
			return;
		}

		/* scan areas before each reservation */
		for (; idx_b < type_b->cnt + 1; idx_b++) {
			struct memblock_region *r;
			phys_addr_t r_start;
			phys_addr_t r_end;

			r = &type_b->regions[idx_b];
			r_start = idx_b ? r[-1].base + r[-1].size : 0;
			r_end = idx_b < type_b->cnt ?
				r->base : ULLONG_MAX;
841

842
843
844
845
			/*
			 * if idx_b advanced past idx_a,
			 * break out to advance idx_a
			 */
846
847
848
849
850
			if (r_start >= m_end)
				break;
			/* if the two regions intersect, we're done */
			if (m_start < r_end) {
				if (out_start)
851
852
					*out_start =
						max(m_start, r_start);
853
854
855
				if (out_end)
					*out_end = min(m_end, r_end);
				if (out_nid)
856
					*out_nid = m_nid;
857
				/*
858
859
				 * The region which ends first is
				 * advanced for the next iteration.
860
861
				 */
				if (m_end <= r_end)
862
					idx_a++;
863
				else
864
865
					idx_b++;
				*idx = (u32)idx_a | (u64)idx_b << 32;
866
867
868
869
870
871
872
873
874
				return;
			}
		}
	}

	/* signal end of iteration */
	*idx = ULLONG_MAX;
}

875
/**
876
877
878
879
880
 * __next_mem_range_rev - generic next function for for_each_*_range_rev()
 *
 * Finds the next range from type_a which is not marked as unsuitable
 * in type_b.
 *
881
 * @idx: pointer to u64 loop variable
882
 * @nid: nid: node selector, %NUMA_NO_NODE for all nodes
883
884
 * @type_a: pointer to memblock_type from where the range is taken
 * @type_b: pointer to memblock_type which excludes memory from being taken
Wanpeng Li's avatar
Wanpeng Li committed
885
886
887
 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
 * @out_nid: ptr to int for nid of the range, can be %NULL
888
 *
889
 * Reverse of __next_mem_range().
890
 */
891
892
893
894
895
void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
					  struct memblock_type *type_a,
					  struct memblock_type *type_b,
					  phys_addr_t *out_start,
					  phys_addr_t *out_end, int *out_nid)
896
{
897
898
	int idx_a = *idx & 0xffffffff;
	int idx_b = *idx >> 32;
899

900
901
	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
		nid = NUMA_NO_NODE;
902
903

	if (*idx == (u64)ULLONG_MAX) {
904
905
		idx_a = type_a->cnt - 1;
		idx_b = type_b->cnt;
906
907
	}

908
909
910
	for (; idx_a >= 0; idx_a--) {
		struct memblock_region *m = &type_a->regions[idx_a];

911
912
		phys_addr_t m_start = m->base;
		phys_addr_t m_end = m->base + m->size;
913
		int m_nid = memblock_get_region_node(m);
914
915

		/* only memory regions are associated with nodes, check it */
916
		if (nid != NUMA_NO_NODE && nid != m_nid)
917
918
			continue;

919
920
921
922
		/* skip hotpluggable memory regions if needed */
		if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
			continue;

923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
		if (!type_b) {
			if (out_start)
				*out_start = m_start;
			if (out_end)
				*out_end = m_end;
			if (out_nid)
				*out_nid = m_nid;
			idx_a++;
			*idx = (u32)idx_a | (u64)idx_b << 32;
			return;
		}

		/* scan areas before each reservation */
		for (; idx_b >= 0; idx_b--) {
			struct memblock_region *r;
			phys_addr_t r_start;
			phys_addr_t r_end;

			r = &type_b->regions[idx_b];
			r_start = idx_b ? r[-1].base + r[-1].size : 0;
			r_end = idx_b < type_b->cnt ?
				r->base : ULLONG_MAX;
			/*
			 * if idx_b advanced past idx_a,
			 * break out to advance idx_a
			 */
949
950
951
952
953
954
955
956
957
958

			if (r_end <= m_start)
				break;
			/* if the two regions intersect, we're done */
			if (m_end > r_start) {
				if (out_start)
					*out_start = max(m_start, r_start);
				if (out_end)
					*out_end = min(m_end, r_end);
				if (out_nid)
959
					*out_nid = m_nid;
960
				if (m_start >= r_start)
961
					idx_a--;
962
				else
963
964
					idx_b--;
				*idx = (u32)idx_a | (u64)idx_b << 32;
965
966
967
968
				return;
			}
		}
	}
969
	/* signal end of iteration */
970
971
972
	*idx = ULLONG_MAX;
}

Tejun Heo's avatar
Tejun Heo committed
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
/*
 * Common iterator interface used to define for_each_mem_range().
 */
void __init_memblock __next_mem_pfn_range(int *idx, int nid,
				unsigned long *out_start_pfn,
				unsigned long *out_end_pfn, int *out_nid)
{
	struct memblock_type *type = &memblock.memory;
	struct memblock_region *r;

	while (++*idx < type->cnt) {
		r = &type->regions[*idx];

		if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
			continue;
		if (nid == MAX_NUMNODES || nid == r->nid)
			break;
	}
	if (*idx >= type->cnt) {
		*idx = -1;
		return;
	}

	if (out_start_pfn)
		*out_start_pfn = PFN_UP(r->base);
	if (out_end_pfn)
		*out_end_pfn = PFN_DOWN(r->base + r->size);