nommu.c 51.9 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
/*
 *  linux/mm/nommu.c
 *
 *  Replacement code for mm functions to support CPU's that don't
 *  have any form of memory management unit (thus no virtual memory).
 *
 *  See Documentation/nommu-mmap.txt
 *
9
 *  Copyright (c) 2004-2008 David Howells <dhowells@redhat.com>
Linus Torvalds's avatar
Linus Torvalds committed
10
11
12
 *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
 *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
 *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
13
 *  Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org>
Linus Torvalds's avatar
Linus Torvalds committed
14
15
 */

16
17
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

18
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
19
#include <linux/mm.h>
Davidlohr Bueso's avatar
Davidlohr Bueso committed
20
#include <linux/vmacache.h>
Linus Torvalds's avatar
Linus Torvalds committed
21
22
23
24
25
26
27
28
29
#include <linux/mman.h>
#include <linux/swap.h>
#include <linux/file.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
30
#include <linux/compiler.h>
Linus Torvalds's avatar
Linus Torvalds committed
31
32
33
34
#include <linux/mount.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/syscalls.h>
Al Viro's avatar
Al Viro committed
35
#include <linux/audit.h>
36
#include <linux/printk.h>
Linus Torvalds's avatar
Linus Torvalds committed
37
38
39
40

#include <asm/uaccess.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
41
#include <asm/mmu_context.h>
42
43
#include "internal.h"

Linus Torvalds's avatar
Linus Torvalds committed
44
void *high_memory;
45
EXPORT_SYMBOL(high_memory);
Linus Torvalds's avatar
Linus Torvalds committed
46
47
struct page *mem_map;
unsigned long max_mapnr;
48
EXPORT_SYMBOL(max_mapnr);
Hugh Dickins's avatar
Hugh Dickins committed
49
unsigned long highest_memmap_pfn;
50
struct percpu_counter vm_committed_as;
Linus Torvalds's avatar
Linus Torvalds committed
51
52
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio = 50; /* default is 50% */
53
unsigned long sysctl_overcommit_kbytes __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
54
int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
55
int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
56
unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
57
unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
Linus Torvalds's avatar
Linus Torvalds committed
58
59
int heap_stack_gap = 0;

60
atomic_long_t mmap_pages_allocated;
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/*
 * The global memory commitment made in the system can be a metric
 * that can be used to drive ballooning decisions when Linux is hosted
 * as a guest. On Hyper-V, the host implements a policy engine for dynamically
 * balancing memory across competing virtual machines that are hosted.
 * Several metrics drive this policy engine including the guest reported
 * memory commitment.
 */
unsigned long vm_memory_committed(void)
{
	return percpu_counter_read_positive(&vm_committed_as);
}

EXPORT_SYMBOL_GPL(vm_memory_committed);

Linus Torvalds's avatar
Linus Torvalds committed
77
78
EXPORT_SYMBOL(mem_map);

79
80
81
82
/* list of mapped, potentially shareable regions */
static struct kmem_cache *vm_region_jar;
struct rb_root nommu_region_tree = RB_ROOT;
DECLARE_RWSEM(nommu_region_sem);
Linus Torvalds's avatar
Linus Torvalds committed
83

84
const struct vm_operations_struct generic_file_vm_ops = {
Linus Torvalds's avatar
Linus Torvalds committed
85
86
87
88
89
90
91
92
93
94
95
96
};

/*
 * Return the total memory allocated for this pointer, not
 * just what the caller asked for.
 *
 * Doesn't have to be accurate, i.e. may have races.
 */
unsigned int kobjsize(const void *objp)
{
	struct page *page;

97
98
99
100
	/*
	 * If the object we have should not have ksize performed on it,
	 * return size of 0
	 */
101
	if (!objp || !virt_addr_valid(objp))
102
103
104
105
106
107
108
109
		return 0;

	page = virt_to_head_page(objp);

	/*
	 * If the allocator sets PageSlab, we know the pointer came from
	 * kmalloc().
	 */
Linus Torvalds's avatar
Linus Torvalds committed
110
111
112
	if (PageSlab(page))
		return ksize(objp);

113
114
115
116
117
118
119
120
121
122
123
124
125
126
	/*
	 * If it's not a compound page, see if we have a matching VMA
	 * region. This test is intentionally done in reverse order,
	 * so if there's no VMA, we still fall through and hand back
	 * PAGE_SIZE for 0-order pages.
	 */
	if (!PageCompound(page)) {
		struct vm_area_struct *vma;

		vma = find_vma(current->mm, (unsigned long)objp);
		if (vma)
			return vma->vm_end - vma->vm_start;
	}

127
128
	/*
	 * The ksize() function is only guaranteed to work for pointers
129
	 * returned by kmalloc(). So handle arbitrary pointers here.
130
	 */
131
	return PAGE_SIZE << compound_order(page);
Linus Torvalds's avatar
Linus Torvalds committed
132
133
}

134
135
136
137
long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
		      unsigned long start, unsigned long nr_pages,
		      unsigned int foll_flags, struct page **pages,
		      struct vm_area_struct **vmas, int *nonblocking)
Linus Torvalds's avatar
Linus Torvalds committed
138
{
139
	struct vm_area_struct *vma;
140
141
142
143
	unsigned long vm_flags;
	int i;

	/* calculate required read or write permissions.
Hugh Dickins's avatar
Hugh Dickins committed
144
	 * If FOLL_FORCE is set, we only require the "MAY" flags.
145
	 */
Hugh Dickins's avatar
Hugh Dickins committed
146
147
148
149
	vm_flags  = (foll_flags & FOLL_WRITE) ?
			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
	vm_flags &= (foll_flags & FOLL_FORCE) ?
			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
Linus Torvalds's avatar
Linus Torvalds committed
150

151
	for (i = 0; i < nr_pages; i++) {
152
		vma = find_vma(mm, start);
153
154
155
156
		if (!vma)
			goto finish_or_fault;

		/* protect what we can, including chardevs */
Hugh Dickins's avatar
Hugh Dickins committed
157
158
		if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
		    !(vm_flags & vma->vm_flags))
159
			goto finish_or_fault;
160

Linus Torvalds's avatar
Linus Torvalds committed
161
162
163
164
165
166
		if (pages) {
			pages[i] = virt_to_page(start);
			if (pages[i])
				page_cache_get(pages[i]);
		}
		if (vmas)
167
			vmas[i] = vma;
168
		start = (start + PAGE_SIZE) & PAGE_MASK;
Linus Torvalds's avatar
Linus Torvalds committed
169
	}
170
171
172
173
174

	return i;

finish_or_fault:
	return i ? : -EFAULT;
Linus Torvalds's avatar
Linus Torvalds committed
175
}
176
177
178
179
180
181
182
183

/*
 * get a list of pages in an address range belonging to the specified process
 * and indicate the VMA that covers each page
 * - this is potentially dodgy as we may end incrementing the page count of a
 *   slab page or a secondary page from a compound page
 * - don't permit access to VMAs that don't support it, such as I/O mappings
 */
184
185
186
187
long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
		    unsigned long start, unsigned long nr_pages,
		    int write, int force, struct page **pages,
		    struct vm_area_struct **vmas)
188
189
190
191
{
	int flags = 0;

	if (write)
Hugh Dickins's avatar
Hugh Dickins committed
192
		flags |= FOLL_WRITE;
193
	if (force)
Hugh Dickins's avatar
Hugh Dickins committed
194
		flags |= FOLL_FORCE;
195

196
197
	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
				NULL);
198
}
199
200
EXPORT_SYMBOL(get_user_pages);

201
202
203
204
205
206
207
208
209
210
long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
			   unsigned long start, unsigned long nr_pages,
			   int write, int force, struct page **pages,
			   int *locked)
{
	return get_user_pages(tsk, mm, start, nr_pages, write, force,
			      pages, NULL);
}
EXPORT_SYMBOL(get_user_pages_locked);

211
212
213
214
long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
			       unsigned long start, unsigned long nr_pages,
			       int write, int force, struct page **pages,
			       unsigned int gup_flags)
215
216
217
218
219
220
221
222
{
	long ret;
	down_read(&mm->mmap_sem);
	ret = get_user_pages(tsk, mm, start, nr_pages, write, force,
			     pages, NULL);
	up_read(&mm->mmap_sem);
	return ret;
}
223
224
225
226
227
228
229
230
231
EXPORT_SYMBOL(__get_user_pages_unlocked);

long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
			     unsigned long start, unsigned long nr_pages,
			     int write, int force, struct page **pages)
{
	return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write,
					 force, pages, 0);
}
232
233
EXPORT_SYMBOL(get_user_pages_unlocked);

Paul Mundt's avatar
Paul Mundt committed
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/**
 * follow_pfn - look up PFN at a user virtual address
 * @vma: memory mapping
 * @address: user virtual address
 * @pfn: location to store found PFN
 *
 * Only IO mappings and raw PFN mappings are allowed.
 *
 * Returns zero and the pfn at @pfn on success, -ve otherwise.
 */
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
	unsigned long *pfn)
{
	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
		return -EINVAL;

	*pfn = address >> PAGE_SHIFT;
	return 0;
}
EXPORT_SYMBOL(follow_pfn);

255
LIST_HEAD(vmap_area_list);
Linus Torvalds's avatar
Linus Torvalds committed
256

257
void vfree(const void *addr)
Linus Torvalds's avatar
Linus Torvalds committed
258
259
260
{
	kfree(addr);
}
261
EXPORT_SYMBOL(vfree);
Linus Torvalds's avatar
Linus Torvalds committed
262

263
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
Linus Torvalds's avatar
Linus Torvalds committed
264
265
{
	/*
266
267
	 *  You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc()
	 * returns only a logical address.
Linus Torvalds's avatar
Linus Torvalds committed
268
	 */
269
	return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
Linus Torvalds's avatar
Linus Torvalds committed
270
}
271
EXPORT_SYMBOL(__vmalloc);
Linus Torvalds's avatar
Linus Torvalds committed
272

273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
void *vmalloc_user(unsigned long size)
{
	void *ret;

	ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
			PAGE_KERNEL);
	if (ret) {
		struct vm_area_struct *vma;

		down_write(&current->mm->mmap_sem);
		vma = find_vma(current->mm, (unsigned long)ret);
		if (vma)
			vma->vm_flags |= VM_USERMAP;
		up_write(&current->mm->mmap_sem);
	}

	return ret;
}
EXPORT_SYMBOL(vmalloc_user);

293
struct page *vmalloc_to_page(const void *addr)
Linus Torvalds's avatar
Linus Torvalds committed
294
295
296
{
	return virt_to_page(addr);
}
297
EXPORT_SYMBOL(vmalloc_to_page);
Linus Torvalds's avatar
Linus Torvalds committed
298

299
unsigned long vmalloc_to_pfn(const void *addr)
Linus Torvalds's avatar
Linus Torvalds committed
300
301
302
{
	return page_to_pfn(virt_to_page(addr));
}
303
EXPORT_SYMBOL(vmalloc_to_pfn);
Linus Torvalds's avatar
Linus Torvalds committed
304
305
306

long vread(char *buf, char *addr, unsigned long count)
{
307
308
309
310
	/* Don't allow overflow */
	if ((unsigned long) buf + count < count)
		count = -(unsigned long) buf;

Linus Torvalds's avatar
Linus Torvalds committed
311
312
313
314
315
316
317
318
319
320
321
	memcpy(buf, addr, count);
	return count;
}

long vwrite(char *buf, char *addr, unsigned long count)
{
	/* Don't allow overflow */
	if ((unsigned long) addr + count < count)
		count = -(unsigned long) addr;

	memcpy(addr, buf, count);
322
	return count;
Linus Torvalds's avatar
Linus Torvalds committed
323
324
325
}

/*
326
 *	vmalloc  -  allocate virtually contiguous memory
Linus Torvalds's avatar
Linus Torvalds committed
327
328
329
330
 *
 *	@size:		allocation size
 *
 *	Allocate enough pages to cover @size from the page level
331
 *	allocator and map them into contiguous kernel virtual space.
Linus Torvalds's avatar
Linus Torvalds committed
332
 *
333
 *	For tight control over page level allocator and protection flags
Linus Torvalds's avatar
Linus Torvalds committed
334
335
336
337
338
339
 *	use __vmalloc() instead.
 */
void *vmalloc(unsigned long size)
{
       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
}
340
341
EXPORT_SYMBOL(vmalloc);

342
/*
343
 *	vzalloc - allocate virtually contiguous memory with zero fill
344
345
346
347
 *
 *	@size:		allocation size
 *
 *	Allocate enough pages to cover @size from the page level
348
 *	allocator and map them into contiguous kernel virtual space.
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
 *	The memory allocated is set to zero.
 *
 *	For tight control over page level allocator and protection flags
 *	use __vmalloc() instead.
 */
void *vzalloc(unsigned long size)
{
	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
			PAGE_KERNEL);
}
EXPORT_SYMBOL(vzalloc);

/**
 * vmalloc_node - allocate memory on a specific node
 * @size:	allocation size
 * @node:	numa node
 *
 * Allocate enough pages to cover @size from the page level
 * allocator and map them into contiguous kernel virtual space.
 *
 * For tight control over page level allocator and protection flags
 * use __vmalloc() instead.
 */
372
373
374
375
void *vmalloc_node(unsigned long size, int node)
{
	return vmalloc(size);
}
376
EXPORT_SYMBOL(vmalloc_node);
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394

/**
 * vzalloc_node - allocate memory on a specific node with zero fill
 * @size:	allocation size
 * @node:	numa node
 *
 * Allocate enough pages to cover @size from the page level
 * allocator and map them into contiguous kernel virtual space.
 * The memory allocated is set to zero.
 *
 * For tight control over page level allocator and protection flags
 * use __vmalloc() instead.
 */
void *vzalloc_node(unsigned long size, int node)
{
	return vzalloc(size);
}
EXPORT_SYMBOL(vzalloc_node);
Linus Torvalds's avatar
Linus Torvalds committed
395

Paul Mundt's avatar
Paul Mundt committed
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
#ifndef PAGE_KERNEL_EXEC
# define PAGE_KERNEL_EXEC PAGE_KERNEL
#endif

/**
 *	vmalloc_exec  -  allocate virtually contiguous, executable memory
 *	@size:		allocation size
 *
 *	Kernel-internal function to allocate enough pages to cover @size
 *	the page level allocator and map them into contiguous and
 *	executable kernel virtual space.
 *
 *	For tight control over page level allocator and protection flags
 *	use __vmalloc() instead.
 */

void *vmalloc_exec(unsigned long size)
{
	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
}

417
418
/**
 * vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
Linus Torvalds's avatar
Linus Torvalds committed
419
420
421
 *	@size:		allocation size
 *
 *	Allocate enough 32bit PA addressable pages to cover @size from the
422
 *	page level allocator and map them into contiguous kernel virtual space.
Linus Torvalds's avatar
Linus Torvalds committed
423
424
425
426
427
 */
void *vmalloc_32(unsigned long size)
{
	return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
}
428
429
430
431
432
433
434
435
EXPORT_SYMBOL(vmalloc_32);

/**
 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
 *	@size:		allocation size
 *
 * The resulting memory area is 32bit addressable and zeroed so it can be
 * mapped to userspace without leaking data.
436
437
438
 *
 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to
 * remap_vmalloc_range() are permissible.
439
440
441
 */
void *vmalloc_32_user(unsigned long size)
{
442
443
444
445
446
	/*
	 * We'll have to sort out the ZONE_DMA bits for 64-bit,
	 * but for now this can simply use vmalloc_user() directly.
	 */
	return vmalloc_user(size);
447
448
}
EXPORT_SYMBOL(vmalloc_32_user);
Linus Torvalds's avatar
Linus Torvalds committed
449
450
451
452
453
454

void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
{
	BUG();
	return NULL;
}
455
EXPORT_SYMBOL(vmap);
Linus Torvalds's avatar
Linus Torvalds committed
456

457
void vunmap(const void *addr)
Linus Torvalds's avatar
Linus Torvalds committed
458
459
460
{
	BUG();
}
461
EXPORT_SYMBOL(vunmap);
Linus Torvalds's avatar
Linus Torvalds committed
462

463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
{
	BUG();
	return NULL;
}
EXPORT_SYMBOL(vm_map_ram);

void vm_unmap_ram(const void *mem, unsigned int count)
{
	BUG();
}
EXPORT_SYMBOL(vm_unmap_ram);

void vm_unmap_aliases(void)
{
}
EXPORT_SYMBOL_GPL(vm_unmap_aliases);

481
482
483
484
/*
 * Implement a stub for vmalloc_sync_all() if the architecture chose not to
 * have one.
 */
485
void __weak vmalloc_sync_all(void)
486
487
488
{
}

489
490
491
492
493
494
495
496
497
498
499
500
/**
 *	alloc_vm_area - allocate a range of kernel address space
 *	@size:		size of the area
 *
 *	Returns:	NULL on failure, vm_struct on success
 *
 *	This function reserves a range of kernel address space, and
 *	allocates pagetables to map that range.  No actual mappings
 *	are created.  If the kernel address space is not shared
 *	between processes, it syncs the pagetable across all
 *	processes.
 */
501
struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
502
503
504
505
506
507
508
509
510
511
512
513
{
	BUG();
	return NULL;
}
EXPORT_SYMBOL_GPL(alloc_vm_area);

void free_vm_area(struct vm_struct *area)
{
	BUG();
}
EXPORT_SYMBOL_GPL(free_vm_area);

514
515
516
517
518
519
520
int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
		   struct page *page)
{
	return -EINVAL;
}
EXPORT_SYMBOL(vm_insert_page);

Linus Torvalds's avatar
Linus Torvalds committed
521
522
523
524
525
526
527
/*
 *  sys_brk() for the most part doesn't need the global kernel
 *  lock, except when an application is doing something nasty
 *  like trying to un-brk an area that has already been mapped
 *  to a regular file.  in this case, the unmapping will need
 *  to invoke file system routines that need the global lock.
 */
528
SYSCALL_DEFINE1(brk, unsigned long, brk)
Linus Torvalds's avatar
Linus Torvalds committed
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
{
	struct mm_struct *mm = current->mm;

	if (brk < mm->start_brk || brk > mm->context.end_brk)
		return mm->brk;

	if (mm->brk == brk)
		return mm->brk;

	/*
	 * Always allow shrinking brk
	 */
	if (brk <= mm->brk) {
		mm->brk = brk;
		return brk;
	}

	/*
	 * Ok, looks good - let it rip.
	 */
549
	flush_icache_range(mm->brk, brk);
Linus Torvalds's avatar
Linus Torvalds committed
550
551
552
	return mm->brk = brk;
}

553
554
555
556
/*
 * initialise the VMA and region record slabs
 */
void __init mmap_init(void)
Linus Torvalds's avatar
Linus Torvalds committed
557
{
558
559
	int ret;

560
	ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
561
	VM_BUG_ON(ret);
562
	vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
Linus Torvalds's avatar
Linus Torvalds committed
563
564
}

565
/*
566
567
 * validate the region tree
 * - the caller must hold the region lock
568
 */
569
570
#ifdef CONFIG_DEBUG_NOMMU_REGIONS
static noinline void validate_nommu_regions(void)
571
{
572
573
	struct vm_region *region, *last;
	struct rb_node *p, *lastp;
574

575
576
577
578
579
	lastp = rb_first(&nommu_region_tree);
	if (!lastp)
		return;

	last = rb_entry(lastp, struct vm_region, vm_rb);
580
581
	BUG_ON(last->vm_end <= last->vm_start);
	BUG_ON(last->vm_top < last->vm_end);
582
583
584
585
586

	while ((p = rb_next(lastp))) {
		region = rb_entry(p, struct vm_region, vm_rb);
		last = rb_entry(lastp, struct vm_region, vm_rb);

587
588
589
		BUG_ON(region->vm_end <= region->vm_start);
		BUG_ON(region->vm_top < region->vm_end);
		BUG_ON(region->vm_start < last->vm_top);
590

591
592
		lastp = p;
	}
593
}
594
#else
595
596
597
static void validate_nommu_regions(void)
{
}
598
#endif
599
600

/*
601
 * add a region into the global tree
602
 */
603
static void add_nommu_region(struct vm_region *region)
604
{
605
606
	struct vm_region *pregion;
	struct rb_node **p, *parent;
607

608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
	validate_nommu_regions();

	parent = NULL;
	p = &nommu_region_tree.rb_node;
	while (*p) {
		parent = *p;
		pregion = rb_entry(parent, struct vm_region, vm_rb);
		if (region->vm_start < pregion->vm_start)
			p = &(*p)->rb_left;
		else if (region->vm_start > pregion->vm_start)
			p = &(*p)->rb_right;
		else if (pregion == region)
			return;
		else
			BUG();
623
624
	}

625
626
	rb_link_node(&region->vm_rb, parent, p);
	rb_insert_color(&region->vm_rb, &nommu_region_tree);
627

628
	validate_nommu_regions();
629
630
}

631
/*
632
 * delete a region from the global tree
633
 */
634
static void delete_nommu_region(struct vm_region *region)
635
{
636
	BUG_ON(!nommu_region_tree.rb_node);
637

638
639
640
	validate_nommu_regions();
	rb_erase(&region->vm_rb, &nommu_region_tree);
	validate_nommu_regions();
641
642
}

643
/*
644
 * free a contiguous series of pages
645
 */
646
static void free_page_series(unsigned long from, unsigned long to)
647
{
648
649
650
	for (; from < to; from += PAGE_SIZE) {
		struct page *page = virt_to_page(from);

651
		atomic_long_dec(&mmap_pages_allocated);
652
		put_page(page);
653
654
655
	}
}

656
/*
657
 * release a reference to a region
658
 * - the caller must hold the region semaphore for writing, which this releases
659
 * - the region may not have been added to the tree yet, in which case vm_top
660
 *   will equal vm_start
661
 */
662
663
static void __put_nommu_region(struct vm_region *region)
	__releases(nommu_region_sem)
Linus Torvalds's avatar
Linus Torvalds committed
664
{
665
	BUG_ON(!nommu_region_tree.rb_node);
Linus Torvalds's avatar
Linus Torvalds committed
666

667
	if (--region->vm_usage == 0) {
668
		if (region->vm_top > region->vm_start)
669
670
671
672
673
674
675
676
			delete_nommu_region(region);
		up_write(&nommu_region_sem);

		if (region->vm_file)
			fput(region->vm_file);

		/* IO memory and memory shared directly out of the pagecache
		 * from ramfs/tmpfs mustn't be released here */
677
		if (region->vm_flags & VM_MAPPED_COPY)
678
			free_page_series(region->vm_start, region->vm_top);
679
680
681
		kmem_cache_free(vm_region_jar, region);
	} else {
		up_write(&nommu_region_sem);
Linus Torvalds's avatar
Linus Torvalds committed
682
	}
683
}
Linus Torvalds's avatar
Linus Torvalds committed
684

685
686
687
688
689
690
691
/*
 * release a reference to a region
 */
static void put_nommu_region(struct vm_region *region)
{
	down_write(&nommu_region_sem);
	__put_nommu_region(region);
Linus Torvalds's avatar
Linus Torvalds committed
692
693
}

694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
/*
 * update protection on a vma
 */
static void protect_vma(struct vm_area_struct *vma, unsigned long flags)
{
#ifdef CONFIG_MPU
	struct mm_struct *mm = vma->vm_mm;
	long start = vma->vm_start & PAGE_MASK;
	while (start < vma->vm_end) {
		protect_page(mm, start, flags);
		start += PAGE_SIZE;
	}
	update_protections(mm);
#endif
}

710
/*
711
712
713
714
 * add a VMA into a process's mm_struct in the appropriate place in the list
 * and tree and add to the address space's page tree also if not an anonymous
 * page
 * - should be called with mm->mmap_sem held writelocked
715
 */
716
static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
Linus Torvalds's avatar
Linus Torvalds committed
717
{
718
	struct vm_area_struct *pvma, *prev;
Linus Torvalds's avatar
Linus Torvalds committed
719
	struct address_space *mapping;
720
	struct rb_node **p, *parent, *rb_prev;
721
722
723
724
725

	BUG_ON(!vma->vm_region);

	mm->map_count++;
	vma->vm_mm = mm;
Linus Torvalds's avatar
Linus Torvalds committed
726

727
728
	protect_vma(vma, vma->vm_flags);

Linus Torvalds's avatar
Linus Torvalds committed
729
730
731
732
	/* add the VMA to the mapping */
	if (vma->vm_file) {
		mapping = vma->vm_file->f_mapping;

733
		i_mmap_lock_write(mapping);
Linus Torvalds's avatar
Linus Torvalds committed
734
		flush_dcache_mmap_lock(mapping);
735
		vma_interval_tree_insert(vma, &mapping->i_mmap);
Linus Torvalds's avatar
Linus Torvalds committed
736
		flush_dcache_mmap_unlock(mapping);
737
		i_mmap_unlock_write(mapping);
Linus Torvalds's avatar
Linus Torvalds committed
738
739
	}

740
	/* add the VMA to the tree */
741
	parent = rb_prev = NULL;
742
	p = &mm->mm_rb.rb_node;
Linus Torvalds's avatar
Linus Torvalds committed
743
744
745
746
	while (*p) {
		parent = *p;
		pvma = rb_entry(parent, struct vm_area_struct, vm_rb);

747
748
749
		/* sort by: start addr, end addr, VMA struct addr in that order
		 * (the latter is necessary as we may get identical VMAs) */
		if (vma->vm_start < pvma->vm_start)
Linus Torvalds's avatar
Linus Torvalds committed
750
			p = &(*p)->rb_left;
751
752
		else if (vma->vm_start > pvma->vm_start) {
			rb_prev = parent;
Linus Torvalds's avatar
Linus Torvalds committed
753
			p = &(*p)->rb_right;
754
		} else if (vma->vm_end < pvma->vm_end)
755
			p = &(*p)->rb_left;
756
757
		else if (vma->vm_end > pvma->vm_end) {
			rb_prev = parent;
758
			p = &(*p)->rb_right;
759
		} else if (vma < pvma)
760
			p = &(*p)->rb_left;
761
762
		else if (vma > pvma) {
			rb_prev = parent;
763
			p = &(*p)->rb_right;
764
		} else
765
			BUG();
Linus Torvalds's avatar
Linus Torvalds committed
766
767
768
	}

	rb_link_node(&vma->vm_rb, parent, p);
769
770
771
	rb_insert_color(&vma->vm_rb, &mm->mm_rb);

	/* add VMA to the VMA list also */
772
773
774
	prev = NULL;
	if (rb_prev)
		prev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
775

776
	__vma_link_list(mm, vma, prev, parent);
Linus Torvalds's avatar
Linus Torvalds committed
777
778
}

779
/*
780
 * delete a VMA from its owning mm_struct and address space
781
 */
782
static void delete_vma_from_mm(struct vm_area_struct *vma)
Linus Torvalds's avatar
Linus Torvalds committed
783
{
Davidlohr Bueso's avatar
Davidlohr Bueso committed
784
	int i;
Linus Torvalds's avatar
Linus Torvalds committed
785
	struct address_space *mapping;
786
	struct mm_struct *mm = vma->vm_mm;
Davidlohr Bueso's avatar
Davidlohr Bueso committed
787
	struct task_struct *curr = current;
788

789
790
	protect_vma(vma, 0);

791
	mm->map_count--;
Davidlohr Bueso's avatar
Davidlohr Bueso committed
792
793
794
	for (i = 0; i < VMACACHE_SIZE; i++) {
		/* if the vma is cached, invalidate the entire cache */
		if (curr->vmacache[i] == vma) {
795
			vmacache_invalidate(mm);
Davidlohr Bueso's avatar
Davidlohr Bueso committed
796
797
798
			break;
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
799
800
801
802
803

	/* remove the VMA from the mapping */
	if (vma->vm_file) {
		mapping = vma->vm_file->f_mapping;

804
		i_mmap_lock_write(mapping);
Linus Torvalds's avatar
Linus Torvalds committed
805
		flush_dcache_mmap_lock(mapping);
806
		vma_interval_tree_remove(vma, &mapping->i_mmap);
Linus Torvalds's avatar
Linus Torvalds committed
807
		flush_dcache_mmap_unlock(mapping);
808
		i_mmap_unlock_write(mapping);
Linus Torvalds's avatar
Linus Torvalds committed
809
810
	}

811
812
	/* remove from the MM's tree and list */
	rb_erase(&vma->vm_rb, &mm->mm_rb);
813
814
815
816
817
818
819
820

	if (vma->vm_prev)
		vma->vm_prev->vm_next = vma->vm_next;
	else
		mm->mmap = vma->vm_next;

	if (vma->vm_next)
		vma->vm_next->vm_prev = vma->vm_prev;
821
822
823
824
825
826
827
828
829
}

/*
 * destroy a VMA record
 */
static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
{
	if (vma->vm_ops && vma->vm_ops->close)
		vma->vm_ops->close(vma);
830
	if (vma->vm_file)
831
832
833
834
835
836
837
838
839
840
841
842
843
844
		fput(vma->vm_file);
	put_nommu_region(vma->vm_region);
	kmem_cache_free(vm_area_cachep, vma);
}

/*
 * look up the first VMA in which addr resides, NULL if none
 * - should be called with mm->mmap_sem at least held readlocked
 */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
	struct vm_area_struct *vma;

	/* check the cache first */
Davidlohr Bueso's avatar
Davidlohr Bueso committed
845
846
	vma = vmacache_find(mm, addr);
	if (likely(vma))
847
848
		return vma;

849
	/* trawl the list (there may be multiple mappings in which addr
850
	 * resides) */
851
	for (vma = mm->mmap; vma; vma = vma->vm_next) {
852
853
854
		if (vma->vm_start > addr)
			return NULL;
		if (vma->vm_end > addr) {
Davidlohr Bueso's avatar
Davidlohr Bueso committed
855
			vmacache_update(addr, vma);
856
857
858
859
860
861
862
863
864
865
866
867
868
869
			return vma;
		}
	}

	return NULL;
}
EXPORT_SYMBOL(find_vma);

/*
 * find a VMA
 * - we don't extend stack VMAs under NOMMU conditions
 */
struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
{
870
	return find_vma(mm, addr);
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
}

/*
 * expand a stack to a given address
 * - not supported under NOMMU conditions
 */
int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
	return -ENOMEM;
}

/*
 * look up the first VMA exactly that exactly matches addr
 * - should be called with mm->mmap_sem at least held readlocked
 */
static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
					     unsigned long addr,
					     unsigned long len)
{
	struct vm_area_struct *vma;
	unsigned long end = addr + len;

	/* check the cache first */
Davidlohr Bueso's avatar
Davidlohr Bueso committed
894
895
	vma = vmacache_find_exact(mm, addr, end);
	if (vma)
896
897
		return vma;

898
	/* trawl the list (there may be multiple mappings in which addr
899
	 * resides) */
900
	for (vma = mm->mmap; vma; vma = vma->vm_next) {
901
902
903
904
905
		if (vma->vm_start < addr)
			continue;
		if (vma->vm_start > addr)
			return NULL;
		if (vma->vm_end == end) {
Davidlohr Bueso's avatar
Davidlohr Bueso committed
906
			vmacache_update(addr, vma);
907
908
909
910
911
			return vma;
		}
	}

	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
912
913
914
915
916
917
918
919
920
921
922
923
924
925
}

/*
 * determine whether a mapping should be permitted and, if so, what sort of
 * mapping we're capable of supporting
 */
static int validate_mmap_request(struct file *file,
				 unsigned long addr,
				 unsigned long len,
				 unsigned long prot,
				 unsigned long flags,
				 unsigned long pgoff,
				 unsigned long *_capabilities)
{
926
	unsigned long capabilities, rlen;
Linus Torvalds's avatar
Linus Torvalds committed
927
928
929
	int ret;

	/* do the simple checks first */
930
	if (flags & MAP_FIXED)
Linus Torvalds's avatar
Linus Torvalds committed
931
932
933
934
935
936
		return -EINVAL;

	if ((flags & MAP_TYPE) != MAP_PRIVATE &&
	    (flags & MAP_TYPE) != MAP_SHARED)
		return -EINVAL;

937
	if (!len)
Linus Torvalds's avatar
Linus Torvalds committed
938
939
		return -EINVAL;

940
	/* Careful about overflows.. */
941
942
	rlen = PAGE_ALIGN(len);
	if (!rlen || rlen > TASK_SIZE)
943
944
		return -ENOMEM;

Linus Torvalds's avatar
Linus Torvalds committed
945
	/* offset overflow? */
946
	if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff)
947
		return -EOVERFLOW;
Linus Torvalds's avatar
Linus Torvalds committed
948
949
950

	if (file) {
		/* files must support mmap */
Al Viro's avatar
Al Viro committed
951
		if (!file->f_op->mmap)
Linus Torvalds's avatar
Linus Torvalds committed
952
953
954
955
956
957
			return -ENODEV;

		/* work out if what we've got could possibly be shared
		 * - we support chardevs that provide their own "memory"
		 * - we support files/blockdevs that are memory backed
		 */
958
959
960
		if (file->f_op->mmap_capabilities) {
			capabilities = file->f_op->mmap_capabilities(file);
		} else {
Linus Torvalds's avatar
Linus Torvalds committed
961
962
			/* no explicit capabilities set, so assume some
			 * defaults */
Al Viro's avatar
Al Viro committed
963
			switch (file_inode(file)->i_mode & S_IFMT) {
Linus Torvalds's avatar
Linus Torvalds committed
964
965
			case S_IFREG:
			case S_IFBLK:
966
				capabilities = NOMMU_MAP_COPY;
Linus Torvalds's avatar
Linus Torvalds committed
967
968
969
970
				break;

			case S_IFCHR:
				capabilities =
971
972
973
					NOMMU_MAP_DIRECT |
					NOMMU_MAP_READ |
					NOMMU_MAP_WRITE;
Linus Torvalds's avatar
Linus Torvalds committed
974
975
976
977
978
979
980
981
982
983
				break;

			default:
				return -EINVAL;
			}
		}

		/* eliminate any capabilities that we can't support on this
		 * device */
		if (!file->f_op->get_unmapped_area)
984
			capabilities &= ~NOMMU_MAP_DIRECT;
Al Viro's avatar
Al Viro committed
985
		if (!(file->f_mode & FMODE_CAN_READ))
986
			capabilities &= ~NOMMU_MAP_COPY;
Linus Torvalds's avatar
Linus Torvalds committed
987

988
989
990
991
		/* The file shall have been opened with read permission. */
		if (!(file->f_mode & FMODE_READ))
			return -EACCES;

Linus Torvalds's avatar
Linus Torvalds committed
992
993
994
995
996
997
		if (flags & MAP_SHARED) {
			/* do checks for writing, appending and locking */
			if ((prot & PROT_WRITE) &&
			    !(file->f_mode & FMODE_WRITE))
				return -EACCES;

Al Viro's avatar
Al Viro committed
998
			if (IS_APPEND(file_inode(file)) &&
Linus Torvalds's avatar
Linus Torvalds committed
999
1000
1001
			    (file->f_mode & FMODE_WRITE))
				return -EACCES;

1002
			if (locks_verify_locked(file))
Linus Torvalds's avatar
Linus Torvalds committed
1003
1004
				return -EAGAIN;

1005
			if (!(capabilities & NOMMU_MAP_DIRECT))
Linus Torvalds's avatar
Linus Torvalds committed
1006
1007
1008
				return -ENODEV;

			/* we mustn't privatise shared mappings */
1009
			capabilities &= ~NOMMU_MAP_COPY;
1010
		} else {
Linus Torvalds's avatar
Linus Torvalds committed
1011
1012
			/* we're going to read the file into private memory we
			 * allocate */
1013
			if (!(capabilities & NOMMU_MAP_COPY))
Linus Torvalds's avatar
Linus Torvalds committed
1014
1015
1016
1017
1018
				return -ENODEV;

			/* we don't permit a private writable mapping to be
			 * shared with the backing device */
			if (prot & PROT_WRITE)
1019
				capabilities &= ~NOMMU_MAP_DIRECT;
Linus Torvalds's avatar
Linus Torvalds committed
1020
1021
		}

1022
1023
1024
1025
		if (capabilities & NOMMU_MAP_DIRECT) {
			if (((prot & PROT_READ)  && !(capabilities & NOMMU_MAP_READ))  ||
			    ((prot & PROT_WRITE) && !(capabilities & NOMMU_MAP_WRITE)) ||
			    ((prot & PROT_EXEC)  && !(capabilities & NOMMU_MAP_EXEC))
1026
			    ) {
1027
				capabilities &= ~NOMMU_MAP_DIRECT;
1028
				if (flags & MAP_SHARED) {
1029
					pr_warn("MAP_SHARED not completely supported on !MMU\n");
1030
1031
1032
1033
1034
					return -EINVAL;
				}
			}
		}

Linus Torvalds's avatar
Linus Torvalds committed
1035
1036
		/* handle executable mappings and implied executable
		 * mappings */
1037
		if (path_noexec(&file->f_path)) {
Linus Torvalds's avatar
Linus Torvalds committed
1038
1039
			if (prot & PROT_EXEC)
				return -EPERM;
1040
		} else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
Linus Torvalds's avatar
Linus Torvalds committed
1041
1042
			/* handle implication of PROT_EXEC by PROT_READ */
			if (current->personality & READ_IMPLIES_EXEC) {
1043
				if (capabilities & NOMMU_MAP_EXEC)
Linus Torvalds's avatar
Linus Torvalds committed
1044
1045
					prot |= PROT_EXEC;
			}
1046
		} else if ((prot & PROT_READ) &&
Linus Torvalds's avatar
Linus Torvalds committed
1047
			 (prot & PROT_EXEC) &&
1048
			 !(capabilities & NOMMU_MAP_EXEC)
Linus Torvalds's avatar
Linus Torvalds committed
1049
1050
			 ) {
			/* backing file is not executable, try to copy */
1051
			capabilities &= ~NOMMU_MAP_DIRECT;
Linus Torvalds's avatar
Linus Torvalds committed
1052
		}
1053
	} else {
Linus Torvalds's avatar
Linus Torvalds committed
1054
1055
1056
		/* anonymous mappings are always memory backed and can be
		 * privately mapped
		 */
1057
		capabilities = NOMMU_MAP_COPY;
Linus Torvalds's avatar
Linus Torvalds committed
1058
1059
1060
1061
1062
1063
1064
1065

		/* handle PROT_EXEC implication by PROT_READ */
		if ((prot & PROT_READ) &&
		    (current->personality & READ_IMPLIES_EXEC))
			prot |= PROT_EXEC;
	}

	/* allow the security API to have its say */
1066
	ret = security_mmap_addr(addr);
Linus Torvalds's avatar
Linus Torvalds committed
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
	if (ret < 0)
		return ret;

	/* looks okay */
	*_capabilities = capabilities;
	return 0;
}

/*
 * we've determined that we can make the mapping, now translate what we
 * now know into VMA flags
 */
static unsigned long determine_vm_flags(struct file *file,
					unsigned long prot,
					unsigned long flags,
					unsigned long capabilities)
{
	unsigned long vm_flags;

	vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags);
	/* vm_flags |= mm->def_flags; */

1089
	if (!(capabilities & NOMMU_MAP_DIRECT)) {
Linus Torvalds's avatar
Linus Torvalds committed
1090
		/* attempt to share read-only copies of mapped file chunks */
1091
		vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
Linus Torvalds's avatar
Linus Torvalds committed
1092
1093
		if (file && !(prot & PROT_WRITE))
			vm_flags |= VM_MAYSHARE;
1094
	} else {
Linus Torvalds's avatar
Linus Torvalds committed
1095
1096
1097
		/* overlay a shareable mapping on the backing device or inode
		 * if possible - used for chardevs, ramfs/tmpfs/shmfs and
		 * romfs/cramfs */
1098
		vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS);
Linus Torvalds's avatar
Linus Torvalds committed
1099
		if (flags & MAP_SHARED)
1100
			vm_flags |= VM_SHARED;
Linus Torvalds's avatar
Linus Torvalds committed
1101
1102
1103
1104
1105
1106
	}

	/* refuse to let anyone share private mappings with this process if
	 * it's being traced - otherwise breakpoints set in it may interfere
	 * with another untraced process
	 */
Tejun Heo's avatar
Tejun Heo committed
1107
	if ((flags & MAP_PRIVATE) && current->ptrace)
Linus Torvalds's avatar
Linus Torvalds committed
1108
1109
1110
1111
1112
1113
		vm_flags &= ~VM_MAYSHARE;

	return vm_flags;
}

/*
1114
1115
 * set up a shared mapping on a file (the driver or filesystem provides and
 * pins the storage)
Linus Torvalds's avatar
Linus Torvalds committed
1116
 */
1117
static int do_mmap_shared_file(struct vm_area_struct *vma)
Linus Torvalds's avatar
Linus Torvalds committed
1118
1119
1120
1121
{
	int ret;

	ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
1122
1123
	if (ret == 0) {
		vma->vm_region->vm_top = vma->vm_region->vm_end;
1124
		return 0;
1125
	}
Linus Torvalds's avatar
Linus Torvalds committed
1126
1127
1128
	if (ret != -ENOSYS)
		return ret;

1129
1130
1131
	/* getting -ENOSYS indicates that direct mmap isn't possible (as
	 * opposed to tried but failed) so we can only give a suitable error as
	 * it's not possible to make a private copy if MAP_SHARED was given */
Linus Torvalds's avatar
Linus Torvalds committed
1132
1133
1134
1135
1136
1137
	return -ENODEV;
}

/*
 * set up a private mapping or an anonymous shared mapping
 */
1138
1139
static int do_mmap_private(struct vm_area_struct *vma,
			   struct vm_region *region,
1140
1141
			   unsigned long len,
			   unsigned long capabilities)
Linus Torvalds's avatar
Linus Torvalds committed
1142
{
1143
	unsigned long total, point;
Linus Torvalds's avatar
Linus Torvalds committed
1144
	void *base;
1145
	int ret, order;
Linus Torvalds's avatar
Linus Torvalds committed
1146
1147
1148
1149
1150

	/* invoke the file's mapping function so that it can keep track of
	 * shared mappings on devices or memory
	 * - VM_MAYSHARE will be set if it may attempt to share
	 */
1151
	if (capabilities & NOMMU_MAP_DIRECT) {
Linus Torvalds's avatar
Linus Torvalds committed
1152
		ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
1153
		if (ret == 0) {
Linus Torvalds's avatar
Linus Torvalds committed
1154
			/* shouldn't return success if we're not sharing */
1155
1156
			BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
			vma->vm_region->vm_top = vma->vm_region->vm_end;
1157
			return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1158
		}
1159
1160
		if (ret != -ENOSYS)
			return ret;
Linus Torvalds's avatar
Linus Torvalds committed
1161
1162
1163
1164
1165
1166

		/* getting an ENOSYS error indicates that direct mmap isn't
		 * possible (as opposed to tried but failed) so we'll try to
		 * make a private copy of the data and map that instead */
	}

1167

Linus Torvalds's avatar
Linus Torvalds committed
1168
1169
1170
1171
	/* allocate some memory to hold the mapping
	 * - note that this may not return a page-aligned address if the object
	 *   we're allocating is smaller than a page
	 */
Bob Liu's avatar
Bob Liu committed
1172
	order = get_order(len);
1173
	total = 1 << order;
Bob Liu's avatar
Bob Liu committed
1174
	point = len >> PAGE_SHIFT;
1175

1176
	/* we don't want to allocate a power-of-2 sized page set */
1177
	if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages)
1178
		total = point;
1179

Joonsoo Kim's avatar
Joonsoo Kim committed
1180
	base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL);
1181
1182
1183
1184
	if (!base)
		goto enomem;

	atomic_long_add(total, &mmap_pages_allocated);
Linus Torvalds's avatar
Linus Torvalds committed
1185

1186
1187
	region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
	region->vm_start = (unsigned long) base;
Bob Liu's avatar
Bob Liu committed
1188
	region->vm_end   = region->vm_start + len;
1189
	region->vm_top   = region->vm_start + (total << PAGE_SHIFT);
1190
1191
1192

	vma->vm_start = region->vm_start;
	vma->vm_end   = region->vm_start + len;
Linus Torvalds's avatar
Linus Torvalds committed
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203

	if (vma->vm_file) {
		/* read the contents of a file into the copy */
		mm_segment_t old_fs;
		loff_t fpos;

		fpos = vma->vm_pgoff;
		fpos <<= PAGE_SHIFT;

		old_fs = get_fs();
		set_fs(KERNEL_DS);
Al Viro's avatar
Al Viro committed
1204
		ret = __vfs_read(vma->vm_file, base, len, &fpos);
Linus Torvalds's avatar
Linus Torvalds committed
1205
1206
1207
1208
1209
1210
		set_fs(old_fs);

		if (ret < 0)
			goto error_free;

		/* clear the last little bit */
Bob Liu's avatar
Bob Liu committed
1211
1212
		if (ret < len)
			memset(base + ret, 0, len - ret);
Linus Torvalds's avatar
Linus Torvalds committed
1213
1214
1215
1216
1217
1218

	}

	return 0;

error_free:
1219
	free_page_series(region->vm_start, region->vm_top);
1220
1221
	region->vm_start = vma->vm_start = 0;
	region->vm_end   = vma->vm_end = 0;
1222
	region->vm_top   = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1223
1224
1225
	return ret;

enomem:
Mitchel Humpherys's avatar