Commit 396faf03 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds
Browse files

Allow huge page allocations to use GFP_HIGH_MOVABLE



Huge pages are not movable so are not allocated from ZONE_MOVABLE.  However,
as ZONE_MOVABLE will always have pages that can be migrated or reclaimed, it
can be used to satisfy hugepage allocations even when the system has been
running a long time.  This allows an administrator to resize the hugepage pool
at runtime depending on the size of ZONE_MOVABLE.

This patch adds a new sysctl called hugepages_treat_as_movable.  When a
non-zero value is written to it, future allocations for the huge page pool
will use ZONE_MOVABLE.  Despite huge pages being non-movable, we do not
introduce additional external fragmentation of note as huge pages are always
the largest contiguous block we care about.

[akpm@linux-foundation.org: various fixes]
Signed-off-by: default avatarMel Gorman <mel@csn.ul.ie>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 2a1e274a
...@@ -15,6 +15,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) ...@@ -15,6 +15,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
} }
int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
...@@ -29,6 +30,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to); ...@@ -29,6 +30,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to);
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
extern unsigned long max_huge_pages; extern unsigned long max_huge_pages;
extern unsigned long hugepages_treat_as_movable;
extern const unsigned long hugetlb_zero, hugetlb_infinity; extern const unsigned long hugetlb_zero, hugetlb_infinity;
extern int sysctl_hugetlb_shm_group; extern int sysctl_hugetlb_shm_group;
......
...@@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p); ...@@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
extern struct mempolicy default_policy; extern struct mempolicy default_policy;
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr); unsigned long addr, gfp_t gfp_flags);
extern unsigned slab_node(struct mempolicy *policy); extern unsigned slab_node(struct mempolicy *policy);
extern enum zone_type policy_zone; extern enum zone_type policy_zone;
...@@ -256,9 +256,9 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p) ...@@ -256,9 +256,9 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
#define set_cpuset_being_rebound(x) do {} while (0) #define set_cpuset_being_rebound(x) do {} while (0)
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr) unsigned long addr, gfp_t gfp_flags)
{ {
return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags);
} }
static inline int do_migrate_pages(struct mm_struct *mm, static inline int do_migrate_pages(struct mm_struct *mm,
......
...@@ -826,6 +826,14 @@ static ctl_table vm_table[] = { ...@@ -826,6 +826,14 @@ static ctl_table vm_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "hugepages_treat_as_movable",
.data = &hugepages_treat_as_movable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &hugetlb_treat_movable_handler,
},
#endif #endif
{ {
.ctl_name = VM_LOWMEM_RESERVE_RATIO, .ctl_name = VM_LOWMEM_RESERVE_RATIO,
......
...@@ -27,6 +27,9 @@ unsigned long max_huge_pages; ...@@ -27,6 +27,9 @@ unsigned long max_huge_pages;
static struct list_head hugepage_freelists[MAX_NUMNODES]; static struct list_head hugepage_freelists[MAX_NUMNODES];
static unsigned int nr_huge_pages_node[MAX_NUMNODES]; static unsigned int nr_huge_pages_node[MAX_NUMNODES];
static unsigned int free_huge_pages_node[MAX_NUMNODES]; static unsigned int free_huge_pages_node[MAX_NUMNODES];
static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
unsigned long hugepages_treat_as_movable;
/* /*
* Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
*/ */
...@@ -68,12 +71,13 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, ...@@ -68,12 +71,13 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
{ {
int nid; int nid;
struct page *page = NULL; struct page *page = NULL;
struct zonelist *zonelist = huge_zonelist(vma, address); struct zonelist *zonelist = huge_zonelist(vma, address,
htlb_alloc_mask);
struct zone **z; struct zone **z;
for (z = zonelist->zones; *z; z++) { for (z = zonelist->zones; *z; z++) {
nid = zone_to_nid(*z); nid = zone_to_nid(*z);
if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) && if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) &&
!list_empty(&hugepage_freelists[nid])) !list_empty(&hugepage_freelists[nid]))
break; break;
} }
...@@ -113,7 +117,7 @@ static int alloc_fresh_huge_page(void) ...@@ -113,7 +117,7 @@ static int alloc_fresh_huge_page(void)
prev_nid = nid; prev_nid = nid;
spin_unlock(&nid_lock); spin_unlock(&nid_lock);
page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
HUGETLB_PAGE_ORDER); HUGETLB_PAGE_ORDER);
if (page) { if (page) {
set_compound_page_dtor(page, free_huge_page); set_compound_page_dtor(page, free_huge_page);
...@@ -263,6 +267,19 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, ...@@ -263,6 +267,19 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
max_huge_pages = set_max_huge_pages(max_huge_pages); max_huge_pages = set_max_huge_pages(max_huge_pages);
return 0; return 0;
} }
int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer,
size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, file, buffer, length, ppos);
if (hugepages_treat_as_movable)
htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
else
htlb_alloc_mask = GFP_HIGHUSER;
return 0;
}
#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_SYSCTL */
int hugetlb_report_meminfo(char *buf) int hugetlb_report_meminfo(char *buf)
......
...@@ -1203,7 +1203,8 @@ static inline unsigned interleave_nid(struct mempolicy *pol, ...@@ -1203,7 +1203,8 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
#ifdef CONFIG_HUGETLBFS #ifdef CONFIG_HUGETLBFS
/* Return a zonelist suitable for a huge page allocation. */ /* Return a zonelist suitable for a huge page allocation. */
struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
gfp_t gfp_flags)
{ {
struct mempolicy *pol = get_vma_policy(current, vma, addr); struct mempolicy *pol = get_vma_policy(current, vma, addr);
...@@ -1211,7 +1212,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) ...@@ -1211,7 +1212,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
unsigned nid; unsigned nid;
nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER); return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags);
} }
return zonelist_policy(GFP_HIGHUSER, pol); return zonelist_policy(GFP_HIGHUSER, pol);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment