Commit a137e1cc authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds
Browse files

hugetlbfs: per mount huge page sizes



Add the ability to configure the hugetlb hstate used on a per mount basis.

- Add a new pagesize= option to the hugetlbfs mount that allows setting
  the page size
- This option causes the mount code to find the hstate corresponding to the
  specified size, and sets up a pointer to the hstate in the mount's
  superblock.
- Change the hstate accessors to use this information rather than the
  global_hstate they were using (requires a slight change in mm/memory.c
  so we don't NULL deref in the error-unmap path -- see comments).

[np: take hstate out of hugetlbfs inode and vma->vm_private_data]
Acked-by: default avatarAdam Litke <agl@us.ibm.com>
Acked-by: default avatarNishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e5ff2159
...@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group; ...@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
enum { enum {
Opt_size, Opt_nr_inodes, Opt_size, Opt_nr_inodes,
Opt_mode, Opt_uid, Opt_gid, Opt_mode, Opt_uid, Opt_gid,
Opt_pagesize,
Opt_err, Opt_err,
}; };
...@@ -62,6 +63,7 @@ static match_table_t tokens = { ...@@ -62,6 +63,7 @@ static match_table_t tokens = {
{Opt_mode, "mode=%o"}, {Opt_mode, "mode=%o"},
{Opt_uid, "uid=%u"}, {Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"}, {Opt_gid, "gid=%u"},
{Opt_pagesize, "pagesize=%s"},
{Opt_err, NULL}, {Opt_err, NULL},
}; };
...@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) ...@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
char *p, *rest; char *p, *rest;
substring_t args[MAX_OPT_ARGS]; substring_t args[MAX_OPT_ARGS];
int option; int option;
unsigned long long size = 0;
enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
if (!options) if (!options)
return 0; return 0;
...@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) ...@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
break; break;
case Opt_size: { case Opt_size: {
unsigned long long size;
/* memparse() will accept a K/M/G without a digit */ /* memparse() will accept a K/M/G without a digit */
if (!isdigit(*args[0].from)) if (!isdigit(*args[0].from))
goto bad_val; goto bad_val;
size = memparse(args[0].from, &rest); size = memparse(args[0].from, &rest);
if (*rest == '%') { setsize = SIZE_STD;
size <<= HPAGE_SHIFT; if (*rest == '%')
size *= max_huge_pages; setsize = SIZE_PERCENT;
do_div(size, 100);
}
pconfig->nr_blocks = (size >> HPAGE_SHIFT);
break; break;
} }
...@@ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) ...@@ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
pconfig->nr_inodes = memparse(args[0].from, &rest); pconfig->nr_inodes = memparse(args[0].from, &rest);
break; break;
case Opt_pagesize: {
unsigned long ps;
ps = memparse(args[0].from, &rest);
pconfig->hstate = size_to_hstate(ps);
if (!pconfig->hstate) {
printk(KERN_ERR
"hugetlbfs: Unsupported page size %lu MB\n",
ps >> 20);
return -EINVAL;
}
break;
}
default: default:
printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
p); p);
...@@ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) ...@@ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
break; break;
} }
} }
/* Do size after hstate is set up */
if (setsize > NO_SIZE) {
struct hstate *h = pconfig->hstate;
if (setsize == SIZE_PERCENT) {
size <<= huge_page_shift(h);
size *= h->max_huge_pages;
do_div(size, 100);
}
pconfig->nr_blocks = (size >> huge_page_shift(h));
}
return 0; return 0;
bad_val: bad_val:
...@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
config.uid = current->fsuid; config.uid = current->fsuid;
config.gid = current->fsgid; config.gid = current->fsgid;
config.mode = 0755; config.mode = 0755;
config.hstate = &default_hstate;
ret = hugetlbfs_parse_options(data, &config); ret = hugetlbfs_parse_options(data, &config);
if (ret) if (ret)
return ret; return ret;
...@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbinfo) if (!sbinfo)
return -ENOMEM; return -ENOMEM;
sb->s_fs_info = sbinfo; sb->s_fs_info = sbinfo;
sbinfo->hstate = config.hstate;
spin_lock_init(&sbinfo->stat_lock); spin_lock_init(&sbinfo->stat_lock);
sbinfo->max_blocks = config.nr_blocks; sbinfo->max_blocks = config.nr_blocks;
sbinfo->free_blocks = config.nr_blocks; sbinfo->free_blocks = config.nr_blocks;
sbinfo->max_inodes = config.nr_inodes; sbinfo->max_inodes = config.nr_inodes;
sbinfo->free_inodes = config.nr_inodes; sbinfo->free_inodes = config.nr_inodes;
sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = HPAGE_SIZE; sb->s_blocksize = huge_page_size(config.hstate);
sb->s_blocksize_bits = HPAGE_SHIFT; sb->s_blocksize_bits = huge_page_shift(config.hstate);
sb->s_magic = HUGETLBFS_MAGIC; sb->s_magic = HUGETLBFS_MAGIC;
sb->s_op = &hugetlbfs_ops; sb->s_op = &hugetlbfs_ops;
sb->s_time_gran = 1; sb->s_time_gran = 1;
......
...@@ -100,6 +100,7 @@ struct hugetlbfs_config { ...@@ -100,6 +100,7 @@ struct hugetlbfs_config {
umode_t mode; umode_t mode;
long nr_blocks; long nr_blocks;
long nr_inodes; long nr_inodes;
struct hstate *hstate;
}; };
struct hugetlbfs_sb_info { struct hugetlbfs_sb_info {
...@@ -108,6 +109,7 @@ struct hugetlbfs_sb_info { ...@@ -108,6 +109,7 @@ struct hugetlbfs_sb_info {
long max_inodes; /* inodes allowed */ long max_inodes; /* inodes allowed */
long free_inodes; /* inodes free */ long free_inodes; /* inodes free */
spinlock_t stat_lock; spinlock_t stat_lock;
struct hstate *hstate;
}; };
...@@ -191,19 +193,21 @@ extern unsigned int default_hstate_idx; ...@@ -191,19 +193,21 @@ extern unsigned int default_hstate_idx;
#define default_hstate (hstates[default_hstate_idx]) #define default_hstate (hstates[default_hstate_idx])
static inline struct hstate *hstate_vma(struct vm_area_struct *vma) static inline struct hstate *hstate_inode(struct inode *i)
{ {
return &default_hstate; struct hugetlbfs_sb_info *hsb;
hsb = HUGETLBFS_SB(i->i_sb);
return hsb->hstate;
} }
static inline struct hstate *hstate_file(struct file *f) static inline struct hstate *hstate_file(struct file *f)
{ {
return &default_hstate; return hstate_inode(f->f_dentry->d_inode);
} }
static inline struct hstate *hstate_inode(struct inode *i) static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
{ {
return &default_hstate; return hstate_file(vma->vm_file);
} }
static inline unsigned long huge_page_size(struct hstate *h) static inline unsigned long huge_page_size(struct hstate *h)
......
...@@ -1439,19 +1439,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, ...@@ -1439,19 +1439,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page) unsigned long end, struct page *ref_page)
{ {
/* spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
* It is undesirable to test vma->vm_file as it should be non-null __unmap_hugepage_range(vma, start, end, ref_page);
* for valid hugetlb area. However, vm_file will be NULL in the error spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
* cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
* do_mmap_pgoff() nullifies vma->vm_file before calling this function
* to clean up. Since no pte has actually been setup, it is safe to
* do nothing in this case.
*/
if (vma->vm_file) {
spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
__unmap_hugepage_range(vma, start, end, ref_page);
spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
}
} }
/* /*
......
...@@ -901,9 +901,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, ...@@ -901,9 +901,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
} }
if (unlikely(is_vm_hugetlb_page(vma))) { if (unlikely(is_vm_hugetlb_page(vma))) {
unmap_hugepage_range(vma, start, end, NULL); /*
zap_work -= (end - start) / * It is undesirable to test vma->vm_file as it
* should be non-null for valid hugetlb area.
* However, vm_file will be NULL in the error
* cleanup path of do_mmap_pgoff. When
* hugetlbfs ->mmap method fails,
* do_mmap_pgoff() nullifies vma->vm_file
* before calling this function to clean up.
* Since no pte has actually been setup, it is
* safe to do nothing in this case.
*/
if (vma->vm_file) {
unmap_hugepage_range(vma, start, end, NULL);
zap_work -= (end - start) /
pages_per_huge_page(hstate_vma(vma)); pages_per_huge_page(hstate_vma(vma));
}
start = end; start = end;
} else } else
start = unmap_page_range(*tlbp, vma, start = unmap_page_range(*tlbp, vma,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment