Commit 68827280 authored by Huang Ying's avatar Huang Ying Committed by Linus Torvalds
mm, gup: prevent pmd checking race in follow_pmd_mask()

mmap_sem will be read locked when calling follow_pmd_mask().  But this
cannot prevent PMD from being changed for all cases when PTL is
unlocked, for example, from pmd_trans_huge() to pmd_none() via
MADV_DONTNEED.  So it is possible for the pmd_present() check in
follow_pmd_mask() to encounter an invalid PMD.  This may cause an
incorrect VM_BUG_ON() or an infinite loop.  Fix this by reading the PMD
entry into a local variable with READ_ONCE() and checking the local
variable and pmd_none() in the retry loop.

As Kirill pointed out, with PTL unlocked, the *pmd may be changed under
us, so reading it directly again and again may incur weird bugs.  So
although using *pmd directly other than for pmd_present() checking may
be safe, it is still better to replace them to read *pmd once and check
the local variable multiple times.

When PTL unlocked, replace all *pmd with local variable was suggested by


Signed-off-by: default avatar"Huang, Ying" <>
Reviewed-by: default avatarZi Yan <>
Cc: "Kirill A. Shutemov" <>
Cc: Al Viro <>
Cc: "Aneesh Kumar K.V" <>
Cc: Dan Williams <>
Signed-off-by: default avatarAndrew Morton <>
Signed-off-by: default avatarLinus Torvalds <>
......@@ -212,53 +212,69 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp,
unsigned int flags, unsigned int *page_mask)
pmd_t *pmd;
pmd_t *pmd, pmdval;
spinlock_t *ptl;
struct page *page;
struct mm_struct *mm = vma->vm_mm;
pmd = pmd_offset(pudp, address);
if (pmd_none(*pmd))
* The READ_ONCE() will stabilize the pmdval in a register or
* on the stack so that it will stop changing under the code.
pmdval = READ_ONCE(*pmd);
if (pmd_none(pmdval))
return no_page_table(vma, flags);
if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
page = follow_huge_pmd(mm, address, pmd, flags);
if (page)
return page;
return no_page_table(vma, flags);
if (is_hugepd(__hugepd(pmd_val(*pmd)))) {
if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
page = follow_huge_pd(vma, address,
__hugepd(pmd_val(*pmd)), flags,
__hugepd(pmd_val(pmdval)), flags,
if (page)
return page;
return no_page_table(vma, flags);
if (!pmd_present(*pmd)) {
if (!pmd_present(pmdval)) {
if (likely(!(flags & FOLL_MIGRATION)))
return no_page_table(vma, flags);
VM_BUG_ON(thp_migration_supported() &&
if (is_pmd_migration_entry(*pmd))
if (is_pmd_migration_entry(pmdval))
pmd_migration_entry_wait(mm, pmd);
pmdval = READ_ONCE(*pmd);
* MADV_DONTNEED may convert the pmd to null because
* mmap_sem is held in read mode
if (pmd_none(pmdval))
return no_page_table(vma, flags);
goto retry;
if (pmd_devmap(*pmd)) {
if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags);
if (page)
return page;
if (likely(!pmd_trans_huge(*pmd)))
if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags);
if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
return no_page_table(vma, flags);
ptl = pmd_lock(mm, pmd);
if (unlikely(pmd_none(*pmd))) {
return no_page_table(vma, flags);
if (unlikely(!pmd_present(*pmd))) {
if (likely(!(flags & FOLL_MIGRATION)))
