diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index fcfa229cc1e7ab2842b20f48ee5ac7aad3b777b0..c2d7b6d7bac75b1705fbadd2b59dc3b0989ed8ee 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -169,7 +169,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 643fad7740717ae8eabcacdcfa05d924566ed29b..92b339c7adbaad4b5faf0cbac91a4746fdf5009e 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -145,7 +145,6 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) */ if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 18ef0b143ac2c0c54ff3a14f1074f62a34cdee97..b598e6978b299575f04ca6ccfd73770f5259b211 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -319,9 +319,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) regs, addr); } if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index cbb29a43aa7f60e46f255c01d86df9125da9a3fa..1027851d469adb53c85f6f6a3645c6f452be92c8 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -521,12 +521,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } if (fault & VM_FAULT_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of - * starvation. - */ if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { - mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; mm_flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index d9e15d941bdbd3a8004cfd4254a0745928b0ab7e..72334b26317a402d52d1f0107e24c0b9cef10224 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -102,7 +102,6 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index b5aa4e80c762b4b69e8e4ad9f4283190ab2cb45c..30d0c1fca99ed7fba95b3e0b090db959eb21cba4 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -167,7 +167,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 182799fd9987032da22fa59bec4da33bf9d60876..f7afb9897966a8d99bff239f7bca3642c958a4bb 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -162,9 +162,6 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 32da02778a63c33ed9dbd3e295e932c17a6de95c..3248141f8ed54e3ca6f3419a10258b7a333a555a 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -236,7 +236,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index ec464da64656a88168ab2d92bd941b2838a03ae0..4a0eafe3d932b1effeb5bf968087d704d27cf6e7 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -178,7 +178,6 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, tsk->min_flt++; } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 2810a4e5ab279317c9b349536bd8daf3a679c5a3..0cf0c08c7da21fe8be1ad8f013ec396fdeffc755 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -246,7 +246,6 @@ void do_page_fault(unsigned long entry, unsigned long addr, 1, regs, addr); } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index c38bea4220fbd12cb54d56906208d69e5be806d2..ec9d8a9c426fcde8f64f5c09635392117ffc7a9f 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -157,9 +157,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 30d5c51e9d40c931a98e51a02e83512c59034e2c..8af1cc78c4fb7fab2451baee32d9cd5a4ba24ab5 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -181,7 +181,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, else tsk->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 8e88e5c5f26a1efa5e59a79bac0dc6dd21b272ec..86e8c848f3d73781d1311af160af35d760d957c5 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -328,14 +328,12 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; - /* * No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ - + flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index d7e1f8dc7e4c4389ad58ce25eaeb17fda7e82520..d15f0f0ee80630d1e88ee664b2a04b51bab0d24d 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -590,13 +590,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, * case. */ if (unlikely(fault & VM_FAULT_RETRY)) { - /* We retry only once */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. - */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index a252d9e38561a763687096f59691ff4d879b48b0..be84e32adc4c4e75e2feedd53a2993882acdeb2e 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -144,11 +144,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs) 1, regs, addr); } if (fault & VM_FAULT_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. - */ - flags &= ~(FAULT_FLAG_ALLOW_RETRY); flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 551ac311bd35b11769962807fabc5849de2c6627..aeccdb30899ac045620ea3462569d3a4739620f7 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -513,10 +513,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) fault = VM_FAULT_PFAULT; goto out_up; } - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~(FAULT_FLAG_ALLOW_RETRY | - FAULT_FLAG_RETRY_NOWAIT); + flags &= ~FAULT_FLAG_RETRY_NOWAIT; flags |= FAULT_FLAG_TRIED; down_read(&mm->mmap_sem); goto retry; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index d9c8f2d00a541416e5a61b7cbc348af40c963cf6..13ee4d20e6226bf4640305df486441650225a32c 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -481,7 +481,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, regs, address); } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index a91b0c2d84f8af7d6b9f32c3cad1429f35fe20a7..f6e0e601f857cdbbd4f3a6eda9af15c486a2b1a4 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -261,7 +261,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, 1, regs, address); } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 30653418a672fdfc83d21fef92bb546224e0ad0e..c0c0dd471b6b3c3dfafeecf16f6445cb66949a72 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -449,7 +449,6 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) 1, regs, address); } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index c59ad37eacda23e9a09b0010fce16ed7b8eef493..8f18cf56b3ddfde62df96f09aeaed68278640b08 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -97,7 +97,6 @@ int handle_page_fault(unsigned long address, unsigned long ip, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index 34a90453ca18b2f305ad57540baa6d8a9614cff7..a9bd08fbe5884af2781f6c17e490ef2a0355a3ff 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -259,9 +259,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) else tsk->min_flt++; if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f70a08e5271f509f83ce96bf3e3f423f383d33fe..859519f5b3423469ac735972fd26934dbab74cfe 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1479,8 +1479,6 @@ void do_user_addr_fault(struct pt_regs *regs, */ if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { - /* Retry at most once */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 7d196dc951e861d62d56d1cc4018c2375e3cbeb8..e7172bd53cedff79debdf35c0bda07790fa93db7 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -128,7 +128,6 @@ void do_page_fault(struct pt_regs *regs) else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 389128b8c4dda985dfbd18fc956e50cbd87f9df5..cb8829ca6c7f87e77975c8868453c62e84ebed12 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -59,9 +59,10 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, /* * If possible, avoid waiting for GPU with mmap_sem - * held. + * held. We only do this if the fault allows retry and this + * is the first attempt. */ - if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault_flag_allow_retry_first(vmf->flags)) { ret = VM_FAULT_RETRY; if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) goto out_unlock; @@ -135,7 +136,12 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, * for the buffer to become unreserved. */ if (unlikely(!dma_resv_trylock(bo->base.resv))) { - if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { + /* + * If the fault allows retry and this is the first + * fault attempt, we try to release the mmap_sem + * before waiting + */ + if (fault_flag_allow_retry_first(vmf->flags)) { if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { ttm_bo_get(bo); up_read(&vmf->vma->vm_mm->mmap_sem); diff --git a/include/linux/mm.h b/include/linux/mm.h index 7eeabc37ec8775e5559ed7465b09da94ede041da..e8e1afab713ffddc6715d26943bfdcdacab5483a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -394,6 +394,25 @@ extern pgprot_t protection_map[16]; * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. + * + * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify + * whether we would allow page faults to retry by specifying these two + * fault flags correctly. Currently there can be three legal combinations: + * + * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and + * this is the first try + * + * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and + * we've already tried at least once + * + * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry + * + * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never + * be used. Note that page faults can be allowed to retry for multiple times, + * in which case we'll have an initial fault with flags (a) then later on + * continuous faults with flags (b). We should always try to detect pending + * signals before a retry to make sure the continuous page faults can still be + * interrupted if necessary. */ #define FAULT_FLAG_WRITE 0x01 #define FAULT_FLAG_MKWRITE 0x02 @@ -414,6 +433,24 @@ extern pgprot_t protection_map[16]; FAULT_FLAG_KILLABLE | \ FAULT_FLAG_INTERRUPTIBLE) +/** + * fault_flag_allow_retry_first - check ALLOW_RETRY the first time + * + * This is mostly used for places where we want to try to avoid taking + * the mmap_sem for too long a time when waiting for another condition + * to change, in which case we can try to be polite to release the + * mmap_sem in the first round to avoid potential starvation of other + * processes that would also want the mmap_sem. + * + * Return: true if the page fault allows retry and this is the first + * attempt of the fault handling; false otherwise. + */ +static inline bool fault_flag_allow_retry_first(unsigned int flags) +{ + return (flags & FAULT_FLAG_ALLOW_RETRY) && + (!(flags & FAULT_FLAG_TRIED)); +} + #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ { FAULT_FLAG_MKWRITE, "MKWRITE" }, \ diff --git a/mm/filemap.c b/mm/filemap.c index 1a58dd6d0ca0acae013d06688ff77c3407e1bdf6..0fbdc8e30dd2a7b3278d35af49d52dd3684dbd05 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1386,7 +1386,7 @@ EXPORT_SYMBOL_GPL(__lock_page_killable); int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { - if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault_flag_allow_retry_first(flags)) { /* * CAUTION! In this case, mmap_sem is not released * even though return 0. diff --git a/mm/internal.h b/mm/internal.h index 91d1d3828093af01a48e657093750782de7d7090..9fb2b8c7928f18b14685638e7b64e1e3feaa95a7 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -400,10 +400,10 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, /* * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or * anything, so we only pin the file and drop the mmap_sem if only - * FAULT_FLAG_ALLOW_RETRY is set. + * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt. */ - if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) == - FAULT_FLAG_ALLOW_RETRY) { + if (fault_flag_allow_retry_first(flags) && + !(flags & FAULT_FLAG_RETRY_NOWAIT)) { fpin = get_file(vmf->vma->vm_file); up_read(&vmf->vma->vm_mm->mmap_sem); }