Commit add6a0cd authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

KVM: MMU: try to fix up page faults before giving up

The vGPU folks would like to trap the first access to a BAR by setting
vm_ops on the VMAs produced by mmap-ing a VFIO device.  The fault handler
then can use remap_pfn_range to place some non-reserved pages in the VMA.

This kind of VM_PFNMAP mapping is not handled by KVM, but follow_pfn
and fixup_user_fault together help supporting it.  The patch also supports
VM_MIXEDMAP vmas where the pfns are not reserved and thus subject to
reference counting.

Cc: Xiao Guangrong <>
Cc: Andrea Arcangeli <>
Cc: Radim Krčmář <>
Tested-by: default avatarNeo Jia <>
Reported-by: default avatarKirti Wankhede <>
Signed-off-by: default avatarPaolo Bonzini <>
parent 92176a8e
......@@ -720,6 +720,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
return 0;
static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
struct mm_struct *mm,
......@@ -1446,9 +1446,45 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
unsigned long addr, bool *async,
bool write_fault, kvm_pfn_t *p_pfn)
*p_pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
unsigned long pfn;
int r;
r = follow_pfn(vma, addr, &pfn);
if (r) {
* get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
* not call the fault handler, so do it here.
bool unlocked = false;
r = fixup_user_fault(current, current->mm, addr,
(write_fault ? FAULT_FLAG_WRITE : 0),
if (unlocked)
return -EAGAIN;
if (r)
return r;
r = follow_pfn(vma, addr, &pfn);
if (r)
return r;
* Get a reference here because callers of *hva_to_pfn* and
* *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
* returned pfn. This is only needed if the VMA has VM_MIXEDMAP
* set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
* simply do nothing for reserved pfns.
* Whoever called remap_pfn_range is also going to call e.g.
* unmap_mapping_range before the underlying pages are freed,
* causing a call to our MMU notifier.
*p_pfn = pfn;
return 0;
......@@ -1493,12 +1529,15 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
goto exit;
vma = find_vma_intersection(current->mm, addr, addr + 1);
if (vma == NULL)
else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn);
if (r == -EAGAIN)
goto retry;
if (r < 0)
} else {
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment