Commit 0e7e6198 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'ttm-transhuge' of git://people.freedesktop.org/~thomash/linux into drm-next

Huge page-table entries for TTM

In order to reduce CPU usage [1] and in theory TLB misses this patchset enables
huge- and giant page-table entries for TTM and TTM-enabled graphics drivers.
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Thomas Hellstrom (VMware) <thomas_os@shipmail.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20200325073102.6129-1-thomas_os@shipmail.org
parents 59e7a8cc 9431042d
......@@ -48,6 +48,11 @@
#include "drm_internal.h"
#include "drm_legacy.h"
#if defined(CONFIG_MMU) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
#include <uapi/asm/mman.h>
#include <drm/drm_vma_manager.h>
#endif
/* from BKL pushdown */
DEFINE_MUTEX(drm_global_mutex);
......@@ -872,3 +877,139 @@ struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags)
return file;
}
EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile);
#ifdef CONFIG_MMU
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* drm_addr_inflate() attempts to construct an aligned area by inflating
* the area size and skipping the unaligned start of the area.
* adapted from shmem_get_unmapped_area()
*/
static unsigned long drm_addr_inflate(unsigned long addr,
unsigned long len,
unsigned long pgoff,
unsigned long flags,
unsigned long huge_size)
{
unsigned long offset, inflated_len;
unsigned long inflated_addr;
unsigned long inflated_offset;
offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
if (offset && offset + len < 2 * huge_size)
return addr;
if ((addr & (huge_size - 1)) == offset)
return addr;
inflated_len = len + huge_size - PAGE_SIZE;
if (inflated_len > TASK_SIZE)
return addr;
if (inflated_len < len)
return addr;
inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len,
0, flags);
if (IS_ERR_VALUE(inflated_addr))
return addr;
if (inflated_addr & ~PAGE_MASK)
return addr;
inflated_offset = inflated_addr & (huge_size - 1);
inflated_addr += offset - inflated_offset;
if (inflated_offset > offset)
inflated_addr += huge_size;
if (inflated_addr > TASK_SIZE - len)
return addr;
return inflated_addr;
}
/**
* drm_get_unmapped_area() - Get an unused user-space virtual memory area
* suitable for huge page table entries.
* @file: The struct file representing the address space being mmap()'d.
* @uaddr: Start address suggested by user-space.
* @len: Length of the area.
* @pgoff: The page offset into the address space.
* @flags: mmap flags
* @mgr: The address space manager used by the drm driver. This argument can
* probably be removed at some point when all drivers use the same
* address space manager.
*
* This function attempts to find an unused user-space virtual memory area
* that can accommodate the size we want to map, and that is properly
* aligned to facilitate huge page table entries matching actual
* huge pages or huge page aligned memory in buffer objects. Buffer objects
* are assumed to start at huge page boundary pfns (io memory) or be
* populated by huge pages aligned to the start of the buffer object
* (system- or coherent memory). Adapted from shmem_get_unmapped_area.
*
* Return: aligned user-space address.
*/
unsigned long drm_get_unmapped_area(struct file *file,
unsigned long uaddr, unsigned long len,
unsigned long pgoff, unsigned long flags,
struct drm_vma_offset_manager *mgr)
{
unsigned long addr;
unsigned long inflated_addr;
struct drm_vma_offset_node *node;
if (len > TASK_SIZE)
return -ENOMEM;
/*
* @pgoff is the file page-offset the huge page boundaries of
* which typically aligns to physical address huge page boundaries.
* That's not true for DRM, however, where physical address huge
* page boundaries instead are aligned with the offset from
* buffer object start. So adjust @pgoff to be the offset from
* buffer object start.
*/
drm_vma_offset_lock_lookup(mgr);
node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
if (node)
pgoff -= node->vm_node.start;
drm_vma_offset_unlock_lookup(mgr);
addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
return addr;
if (addr & ~PAGE_MASK)
return addr;
if (addr > TASK_SIZE - len)
return addr;
if (len < HPAGE_PMD_SIZE)
return addr;
if (flags & MAP_FIXED)
return addr;
/*
* Our priority is to support MAP_SHARED mapped hugely;
* and support MAP_PRIVATE mapped hugely too, until it is COWed.
* But if caller specified an address hint, respect that as before.
*/
if (uaddr)
return addr;
inflated_addr = drm_addr_inflate(addr, len, pgoff, flags,
HPAGE_PMD_SIZE);
if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
len >= HPAGE_PUD_SIZE)
inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff,
flags, HPAGE_PUD_SIZE);
return inflated_addr;
}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
unsigned long drm_get_unmapped_area(struct file *file,
unsigned long uaddr, unsigned long len,
unsigned long pgoff, unsigned long flags,
struct drm_vma_offset_manager *mgr)
{
return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
#endif /* CONFIG_MMU */
......@@ -156,6 +156,89 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_vm_reserve);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/**
* ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
* @vmf: Fault data
* @bo: The buffer object
* @page_offset: Page offset from bo start
* @fault_page_size: The size of the fault in pages.
* @pgprot: The page protections.
* Does additional checking whether it's possible to insert a PUD or PMD
* pfn and performs the insertion.
*
* Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
* a huge fault was not possible, or on insertion error.
*/
static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
struct ttm_buffer_object *bo,
pgoff_t page_offset,
pgoff_t fault_page_size,
pgprot_t pgprot)
{
pgoff_t i;
vm_fault_t ret;
unsigned long pfn;
pfn_t pfnt;
struct ttm_tt *ttm = bo->ttm;
bool write = vmf->flags & FAULT_FLAG_WRITE;
/* Fault should not cross bo boundary. */
page_offset &= ~(fault_page_size - 1);
if (page_offset + fault_page_size > bo->num_pages)
goto out_fallback;
if (bo->mem.bus.is_iomem)
pfn = ttm_bo_io_mem_pfn(bo, page_offset);
else
pfn = page_to_pfn(ttm->pages[page_offset]);
/* pfn must be fault_page_size aligned. */
if ((pfn & (fault_page_size - 1)) != 0)
goto out_fallback;
/* Check that memory is contiguous. */
if (!bo->mem.bus.is_iomem) {
for (i = 1; i < fault_page_size; ++i) {
if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
goto out_fallback;
}
} else if (bo->bdev->driver->io_mem_pfn) {
for (i = 1; i < fault_page_size; ++i) {
if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
goto out_fallback;
}
}
pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
#endif
else
WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);
if (ret != VM_FAULT_NOPAGE)
goto out_fallback;
return VM_FAULT_NOPAGE;
out_fallback:
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
#else
static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
struct ttm_buffer_object *bo,
pgoff_t page_offset,
pgoff_t fault_page_size,
pgprot_t pgprot)
{
return VM_FAULT_FALLBACK;
}
#endif
/**
* ttm_bo_vm_fault_reserved - TTM fault helper
* @vmf: The struct vm_fault given as argument to the fault callback
......@@ -163,6 +246,7 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve);
* @num_prefault: Maximum number of prefault pages. The caller may want to
* specify this based on madvice settings and the size of the GPU object
* backed by the memory.
* @fault_page_size: The size of the fault in pages.
*
* This function inserts one or more page table entries pointing to the
* memory backing the buffer object, and then returns a return code
......@@ -176,7 +260,8 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve);
*/
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pgprot_t prot,
pgoff_t num_prefault)
pgoff_t num_prefault,
pgoff_t fault_page_size)
{
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = vma->vm_private_data;
......@@ -268,6 +353,13 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
prot = pgprot_decrypted(prot);
}
/* We don't prefault on huge faults. Yet. */
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) {
ret = ttm_bo_vm_insert_huge(vmf, bo, page_offset,
fault_page_size, prot);
goto out_io_unlock;
}
/*
* Speculatively prefault a number of pages. Only error on
* first page.
......@@ -334,7 +426,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return ret;
prot = vma->vm_page_prot;
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
......@@ -344,6 +436,66 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(ttm_bo_vm_fault);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/**
* ttm_pgprot_is_wrprotecting - Is a page protection value write-protecting?
* @prot: The page protection value
*
* Return: true if @prot is write-protecting. false otherwise.
*/
static bool ttm_pgprot_is_wrprotecting(pgprot_t prot)
{
/*
* This is meant to say "pgprot_wrprotect(prot) == prot" in a generic
* way. Unfortunately there is no generic pgprot_wrprotect.
*/
return pte_val(pte_wrprotect(__pte(pgprot_val(prot)))) ==
pgprot_val(prot);
}
static vm_fault_t ttm_bo_vm_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
{
struct vm_area_struct *vma = vmf->vma;
pgprot_t prot;
struct ttm_buffer_object *bo = vma->vm_private_data;
vm_fault_t ret;
pgoff_t fault_page_size = 0;
bool write = vmf->flags & FAULT_FLAG_WRITE;
switch (pe_size) {
case PE_SIZE_PMD:
fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
break;
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
case PE_SIZE_PUD:
fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
break;
#endif
default:
WARN_ON_ONCE(1);
return VM_FAULT_FALLBACK;
}
/* Fallback on write dirty-tracking or COW */
if (write && ttm_pgprot_is_wrprotecting(vma->vm_page_prot))
return VM_FAULT_FALLBACK;
ret = ttm_bo_vm_reserve(bo, vmf);
if (ret)
return ret;
prot = vm_get_page_prot(vma->vm_flags);
ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
dma_resv_unlock(bo->base.resv);
return ret;
}
#endif
void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo = vma->vm_private_data;
......@@ -445,7 +597,10 @@ static const struct vm_operations_struct ttm_bo_vm_ops = {
.fault = ttm_bo_vm_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
.access = ttm_bo_vm_access
.access = ttm_bo_vm_access,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
.huge_fault = ttm_bo_vm_huge_fault,
#endif
};
static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
......
......@@ -11,4 +11,5 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \
ttm_object.o ttm_lock.o
vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
......@@ -1247,6 +1247,18 @@ static void vmw_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
}
static unsigned long
vmw_get_unmapped_area(struct file *file, unsigned long uaddr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
struct drm_file *file_priv = file->private_data;
struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
&dev_priv->vma_manager);
}
static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
void *ptr)
{
......@@ -1418,6 +1430,7 @@ static const struct file_operations vmwgfx_driver_fops = {
.compat_ioctl = vmw_compat_ioctl,
#endif
.llseek = noop_llseek,
.get_unmapped_area = vmw_get_unmapped_area,
};
static struct drm_driver driver = {
......
......@@ -1000,6 +1000,7 @@ extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma);
extern void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv,
size_t gran);
/**
* TTM buffer object driver - vmwgfx_ttm_buffer.c
*/
......@@ -1510,6 +1511,17 @@ void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
pgoff_t start, pgoff_t end);
vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size);
#endif
/* Transparent hugepage support - vmwgfx_thp.c */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern const struct ttm_mem_type_manager_func vmw_thp_func;
#else
#define vmw_thp_func ttm_bo_manager_func
#endif
/**
* VMW_DEBUG_KMS - Debug output for kernel mode-setting
......
......@@ -473,11 +473,11 @@ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
* a lot of unnecessary write faults.
*/
if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
prot = vma->vm_page_prot;
prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
else
prot = vm_get_page_prot(vma->vm_flags);
ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault);
ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
......@@ -486,3 +486,75 @@ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
return ret;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
{
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
vma->vm_private_data;
struct vmw_buffer_object *vbo =
container_of(bo, struct vmw_buffer_object, base);
pgprot_t prot;
vm_fault_t ret;
pgoff_t fault_page_size;
bool write = vmf->flags & FAULT_FLAG_WRITE;
bool is_cow_mapping =
(vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
switch (pe_size) {
case PE_SIZE_PMD:
fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
break;
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
case PE_SIZE_PUD:
fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
break;
#endif
default:
WARN_ON_ONCE(1);
return VM_FAULT_FALLBACK;
}
/* Always do write dirty-tracking and COW on PTE level. */
if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping))
return VM_FAULT_FALLBACK;
ret = ttm_bo_vm_reserve(bo, vmf);
if (ret)
return ret;
if (vbo->dirty) {
pgoff_t allowed_prefault;
unsigned long page_offset;
page_offset = vmf->pgoff -
drm_vma_node_start(&bo->base.vma_node);
if (page_offset >= bo->num_pages ||
vmw_resources_clean(vbo, page_offset,
page_offset + PAGE_SIZE,
&allowed_prefault)) {
ret = VM_FAULT_SIGBUS;
goto out_unlock;
}
/*
* Write protect, so we get a new fault on write, and can
* split.
*/
prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
} else {
prot = vm_get_page_prot(vma->vm_flags);
}
ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
out_unlock:
dma_resv_unlock(bo->base.resv);
return ret;
}
#endif
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Huge page-table-entry support for IO memory.
*
* Copyright (C) 2007-2019 Vmware, Inc. All rights reservedd.
*/
#include "vmwgfx_drv.h"
#include <drm/ttm/ttm_module.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
/**
* struct vmw_thp_manager - Range manager implementing huge page alignment
*
* @mm: The underlying range manager. Protected by @lock.
* @lock: Manager lock.
*/
struct vmw_thp_manager {
struct drm_mm mm;
spinlock_t lock;
};
static int vmw_thp_insert_aligned(struct drm_mm *mm, struct drm_mm_node *node,
unsigned long align_pages,
const struct ttm_place *place,
struct ttm_mem_reg *mem,
unsigned long lpfn,
enum drm_mm_insert_mode mode)
{
if (align_pages >= mem->page_alignment &&
(!mem->page_alignment || align_pages % mem->page_alignment == 0)) {
return drm_mm_insert_node_in_range(mm, node,
mem->num_pages,
align_pages, 0,
place->fpfn, lpfn, mode);
}
return -ENOSPC;
}
static int vmw_thp_get_node(struct ttm_mem_type_manager *man,
struct ttm_buffer_object *bo,
const struct ttm_place *place,
struct ttm_mem_reg *mem)
{
struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv;
struct drm_mm *mm = &rman->mm;
struct drm_mm_node *node;
unsigned long align_pages;
unsigned long lpfn;
enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST;
int ret;
node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return -ENOMEM;
lpfn = place->lpfn;
if (!lpfn)
lpfn = man->size;
mode = DRM_MM_INSERT_BEST;
if (place->flags & TTM_PL_FLAG_TOPDOWN)
mode = DRM_MM_INSERT_HIGH;
spin_lock(&rman->lock);
if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) {
align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT);
if (mem->num_pages >= align_pages) {
ret = vmw_thp_insert_aligned(mm, node, align_pages,
place, mem, lpfn, mode);
if (!ret)
goto found_unlock;
}
}
align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT);
if (mem->num_pages >= align_pages) {
ret = vmw_thp_insert_aligned(mm, node, align_pages, place, mem,
lpfn, mode);
if (!ret)
goto found_unlock;
}
ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
mem->page_alignment, 0,
place->fpfn, lpfn, mode);
found_unlock:
spin_unlock(&rman->lock);
if (unlikely(ret)) {
kfree(node);
} else {
mem->mm_node = node;
mem->start = node->start;
}
return 0;
}
static void vmw_thp_put_node(struct ttm_mem_type_manager *man,
struct ttm_mem_reg *mem)
{
struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv;