Commit 0a6cad5d authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge branch 'vmwgfx-coherent' of git://people.freedesktop.org/~thomash/linux into drm-next



Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver
to provide coherent graphics memory, meaning that the GPU sees any
content written to the coherent memory on the next GPU operation that
touches that memory, and the CPU sees any content written by the GPU
to that memory immediately after any fence object trailing the GPU
operation is signaled.

Paravirtual drivers that otherwise require explicit synchronization
needs to do this by hooking up dirty tracking to pagefault handlers
and buffer object validation.

Provide mm helpers needed for this and that also allow for huge pmd-
and pud entries (patch 1-3), and the associated vmwgfx code (patch 4-7).

The code has been tested and exercised by a tailored version of mesa
where we disable all explicit synchronization and assume graphics memory
is coherent. The performance loss varies of course; a typical number is
around 5%.
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Thomas Hellstrom <thomas_os@shipmail.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20191113131639.4653-1-thomas_os@shipmail.org
parents acc61b89 9ca7d19f
......@@ -42,8 +42,6 @@
#include <linux/uaccess.h>
#include <linux/mem_encrypt.h>
#define TTM_BO_VM_NUM_PREFAULT 16
static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
{
......@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
+ page_offset;
}
static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
/**
* ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
* @bo: The buffer object
* @vmf: The fault structure handed to the callback
*
* vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
* during long waits, and after the wait the callback will be restarted. This
* is to allow other threads using the same virtual memory space concurrent
* access to map(), unmap() completely unrelated buffer objects. TTM buffer
* object reservations sometimes wait for GPU and should therefore be
* considered long waits. This function reserves the buffer object interruptibly
* taking this into account. Starvation is avoided by the vm system not
* allowing too many repeated restarts.
* This function is intended to be used in customized fault() and _mkwrite()
* handlers.
*
* Return:
* 0 on success and the bo was reserved.
* VM_FAULT_RETRY if blocking wait.
* VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
*/
vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
vma->vm_private_data;
struct ttm_bo_device *bdev = bo->bdev;
unsigned long page_offset;
unsigned long page_last;
unsigned long pfn;
struct ttm_tt *ttm = NULL;
struct page *page;
int err;
int i;
vm_fault_t ret = VM_FAULT_NOPAGE;
unsigned long address = vmf->address;
struct ttm_mem_type_manager *man =
&bdev->man[bo->mem.mem_type];
struct vm_area_struct cvma;
/*
* Work around locking order reversal in fault / nopfn
* between mmap_sem and bo_reserve: Perform a trylock operation
......@@ -151,14 +154,54 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
return 0;
}
EXPORT_SYMBOL(ttm_bo_vm_reserve);
/**
* ttm_bo_vm_fault_reserved - TTM fault helper
* @vmf: The struct vm_fault given as argument to the fault callback
* @prot: The page protection to be used for this memory area.
* @num_prefault: Maximum number of prefault pages. The caller may want to
* specify this based on madvice settings and the size of the GPU object
* backed by the memory.
*
* This function inserts one or more page table entries pointing to the
* memory backing the buffer object, and then returns a return code
* instructing the caller to retry the page access.
*
* Return:
* VM_FAULT_NOPAGE on success or pending signal
* VM_FAULT_SIGBUS on unspecified error
* VM_FAULT_OOM on out-of-memory
* VM_FAULT_RETRY if retryable wait
*/
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pgprot_t prot,
pgoff_t num_prefault)
{
struct vm_area_struct *vma = vmf->vma;
struct vm_area_struct cvma = *vma;
struct ttm_buffer_object *bo = vma->vm_private_data;
struct ttm_bo_device *bdev = bo->bdev;
unsigned long page_offset;
unsigned long page_last;
unsigned long pfn;
struct ttm_tt *ttm = NULL;
struct page *page;
int err;
pgoff_t i;
vm_fault_t ret = VM_FAULT_NOPAGE;
unsigned long address = vmf->address;
struct ttm_mem_type_manager *man =
&bdev->man[bo->mem.mem_type];
/*
* Refuse to fault imported pages. This should be handled
* (if at all) by redirecting mmap to the exporter.
*/
if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
ret = VM_FAULT_SIGBUS;
goto out_unlock;
}
if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
return VM_FAULT_SIGBUS;
if (bdev->driver->fault_reserve_notify) {
struct dma_fence *moving = dma_fence_get(bo->moving);
......@@ -169,11 +212,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
break;
case -EBUSY:
case -ERESTARTSYS:
ret = VM_FAULT_NOPAGE;
goto out_unlock;
return VM_FAULT_NOPAGE;
default:
ret = VM_FAULT_SIGBUS;
goto out_unlock;
return VM_FAULT_SIGBUS;
}
if (bo->moving != moving) {
......@@ -189,21 +230,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
* move.
*/
ret = ttm_bo_vm_fault_idle(bo, vmf);
if (unlikely(ret != 0)) {
if (ret == VM_FAULT_RETRY &&
!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
/* The BO has already been unreserved. */
return ret;
}
goto out_unlock;
}
if (unlikely(ret != 0))
return ret;
err = ttm_mem_io_lock(man, true);
if (unlikely(err != 0)) {
ret = VM_FAULT_NOPAGE;
goto out_unlock;
}
if (unlikely(err != 0))
return VM_FAULT_NOPAGE;
err = ttm_mem_io_reserve_vm(bo);
if (unlikely(err != 0)) {
ret = VM_FAULT_SIGBUS;
......@@ -220,18 +252,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
goto out_io_unlock;
}
/*
* Make a local vma copy to modify the page_prot member
* and vm_flags if necessary. The vma parameter is protected
* by mmap_sem in write mode.
*/
cvma = *vma;
cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
if (bo->mem.bus.is_iomem) {
cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
cvma.vm_page_prot);
} else {
cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
if (!bo->mem.bus.is_iomem) {
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false,
......@@ -240,24 +262,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
};
ttm = bo->ttm;
cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
cvma.vm_page_prot);
/* Allocate all page at once, most common usage */
if (ttm_tt_populate(ttm, &ctx)) {
if (ttm_tt_populate(bo->ttm, &ctx)) {
ret = VM_FAULT_OOM;
goto out_io_unlock;
}
} else {
/* Iomem should not be marked encrypted */
cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
}
/*
* Speculatively prefault a number of pages. Only error on
* first page.
*/
for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
for (i = 0; i < num_prefault; ++i) {
if (bo->mem.bus.is_iomem) {
/* Iomem should not be marked encrypted */
cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
pfn = ttm_bo_io_mem_pfn(bo, page_offset);
} else {
page = ttm->pages[page_offset];
......@@ -293,28 +312,49 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
out_io_unlock:
ttm_mem_io_unlock(man);
out_unlock:
return ret;
}
EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
pgprot_t prot;
struct ttm_buffer_object *bo = vma->vm_private_data;
vm_fault_t ret;
ret = ttm_bo_vm_reserve(bo, vmf);
if (ret)
return ret;
prot = vm_get_page_prot(vma->vm_flags);
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
dma_resv_unlock(bo->base.resv);
return ret;
}
static void ttm_bo_vm_open(struct vm_area_struct *vma)
void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo =
(struct ttm_buffer_object *)vma->vm_private_data;
struct ttm_buffer_object *bo = vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
ttm_bo_get(bo);
}
EXPORT_SYMBOL(ttm_bo_vm_open);
static void ttm_bo_vm_close(struct vm_area_struct *vma)
void ttm_bo_vm_close(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)vma->vm_private_data;
struct ttm_buffer_object *bo = vma->vm_private_data;
ttm_bo_put(bo);
vma->vm_private_data = NULL;
}
EXPORT_SYMBOL(ttm_bo_vm_close);
static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
......
......@@ -8,6 +8,7 @@ config DRM_VMWGFX
select FB_CFB_IMAGEBLIT
select DRM_TTM
select FB
select MAPPING_DIRTY_HELPERS
# Only needed for the transitional use of drm_crtc_init - can be removed
# again once vmwgfx sets up the primary plane itself.
select DRM_KMS_HELPER
......
......@@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \
vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
vmwgfx_validation.o \
vmwgfx_validation.o vmwgfx_page_dirty.o \
ttm_object.o ttm_lock.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
......@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
}
static inline u32
svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
surf_size_struct baseLevelSize,
......@@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
}
/**
* struct svga3dsurface_mip - Mimpmap level information
* @bytes: Bytes required in the backing store of this mipmap level.
* @img_stride: Byte stride per image.
* @row_stride: Byte stride per block row.
* @size: The size of the mipmap.
*/
struct svga3dsurface_mip {
size_t bytes;
size_t img_stride;
size_t row_stride;
struct drm_vmw_size size;
};
/**
* struct svga3dsurface_cache - Cached surface information
* @desc: Pointer to the surface descriptor
* @mip: Array of mipmap level information. Valid size is @num_mip_levels.
* @mip_chain_bytes: Bytes required in the backing store for the whole chain
* of mip levels.
* @sheet_bytes: Bytes required in the backing store for a sheet
* representing a single sample.
* @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
* a chain.
* @num_layers: Number of slices in an array texture or number of faces in
* a cubemap texture.
*/
struct svga3dsurface_cache {
const struct svga3d_surface_desc *desc;
struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
size_t mip_chain_bytes;
size_t sheet_bytes;
u32 num_mip_levels;
u32 num_layers;
};
/**
* struct svga3dsurface_loc - Surface location
* @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
* mip_level.
* @x: X coordinate.
* @y: Y coordinate.
* @z: Z coordinate.
*/
struct svga3dsurface_loc {
u32 sub_resource;
u32 x, y, z;
};
/**
* svga3dsurface_subres - Compute the subresource from layer and mipmap.
* @cache: Surface layout data.
* @mip_level: The mipmap level.
* @layer: The surface layer (face or array slice).
*
* Return: The subresource.
*/
static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
u32 mip_level, u32 layer)
{
return cache->num_mip_levels * layer + mip_level;
}
/**
* svga3dsurface_setup_cache - Build a surface cache entry
* @size: The surface base level dimensions.
* @format: The surface format.
* @num_mip_levels: Number of mipmap levels.
* @num_layers: Number of layers.
* @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
*
* Return: Zero on success, -EINVAL on invalid surface layout.
*/
static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
SVGA3dSurfaceFormat format,
u32 num_mip_levels,
u32 num_layers,
u32 num_samples,
struct svga3dsurface_cache *cache)
{
const struct svga3d_surface_desc *desc;
u32 i;
memset(cache, 0, sizeof(*cache));
cache->desc = desc = svga3dsurface_get_desc(format);
cache->num_mip_levels = num_mip_levels;
cache->num_layers = num_layers;
for (i = 0; i < cache->num_mip_levels; i++) {
struct svga3dsurface_mip *mip = &cache->mip[i];
mip->size = svga3dsurface_get_mip_size(*size, i);
mip->bytes = svga3dsurface_get_image_buffer_size
(desc, &mip->size, 0);
mip->row_stride =
__KERNEL_DIV_ROUND_UP(mip->size.width,
desc->block_size.width) *
desc->bytes_per_block * num_samples;
if (!mip->row_stride)
goto invalid_dim;
mip->img_stride =
__KERNEL_DIV_ROUND_UP(mip->size.height,
desc->block_size.height) *
mip->row_stride;
if (!mip->img_stride)
goto invalid_dim;
cache->mip_chain_bytes += mip->bytes;
}
cache->sheet_bytes = cache->mip_chain_bytes * num_layers;
if (!cache->sheet_bytes)
goto invalid_dim;
return 0;
invalid_dim:
VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n");
return -EINVAL;
}
/**
* svga3dsurface_get_loc - Get a surface location from an offset into the
* backing store
* @cache: Surface layout data.
* @loc: Pointer to a struct svga3dsurface_loc to be filled in.
* @offset: Offset into the surface backing store.
*/
static inline void
svga3dsurface_get_loc(const struct svga3dsurface_cache *cache,
struct svga3dsurface_loc *loc,
size_t offset)
{
const struct svga3dsurface_mip *mip = &cache->mip[0];
const struct svga3d_surface_desc *desc = cache->desc;
u32 layer;
int i;
if (offset >= cache->sheet_bytes)
offset %= cache->sheet_bytes;
layer = offset / cache->mip_chain_bytes;
offset -= layer * cache->mip_chain_bytes;
for (i = 0; i < cache->num_mip_levels; ++i, ++mip) {
if (mip->bytes > offset)
break;
offset -= mip->bytes;
}
loc->sub_resource = svga3dsurface_subres(cache, i, layer);
loc->z = offset / mip->img_stride;
offset -= loc->z * mip->img_stride;
loc->z *= desc->block_size.depth;
loc->y = offset / mip->row_stride;
offset -= loc->y * mip->row_stride;
loc->y *= desc->block_size.height;
loc->x = offset / desc->bytes_per_block;
loc->x *= desc->block_size.width;
}
/**
* svga3dsurface_inc_loc - Clamp increment a surface location with one block
* size
* in each dimension.
* @loc: Pointer to a struct svga3dsurface_loc to be incremented.
*
* When computing the size of a range as size = end - start, the range does not
* include the end element. However a location representing the last byte
* of a touched region in the backing store *is* included in the range.
* This function modifies such a location to match the end definition
* given as start + size which is the one used in a SVGA3dBox.
*/
static inline void
svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache,
struct svga3dsurface_loc *loc)
{
const struct svga3d_surface_desc *desc = cache->desc;
u32 mip = loc->sub_resource % cache->num_mip_levels;
const struct drm_vmw_size *size = &cache->mip[mip].size;
loc->sub_resource++;
loc->x += desc->block_size.width;
if (loc->x > size->width)
loc->x = size->width;
loc->y += desc->block_size.height;
if (loc->y > size->height)
loc->y = size->height;
loc->z += desc->block_size.depth;
if (loc->z > size->depth)
loc->z = size->depth;
}
/**
* svga3dsurface_min_loc - The start location in a subresource
* @cache: Surface layout data.
* @sub_resource: The subresource.
* @loc: Pointer to a struct svga3dsurface_loc to be filled in.
*/
static inline void
svga3dsurface_min_loc(const struct svga3dsurface_cache *cache,
u32 sub_resource,
struct svga3dsurface_loc *loc)
{
loc->sub_resource = sub_resource;
loc->x = loc->y = loc->z = 0;
}
/**
* svga3dsurface_min_loc - The end location in a subresource
* @cache: Surface layout data.
* @sub_resource: The subresource.
* @loc: Pointer to a struct svga3dsurface_loc to be filled in.
*
* Following the end definition given in svga3dsurface_inc_loc(),
* Compute the end location of a surface subresource.
*/
static inline void
svga3dsurface_max_loc(const struct svga3dsurface_cache *cache,
u32 sub_resource,
struct svga3dsurface_loc *loc)
{
const struct drm_vmw_size *size;
u32 mip;
loc->sub_resource = sub_resource + 1;
mip = sub_resource % cache->num_mip_levels;
size = &cache->mip[mip].size;
loc->x = size->width;
loc->y = size->height;
loc->z = size->depth;
}
#endif /* _SVGA3D_SURFACEDEFS_H_ */
......@@ -462,6 +462,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
{
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
WARN_ON(vmw_bo->dirty);
WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
}
......@@ -475,8 +477,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
static void vmw_user_bo_destroy(struct ttm_buffer_object *bo)
{
struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo);
struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
vmw_bo_unmap(&vmw_user_bo->vbo);
WARN_ON(vbo->dirty);
WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
}
......@@ -511,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
INIT_LIST_HEAD(&vmw_bo->res_list);
vmw_bo->res_tree = RB_ROOT;
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
ttm_bo_type_device, placement,
......
......@@ -56,9 +56,9 @@
#define VMWGFX_DRIVER_NAME "vmwgfx"
#define VMWGFX_DRIVER_DATE "20180704"
#define VMWGFX_DRIVER_DATE "20190328"
#define VMWGFX_DRIVER_MAJOR 2
#define VMWGFX_DRIVER_MINOR 15
#define VMWGFX_DRIVER_MINOR 16
#define VMWGFX_DRIVER_PATCHLEVEL 0
#define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
#define VMWGFX_MAX_RELOCATIONS 2048
......@@ -100,17 +100,18 @@ struct vmw_fpriv {
/**
* struct vmw_buffer_object - TTM buffer object with vmwgfx additions
* @base: The TTM buffer object
* @res_list: List of resources using this buffer object as a backing MOB
* @res_tree: RB tree of resources using this buffer object as a backing MOB
* @pin_count: pin depth
* @cpu_writers: Number of synccpu write grabs. Protected by reservation when
* increased. May be decreased without reservation.
* @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
* @map: Kmap object for semi-persistent mappings
* @res_prios: Eviction priority counts for attached resources
* @dirty: structure for user-space dirty-tracking
*/
struct vmw_buffer_object {
struct ttm_buffer_object base;
struct list_head res_list;
struct rb_root res_tree;
s32 pin_count;
atomic_t cpu_writers;
/* Not ref-counted. Protected by binding_mutex */
......@@ -118,6 +119,7 @@ struct vmw_buffer_object {
/* Protected by reservation */
struct ttm_bo_kmap_obj map;
u32 res_prios[TTM_MAX_BO_PRIORITY];
struct vmw_bo_dirty *dirty;
};
/**
......@@ -148,7 +150,8 @@ struct vmw_res_func;
* @res_dirty: Resource contains data not yet in the backup buffer. Protected
* by resource reserved.
* @backup_dirty: Backup buffer contains data not yet in the HW resource.
* Protecte by resource reserved.
* Protected by resource reserved.
* @coherent: Emulate coherency by tracking vm accesses.
* @backup: The backup buffer if any. Protected by resource reserved.
* @backup_offset: Offset into the backup buffer if any. Protected by resource