Commit d5bb349d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'drm-vmwgfx-coherent-2019-11-29' of git://anongit.freedesktop.org/drm/drm

Pull drm coherent memory support for vmwgfx from Dave Airlie:
 "This is a separate pull for the mm pagewalking + drm/vmwgfx work
  Thomas did and you were involved in, I've left it separate in case you
  don't feel as comfortable with it as the other stuff.

  It has mm acks/r-b in the right places from what I can see"

* tag 'drm-vmwgfx-coherent-2019-11-29' of git://anongit.freedesktop.org/drm/drm:
  drm/vmwgfx: Add surface dirty-tracking callbacks
  drm/vmwgfx: Implement an infrastructure for read-coherent resources
  drm/vmwgfx: Use an RBtree instead of linked list for MOB resources
  drm/vmwgfx: Implement an infrastructure for write-coherent resources
  mm: Add write-protect and clean utilities for address space ranges
  mm: Add a walk_page_mapping() function to the pagewalk code
  mm: pagewalk: Take the pagetable lock in walk_pte_range()
  mm: Remove BUG_ON mmap_sem not held from xxx_trans_huge_lock()
  drm/ttm: Convert vm callbacks to helpers
  drm/ttm: Remove explicit typecasts of vm_private_data
parents 81b6b964 0a6cad5d
......@@ -42,8 +42,6 @@
#include <linux/uaccess.h>
#include <linux/mem_encrypt.h>
#define TTM_BO_VM_NUM_PREFAULT 16
static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
{
......@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
+ page_offset;
}
static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
/**
* ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
* @bo: The buffer object
* @vmf: The fault structure handed to the callback
*
* vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
* during long waits, and after the wait the callback will be restarted. This
* is to allow other threads using the same virtual memory space concurrent
* access to map(), unmap() completely unrelated buffer objects. TTM buffer
* object reservations sometimes wait for GPU and should therefore be
* considered long waits. This function reserves the buffer object interruptibly
* taking this into account. Starvation is avoided by the vm system not
* allowing too many repeated restarts.
* This function is intended to be used in customized fault() and _mkwrite()
* handlers.
*
* Return:
* 0 on success and the bo was reserved.
* VM_FAULT_RETRY if blocking wait.
* VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
*/
vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
vma->vm_private_data;
struct ttm_bo_device *bdev = bo->bdev;
unsigned long page_offset;
unsigned long page_last;
unsigned long pfn;
struct ttm_tt *ttm = NULL;
struct page *page;
int err;
int i;
vm_fault_t ret = VM_FAULT_NOPAGE;
unsigned long address = vmf->address;
struct ttm_mem_type_manager *man =
&bdev->man[bo->mem.mem_type];
struct vm_area_struct cvma;
/*
* Work around locking order reversal in fault / nopfn
* between mmap_sem and bo_reserve: Perform a trylock operation
......@@ -151,14 +154,54 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
return 0;
}
EXPORT_SYMBOL(ttm_bo_vm_reserve);
/**
* ttm_bo_vm_fault_reserved - TTM fault helper
* @vmf: The struct vm_fault given as argument to the fault callback
* @prot: The page protection to be used for this memory area.
* @num_prefault: Maximum number of prefault pages. The caller may want to
* specify this based on madvice settings and the size of the GPU object
* backed by the memory.
*
* This function inserts one or more page table entries pointing to the
* memory backing the buffer object, and then returns a return code
* instructing the caller to retry the page access.
*
* Return:
* VM_FAULT_NOPAGE on success or pending signal
* VM_FAULT_SIGBUS on unspecified error
* VM_FAULT_OOM on out-of-memory
* VM_FAULT_RETRY if retryable wait
*/
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
pgprot_t prot,
pgoff_t num_prefault)
{
struct vm_area_struct *vma = vmf->vma;
struct vm_area_struct cvma = *vma;
struct ttm_buffer_object *bo = vma->vm_private_data;
struct ttm_bo_device *bdev = bo->bdev;
unsigned long page_offset;
unsigned long page_last;
unsigned long pfn;
struct ttm_tt *ttm = NULL;
struct page *page;
int err;
pgoff_t i;
vm_fault_t ret = VM_FAULT_NOPAGE;
unsigned long address = vmf->address;
struct ttm_mem_type_manager *man =
&bdev->man[bo->mem.mem_type];
/*
* Refuse to fault imported pages. This should be handled
* (if at all) by redirecting mmap to the exporter.
*/
if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
ret = VM_FAULT_SIGBUS;
goto out_unlock;
}
if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
return VM_FAULT_SIGBUS;
if (bdev->driver->fault_reserve_notify) {
struct dma_fence *moving = dma_fence_get(bo->moving);
......@@ -169,11 +212,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
break;
case -EBUSY:
case -ERESTARTSYS:
ret = VM_FAULT_NOPAGE;
goto out_unlock;
return VM_FAULT_NOPAGE;
default:
ret = VM_FAULT_SIGBUS;
goto out_unlock;
return VM_FAULT_SIGBUS;
}
if (bo->moving != moving) {
......@@ -189,21 +230,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
* move.
*/
ret = ttm_bo_vm_fault_idle(bo, vmf);
if (unlikely(ret != 0)) {
if (ret == VM_FAULT_RETRY &&
!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
/* The BO has already been unreserved. */
return ret;
}
goto out_unlock;
}
if (unlikely(ret != 0))
return ret;
err = ttm_mem_io_lock(man, true);
if (unlikely(err != 0)) {
ret = VM_FAULT_NOPAGE;
goto out_unlock;
}
if (unlikely(err != 0))
return VM_FAULT_NOPAGE;
err = ttm_mem_io_reserve_vm(bo);
if (unlikely(err != 0)) {
ret = VM_FAULT_SIGBUS;
......@@ -220,18 +252,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
goto out_io_unlock;
}
/*
* Make a local vma copy to modify the page_prot member
* and vm_flags if necessary. The vma parameter is protected
* by mmap_sem in write mode.
*/
cvma = *vma;
cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
if (bo->mem.bus.is_iomem) {
cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
cvma.vm_page_prot);
} else {
cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
if (!bo->mem.bus.is_iomem) {
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false,
......@@ -240,24 +262,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
};
ttm = bo->ttm;
cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
cvma.vm_page_prot);
/* Allocate all page at once, most common usage */
if (ttm_tt_populate(ttm, &ctx)) {
if (ttm_tt_populate(bo->ttm, &ctx)) {
ret = VM_FAULT_OOM;
goto out_io_unlock;
}
} else {
/* Iomem should not be marked encrypted */
cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
}
/*
* Speculatively prefault a number of pages. Only error on
* first page.
*/
for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
for (i = 0; i < num_prefault; ++i) {
if (bo->mem.bus.is_iomem) {
/* Iomem should not be marked encrypted */
cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
pfn = ttm_bo_io_mem_pfn(bo, page_offset);
} else {
page = ttm->pages[page_offset];
......@@ -293,28 +312,49 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
out_io_unlock:
ttm_mem_io_unlock(man);
out_unlock:
return ret;
}
EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
pgprot_t prot;
struct ttm_buffer_object *bo = vma->vm_private_data;
vm_fault_t ret;
ret = ttm_bo_vm_reserve(bo, vmf);
if (ret)
return ret;
prot = vm_get_page_prot(vma->vm_flags);
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
return ret;
dma_resv_unlock(bo->base.resv);
return ret;
}
static void ttm_bo_vm_open(struct vm_area_struct *vma)
void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo =
(struct ttm_buffer_object *)vma->vm_private_data;
struct ttm_buffer_object *bo = vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
ttm_bo_get(bo);
}
EXPORT_SYMBOL(ttm_bo_vm_open);
static void ttm_bo_vm_close(struct vm_area_struct *vma)
void ttm_bo_vm_close(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)vma->vm_private_data;
struct ttm_buffer_object *bo = vma->vm_private_data;
ttm_bo_put(bo);
vma->vm_private_data = NULL;
}
EXPORT_SYMBOL(ttm_bo_vm_close);
static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
......
......@@ -8,6 +8,7 @@ config DRM_VMWGFX
select FB_CFB_IMAGEBLIT
select DRM_TTM
select FB
select MAPPING_DIRTY_HELPERS
# Only needed for the transitional use of drm_crtc_init - can be removed
# again once vmwgfx sets up the primary plane itself.
select DRM_KMS_HELPER
......
......@@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \
vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
vmwgfx_validation.o \
vmwgfx_validation.o vmwgfx_page_dirty.o \
ttm_object.o ttm_lock.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
......@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
}
static inline u32
svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
surf_size_struct baseLevelSize,
......@@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
}
/**
* struct svga3dsurface_mip - Mimpmap level information
* @bytes: Bytes required in the backing store of this mipmap level.
* @img_stride: Byte stride per image.
* @row_stride: Byte stride per block row.
* @size: The size of the mipmap.
*/
struct svga3dsurface_mip {
size_t bytes;
size_t img_stride;
size_t row_stride;
struct drm_vmw_size size;
};
/**
* struct svga3dsurface_cache - Cached surface information
* @desc: Pointer to the surface descriptor
* @mip: Array of mipmap level information. Valid size is @num_mip_levels.
* @mip_chain_bytes: Bytes required in the backing store for the whole chain
* of mip levels.
* @sheet_bytes: Bytes required in the backing store for a sheet
* representing a single sample.
* @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
* a chain.
* @num_layers: Number of slices in an array texture or number of faces in
* a cubemap texture.
*/
struct svga3dsurface_cache {
const struct svga3d_surface_desc *desc;
struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
size_t mip_chain_bytes;
size_t sheet_bytes;
u32 num_mip_levels;
u32 num_layers;
};
/**
* struct svga3dsurface_loc - Surface location
* @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
* mip_level.
* @x: X coordinate.
* @y: Y coordinate.
* @z: Z coordinate.
*/
struct svga3dsurface_loc {
u32 sub_resource;
u32 x, y, z;
};
/**
* svga3dsurface_subres - Compute the subresource from layer and mipmap.
* @cache: Surface layout data.
* @mip_level: The mipmap level.
* @layer: The surface layer (face or array slice).
*
* Return: The subresource.
*/
static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
u32 mip_level, u32 layer)
{
return cache->num_mip_levels * layer + mip_level;
}
/**
* svga3dsurface_setup_cache - Build a surface cache entry
* @size: The surface base level dimensions.
* @format: The surface format.
* @num_mip_levels: Number of mipmap levels.
* @num_layers: Number of layers.
* @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
*
* Return: Zero on success, -EINVAL on invalid surface layout.
*/
static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
SVGA3dSurfaceFormat format,
u32 num_mip_levels,
u32 num_layers,
u32 num_samples,
struct svga3dsurface_cache *cache)
{
const struct svga3d_surface_desc *desc;
u32 i;
memset(cache, 0, sizeof(*cache));
cache->desc = desc = svga3dsurface_get_desc(format);
cache->num_mip_levels = num_mip_levels;
cache->num_layers = num_layers;
for (i = 0; i < cache->num_mip_levels; i++) {
struct svga3dsurface_mip *mip = &cache->mip[i];
mip->size = svga3dsurface_get_mip_size(*size, i);
mip->bytes = svga3dsurface_get_image_buffer_size
(desc, &mip->size, 0);
mip->row_stride =
__KERNEL_DIV_ROUND_UP(mip->size.width,
desc->block_size.width) *
desc->bytes_per_block * num_samples;
if (!mip->row_stride)
goto invalid_dim;
mip->img_stride =
__KERNEL_DIV_ROUND_UP(mip->size.height,
desc->block_size.height) *
mip->row_stride;
if (!mip->img_stride)
goto invalid_dim;
cache->mip_chain_bytes += mip->bytes;
}
cache->sheet_bytes = cache->mip_chain_bytes * num_layers;
if (!cache->sheet_bytes)
goto invalid_dim;
return 0;
invalid_dim:
VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n");
return -EINVAL;
}
/**
* svga3dsurface_get_loc - Get a surface location from an offset into the
* backing store
* @cache: Surface layout data.
* @loc: Pointer to a struct svga3dsurface_loc to be filled in.
* @offset: Offset into the surface backing store.
*/
static inline void
svga3dsurface_get_loc(const struct svga3dsurface_cache *cache,
struct svga3dsurface_loc *loc,
size_t offset)
{
const struct svga3dsurface_mip *mip = &cache->mip[0];
const struct svga3d_surface_desc *desc = cache->desc;
u32 layer;
int i;
if (offset >= cache->sheet_bytes)
offset %= cache->sheet_bytes;
layer = offset / cache->mip_chain_bytes;
offset -= layer * cache->mip_chain_bytes;
for (i = 0; i < cache->num_mip_levels; ++i, ++mip) {
if (mip->bytes > offset)
break;
offset -= mip->bytes;
}
loc->sub_resource = svga3dsurface_subres(cache, i, layer);
loc->z = offset / mip->img_stride;
offset -= loc->z * mip->img_stride;
loc->z *= desc->block_size.depth;
loc->y = offset / mip->row_stride;
offset -= loc->y * mip->row_stride;
loc->y *= desc->block_size.height;
loc->x = offset / desc->bytes_per_block;
loc->x *= desc->block_size.width;
}
/**
* svga3dsurface_inc_loc - Clamp increment a surface location with one block
* size
* in each dimension.
* @loc: Pointer to a struct svga3dsurface_loc to be incremented.
*
* When computing the size of a range as size = end - start, the range does not
* include the end element. However a location representing the last byte
* of a touched region in the backing store *is* included in the range.
* This function modifies such a location to match the end definition
* given as start + size which is the one used in a SVGA3dBox.
*/
static inline void
svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache,
struct svga3dsurface_loc *loc)
{
const struct svga3d_surface_desc *desc = cache->desc;
u32 mip = loc->sub_resource % cache->num_mip_levels;
const struct drm_vmw_size *size = &cache->mip[mip].size;
loc->sub_resource++;
loc->x += desc->block_size.width;
if (loc->x > size->width)
loc->x = size->width;
loc->y += desc->block_size.height;
if (loc->y > size->height)
loc->y = size->height;
loc->z += desc->block_size.depth;
if (loc->z > size->depth)
loc->z = size->depth;
}
/**
* svga3dsurface_min_loc - The start location in a subresource
* @cache: Surface layout data.
* @sub_resource: The subresource.
* @loc: Pointer to a struct svga3dsurface_loc to be filled in.
*/
static inline void
svga3dsurface_min_loc(const struct svga3dsurface_cache *cache,
u32 sub_resource,
struct svga3dsurface_loc *loc)
{
loc->sub_resource = sub_resource;
loc->x = loc->y = loc->z = 0;
}
/**
* svga3dsurface_min_loc - The end location in a subresource
* @cache: Surface layout data.
* @sub_resource: The subresource.
* @loc: Pointer to a struct svga3dsurface_loc to be filled in.
*
* Following the end definition given in svga3dsurface_inc_loc(),
* Compute the end location of a surface subresource.
*/
static inline void
svga3dsurface_max_loc(const struct svga3dsurface_cache *cache,
u32 sub_resource,
struct svga3dsurface_loc *loc)
{
const struct drm_vmw_size *size;
u32 mip;
loc->sub_resource = sub_resource + 1;
mip = sub_resource % cache->num_mip_levels;
size = &cache->mip[mip].size;
loc->x = size->width;
loc->y = size->height;
loc->z = size->depth;
}
#endif /* _SVGA3D_SURFACEDEFS_H_ */
......@@ -462,6 +462,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
{
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
WARN_ON(vmw_bo->dirty);
WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
}
......@@ -475,8 +477,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
static void vmw_user_bo_destroy(struct ttm_buffer_object *bo)
{
struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo);
struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
vmw_bo_unmap(&vmw_user_bo->vbo);
WARN_ON(vbo->dirty);
WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
}
......@@ -511,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
INIT_LIST_HEAD(&vmw_bo->res_list);
vmw_bo->res_tree = RB_ROOT;
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
ttm_bo_type_device, placement,
......
......@@ -56,9 +56,9 @@
#define VMWGFX_DRIVER_NAME "vmwgfx"
#define VMWGFX_DRIVER_DATE "20180704"
#define VMWGFX_DRIVER_DATE "20190328"
#define VMWGFX_DRIVER_MAJOR 2
#define VMWGFX_DRIVER_MINOR 15
#define VMWGFX_DRIVER_MINOR 16
#define VMWGFX_DRIVER_PATCHLEVEL 0
#define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
#define VMWGFX_MAX_RELOCATIONS 2048
......@@ -100,17 +100,18 @@ struct vmw_fpriv {
/**
* struct vmw_buffer_object - TTM buffer object with vmwgfx additions
* @base: The TTM buffer object
* @res_list: List of resources using this buffer object as a backing MOB
* @res_tree: RB tree of resources using this buffer object as a backing MOB
* @pin_count: pin depth
* @cpu_writers: Number of synccpu write grabs. Protected by reservation when
* increased. May be decreased without reservation.
* @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
* @map: Kmap object for semi-persistent mappings
* @res_prios: Eviction priority counts for attached resources
* @dirty: structure for user-space dirty-tracking
*/
struct vmw_buffer_object {
struct ttm_buffer_object base;
struct list_head res_list;
struct rb_root res_tree;
s32 pin_count;
atomic_t cpu_writers;
/* Not ref-counted. Protected by binding_mutex */
......@@ -118,6 +119,7 @@ struct vmw_buffer_object {
/* Protected by reservation */
struct ttm_bo_kmap_obj map;
u32 res_prios[TTM_MAX_BO_PRIORITY];
struct vmw_bo_dirty *dirty;
};
/**
......@@ -148,7 +150,8 @@ struct vmw_res_func;
* @res_dirty: Resource contains data not yet in the backup buffer. Protected
* by resource reserved.
* @backup_dirty: Backup buffer contains data not yet in the HW resource.
* Protecte by resource reserved.
* Protected by resource reserved.
* @coherent: Emulate coherency by tracking vm accesses.
* @backup: The backup buffer if any. Protected by resource reserved.
* @backup_offset: Offset into the backup buffer if any. Protected by resource
* reserved. Note that only a few resource types can have a @backup_offset
......@@ -157,29 +160,32 @@ struct vmw_res_func;