diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 02551c781f0ab8688eafbf631174b422f6ac74db..f9386e793c8709f15d463c51ab156d87cac4ef3f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3887,6 +3887,8 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm); +void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv); + /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5504be753092ca077d68c337cff5794080700f61..67dc11effc8e55c31fcc80960582cf6da85aacca 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -666,17 +666,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } -static void -flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - - if (!(obj->base.write_domain & flush_domains)) - return; - - /* No actual flushing is required for the GTT write domain. Writes - * to it "immediately" go to main memory as far as we know, so there's - * no chipset flush. It also doesn't land in render cache. + /* + * No actual flushing is required for the GTT write domain for reads + * from the GTT domain. Writes to it "immediately" go to main memory + * as far as we know, so there's no chipset flush. It also doesn't + * land in the GPU render cache. * * However, we do have to enforce the order so that all writes through * the GTT land before any writes to the device, such as updates to @@ -687,22 +683,46 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) * timing. This issue has only been observed when switching quickly * between GTT writes and CPU reads from inside the kernel on recent hw, * and it appears to only affect discrete GTT blocks (i.e. on LLC - * system agents we cannot reproduce this behaviour). + * system agents we cannot reproduce this behaviour, until Cannonlake + * that was!). */ + wmb(); + intel_runtime_pm_get(dev_priv); + spin_lock_irq(&dev_priv->uncore.lock); + + POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); + + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); +} + +static void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_vma *vma; + + if (!(obj->base.write_domain & flush_domains)) + return; + switch (obj->base.write_domain) { case I915_GEM_DOMAIN_GTT: - if (!HAS_LLC(dev_priv)) { - intel_runtime_pm_get(dev_priv); - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); - } + i915_gem_flush_ggtt_writes(dev_priv); intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + break; + + if (vma->iomap) + continue; + + i915_vma_unset_ggtt_write(vma); + } break; case I915_GEM_DOMAIN_CPU: @@ -1965,6 +1985,8 @@ int i915_gem_fault(struct vm_fault *vmf) list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); GEM_BUG_ON(!obj->userfault_count); + i915_vma_set_ggtt_write(vma); + err_fence: i915_vma_unpin_fence(vma); err_unpin: diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 1013403fcfea692fe65daddf99a557cc7caca86d..0ebd75693505ea3b39ecef1e38d987f6e3c8dcc1 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -322,6 +322,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) if (err) goto err_unpin; + i915_vma_set_ggtt_write(vma); return ptr; err_unpin: @@ -330,12 +331,24 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) return IO_ERR_PTR(err); } +void i915_vma_flush_writes(struct i915_vma *vma) +{ + if (!i915_vma_has_ggtt_write(vma)) + return; + + i915_gem_flush_ggtt_writes(vma->vm->i915); + + i915_vma_unset_ggtt_write(vma); +} + void i915_vma_unpin_iomap(struct i915_vma *vma) { lockdep_assert_held(&vma->obj->base.dev->struct_mutex); GEM_BUG_ON(vma->iomap == NULL); + i915_vma_flush_writes(vma); + i915_vma_unpin_fence(vma); i915_vma_unpin(vma); } @@ -792,6 +805,15 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); if (i915_vma_is_map_and_fenceable(vma)) { + /* + * Check that we have flushed all writes through the GGTT + * before the unbind, other due to non-strict nature of those + * indirect writes they may end up referencing the GGTT PTE + * after the unbind. + */ + i915_vma_flush_writes(vma); + GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); + /* release the fence reg _after_ flushing */ ret = i915_vma_put_fence(vma); if (ret) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 1e2bc9b3c3ac19a4790222eb151765050e264d49..f636243eb8f72773a3551bf989ce0f90a44489a4 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -90,6 +90,7 @@ struct i915_vma { #define I915_VMA_CLOSED BIT(10) #define I915_VMA_USERFAULT_BIT 11 #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) +#define I915_VMA_GGTT_WRITE BIT(12) unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; @@ -138,6 +139,24 @@ static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) return vma->flags & I915_VMA_GGTT; } +static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_GGTT_WRITE; +} + +static inline void i915_vma_set_ggtt_write(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + vma->flags |= I915_VMA_GGTT_WRITE; +} + +static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma) +{ + vma->flags &= ~I915_VMA_GGTT_WRITE; +} + +void i915_vma_flush_writes(struct i915_vma *vma); + static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) { return vma->flags & I915_VMA_CAN_FENCE;