diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 02551c781f0ab8688eafbf631174b422f6ac74db..f9386e793c8709f15d463c51ab156d87cac4ef3f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3887,6 +3887,8 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
 					 unsigned int flags);
 int i915_gem_evict_vm(struct i915_address_space *vm);
 
+void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv);
+
 /* belongs in i915_gem_gtt.h */
 static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5504be753092ca077d68c337cff5794080700f61..67dc11effc8e55c31fcc80960582cf6da85aacca 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -666,17 +666,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 }
 
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-	if (!(obj->base.write_domain & flush_domains))
-		return;
-
-	/* No actual flushing is required for the GTT write domain.  Writes
-	 * to it "immediately" go to main memory as far as we know, so there's
-	 * no chipset flush.  It also doesn't land in render cache.
+	/*
+	 * No actual flushing is required for the GTT write domain for reads
+	 * from the GTT domain. Writes to it "immediately" go to main memory
+	 * as far as we know, so there's no chipset flush. It also doesn't
+	 * land in the GPU render cache.
 	 *
 	 * However, we do have to enforce the order so that all writes through
 	 * the GTT land before any writes to the device, such as updates to
@@ -687,22 +683,46 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 	 * timing. This issue has only been observed when switching quickly
 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
-	 * system agents we cannot reproduce this behaviour).
+	 * system agents we cannot reproduce this behaviour, until Cannonlake
+	 * that was!).
 	 */
+
 	wmb();
 
+	intel_runtime_pm_get(dev_priv);
+	spin_lock_irq(&dev_priv->uncore.lock);
+
+	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
+
+	spin_unlock_irq(&dev_priv->uncore.lock);
+	intel_runtime_pm_put(dev_priv);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+
+	if (!(obj->base.write_domain & flush_domains))
+		return;
+
 	switch (obj->base.write_domain) {
 	case I915_GEM_DOMAIN_GTT:
-		if (!HAS_LLC(dev_priv)) {
-			intel_runtime_pm_get(dev_priv);
-			spin_lock_irq(&dev_priv->uncore.lock);
-			POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base));
-			spin_unlock_irq(&dev_priv->uncore.lock);
-			intel_runtime_pm_put(dev_priv);
-		}
+		i915_gem_flush_ggtt_writes(dev_priv);
 
 		intel_fb_obj_flush(obj,
 				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+		list_for_each_entry(vma, &obj->vma_list, obj_link) {
+			if (!i915_vma_is_ggtt(vma))
+				break;
+
+			if (vma->iomap)
+				continue;
+
+			i915_vma_unset_ggtt_write(vma);
+		}
 		break;
 
 	case I915_GEM_DOMAIN_CPU:
@@ -1965,6 +1985,8 @@ int i915_gem_fault(struct vm_fault *vmf)
 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
 	GEM_BUG_ON(!obj->userfault_count);
 
+	i915_vma_set_ggtt_write(vma);
+
 err_fence:
 	i915_vma_unpin_fence(vma);
 err_unpin:
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1013403fcfea692fe65daddf99a557cc7caca86d..0ebd75693505ea3b39ecef1e38d987f6e3c8dcc1 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -322,6 +322,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	if (err)
 		goto err_unpin;
 
+	i915_vma_set_ggtt_write(vma);
 	return ptr;
 
 err_unpin:
@@ -330,12 +331,24 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	return IO_ERR_PTR(err);
 }
 
+void i915_vma_flush_writes(struct i915_vma *vma)
+{
+	if (!i915_vma_has_ggtt_write(vma))
+		return;
+
+	i915_gem_flush_ggtt_writes(vma->vm->i915);
+
+	i915_vma_unset_ggtt_write(vma);
+}
+
 void i915_vma_unpin_iomap(struct i915_vma *vma)
 {
 	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
 
 	GEM_BUG_ON(vma->iomap == NULL);
 
+	i915_vma_flush_writes(vma);
+
 	i915_vma_unpin_fence(vma);
 	i915_vma_unpin(vma);
 }
@@ -792,6 +805,15 @@ int i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
+		/*
+		 * Check that we have flushed all writes through the GGTT
+		 * before the unbind, other due to non-strict nature of those
+		 * indirect writes they may end up referencing the GGTT PTE
+		 * after the unbind.
+		 */
+		i915_vma_flush_writes(vma);
+		GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
+
 		/* release the fence reg _after_ flushing */
 		ret = i915_vma_put_fence(vma);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 1e2bc9b3c3ac19a4790222eb151765050e264d49..f636243eb8f72773a3551bf989ce0f90a44489a4 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -90,6 +90,7 @@ struct i915_vma {
 #define I915_VMA_CLOSED		BIT(10)
 #define I915_VMA_USERFAULT_BIT	11
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
+#define I915_VMA_GGTT_WRITE	BIT(12)
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
@@ -138,6 +139,24 @@ static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 	return vma->flags & I915_VMA_GGTT;
 }
 
+static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma)
+{
+	return vma->flags & I915_VMA_GGTT_WRITE;
+}
+
+static inline void i915_vma_set_ggtt_write(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+	vma->flags |= I915_VMA_GGTT_WRITE;
+}
+
+static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma)
+{
+	vma->flags &= ~I915_VMA_GGTT_WRITE;
+}
+
+void i915_vma_flush_writes(struct i915_vma *vma);
+
 static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_CAN_FENCE;