i915_gem.c 117 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
/*
 * Copyright © 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *
 */

28 29
#include <drm/drmP.h>
#include <drm/i915_drm.h>
30
#include "i915_drv.h"
Chris Wilson's avatar
Chris Wilson committed
31
#include "i915_trace.h"
32
#include "intel_drv.h"
33
#include <linux/shmem_fs.h>
34
#include <linux/slab.h>
35
#include <linux/swap.h>
36
#include <linux/pci.h>
37
#include <linux/dma-buf.h>
38

39 40
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
41 42
static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
						    unsigned alignment,
43 44
						    bool map_and_fenceable,
						    bool nonblocking);
45 46
static int i915_gem_phys_pwrite(struct drm_device *dev,
				struct drm_i915_gem_object *obj,
47
				struct drm_i915_gem_pwrite *args,
48
				struct drm_file *file);
49

50 51 52 53 54 55
static void i915_gem_write_fence(struct drm_device *dev, int reg,
				 struct drm_i915_gem_object *obj);
static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
					 struct drm_i915_fence_reg *fence,
					 bool enable);

56
static int i915_gem_inactive_shrink(struct shrinker *shrinker,
57
				    struct shrink_control *sc);
Chris Wilson's avatar
Chris Wilson committed
58 59
static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
60
static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
61

62 63 64 65 66 67 68 69
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
	if (obj->tiling_mode)
		i915_gem_release_mmap(obj);

	/* As we do not have an associated fence register, we will force
	 * a tiling change if we ever need to acquire one.
	 */
70
	obj->fence_dirty = false;
71 72 73
	obj->fence_reg = I915_FENCE_REG_NONE;
}

74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
/* some bookkeeping */
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
				  size_t size)
{
	dev_priv->mm.object_count++;
	dev_priv->mm.object_memory += size;
}

static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
				     size_t size)
{
	dev_priv->mm.object_count--;
	dev_priv->mm.object_memory -= size;
}

89
static int
90
i915_gem_wait_for_error(struct i915_gpu_error *error)
91 92 93
{
	int ret;

94 95
#define EXIT_COND (!i915_reset_in_progress(error) || \
		   i915_terminally_wedged(error))
96
	if (EXIT_COND)
97 98
		return 0;

99 100 101 102 103
	/*
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
	 * userspace. If it takes that long something really bad is going on and
	 * we should simply try to bail out and fail as gracefully as possible.
	 */
104 105 106
	ret = wait_event_interruptible_timeout(error->reset_queue,
					       EXIT_COND,
					       10*HZ);
107 108 109 110
	if (ret == 0) {
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
		return -EIO;
	} else if (ret < 0) {
111
		return ret;
112
	}
113
#undef EXIT_COND
114

115
	return 0;
116 117
}

118
int i915_mutex_lock_interruptible(struct drm_device *dev)
119
{
120
	struct drm_i915_private *dev_priv = dev->dev_private;
121 122
	int ret;

123
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
124 125 126 127 128 129 130
	if (ret)
		return ret;

	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		return ret;

131
	WARN_ON(i915_verify_lists(dev));
132 133
	return 0;
}
134

135
static inline bool
136
i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
137
{
138
	return i915_gem_obj_ggtt_bound(obj) && !obj->active;
139 140
}

141 142
int
i915_gem_init_ioctl(struct drm_device *dev, void *data,
143
		    struct drm_file *file)
144
{
145
	struct drm_i915_private *dev_priv = dev->dev_private;
146
	struct drm_i915_gem_init *args = data;
147

148 149 150
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return -ENODEV;

151 152 153
	if (args->gtt_start >= args->gtt_end ||
	    (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
		return -EINVAL;
154

155 156 157 158
	/* GEM with user mode setting was never supported on ilk and later. */
	if (INTEL_INFO(dev)->gen >= 5)
		return -ENODEV;

159
	mutex_lock(&dev->struct_mutex);
160 161
	i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
				  args->gtt_end);
162
	dev_priv->gtt.mappable_end = args->gtt_end;
163 164
	mutex_unlock(&dev->struct_mutex);

165
	return 0;
166 167
}

168 169
int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
170
			    struct drm_file *file)
171
{
172
	struct drm_i915_private *dev_priv = dev->dev_private;
173
	struct drm_i915_gem_get_aperture *args = data;
174 175
	struct drm_i915_gem_object *obj;
	size_t pinned;
176

177
	pinned = 0;
178
	mutex_lock(&dev->struct_mutex);
179
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
180
		if (obj->pin_count)
181
			pinned += i915_gem_obj_ggtt_size(obj);
182
	mutex_unlock(&dev->struct_mutex);
183

184
	args->aper_size = dev_priv->gtt.base.total;
185
	args->aper_available_size = args->aper_size - pinned;
186

187 188 189
	return 0;
}

190 191 192 193 194 195 196 197 198 199 200 201
void *i915_gem_object_alloc(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
}

void i915_gem_object_free(struct drm_i915_gem_object *obj)
{
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
	kmem_cache_free(dev_priv->slab, obj);
}

202 203 204 205 206
static int
i915_gem_create(struct drm_file *file,
		struct drm_device *dev,
		uint64_t size,
		uint32_t *handle_p)
207
{
208
	struct drm_i915_gem_object *obj;
209 210
	int ret;
	u32 handle;
211

212
	size = roundup(size, PAGE_SIZE);
213 214
	if (size == 0)
		return -EINVAL;
215 216

	/* Allocate the new object */
217
	obj = i915_gem_alloc_object(dev, size);
218 219 220
	if (obj == NULL)
		return -ENOMEM;

221
	ret = drm_gem_handle_create(file, &obj->base, &handle);
222
	if (ret) {
223 224
		drm_gem_object_release(&obj->base);
		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
225
		i915_gem_object_free(obj);
226
		return ret;
227
	}
228

229
	/* drop reference from allocate - handle holds it now */
230
	drm_gem_object_unreference(&obj->base);
231 232
	trace_i915_gem_object_create(obj);

233
	*handle_p = handle;
234 235 236
	return 0;
}

237 238 239 240 241 242
int
i915_gem_dumb_create(struct drm_file *file,
		     struct drm_device *dev,
		     struct drm_mode_create_dumb *args)
{
	/* have to work out size/pitch and return them */
243
	args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
	args->size = args->pitch * args->height;
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

int i915_gem_dumb_destroy(struct drm_file *file,
			  struct drm_device *dev,
			  uint32_t handle)
{
	return drm_gem_handle_delete(file, handle);
}

/**
 * Creates a new mm object and returns a handle to it.
 */
int
i915_gem_create_ioctl(struct drm_device *dev, void *data,
		      struct drm_file *file)
{
	struct drm_i915_gem_create *args = data;
264

265 266 267 268
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
static inline int
__copy_to_user_swizzled(char __user *cpu_vaddr,
			const char *gpu_vaddr, int gpu_offset,
			int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_to_user(cpu_vaddr + cpu_offset,
				     gpu_vaddr + swizzled_gpu_offset,
				     this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

295
static inline int
296 297
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
			  const char __user *cpu_vaddr,
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
			  int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
				       cpu_vaddr + cpu_offset,
				       this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

321 322 323
/* Per-page copy function for the shmem pread fastpath.
 * Flushes invalid cachelines before reading the target if
 * needs_clflush is set. */
324
static int
325 326 327 328 329 330 331
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
		 char __user *user_data,
		 bool page_do_bit17_swizzling, bool needs_clflush)
{
	char *vaddr;
	int ret;

332
	if (unlikely(page_do_bit17_swizzling))
333 334 335 336 337 338 339 340 341 342 343
		return -EINVAL;

	vaddr = kmap_atomic(page);
	if (needs_clflush)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	ret = __copy_to_user_inatomic(user_data,
				      vaddr + shmem_page_offset,
				      page_length);
	kunmap_atomic(vaddr);

344
	return ret ? -EFAULT : 0;
345 346
}

347 348 349 350
static void
shmem_clflush_swizzled_range(char *addr, unsigned long length,
			     bool swizzled)
{
351
	if (unlikely(swizzled)) {
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
		unsigned long start = (unsigned long) addr;
		unsigned long end = (unsigned long) addr + length;

		/* For swizzling simply ensure that we always flush both
		 * channels. Lame, but simple and it works. Swizzled
		 * pwrite/pread is far from a hotpath - current userspace
		 * doesn't use it at all. */
		start = round_down(start, 128);
		end = round_up(end, 128);

		drm_clflush_virt_range((void *)start, end - start);
	} else {
		drm_clflush_virt_range(addr, length);
	}

}

369 370 371 372 373 374 375 376 377 378 379 380
/* Only difference to the fast-path function is that this can handle bit17
 * and uses non-atomic copy and kmap functions. */
static int
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
		 char __user *user_data,
		 bool page_do_bit17_swizzling, bool needs_clflush)
{
	char *vaddr;
	int ret;

	vaddr = kmap(page);
	if (needs_clflush)
381 382 383
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
384 385 386 387 388 389 390 391 392 393 394

	if (page_do_bit17_swizzling)
		ret = __copy_to_user_swizzled(user_data,
					      vaddr, shmem_page_offset,
					      page_length);
	else
		ret = __copy_to_user(user_data,
				     vaddr + shmem_page_offset,
				     page_length);
	kunmap(page);

395
	return ret ? - EFAULT : 0;
396 397
}

398
static int
399 400 401 402
i915_gem_shmem_pread(struct drm_device *dev,
		     struct drm_i915_gem_object *obj,
		     struct drm_i915_gem_pread *args,
		     struct drm_file *file)
403
{
404
	char __user *user_data;
405
	ssize_t remain;
406
	loff_t offset;
407
	int shmem_page_offset, page_length, ret = 0;
408
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
409
	int prefaulted = 0;
410
	int needs_clflush = 0;
411
	struct sg_page_iter sg_iter;
412

413
	user_data = to_user_ptr(args->data_ptr);
414 415
	remain = args->size;

416
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
417

418 419 420 421 422 423 424
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
		/* If we're not in the cpu read domain, set ourself into the gtt
		 * read domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will dirty the data
		 * anyway again before the next pread happens. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush = 1;
425
		if (i915_gem_obj_ggtt_bound(obj)) {
Chris Wilson's avatar
Chris Wilson committed
426 427 428 429
			ret = i915_gem_object_set_to_gtt_domain(obj, false);
			if (ret)
				return ret;
		}
430
	}
431

432 433 434 435 436 437
	ret = i915_gem_object_get_pages(obj);
	if (ret)
		return ret;

	i915_gem_object_pin_pages(obj);

438
	offset = args->offset;
439

440 441
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
			 offset >> PAGE_SHIFT) {
442
		struct page *page = sg_page_iter_page(&sg_iter);
443 444 445 446

		if (remain <= 0)
			break;

447 448 449 450 451
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
452
		shmem_page_offset = offset_in_page(offset);
453 454 455 456
		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

457 458 459
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

460 461 462 463 464
		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
				       user_data, page_do_bit17_swizzling,
				       needs_clflush);
		if (ret == 0)
			goto next_page;
465 466 467

		mutex_unlock(&dev->struct_mutex);

468
		if (likely(!i915_prefault_disable) && !prefaulted) {
469
			ret = fault_in_multipages_writeable(user_data, remain);
470 471 472 473 474 475 476
			/* Userspace is tricking us, but we've already clobbered
			 * its pages with the prefault and promised to write the
			 * data up to the first fault. Hence ignore any errors
			 * and just continue. */
			(void)ret;
			prefaulted = 1;
		}
477

478 479 480
		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
				       user_data, page_do_bit17_swizzling,
				       needs_clflush);
481

482
		mutex_lock(&dev->struct_mutex);
483

484
next_page:
485 486
		mark_page_accessed(page);

487
		if (ret)
488 489
			goto out;

490
		remain -= page_length;
491
		user_data += page_length;
492 493 494
		offset += page_length;
	}

495
out:
496 497
	i915_gem_object_unpin_pages(obj);

498 499 500
	return ret;
}

501 502 503 504 505 506 507
/**
 * Reads data from the object referenced by handle.
 *
 * On error, the contents of *data are undefined.
 */
int
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
508
		     struct drm_file *file)
509 510
{
	struct drm_i915_gem_pread *args = data;
511
	struct drm_i915_gem_object *obj;
512
	int ret = 0;
513

514 515 516 517
	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_WRITE,
518
		       to_user_ptr(args->data_ptr),
519 520 521
		       args->size))
		return -EFAULT;

522
	ret = i915_mutex_lock_interruptible(dev);
523
	if (ret)
524
		return ret;
525

526
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
527
	if (&obj->base == NULL) {
528 529
		ret = -ENOENT;
		goto unlock;
530
	}
531

532
	/* Bounds check source.  */
533 534
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
535
		ret = -EINVAL;
536
		goto out;
537 538
	}

539 540 541 542 543 544 545 546
	/* prime objects have no backing filp to GEM pread/pwrite
	 * pages from.
	 */
	if (!obj->base.filp) {
		ret = -EINVAL;
		goto out;
	}

Chris Wilson's avatar
Chris Wilson committed
547 548
	trace_i915_gem_object_pread(obj, args->offset, args->size);

549
	ret = i915_gem_shmem_pread(dev, obj, args, file);
550

551
out:
552
	drm_gem_object_unreference(&obj->base);
553
unlock:
554
	mutex_unlock(&dev->struct_mutex);
555
	return ret;
556 557
}

558 559
/* This is the fast write path which cannot handle
 * page faults in the source data
560
 */
561 562 563 564 565 566

static inline int
fast_user_write(struct io_mapping *mapping,
		loff_t page_base, int page_offset,
		char __user *user_data,
		int length)
567
{
568 569
	void __iomem *vaddr_atomic;
	void *vaddr;
570
	unsigned long unwritten;
571

572
	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
573 574 575
	/* We can use the cpu mem copy function because this is X86. */
	vaddr = (void __force*)vaddr_atomic + page_offset;
	unwritten = __copy_from_user_inatomic_nocache(vaddr,
576
						      user_data, length);
577
	io_mapping_unmap_atomic(vaddr_atomic);
578
	return unwritten;
579 580
}

581 582 583 584
/**
 * This is the fast pwrite path, where we copy the data directly from the
 * user into the GTT, uncached.
 */
585
static int
586 587
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
			 struct drm_i915_gem_object *obj,
588
			 struct drm_i915_gem_pwrite *args,
589
			 struct drm_file *file)
590
{
591
	drm_i915_private_t *dev_priv = dev->dev_private;
592
	ssize_t remain;
593
	loff_t offset, page_base;
594
	char __user *user_data;
Daniel Vetter's avatar
Daniel Vetter committed
595 596
	int page_offset, page_length, ret;

597
	ret = i915_gem_object_pin(obj, 0, true, true);
Daniel Vetter's avatar
Daniel Vetter committed
598 599 600 601 602 603 604 605 606 607
	if (ret)
		goto out;

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret)
		goto out_unpin;

	ret = i915_gem_object_put_fence(obj);
	if (ret)
		goto out_unpin;
608

609
	user_data = to_user_ptr(args->data_ptr);
610 611
	remain = args->size;

612
	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
613 614 615 616

	while (remain > 0) {
		/* Operation in this page
		 *
617 618 619
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
620
		 */
621 622
		page_base = offset & PAGE_MASK;
		page_offset = offset_in_page(offset);
623 624 625 626 627
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

		/* If we get a fault while copying data, then (presumably) our
628 629
		 * source page isn't available.  Return the error and we'll
		 * retry in the slow path.
630
		 */
631
		if (fast_user_write(dev_priv->gtt.mappable, page_base,
Daniel Vetter's avatar
Daniel Vetter committed
632 633 634 635
				    page_offset, user_data, page_length)) {
			ret = -EFAULT;
			goto out_unpin;
		}
636

637 638 639
		remain -= page_length;
		user_data += page_length;
		offset += page_length;
640 641
	}

Daniel Vetter's avatar
Daniel Vetter committed
642 643 644
out_unpin:
	i915_gem_object_unpin(obj);
out:
645
	return ret;
646 647
}

648 649 650 651
/* Per-page copy function for the shmem pwrite fastpath.
 * Flushes invalid cachelines before writing to the target if
 * needs_clflush_before is set and flushes out any written cachelines after
 * writing if needs_clflush is set. */
652
static int
653 654 655 656 657
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
		  char __user *user_data,
		  bool page_do_bit17_swizzling,
		  bool needs_clflush_before,
		  bool needs_clflush_after)
658
{
659
	char *vaddr;
660
	int ret;
661

662
	if (unlikely(page_do_bit17_swizzling))
663
		return -EINVAL;
664

665 666 667 668 669 670 671 672 673 674 675
	vaddr = kmap_atomic(page);
	if (needs_clflush_before)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
						user_data,
						page_length);
	if (needs_clflush_after)
		drm_clflush_virt_range(vaddr + shmem_page_offset,
				       page_length);
	kunmap_atomic(vaddr);
676

677
	return ret ? -EFAULT : 0;
678 679
}

680 681
/* Only difference to the fast-path function is that this can handle bit17
 * and uses non-atomic copy and kmap functions. */
682
static int
683 684 685 686 687
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
		  char __user *user_data,
		  bool page_do_bit17_swizzling,
		  bool needs_clflush_before,
		  bool needs_clflush_after)
688
{
689 690
	char *vaddr;
	int ret;
691

692
	vaddr = kmap(page);
693
	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
694 695 696
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
697 698
	if (page_do_bit17_swizzling)
		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
699 700
						user_data,
						page_length);
701 702 703 704 705
	else
		ret = __copy_from_user(vaddr + shmem_page_offset,
				       user_data,
				       page_length);
	if (needs_clflush_after)
706 707 708
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
					     page_length,
					     page_do_bit17_swizzling);
709
	kunmap(page);
710

711
	return ret ? -EFAULT : 0;
712 713 714
}

static int
715 716 717 718
i915_gem_shmem_pwrite(struct drm_device *dev,
		      struct drm_i915_gem_object *obj,
		      struct drm_i915_gem_pwrite *args,
		      struct drm_file *file)
719 720
{
	ssize_t remain;
721 722
	loff_t offset;
	char __user *user_data;
723
	int shmem_page_offset, page_length, ret = 0;
724
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
725
	int hit_slowpath = 0;
726 727
	int needs_clflush_after = 0;
	int needs_clflush_before = 0;
728
	struct sg_page_iter sg_iter;
729

730
	user_data = to_user_ptr(args->data_ptr);
731 732
	remain = args->size;

733
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
734

735 736 737 738 739 740 741
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
		/* If we're not in the cpu write domain, set ourself into the gtt
		 * write domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will use the data
		 * right away and we therefore have to clflush anyway. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush_after = 1;
742
		if (i915_gem_obj_ggtt_bound(obj)) {
Chris Wilson's avatar
Chris Wilson committed
743 744 745 746
			ret = i915_gem_object_set_to_gtt_domain(obj, true);
			if (ret)
				return ret;
		}
747 748 749 750 751 752 753
	}
	/* Same trick applies for invalidate partially written cachelines before
	 * writing.  */
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
	    && obj->cache_level == I915_CACHE_NONE)
		needs_clflush_before = 1;

754 755 756 757 758 759
	ret = i915_gem_object_get_pages(obj);
	if (ret)
		return ret;

	i915_gem_object_pin_pages(obj);

760
	offset = args->offset;
761
	obj->dirty = 1;
762

763 764
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
			 offset >> PAGE_SHIFT) {
765
		struct page *page = sg_page_iter_page(&sg_iter);
766
		int partial_cacheline_write;
767

768 769 770
		if (remain <= 0)
			break;

771 772 773 774 775
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
776
		shmem_page_offset = offset_in_page(offset);
777 778 779 780 781

		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

782 783 784 785 786 787 788
		/* If we don't overwrite a cacheline completely we need to be
		 * careful to have up-to-date data by first clflushing. Don't
		 * overcomplicate things and flush the entire patch. */
		partial_cacheline_write = needs_clflush_before &&
			((shmem_page_offset | page_length)
				& (boot_cpu_data.x86_clflush_size - 1));

789 790 791
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

792 793 794 795 796 797
		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
					user_data, page_do_bit17_swizzling,
					partial_cacheline_write,
					needs_clflush_after);
		if (ret == 0)
			goto next_page;
798 799 800

		hit_slowpath = 1;
		mutex_unlock(&dev->struct_mutex);
801 802 803 804
		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
					user_data, page_do_bit17_swizzling,
					partial_cacheline_write,
					needs_clflush_after);
805

806
		mutex_lock(&dev->struct_mutex);
807

808
next_page:
809 810 811
		set_page_dirty(page);
		mark_page_accessed(page);

812
		if (ret)
813 814
			goto out;

815
		remain -= page_length;
816
		user_data += page_length;
817
		offset += page_length;
818 819
	}

820
out:
821 822
	i915_gem_object_unpin_pages(obj);

823
	if (hit_slowpath) {
824 825 826 827 828 829 830
		/*
		 * Fixup: Flush cpu caches in case we didn't flush the dirty
		 * cachelines in-line while writing and the object moved
		 * out of the cpu write domain while we've dropped the lock.
		 */
		if (!needs_clflush_after &&
		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
831
			i915_gem_clflush_object(obj);
832
			i915_gem_chipset_flush(dev);
833
		}
834
	}
835

836
	if (needs_clflush_after)
837
		i915_gem_chipset_flush(dev);
838

839
	return ret;
840 841 842 843 844 845 846 847 848
}

/**
 * Writes data to the object referenced by handle.
 *
 * On error, the contents of the buffer that were to be modified are undefined.
 */
int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
849
		      struct drm_file *file)
850 851
{
	struct drm_i915_gem_pwrite *args = data;
852
	struct drm_i915_gem_object *obj;
853 854 855 856 857 858
	int ret;

	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_READ,
859
		       to_user_ptr(args->data_ptr),
860 861 862
		       args->size))
		return -EFAULT;

863 864 865 866 867 868
	if (likely(!i915_prefault_disable)) {
		ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
						   args->size);
		if (ret)
			return -EFAULT;
	}
869

870
	ret = i915_mutex_lock_interruptible(dev);
871
	if (ret)
872
		return ret;
873

874
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
875
	if (&obj->base == NULL) {
876 877
		ret = -ENOENT;
		goto unlock;
878
	}
879

880
	/* Bounds check destination. */
881 882
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
883
		ret = -EINVAL;
884
		goto out;
885 886
	}

887 888 889 890 891 892 893 894
	/* prime objects have no backing filp to GEM pread/pwrite
	 * pages from.
	 */
	if (!obj->base.filp) {
		ret = -EINVAL;
		goto out;
	}

Chris Wilson's avatar
Chris Wilson committed
895 896
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);

Daniel Vetter's avatar
Daniel Vetter committed
897
	ret = -EFAULT;
898 899 900 901 902 903
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
	 * it would end up going through the fenced access, and we'll get
	 * different detiling behavior between reading and writing.
	 * pread/pwrite currently are reading and writing from the CPU
	 * perspective, requiring manual detiling by the client.
	 */
904
	if (obj->phys_obj) {
905
		ret = i915_gem_phys_pwrite(dev, obj, args, file);
906 907 908
		goto out;
	}

909
	if (obj->cache_level == I915_CACHE_NONE &&
910
	    obj->tiling_mode == I915_TILING_NONE &&
911
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
912
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
Daniel Vetter's avatar
Daniel Vetter committed
913 914 915
		/* Note that the gtt paths might fail with non-page-backed user
		 * pointers (e.g. gtt mappings when moving data between
		 * textures). Fallback to the shmem path in that case. */
916
	}
917

918
	if (ret == -EFAULT || ret == -ENOSPC)
Daniel Vetter's avatar
Daniel Vetter committed
919
		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
920

921
out:
922
	drm_gem_object_unreference(&obj->base);
923
unlock:
924
	mutex_unlock(&dev->struct_mutex);
925 926 927
	return ret;
}

928
int
929
i915_gem_check_wedge(struct i915_gpu_error *error,
930 931
		     bool interruptible)
{
932
	if (i915_reset_in_progress(error)) {
933 934 935 936 937
		/* Non-interruptible callers can't handle -EAGAIN, hence return
		 * -EIO unconditionally for these. */
		if (!interruptible)
			return -EIO;

938 939
		/* Recovery complete, but the reset failed ... */
		if (i915_terminally_wedged(error))
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960
			return -EIO;

		return -EAGAIN;
	}

	return 0;
}

/*
 * Compare seqno against outstanding lazy request. Emit a request if they are
 * equal.
 */
static int
i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
{
	int ret;

	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));

	ret = 0;
	if (seqno == ring->outstanding_lazy_request)
961
		ret = i915_add_request(ring, NULL);
962 963 964 965 966 967 968 969

	return ret;
}

/**
 * __wait_seqno - wait until execution of seqno has finished
 * @ring: the ring expected to report seqno
 * @seqno: duh!
970
 * @reset_counter: reset sequence associated with the given seqno
971 972 973
 * @interruptible: do an interruptible wait (normally yes)
 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
 *
974 975 976 977 978 979 980
 * Note: It is of utmost importance that the passed in seqno and reset_counter
 * values have been read by the caller in an smp safe manner. Where read-side
 * locks are involved, it is sufficient to read the reset_counter before
 * unlocking the lock that protects the seqno. For lockless tricks, the
 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
 * inserted.
 *
981 982 983 984
 * Returns 0 if the seqno was found within the alloted time. Else returns the
 * errno with remaining time filled in timeout argument.
 */
static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
985
			unsigned reset_counter,
986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
			bool interruptible, struct timespec *timeout)
{
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
	struct timespec before, now, wait_time={1,0};
	unsigned long timeout_jiffies;
	long end;
	bool wait_forever = true;
	int ret;

	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
		return 0;

	trace_i915_gem_request_wait_begin(ring, seqno);

	if (timeout != NULL) {
		wait_time = *timeout;
		wait_forever = false;
	}

1005
	timeout_jiffies = timespec_to_jiffies_timeout(&wait_time);
1006 1007 1008 1009 1010 1011 1012 1013 1014

	if (WARN_ON(!ring->irq_get(ring)))
		return -ENODEV;

	/* Record current time in case interrupted by signal, or wedged * */
	getrawmonotonic(&before);

#define EXIT_COND \
	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
1015 1016
	 i915_reset_in_progress(&dev_priv->gpu_error) || \
	 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
1017 1018 1019 1020 1021 1022 1023 1024 1025
	do {
		if (interruptible)
			end = wait_event_interruptible_timeout(ring->irq_queue,
							       EXIT_COND,
							       timeout_jiffies);
		else
			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
						 timeout_jiffies);

1026 1027 1028 1029 1030 1031 1032
		/* We need to check whether any gpu reset happened in between
		 * the caller grabbing the seqno and now ... */
		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
			end = -EAGAIN;

		/* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
		 * gone. */
1033
		ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
		if (ret)
			end = ret;
	} while (end == 0 && wait_forever);

	getrawmonotonic(&now);

	ring->irq_put(ring);
	trace_i915_gem_request_wait_end(ring, seqno);
#undef EXIT_COND

	if (timeout) {
		struct timespec sleep_time = timespec_sub(now, before);
		*timeout = timespec_sub(*timeout, sleep_time);
1047 1048
		if (!timespec_valid(timeout)) /* i.e. negative time remains */
			set_normalized_timespec(timeout, 0, 0);
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
	}

	switch (end) {
	case -EIO:
	case -EAGAIN: /* Wedged */
	case -ERESTARTSYS: /* Signal */
		return (int)end;
	case 0: /* Timeout */
		return -ETIME;
	default: /* Completed */
		WARN_ON(end < 0); /* We're not aware of other errors */
		return 0;
	}
}

/**
 * Waits for a sequence number to be signaled, and cleans up the
 * request and object lists appropriately for that event.
 */
int
i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	bool interruptible = dev_priv->mm.interruptible;
	int ret;

	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
	BUG_ON(seqno == 0);

1079
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1080 1081 1082 1083 1084 1085 1086
	if (ret)
		return ret;

	ret = i915_gem_check_olr(ring, seqno);
	if (ret)
		return ret;

1087 1088 1089
	return __wait_seqno(ring, seqno,
			    atomic_read(&dev_priv->gpu_error.reset_counter),
			    interruptible, NULL);
1090 1091
}

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
static int
i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
				     struct intel_ring_buffer *ring)
{
	i915_gem_retire_requests_ring(ring);

	/* Manually manage the write flush as we may have not yet
	 * retired the buffer.
	 *
	 * Note that the last_write_seqno is always the earlier of
	 * the two (read/write) seqno, so if we haved successfully waited,
	 * we know we have passed the last write.
	 */
	obj->last_write_seqno = 0;
	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;

	return 0;
}

1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
/**
 * Ensures that all rendering to the object has completed and the object is
 * safe to unbind from the GTT or access from the CPU.
 */
static __must_check int
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
			       bool readonly)
{
	struct intel_ring_buffer *ring = obj->ring;
	u32 seqno;
	int ret;

	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
	if (seqno == 0)
		return 0;

	ret = i915_wait_seqno(ring, seqno);
	if (ret)
		return ret;

1131
	return i915_gem_object_wait_rendering__tail(obj, ring);
1132 1133
}

1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
/* A nonblocking variant of the above wait. This is a highly dangerous routine
 * as the object state may change during this call.
 */
static __must_check int
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
					    bool readonly)
{
	struct drm_device *dev = obj->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct intel_ring_buffer *ring = obj->ring;
1144
	unsigned reset_counter;
1145 1146 1147 1148 1149 1150 1151 1152 1153 1154
	u32 seqno;
	int ret;

	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
	BUG_ON(!dev_priv->mm.interruptible);

	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
	if (seqno == 0)
		return 0;

1155
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
1156 1157 1158 1159 1160 1161 1162
	if (ret)
		return ret;

	ret = i915_gem_check_olr(ring, seqno);
	if (ret)
		return ret;

1163
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1164
	mutex_unlock(&dev->struct_mutex);
1165
	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);
1166
	mutex_lock(&dev->struct_mutex);
1167 1168
	if (ret)
		return ret;
1169

1170
	return i915_gem_object_wait_rendering__tail(obj, ring);
1171 1172
}

1173
/**