i915_gem.c 134 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * Copyright © 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *
 */

#include "drmP.h"
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
Chris Wilson's avatar
Chris Wilson committed
32
#include "i915_trace.h"
33
#include "intel_drv.h"
34
#include <linux/slab.h>
35
#include <linux/swap.h>
36
#include <linux/pci.h>
37

38 39 40 41 42 43
struct change_domains {
	uint32_t invalidate_domains;
	uint32_t flush_domains;
	uint32_t flush_rings;
};

44
static int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj,
45
						  struct intel_ring_buffer *pipelined);
46 47 48
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
static int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
49
					     bool write);
50
static int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
51 52
						     uint64_t offset,
						     uint64_t size);
53 54
static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
static int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
55
					  bool interruptible);
56
static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
57
				       unsigned alignment,
58
				       bool map_and_fenceable);
59 60 61
static void i915_gem_clear_fence_reg(struct drm_i915_gem_object *obj);
static int i915_gem_phys_pwrite(struct drm_device *dev,
				struct drm_i915_gem_object *obj,
62
				struct drm_i915_gem_pwrite *args,
63 64
				struct drm_file *file);
static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
65

66 67 68 69
static int i915_gem_inactive_shrink(struct shrinker *shrinker,
				    int nr_to_scan,
				    gfp_t gfp_mask);

70

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
/* some bookkeeping */
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
				  size_t size)
{
	dev_priv->mm.object_count++;
	dev_priv->mm.object_memory += size;
}

static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
				     size_t size)
{
	dev_priv->mm.object_count--;
	dev_priv->mm.object_memory -= size;
}

static void i915_gem_info_add_gtt(struct drm_i915_private *dev_priv,
87
				  struct drm_i915_gem_object *obj)
88 89
{
	dev_priv->mm.gtt_count++;
90 91
	dev_priv->mm.gtt_memory += obj->gtt_space->size;
	if (obj->gtt_offset < dev_priv->mm.gtt_mappable_end) {
92
		dev_priv->mm.mappable_gtt_used +=
93 94
			min_t(size_t, obj->gtt_space->size,
			      dev_priv->mm.gtt_mappable_end - obj->gtt_offset);
95
	}
96
	list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
97 98 99
}

static void i915_gem_info_remove_gtt(struct drm_i915_private *dev_priv,
100
				     struct drm_i915_gem_object *obj)
101 102
{
	dev_priv->mm.gtt_count--;
103 104
	dev_priv->mm.gtt_memory -= obj->gtt_space->size;
	if (obj->gtt_offset < dev_priv->mm.gtt_mappable_end) {
105
		dev_priv->mm.mappable_gtt_used -=
106 107
			min_t(size_t, obj->gtt_space->size,
			      dev_priv->mm.gtt_mappable_end - obj->gtt_offset);
108
	}
109
	list_del_init(&obj->gtt_list);
110 111 112 113 114 115 116 117 118
}

/**
 * Update the mappable working set counters. Call _only_ when there is a change
 * in one of (pin|fault)_mappable and update *_mappable _before_ calling.
 * @mappable: new state the changed mappable flag (either pin_ or fault_).
 */
static void
i915_gem_info_update_mappable(struct drm_i915_private *dev_priv,
119
			      struct drm_i915_gem_object *obj,
120 121 122
			      bool mappable)
{
	if (mappable) {
123
		if (obj->pin_mappable && obj->fault_mappable)
124 125 126
			/* Combined state was already mappable. */
			return;
		dev_priv->mm.gtt_mappable_count++;
127
		dev_priv->mm.gtt_mappable_memory += obj->gtt_space->size;
128
	} else {
129
		if (obj->pin_mappable || obj->fault_mappable)
130 131 132
			/* Combined state still mappable. */
			return;
		dev_priv->mm.gtt_mappable_count--;
133
		dev_priv->mm.gtt_mappable_memory -= obj->gtt_space->size;
134
	}
135 136 137
}

static void i915_gem_info_add_pin(struct drm_i915_private *dev_priv,
138
				  struct drm_i915_gem_object *obj,
139
				  bool mappable)
140 141
{
	dev_priv->mm.pin_count++;
142
	dev_priv->mm.pin_memory += obj->gtt_space->size;
143
	if (mappable) {
144
		obj->pin_mappable = true;
145 146
		i915_gem_info_update_mappable(dev_priv, obj, true);
	}
147 148 149
}

static void i915_gem_info_remove_pin(struct drm_i915_private *dev_priv,
150
				     struct drm_i915_gem_object *obj)
151 152
{
	dev_priv->mm.pin_count--;
153 154 155
	dev_priv->mm.pin_memory -= obj->gtt_space->size;
	if (obj->pin_mappable) {
		obj->pin_mappable = false;
156 157
		i915_gem_info_update_mappable(dev_priv, obj, false);
	}
158 159
}

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
int
i915_gem_check_is_wedged(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct completion *x = &dev_priv->error_completion;
	unsigned long flags;
	int ret;

	if (!atomic_read(&dev_priv->mm.wedged))
		return 0;

	ret = wait_for_completion_interruptible(x);
	if (ret)
		return ret;

	/* Success, we reset the GPU! */
	if (!atomic_read(&dev_priv->mm.wedged))
		return 0;

	/* GPU is hung, bump the completion count to account for
	 * the token we just consumed so that we never hit zero and
	 * end up waiting upon a subsequent completion event that
	 * will never happen.
	 */
	spin_lock_irqsave(&x->wait.lock, flags);
	x->done++;
	spin_unlock_irqrestore(&x->wait.lock, flags);
	return -EIO;
}

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
static int i915_mutex_lock_interruptible(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	int ret;

	ret = i915_gem_check_is_wedged(dev);
	if (ret)
		return ret;

	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		return ret;

	if (atomic_read(&dev_priv->mm.wedged)) {
		mutex_unlock(&dev->struct_mutex);
		return -EAGAIN;
	}

208
	WARN_ON(i915_verify_lists(dev));
209 210
	return 0;
}
211

212
static inline bool
213
i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
214
{
215
	return obj->gtt_space && !obj->active && obj->pin_count == 0;
216 217
}

218 219 220 221
void i915_gem_do_init(struct drm_device *dev,
		      unsigned long start,
		      unsigned long mappable_end,
		      unsigned long end)
222 223 224
{
	drm_i915_private_t *dev_priv = dev->dev_private;

225 226
	drm_mm_init(&dev_priv->mm.gtt_space, start,
		    end - start);
227

228
	dev_priv->mm.gtt_total = end - start;
229
	dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
230
	dev_priv->mm.gtt_mappable_end = mappable_end;
231
}
232

233 234
int
i915_gem_init_ioctl(struct drm_device *dev, void *data,
235
		    struct drm_file *file)
236 237
{
	struct drm_i915_gem_init *args = data;
238 239 240 241

	if (args->gtt_start >= args->gtt_end ||
	    (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
		return -EINVAL;
242 243

	mutex_lock(&dev->struct_mutex);
244
	i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
245 246
	mutex_unlock(&dev->struct_mutex);

247
	return 0;
248 249
}

250 251
int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
252
			    struct drm_file *file)
253
{
254
	struct drm_i915_private *dev_priv = dev->dev_private;
255 256 257 258 259
	struct drm_i915_gem_get_aperture *args = data;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

260 261 262 263
	mutex_lock(&dev->struct_mutex);
	args->aper_size = dev_priv->mm.gtt_total;
	args->aper_available_size = args->aper_size - dev_priv->mm.pin_memory;
	mutex_unlock(&dev->struct_mutex);
264 265 266 267

	return 0;
}

268 269 270 271 272 273

/**
 * Creates a new mm object and returns a handle to it.
 */
int
i915_gem_create_ioctl(struct drm_device *dev, void *data,
274
		      struct drm_file *file)
275 276
{
	struct drm_i915_gem_create *args = data;
277
	struct drm_i915_gem_object *obj;
278 279
	int ret;
	u32 handle;
280 281 282 283

	args->size = roundup(args->size, PAGE_SIZE);

	/* Allocate the new object */
284
	obj = i915_gem_alloc_object(dev, args->size);
285 286 287
	if (obj == NULL)
		return -ENOMEM;

288
	ret = drm_gem_handle_create(file, &obj->base, &handle);
289
	if (ret) {
290 291
		drm_gem_object_release(&obj->base);
		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
292
		kfree(obj);
293
		return ret;
294
	}
295

296
	/* drop reference from allocate - handle holds it now */
297
	drm_gem_object_unreference(&obj->base);
298 299
	trace_i915_gem_object_create(obj);

300
	args->handle = handle;
301 302 303
	return 0;
}

304
static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
305
{
306
	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
307 308

	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
309
		obj->tiling_mode != I915_TILING_NONE;
310 311
}

312
static inline void
313 314 315 316 317 318 319 320
slow_shmem_copy(struct page *dst_page,
		int dst_offset,
		struct page *src_page,
		int src_offset,
		int length)
{
	char *dst_vaddr, *src_vaddr;

321 322
	dst_vaddr = kmap(dst_page);
	src_vaddr = kmap(src_page);
323 324 325

	memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);

326 327
	kunmap(src_page);
	kunmap(dst_page);
328 329
}

330
static inline void
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
slow_shmem_bit17_copy(struct page *gpu_page,
		      int gpu_offset,
		      struct page *cpu_page,
		      int cpu_offset,
		      int length,
		      int is_read)
{
	char *gpu_vaddr, *cpu_vaddr;

	/* Use the unswizzled path if this page isn't affected. */
	if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
		if (is_read)
			return slow_shmem_copy(cpu_page, cpu_offset,
					       gpu_page, gpu_offset, length);
		else
			return slow_shmem_copy(gpu_page, gpu_offset,
					       cpu_page, cpu_offset, length);
	}

350 351
	gpu_vaddr = kmap(gpu_page);
	cpu_vaddr = kmap(cpu_page);
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374

	/* Copy the data, XORing A6 with A17 (1). The user already knows he's
	 * XORing with the other bits (A9 for Y, A9 and A10 for X)
	 */
	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		if (is_read) {
			memcpy(cpu_vaddr + cpu_offset,
			       gpu_vaddr + swizzled_gpu_offset,
			       this_length);
		} else {
			memcpy(gpu_vaddr + swizzled_gpu_offset,
			       cpu_vaddr + cpu_offset,
			       this_length);
		}
		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

375 376
	kunmap(cpu_page);
	kunmap(gpu_page);
377 378
}

379 380 381 382 383 384
/**
 * This is the fast shmem pread path, which attempts to copy_from_user directly
 * from the backing pages of the object to the user's address space.  On a
 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
 */
static int
385 386
i915_gem_shmem_pread_fast(struct drm_device *dev,
			  struct drm_i915_gem_object *obj,
387
			  struct drm_i915_gem_pread *args,
388
			  struct drm_file *file)
389
{
390
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
391
	ssize_t remain;
392
	loff_t offset;
393 394 395 396 397 398 399 400 401
	char __user *user_data;
	int page_offset, page_length;

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;

	offset = args->offset;

	while (remain > 0) {
402 403 404 405
		struct page *page;
		char *vaddr;
		int ret;

406 407 408 409 410 411 412 413 414 415
		/* Operation in this page
		 *
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
		 */
		page_offset = offset & (PAGE_SIZE-1);
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

416 417 418 419 420 421 422 423 424 425 426 427 428 429
		page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
		if (IS_ERR(page))
			return PTR_ERR(page);

		vaddr = kmap_atomic(page);
		ret = __copy_to_user_inatomic(user_data,
					      vaddr + page_offset,
					      page_length);
		kunmap_atomic(vaddr);

		mark_page_accessed(page);
		page_cache_release(page);
		if (ret)
430
			return -EFAULT;
431 432 433 434 435 436

		remain -= page_length;
		user_data += page_length;
		offset += page_length;
	}

437
	return 0;
438 439 440 441 442 443 444 445 446
}

/**
 * This is the fallback shmem pread path, which allocates temporary storage
 * in kernel space to copy_to_user into outside of the struct_mutex, so we
 * can copy out of the object's backing pages while holding the struct mutex
 * and not take page faults.
 */
static int
447 448
i915_gem_shmem_pread_slow(struct drm_device *dev,
			  struct drm_i915_gem_object *obj,
449
			  struct drm_i915_gem_pread *args,
450
			  struct drm_file *file)
451
{
452
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
453 454 455 456 457
	struct mm_struct *mm = current->mm;
	struct page **user_pages;
	ssize_t remain;
	loff_t offset, pinned_pages, i;
	loff_t first_data_page, last_data_page, num_pages;
458 459
	int shmem_page_offset;
	int data_page_index, data_page_offset;
460 461 462
	int page_length;
	int ret;
	uint64_t data_ptr = args->data_ptr;
463
	int do_bit17_swizzling;
464 465 466 467 468 469 470 471 472 473 474

	remain = args->size;

	/* Pin the user pages containing the data.  We can't fault while
	 * holding the struct mutex, yet we want to hold it while
	 * dereferencing the user data.
	 */
	first_data_page = data_ptr / PAGE_SIZE;
	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
	num_pages = last_data_page - first_data_page + 1;

475
	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
476 477 478
	if (user_pages == NULL)
		return -ENOMEM;

479
	mutex_unlock(&dev->struct_mutex);
480 481
	down_read(&mm->mmap_sem);
	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
482
				      num_pages, 1, 0, user_pages, NULL);
483
	up_read(&mm->mmap_sem);
484
	mutex_lock(&dev->struct_mutex);
485 486
	if (pinned_pages < num_pages) {
		ret = -EFAULT;
487
		goto out;
488 489
	}

490 491 492
	ret = i915_gem_object_set_cpu_read_domain_range(obj,
							args->offset,
							args->size);
493
	if (ret)
494
		goto out;
495

496
	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
497 498 499 500

	offset = args->offset;

	while (remain > 0) {
501 502
		struct page *page;

503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * data_page_index = page number in get_user_pages return
		 * data_page_offset = offset with data_page_index page.
		 * page_length = bytes to copy for this page
		 */
		shmem_page_offset = offset & ~PAGE_MASK;
		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
		data_page_offset = data_ptr & ~PAGE_MASK;

		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;
		if ((data_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - data_page_offset;

520 521 522 523 524
		page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
		if (IS_ERR(page))
			return PTR_ERR(page);

525
		if (do_bit17_swizzling) {
526
			slow_shmem_bit17_copy(page,
527
					      shmem_page_offset,
528 529 530 531 532 533 534
					      user_pages[data_page_index],
					      data_page_offset,
					      page_length,
					      1);
		} else {
			slow_shmem_copy(user_pages[data_page_index],
					data_page_offset,
535
					page,
536 537
					shmem_page_offset,
					page_length);
538
		}
539

540 541 542
		mark_page_accessed(page);
		page_cache_release(page);

543 544 545 546 547
		remain -= page_length;
		data_ptr += page_length;
		offset += page_length;
	}

548
out:
549 550
	for (i = 0; i < pinned_pages; i++) {
		SetPageDirty(user_pages[i]);
551
		mark_page_accessed(user_pages[i]);
552 553
		page_cache_release(user_pages[i]);
	}
554
	drm_free_large(user_pages);
555 556 557 558

	return ret;
}

559 560 561 562 563 564 565
/**
 * Reads data from the object referenced by handle.
 *
 * On error, the contents of *data are undefined.
 */
int
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
566
		     struct drm_file *file)
567 568
{
	struct drm_i915_gem_pread *args = data;
569
	struct drm_i915_gem_object *obj;
570
	int ret = 0;
571

572 573 574 575 576 577 578 579 580 581 582 583 584
	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_WRITE,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

	ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
				       args->size);
	if (ret)
		return -EFAULT;

585
	ret = i915_mutex_lock_interruptible(dev);
586
	if (ret)
587
		return ret;
588

589
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
590 591 592
	if (obj == NULL) {
		ret = -ENOENT;
		goto unlock;
593
	}
594

595
	/* Bounds check source.  */
596 597
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
598
		ret = -EINVAL;
599
		goto out;
600 601
	}

602 603 604 605
	ret = i915_gem_object_set_cpu_read_domain_range(obj,
							args->offset,
							args->size);
	if (ret)
606
		goto out;
607 608 609

	ret = -EFAULT;
	if (!i915_gem_object_needs_bit17_swizzle(obj))
610
		ret = i915_gem_shmem_pread_fast(dev, obj, args, file);
611
	if (ret == -EFAULT)
612
		ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
613

614
out:
615
	drm_gem_object_unreference(&obj->base);
616
unlock:
617
	mutex_unlock(&dev->struct_mutex);
618
	return ret;
619 620
}

621 622
/* This is the fast write path which cannot handle
 * page faults in the source data
623
 */
624 625 626 627 628 629

static inline int
fast_user_write(struct io_mapping *mapping,
		loff_t page_base, int page_offset,
		char __user *user_data,
		int length)
630 631
{
	char *vaddr_atomic;
632
	unsigned long unwritten;
633

634
	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
635 636
	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
						      user_data, length);
637
	io_mapping_unmap_atomic(vaddr_atomic);
638
	return unwritten;
639 640 641 642 643 644
}

/* Here's the write path which can sleep for
 * page faults
 */

645
static inline void
646 647 648 649
slow_kernel_write(struct io_mapping *mapping,
		  loff_t gtt_base, int gtt_offset,
		  struct page *user_page, int user_offset,
		  int length)
650
{
651 652
	char __iomem *dst_vaddr;
	char *src_vaddr;
653

654 655 656 657 658 659 660 661 662
	dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
	src_vaddr = kmap(user_page);

	memcpy_toio(dst_vaddr + gtt_offset,
		    src_vaddr + user_offset,
		    length);

	kunmap(user_page);
	io_mapping_unmap(dst_vaddr);
663 664
}

665 666 667 668
/**
 * This is the fast pwrite path, where we copy the data directly from the
 * user into the GTT, uncached.
 */
669
static int
670 671
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
			 struct drm_i915_gem_object *obj,
672
			 struct drm_i915_gem_pwrite *args,
673
			 struct drm_file *file)
674
{
675
	drm_i915_private_t *dev_priv = dev->dev_private;
676
	ssize_t remain;
677
	loff_t offset, page_base;
678
	char __user *user_data;
679
	int page_offset, page_length;
680 681 682 683

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;

684
	offset = obj->gtt_offset + args->offset;
685 686 687 688

	while (remain > 0) {
		/* Operation in this page
		 *
689 690 691
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
692
		 */
693 694 695 696 697 698 699
		page_base = (offset & ~(PAGE_SIZE-1));
		page_offset = offset & (PAGE_SIZE-1);
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

		/* If we get a fault while copying data, then (presumably) our
700 701
		 * source page isn't available.  Return the error and we'll
		 * retry in the slow path.
702
		 */
703 704 705 706
		if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
				    page_offset, user_data, page_length))

			return -EFAULT;
707

708 709 710
		remain -= page_length;
		user_data += page_length;
		offset += page_length;
711 712
	}

713
	return 0;
714 715
}

716 717 718 719 720 721 722
/**
 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
 * the memory and maps it using kmap_atomic for copying.
 *
 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
 */
723
static int
724 725
i915_gem_gtt_pwrite_slow(struct drm_device *dev,
			 struct drm_i915_gem_object *obj,
726
			 struct drm_i915_gem_pwrite *args,
727
			 struct drm_file *file)
728
{
729 730 731 732 733 734 735 736
	drm_i915_private_t *dev_priv = dev->dev_private;
	ssize_t remain;
	loff_t gtt_page_base, offset;
	loff_t first_data_page, last_data_page, num_pages;
	loff_t pinned_pages, i;
	struct page **user_pages;
	struct mm_struct *mm = current->mm;
	int gtt_page_offset, data_page_offset, data_page_index, page_length;
737
	int ret;
738 739 740 741 742 743 744 745 746 747 748 749
	uint64_t data_ptr = args->data_ptr;

	remain = args->size;

	/* Pin the user pages containing the data.  We can't fault while
	 * holding the struct mutex, and all of the pwrite implementations
	 * want to hold it while dereferencing the user data.
	 */
	first_data_page = data_ptr / PAGE_SIZE;
	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
	num_pages = last_data_page - first_data_page + 1;

750
	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
751 752 753
	if (user_pages == NULL)
		return -ENOMEM;

754
	mutex_unlock(&dev->struct_mutex);
755 756 757 758
	down_read(&mm->mmap_sem);
	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
				      num_pages, 0, 0, user_pages, NULL);
	up_read(&mm->mmap_sem);
759
	mutex_lock(&dev->struct_mutex);
760 761 762 763
	if (pinned_pages < num_pages) {
		ret = -EFAULT;
		goto out_unpin_pages;
	}
764

765 766
	ret = i915_gem_object_set_to_gtt_domain(obj, 1);
	if (ret)
767
		goto out_unpin_pages;
768

769
	offset = obj->gtt_offset + args->offset;
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790

	while (remain > 0) {
		/* Operation in this page
		 *
		 * gtt_page_base = page offset within aperture
		 * gtt_page_offset = offset within page in aperture
		 * data_page_index = page number in get_user_pages return
		 * data_page_offset = offset with data_page_index page.
		 * page_length = bytes to copy for this page
		 */
		gtt_page_base = offset & PAGE_MASK;
		gtt_page_offset = offset & ~PAGE_MASK;
		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
		data_page_offset = data_ptr & ~PAGE_MASK;

		page_length = remain;
		if ((gtt_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - gtt_page_offset;
		if ((data_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - data_page_offset;

791 792 793 794 795
		slow_kernel_write(dev_priv->mm.gtt_mapping,
				  gtt_page_base, gtt_page_offset,
				  user_pages[data_page_index],
				  data_page_offset,
				  page_length);
796 797 798 799 800 801 802 803 804

		remain -= page_length;
		offset += page_length;
		data_ptr += page_length;
	}

out_unpin_pages:
	for (i = 0; i < pinned_pages; i++)
		page_cache_release(user_pages[i]);
805
	drm_free_large(user_pages);
806 807 808 809

	return ret;
}

810 811 812 813
/**
 * This is the fast shmem pwrite path, which attempts to directly
 * copy_from_user into the kmapped pages backing the object.
 */
814
static int
815 816
i915_gem_shmem_pwrite_fast(struct drm_device *dev,
			   struct drm_i915_gem_object *obj,
817
			   struct drm_i915_gem_pwrite *args,
818
			   struct drm_file *file)
819
{
820
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
821
	ssize_t remain;
822
	loff_t offset;
823 824 825 826 827
	char __user *user_data;
	int page_offset, page_length;

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;
828

829
	offset = args->offset;
830
	obj->dirty = 1;
831 832

	while (remain > 0) {
833 834 835 836
		struct page *page;
		char *vaddr;
		int ret;

837 838 839 840 841 842 843 844 845 846
		/* Operation in this page
		 *
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
		 */
		page_offset = offset & (PAGE_SIZE-1);
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866
		page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
		if (IS_ERR(page))
			return PTR_ERR(page);

		vaddr = kmap_atomic(page, KM_USER0);
		ret = __copy_from_user_inatomic(vaddr + page_offset,
						user_data,
						page_length);
		kunmap_atomic(vaddr, KM_USER0);

		set_page_dirty(page);
		mark_page_accessed(page);
		page_cache_release(page);

		/* If we get a fault while copying data, then (presumably) our
		 * source page isn't available.  Return the error and we'll
		 * retry in the slow path.
		 */
		if (ret)
867
			return -EFAULT;
868 869 870 871 872 873

		remain -= page_length;
		user_data += page_length;
		offset += page_length;
	}

874
	return 0;
875 876 877 878 879 880 881 882 883 884
}

/**
 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
 * the memory and maps it using kmap_atomic for copying.
 *
 * This avoids taking mmap_sem for faulting on the user's address while the
 * struct_mutex is held.
 */
static int
885 886
i915_gem_shmem_pwrite_slow(struct drm_device *dev,
			   struct drm_i915_gem_object *obj,
887
			   struct drm_i915_gem_pwrite *args,
888
			   struct drm_file *file)
889
{
890
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
891 892 893 894 895
	struct mm_struct *mm = current->mm;
	struct page **user_pages;
	ssize_t remain;
	loff_t offset, pinned_pages, i;
	loff_t first_data_page, last_data_page, num_pages;
896
	int shmem_page_offset;
897 898 899 900
	int data_page_index,  data_page_offset;
	int page_length;
	int ret;
	uint64_t data_ptr = args->data_ptr;
901
	int do_bit17_swizzling;
902 903 904 905 906 907 908 909 910 911 912

	remain = args->size;

	/* Pin the user pages containing the data.  We can't fault while
	 * holding the struct mutex, and all of the pwrite implementations
	 * want to hold it while dereferencing the user data.
	 */
	first_data_page = data_ptr / PAGE_SIZE;
	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
	num_pages = last_data_page - first_data_page + 1;

913
	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
914 915 916
	if (user_pages == NULL)
		return -ENOMEM;

917
	mutex_unlock(&dev->struct_mutex);
918 919 920 921
	down_read(&mm->mmap_sem);
	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
				      num_pages, 0, 0, user_pages, NULL);
	up_read(&mm->mmap_sem);
922
	mutex_lock(&dev->struct_mutex);
923 924
	if (pinned_pages < num_pages) {
		ret = -EFAULT;
925
		goto out;
926 927
	}

928
	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
929
	if (ret)
930
		goto out;
931

932
	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
933

934
	offset = args->offset;
935
	obj->dirty = 1;
936

937
	while (remain > 0) {
938 939
		struct page *page;

940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * data_page_index = page number in get_user_pages return
		 * data_page_offset = offset with data_page_index page.
		 * page_length = bytes to copy for this page
		 */
		shmem_page_offset = offset & ~PAGE_MASK;
		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
		data_page_offset = data_ptr & ~PAGE_MASK;

		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;
		if ((data_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - data_page_offset;

957 958 959 960 961 962 963
		page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
		if (IS_ERR(page)) {
			ret = PTR_ERR(page);
			goto out;
		}

964
		if (do_bit17_swizzling) {
965
			slow_shmem_bit17_copy(page,
966 967 968
					      shmem_page_offset,
					      user_pages[data_page_index],
					      data_page_offset,
969 970 971
					      page_length,
					      0);
		} else {
972
			slow_shmem_copy(page,
973 974 975 976
					shmem_page_offset,
					user_pages[data_page_index],
					data_page_offset,
					page_length);
977
		}
978

979 980 981 982
		set_page_dirty(page);
		mark_page_accessed(page);
		page_cache_release(page);

983 984 985
		remain -= page_length;
		data_ptr += page_length;
		offset += page_length;
986 987
	}

988
out:
989 990
	for (i = 0; i < pinned_pages; i++)
		page_cache_release(user_pages[i]);
991
	drm_free_large(user_pages);
992

993
	return ret;
994 995 996 997 998 999 1000 1001 1002
}

/**
 * Writes data to the object referenced by handle.
 *
 * On error, the contents of the buffer that were to be modified are undefined.
 */
int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1003
		      struct drm_file *file)
1004 1005
{
	struct drm_i915_gem_pwrite *args = data;
1006
	struct drm_i915_gem_object *obj;
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
	int ret;

	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_READ,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

	ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
				      args->size);
	if (ret)
		return -EFAULT;
1021

1022
	ret = i915_mutex_lock_interruptible(dev);
1023
	if (ret)
1024
		return ret;
1025

1026
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1027 1028 1029
	if (obj == NULL) {
		ret = -ENOENT;
		goto unlock;
1030
	}
1031

1032
	/* Bounds check destination. */
1033 1034
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
1035
		ret = -EINVAL;
1036
		goto out;
1037 1038
	}

1039 1040 1041 1042 1043 1044
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
	 * it would end up going through the fenced access, and we'll get
	 * different detiling behavior between reading and writing.
	 * pread/pwrite currently are reading and writing from the CPU
	 * perspective, requiring manual detiling by the client.
	 */
1045
	if (obj->phys_obj)
1046
		ret = i915_gem_phys_pwrite(dev, obj, args, file);
1047 1048 1049
	else if (obj->tiling_mode == I915_TILING_NONE &&
		 obj->gtt_space &&
		 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1050
		ret = i915_gem_object_pin(obj, 0, true);
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
		if (ret)
			goto out;

		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
		if (ret)
			goto out_unpin;

		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
		if (ret == -EFAULT)
			ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);

out_unpin:
		i915_gem_object_unpin(obj);
1064
	} else {
1065 1066
		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
		if (ret)
1067
			goto out;
1068

1069 1070 1071 1072 1073 1074
		ret = -EFAULT;
		if (!i915_gem_object_needs_bit17_swizzle(obj))
			ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
		if (ret == -EFAULT)
			ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
	}
1075

1076
out:
1077
	drm_gem_object_unreference(&obj->base);
1078
unlock:
1079
	mutex_unlock(&dev->struct_mutex);
1080 1081 1082 1083
	return ret;
}

/**
1084 1085
 * Called when user space prepares to use an object with the CPU, either
 * through the mmap ioctl's mapping or a GTT mapping.
1086 1087 1088
 */
int
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1089
			  struct drm_file *file)
1090
{
1091
	struct drm_i915_private *dev_priv = dev->dev_private;
1092
	struct drm_i915_gem_set_domain *args = data;
1093
	struct drm_i915_gem_object *obj;
1094 1095
	uint32_t read_domains = args->read_domains;
	uint32_t write_domain = args->write_domain;
1096 1097 1098 1099 1100
	int ret;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

1101
	/* Only handle setting domains to types used by the CPU. */
1102
	if (write_domain & I915_GEM_GPU_DOMAINS)
1103 1104
		return -EINVAL;

1105
	if (read_domains & I915_GEM_GPU_DOMAINS)
1106 1107 1108 1109 1110 1111 1112 1113
		return -EINVAL;

	/* Having something in the write domain implies it's in the read
	 * domain, and only that read domain.  Enforce that in the request.
	 */
	if (write_domain != 0 && read_domains != write_domain)
		return -EINVAL;

1114
	ret = i915_mutex_lock_interruptible(dev);
1115
	if (ret)
1116
		return ret;
1117

1118
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1119 1120 1121
	if (obj == NULL) {
		ret = -ENOENT;
		goto unlock;
1122
	}
1123

1124 1125
	intel_mark_busy(dev, obj);

1126 1127
	if (read_domains & I915_GEM_DOMAIN_GTT) {
		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1128

1129 1130 1131
		/* Update the LRU on the fence for the CPU access that's
		 * about to occur.
		 */
1132
		if (obj->fence_reg != I915_FENCE_REG_NONE) {
1133
			struct drm_i915_fence_reg *reg =
1134
				&dev_priv->fence_regs[obj->fence_reg];
1135
			list_move_tail(&reg->lru_list,
1136 1137 1138
				       &dev_priv->mm.fence_list);
		}

1139 1140 1141 1142 1143 1144
		/* Silently promote "you're not bound, there was nothing to do"
		 * to success, since the client was just asking us to
		 * make sure everything was done.
		 */
		if (ret == -EINVAL)
			ret = 0;
1145
	} else {
1146
		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1147 1148
	}

1149
	/* Maintain LRU order of "inactive" objects */
1150 1151
	if (ret == 0 && i915_gem_object_is_inactive(obj))
		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1152

1153
	drm_gem_object_unreference(&obj->base);
Chris Wilson's avatar