xref: /linux/drivers/gpu/drm/i915/i915_gem.c (revision dfc349402de8e95f6a42e8341e9ea193b718eee3)
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/swap.h>
35 #include <linux/pci.h>
36 
37 #define I915_GEM_GPU_DOMAINS	(~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
38 
39 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
40 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
41 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
42 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
43 					     int write);
44 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
45 						     uint64_t offset,
46 						     uint64_t size);
47 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
48 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
49 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
50 					   unsigned alignment);
51 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
52 static int i915_gem_evict_something(struct drm_device *dev, int min_size);
53 static int i915_gem_evict_from_inactive_list(struct drm_device *dev);
54 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
55 				struct drm_i915_gem_pwrite *args,
56 				struct drm_file *file_priv);
57 
58 static LIST_HEAD(shrink_list);
59 static DEFINE_SPINLOCK(shrink_list_lock);
60 
61 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
62 		     unsigned long end)
63 {
64 	drm_i915_private_t *dev_priv = dev->dev_private;
65 
66 	if (start >= end ||
67 	    (start & (PAGE_SIZE - 1)) != 0 ||
68 	    (end & (PAGE_SIZE - 1)) != 0) {
69 		return -EINVAL;
70 	}
71 
72 	drm_mm_init(&dev_priv->mm.gtt_space, start,
73 		    end - start);
74 
75 	dev->gtt_total = (uint32_t) (end - start);
76 
77 	return 0;
78 }
79 
80 int
81 i915_gem_init_ioctl(struct drm_device *dev, void *data,
82 		    struct drm_file *file_priv)
83 {
84 	struct drm_i915_gem_init *args = data;
85 	int ret;
86 
87 	mutex_lock(&dev->struct_mutex);
88 	ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
89 	mutex_unlock(&dev->struct_mutex);
90 
91 	return ret;
92 }
93 
94 int
95 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
96 			    struct drm_file *file_priv)
97 {
98 	struct drm_i915_gem_get_aperture *args = data;
99 
100 	if (!(dev->driver->driver_features & DRIVER_GEM))
101 		return -ENODEV;
102 
103 	args->aper_size = dev->gtt_total;
104 	args->aper_available_size = (args->aper_size -
105 				     atomic_read(&dev->pin_memory));
106 
107 	return 0;
108 }
109 
110 
111 /**
112  * Creates a new mm object and returns a handle to it.
113  */
114 int
115 i915_gem_create_ioctl(struct drm_device *dev, void *data,
116 		      struct drm_file *file_priv)
117 {
118 	struct drm_i915_gem_create *args = data;
119 	struct drm_gem_object *obj;
120 	int ret;
121 	u32 handle;
122 
123 	args->size = roundup(args->size, PAGE_SIZE);
124 
125 	/* Allocate the new object */
126 	obj = drm_gem_object_alloc(dev, args->size);
127 	if (obj == NULL)
128 		return -ENOMEM;
129 
130 	ret = drm_gem_handle_create(file_priv, obj, &handle);
131 	mutex_lock(&dev->struct_mutex);
132 	drm_gem_object_handle_unreference(obj);
133 	mutex_unlock(&dev->struct_mutex);
134 
135 	if (ret)
136 		return ret;
137 
138 	args->handle = handle;
139 
140 	return 0;
141 }
142 
143 static inline int
144 fast_shmem_read(struct page **pages,
145 		loff_t page_base, int page_offset,
146 		char __user *data,
147 		int length)
148 {
149 	char __iomem *vaddr;
150 	int unwritten;
151 
152 	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
153 	if (vaddr == NULL)
154 		return -ENOMEM;
155 	unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
156 	kunmap_atomic(vaddr, KM_USER0);
157 
158 	if (unwritten)
159 		return -EFAULT;
160 
161 	return 0;
162 }
163 
164 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
165 {
166 	drm_i915_private_t *dev_priv = obj->dev->dev_private;
167 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
168 
169 	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
170 		obj_priv->tiling_mode != I915_TILING_NONE;
171 }
172 
173 static inline int
174 slow_shmem_copy(struct page *dst_page,
175 		int dst_offset,
176 		struct page *src_page,
177 		int src_offset,
178 		int length)
179 {
180 	char *dst_vaddr, *src_vaddr;
181 
182 	dst_vaddr = kmap_atomic(dst_page, KM_USER0);
183 	if (dst_vaddr == NULL)
184 		return -ENOMEM;
185 
186 	src_vaddr = kmap_atomic(src_page, KM_USER1);
187 	if (src_vaddr == NULL) {
188 		kunmap_atomic(dst_vaddr, KM_USER0);
189 		return -ENOMEM;
190 	}
191 
192 	memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
193 
194 	kunmap_atomic(src_vaddr, KM_USER1);
195 	kunmap_atomic(dst_vaddr, KM_USER0);
196 
197 	return 0;
198 }
199 
200 static inline int
201 slow_shmem_bit17_copy(struct page *gpu_page,
202 		      int gpu_offset,
203 		      struct page *cpu_page,
204 		      int cpu_offset,
205 		      int length,
206 		      int is_read)
207 {
208 	char *gpu_vaddr, *cpu_vaddr;
209 
210 	/* Use the unswizzled path if this page isn't affected. */
211 	if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
212 		if (is_read)
213 			return slow_shmem_copy(cpu_page, cpu_offset,
214 					       gpu_page, gpu_offset, length);
215 		else
216 			return slow_shmem_copy(gpu_page, gpu_offset,
217 					       cpu_page, cpu_offset, length);
218 	}
219 
220 	gpu_vaddr = kmap_atomic(gpu_page, KM_USER0);
221 	if (gpu_vaddr == NULL)
222 		return -ENOMEM;
223 
224 	cpu_vaddr = kmap_atomic(cpu_page, KM_USER1);
225 	if (cpu_vaddr == NULL) {
226 		kunmap_atomic(gpu_vaddr, KM_USER0);
227 		return -ENOMEM;
228 	}
229 
230 	/* Copy the data, XORing A6 with A17 (1). The user already knows he's
231 	 * XORing with the other bits (A9 for Y, A9 and A10 for X)
232 	 */
233 	while (length > 0) {
234 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
235 		int this_length = min(cacheline_end - gpu_offset, length);
236 		int swizzled_gpu_offset = gpu_offset ^ 64;
237 
238 		if (is_read) {
239 			memcpy(cpu_vaddr + cpu_offset,
240 			       gpu_vaddr + swizzled_gpu_offset,
241 			       this_length);
242 		} else {
243 			memcpy(gpu_vaddr + swizzled_gpu_offset,
244 			       cpu_vaddr + cpu_offset,
245 			       this_length);
246 		}
247 		cpu_offset += this_length;
248 		gpu_offset += this_length;
249 		length -= this_length;
250 	}
251 
252 	kunmap_atomic(cpu_vaddr, KM_USER1);
253 	kunmap_atomic(gpu_vaddr, KM_USER0);
254 
255 	return 0;
256 }
257 
258 /**
259  * This is the fast shmem pread path, which attempts to copy_from_user directly
260  * from the backing pages of the object to the user's address space.  On a
261  * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
262  */
263 static int
264 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
265 			  struct drm_i915_gem_pread *args,
266 			  struct drm_file *file_priv)
267 {
268 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
269 	ssize_t remain;
270 	loff_t offset, page_base;
271 	char __user *user_data;
272 	int page_offset, page_length;
273 	int ret;
274 
275 	user_data = (char __user *) (uintptr_t) args->data_ptr;
276 	remain = args->size;
277 
278 	mutex_lock(&dev->struct_mutex);
279 
280 	ret = i915_gem_object_get_pages(obj);
281 	if (ret != 0)
282 		goto fail_unlock;
283 
284 	ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
285 							args->size);
286 	if (ret != 0)
287 		goto fail_put_pages;
288 
289 	obj_priv = obj->driver_private;
290 	offset = args->offset;
291 
292 	while (remain > 0) {
293 		/* Operation in this page
294 		 *
295 		 * page_base = page offset within aperture
296 		 * page_offset = offset within page
297 		 * page_length = bytes to copy for this page
298 		 */
299 		page_base = (offset & ~(PAGE_SIZE-1));
300 		page_offset = offset & (PAGE_SIZE-1);
301 		page_length = remain;
302 		if ((page_offset + remain) > PAGE_SIZE)
303 			page_length = PAGE_SIZE - page_offset;
304 
305 		ret = fast_shmem_read(obj_priv->pages,
306 				      page_base, page_offset,
307 				      user_data, page_length);
308 		if (ret)
309 			goto fail_put_pages;
310 
311 		remain -= page_length;
312 		user_data += page_length;
313 		offset += page_length;
314 	}
315 
316 fail_put_pages:
317 	i915_gem_object_put_pages(obj);
318 fail_unlock:
319 	mutex_unlock(&dev->struct_mutex);
320 
321 	return ret;
322 }
323 
324 static inline gfp_t
325 i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj)
326 {
327 	return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping);
328 }
329 
330 static inline void
331 i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp)
332 {
333 	mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp);
334 }
335 
336 static int
337 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
338 {
339 	int ret;
340 
341 	ret = i915_gem_object_get_pages(obj);
342 
343 	/* If we've insufficient memory to map in the pages, attempt
344 	 * to make some space by throwing out some old buffers.
345 	 */
346 	if (ret == -ENOMEM) {
347 		struct drm_device *dev = obj->dev;
348 		gfp_t gfp;
349 
350 		ret = i915_gem_evict_something(dev, obj->size);
351 		if (ret)
352 			return ret;
353 
354 		gfp = i915_gem_object_get_page_gfp_mask(obj);
355 		i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY);
356 		ret = i915_gem_object_get_pages(obj);
357 		i915_gem_object_set_page_gfp_mask (obj, gfp);
358 	}
359 
360 	return ret;
361 }
362 
363 /**
364  * This is the fallback shmem pread path, which allocates temporary storage
365  * in kernel space to copy_to_user into outside of the struct_mutex, so we
366  * can copy out of the object's backing pages while holding the struct mutex
367  * and not take page faults.
368  */
369 static int
370 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
371 			  struct drm_i915_gem_pread *args,
372 			  struct drm_file *file_priv)
373 {
374 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
375 	struct mm_struct *mm = current->mm;
376 	struct page **user_pages;
377 	ssize_t remain;
378 	loff_t offset, pinned_pages, i;
379 	loff_t first_data_page, last_data_page, num_pages;
380 	int shmem_page_index, shmem_page_offset;
381 	int data_page_index,  data_page_offset;
382 	int page_length;
383 	int ret;
384 	uint64_t data_ptr = args->data_ptr;
385 	int do_bit17_swizzling;
386 
387 	remain = args->size;
388 
389 	/* Pin the user pages containing the data.  We can't fault while
390 	 * holding the struct mutex, yet we want to hold it while
391 	 * dereferencing the user data.
392 	 */
393 	first_data_page = data_ptr / PAGE_SIZE;
394 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
395 	num_pages = last_data_page - first_data_page + 1;
396 
397 	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
398 	if (user_pages == NULL)
399 		return -ENOMEM;
400 
401 	down_read(&mm->mmap_sem);
402 	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
403 				      num_pages, 1, 0, user_pages, NULL);
404 	up_read(&mm->mmap_sem);
405 	if (pinned_pages < num_pages) {
406 		ret = -EFAULT;
407 		goto fail_put_user_pages;
408 	}
409 
410 	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
411 
412 	mutex_lock(&dev->struct_mutex);
413 
414 	ret = i915_gem_object_get_pages_or_evict(obj);
415 	if (ret)
416 		goto fail_unlock;
417 
418 	ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
419 							args->size);
420 	if (ret != 0)
421 		goto fail_put_pages;
422 
423 	obj_priv = obj->driver_private;
424 	offset = args->offset;
425 
426 	while (remain > 0) {
427 		/* Operation in this page
428 		 *
429 		 * shmem_page_index = page number within shmem file
430 		 * shmem_page_offset = offset within page in shmem file
431 		 * data_page_index = page number in get_user_pages return
432 		 * data_page_offset = offset with data_page_index page.
433 		 * page_length = bytes to copy for this page
434 		 */
435 		shmem_page_index = offset / PAGE_SIZE;
436 		shmem_page_offset = offset & ~PAGE_MASK;
437 		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
438 		data_page_offset = data_ptr & ~PAGE_MASK;
439 
440 		page_length = remain;
441 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
442 			page_length = PAGE_SIZE - shmem_page_offset;
443 		if ((data_page_offset + page_length) > PAGE_SIZE)
444 			page_length = PAGE_SIZE - data_page_offset;
445 
446 		if (do_bit17_swizzling) {
447 			ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
448 						    shmem_page_offset,
449 						    user_pages[data_page_index],
450 						    data_page_offset,
451 						    page_length,
452 						    1);
453 		} else {
454 			ret = slow_shmem_copy(user_pages[data_page_index],
455 					      data_page_offset,
456 					      obj_priv->pages[shmem_page_index],
457 					      shmem_page_offset,
458 					      page_length);
459 		}
460 		if (ret)
461 			goto fail_put_pages;
462 
463 		remain -= page_length;
464 		data_ptr += page_length;
465 		offset += page_length;
466 	}
467 
468 fail_put_pages:
469 	i915_gem_object_put_pages(obj);
470 fail_unlock:
471 	mutex_unlock(&dev->struct_mutex);
472 fail_put_user_pages:
473 	for (i = 0; i < pinned_pages; i++) {
474 		SetPageDirty(user_pages[i]);
475 		page_cache_release(user_pages[i]);
476 	}
477 	drm_free_large(user_pages);
478 
479 	return ret;
480 }
481 
482 /**
483  * Reads data from the object referenced by handle.
484  *
485  * On error, the contents of *data are undefined.
486  */
487 int
488 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
489 		     struct drm_file *file_priv)
490 {
491 	struct drm_i915_gem_pread *args = data;
492 	struct drm_gem_object *obj;
493 	struct drm_i915_gem_object *obj_priv;
494 	int ret;
495 
496 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
497 	if (obj == NULL)
498 		return -EBADF;
499 	obj_priv = obj->driver_private;
500 
501 	/* Bounds check source.
502 	 *
503 	 * XXX: This could use review for overflow issues...
504 	 */
505 	if (args->offset > obj->size || args->size > obj->size ||
506 	    args->offset + args->size > obj->size) {
507 		drm_gem_object_unreference(obj);
508 		return -EINVAL;
509 	}
510 
511 	if (i915_gem_object_needs_bit17_swizzle(obj)) {
512 		ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
513 	} else {
514 		ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
515 		if (ret != 0)
516 			ret = i915_gem_shmem_pread_slow(dev, obj, args,
517 							file_priv);
518 	}
519 
520 	drm_gem_object_unreference(obj);
521 
522 	return ret;
523 }
524 
525 /* This is the fast write path which cannot handle
526  * page faults in the source data
527  */
528 
529 static inline int
530 fast_user_write(struct io_mapping *mapping,
531 		loff_t page_base, int page_offset,
532 		char __user *user_data,
533 		int length)
534 {
535 	char *vaddr_atomic;
536 	unsigned long unwritten;
537 
538 	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
539 	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
540 						      user_data, length);
541 	io_mapping_unmap_atomic(vaddr_atomic);
542 	if (unwritten)
543 		return -EFAULT;
544 	return 0;
545 }
546 
547 /* Here's the write path which can sleep for
548  * page faults
549  */
550 
551 static inline int
552 slow_kernel_write(struct io_mapping *mapping,
553 		  loff_t gtt_base, int gtt_offset,
554 		  struct page *user_page, int user_offset,
555 		  int length)
556 {
557 	char *src_vaddr, *dst_vaddr;
558 	unsigned long unwritten;
559 
560 	dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base);
561 	src_vaddr = kmap_atomic(user_page, KM_USER1);
562 	unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset,
563 						      src_vaddr + user_offset,
564 						      length);
565 	kunmap_atomic(src_vaddr, KM_USER1);
566 	io_mapping_unmap_atomic(dst_vaddr);
567 	if (unwritten)
568 		return -EFAULT;
569 	return 0;
570 }
571 
572 static inline int
573 fast_shmem_write(struct page **pages,
574 		 loff_t page_base, int page_offset,
575 		 char __user *data,
576 		 int length)
577 {
578 	char __iomem *vaddr;
579 	unsigned long unwritten;
580 
581 	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
582 	if (vaddr == NULL)
583 		return -ENOMEM;
584 	unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
585 	kunmap_atomic(vaddr, KM_USER0);
586 
587 	if (unwritten)
588 		return -EFAULT;
589 	return 0;
590 }
591 
592 /**
593  * This is the fast pwrite path, where we copy the data directly from the
594  * user into the GTT, uncached.
595  */
596 static int
597 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
598 			 struct drm_i915_gem_pwrite *args,
599 			 struct drm_file *file_priv)
600 {
601 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
602 	drm_i915_private_t *dev_priv = dev->dev_private;
603 	ssize_t remain;
604 	loff_t offset, page_base;
605 	char __user *user_data;
606 	int page_offset, page_length;
607 	int ret;
608 
609 	user_data = (char __user *) (uintptr_t) args->data_ptr;
610 	remain = args->size;
611 	if (!access_ok(VERIFY_READ, user_data, remain))
612 		return -EFAULT;
613 
614 
615 	mutex_lock(&dev->struct_mutex);
616 	ret = i915_gem_object_pin(obj, 0);
617 	if (ret) {
618 		mutex_unlock(&dev->struct_mutex);
619 		return ret;
620 	}
621 	ret = i915_gem_object_set_to_gtt_domain(obj, 1);
622 	if (ret)
623 		goto fail;
624 
625 	obj_priv = obj->driver_private;
626 	offset = obj_priv->gtt_offset + args->offset;
627 
628 	while (remain > 0) {
629 		/* Operation in this page
630 		 *
631 		 * page_base = page offset within aperture
632 		 * page_offset = offset within page
633 		 * page_length = bytes to copy for this page
634 		 */
635 		page_base = (offset & ~(PAGE_SIZE-1));
636 		page_offset = offset & (PAGE_SIZE-1);
637 		page_length = remain;
638 		if ((page_offset + remain) > PAGE_SIZE)
639 			page_length = PAGE_SIZE - page_offset;
640 
641 		ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
642 				       page_offset, user_data, page_length);
643 
644 		/* If we get a fault while copying data, then (presumably) our
645 		 * source page isn't available.  Return the error and we'll
646 		 * retry in the slow path.
647 		 */
648 		if (ret)
649 			goto fail;
650 
651 		remain -= page_length;
652 		user_data += page_length;
653 		offset += page_length;
654 	}
655 
656 fail:
657 	i915_gem_object_unpin(obj);
658 	mutex_unlock(&dev->struct_mutex);
659 
660 	return ret;
661 }
662 
663 /**
664  * This is the fallback GTT pwrite path, which uses get_user_pages to pin
665  * the memory and maps it using kmap_atomic for copying.
666  *
667  * This code resulted in x11perf -rgb10text consuming about 10% more CPU
668  * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
669  */
670 static int
671 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
672 			 struct drm_i915_gem_pwrite *args,
673 			 struct drm_file *file_priv)
674 {
675 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
676 	drm_i915_private_t *dev_priv = dev->dev_private;
677 	ssize_t remain;
678 	loff_t gtt_page_base, offset;
679 	loff_t first_data_page, last_data_page, num_pages;
680 	loff_t pinned_pages, i;
681 	struct page **user_pages;
682 	struct mm_struct *mm = current->mm;
683 	int gtt_page_offset, data_page_offset, data_page_index, page_length;
684 	int ret;
685 	uint64_t data_ptr = args->data_ptr;
686 
687 	remain = args->size;
688 
689 	/* Pin the user pages containing the data.  We can't fault while
690 	 * holding the struct mutex, and all of the pwrite implementations
691 	 * want to hold it while dereferencing the user data.
692 	 */
693 	first_data_page = data_ptr / PAGE_SIZE;
694 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
695 	num_pages = last_data_page - first_data_page + 1;
696 
697 	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
698 	if (user_pages == NULL)
699 		return -ENOMEM;
700 
701 	down_read(&mm->mmap_sem);
702 	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
703 				      num_pages, 0, 0, user_pages, NULL);
704 	up_read(&mm->mmap_sem);
705 	if (pinned_pages < num_pages) {
706 		ret = -EFAULT;
707 		goto out_unpin_pages;
708 	}
709 
710 	mutex_lock(&dev->struct_mutex);
711 	ret = i915_gem_object_pin(obj, 0);
712 	if (ret)
713 		goto out_unlock;
714 
715 	ret = i915_gem_object_set_to_gtt_domain(obj, 1);
716 	if (ret)
717 		goto out_unpin_object;
718 
719 	obj_priv = obj->driver_private;
720 	offset = obj_priv->gtt_offset + args->offset;
721 
722 	while (remain > 0) {
723 		/* Operation in this page
724 		 *
725 		 * gtt_page_base = page offset within aperture
726 		 * gtt_page_offset = offset within page in aperture
727 		 * data_page_index = page number in get_user_pages return
728 		 * data_page_offset = offset with data_page_index page.
729 		 * page_length = bytes to copy for this page
730 		 */
731 		gtt_page_base = offset & PAGE_MASK;
732 		gtt_page_offset = offset & ~PAGE_MASK;
733 		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
734 		data_page_offset = data_ptr & ~PAGE_MASK;
735 
736 		page_length = remain;
737 		if ((gtt_page_offset + page_length) > PAGE_SIZE)
738 			page_length = PAGE_SIZE - gtt_page_offset;
739 		if ((data_page_offset + page_length) > PAGE_SIZE)
740 			page_length = PAGE_SIZE - data_page_offset;
741 
742 		ret = slow_kernel_write(dev_priv->mm.gtt_mapping,
743 					gtt_page_base, gtt_page_offset,
744 					user_pages[data_page_index],
745 					data_page_offset,
746 					page_length);
747 
748 		/* If we get a fault while copying data, then (presumably) our
749 		 * source page isn't available.  Return the error and we'll
750 		 * retry in the slow path.
751 		 */
752 		if (ret)
753 			goto out_unpin_object;
754 
755 		remain -= page_length;
756 		offset += page_length;
757 		data_ptr += page_length;
758 	}
759 
760 out_unpin_object:
761 	i915_gem_object_unpin(obj);
762 out_unlock:
763 	mutex_unlock(&dev->struct_mutex);
764 out_unpin_pages:
765 	for (i = 0; i < pinned_pages; i++)
766 		page_cache_release(user_pages[i]);
767 	drm_free_large(user_pages);
768 
769 	return ret;
770 }
771 
772 /**
773  * This is the fast shmem pwrite path, which attempts to directly
774  * copy_from_user into the kmapped pages backing the object.
775  */
776 static int
777 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
778 			   struct drm_i915_gem_pwrite *args,
779 			   struct drm_file *file_priv)
780 {
781 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
782 	ssize_t remain;
783 	loff_t offset, page_base;
784 	char __user *user_data;
785 	int page_offset, page_length;
786 	int ret;
787 
788 	user_data = (char __user *) (uintptr_t) args->data_ptr;
789 	remain = args->size;
790 
791 	mutex_lock(&dev->struct_mutex);
792 
793 	ret = i915_gem_object_get_pages(obj);
794 	if (ret != 0)
795 		goto fail_unlock;
796 
797 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
798 	if (ret != 0)
799 		goto fail_put_pages;
800 
801 	obj_priv = obj->driver_private;
802 	offset = args->offset;
803 	obj_priv->dirty = 1;
804 
805 	while (remain > 0) {
806 		/* Operation in this page
807 		 *
808 		 * page_base = page offset within aperture
809 		 * page_offset = offset within page
810 		 * page_length = bytes to copy for this page
811 		 */
812 		page_base = (offset & ~(PAGE_SIZE-1));
813 		page_offset = offset & (PAGE_SIZE-1);
814 		page_length = remain;
815 		if ((page_offset + remain) > PAGE_SIZE)
816 			page_length = PAGE_SIZE - page_offset;
817 
818 		ret = fast_shmem_write(obj_priv->pages,
819 				       page_base, page_offset,
820 				       user_data, page_length);
821 		if (ret)
822 			goto fail_put_pages;
823 
824 		remain -= page_length;
825 		user_data += page_length;
826 		offset += page_length;
827 	}
828 
829 fail_put_pages:
830 	i915_gem_object_put_pages(obj);
831 fail_unlock:
832 	mutex_unlock(&dev->struct_mutex);
833 
834 	return ret;
835 }
836 
837 /**
838  * This is the fallback shmem pwrite path, which uses get_user_pages to pin
839  * the memory and maps it using kmap_atomic for copying.
840  *
841  * This avoids taking mmap_sem for faulting on the user's address while the
842  * struct_mutex is held.
843  */
844 static int
845 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
846 			   struct drm_i915_gem_pwrite *args,
847 			   struct drm_file *file_priv)
848 {
849 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
850 	struct mm_struct *mm = current->mm;
851 	struct page **user_pages;
852 	ssize_t remain;
853 	loff_t offset, pinned_pages, i;
854 	loff_t first_data_page, last_data_page, num_pages;
855 	int shmem_page_index, shmem_page_offset;
856 	int data_page_index,  data_page_offset;
857 	int page_length;
858 	int ret;
859 	uint64_t data_ptr = args->data_ptr;
860 	int do_bit17_swizzling;
861 
862 	remain = args->size;
863 
864 	/* Pin the user pages containing the data.  We can't fault while
865 	 * holding the struct mutex, and all of the pwrite implementations
866 	 * want to hold it while dereferencing the user data.
867 	 */
868 	first_data_page = data_ptr / PAGE_SIZE;
869 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
870 	num_pages = last_data_page - first_data_page + 1;
871 
872 	user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
873 	if (user_pages == NULL)
874 		return -ENOMEM;
875 
876 	down_read(&mm->mmap_sem);
877 	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
878 				      num_pages, 0, 0, user_pages, NULL);
879 	up_read(&mm->mmap_sem);
880 	if (pinned_pages < num_pages) {
881 		ret = -EFAULT;
882 		goto fail_put_user_pages;
883 	}
884 
885 	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
886 
887 	mutex_lock(&dev->struct_mutex);
888 
889 	ret = i915_gem_object_get_pages_or_evict(obj);
890 	if (ret)
891 		goto fail_unlock;
892 
893 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
894 	if (ret != 0)
895 		goto fail_put_pages;
896 
897 	obj_priv = obj->driver_private;
898 	offset = args->offset;
899 	obj_priv->dirty = 1;
900 
901 	while (remain > 0) {
902 		/* Operation in this page
903 		 *
904 		 * shmem_page_index = page number within shmem file
905 		 * shmem_page_offset = offset within page in shmem file
906 		 * data_page_index = page number in get_user_pages return
907 		 * data_page_offset = offset with data_page_index page.
908 		 * page_length = bytes to copy for this page
909 		 */
910 		shmem_page_index = offset / PAGE_SIZE;
911 		shmem_page_offset = offset & ~PAGE_MASK;
912 		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
913 		data_page_offset = data_ptr & ~PAGE_MASK;
914 
915 		page_length = remain;
916 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
917 			page_length = PAGE_SIZE - shmem_page_offset;
918 		if ((data_page_offset + page_length) > PAGE_SIZE)
919 			page_length = PAGE_SIZE - data_page_offset;
920 
921 		if (do_bit17_swizzling) {
922 			ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
923 						    shmem_page_offset,
924 						    user_pages[data_page_index],
925 						    data_page_offset,
926 						    page_length,
927 						    0);
928 		} else {
929 			ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
930 					      shmem_page_offset,
931 					      user_pages[data_page_index],
932 					      data_page_offset,
933 					      page_length);
934 		}
935 		if (ret)
936 			goto fail_put_pages;
937 
938 		remain -= page_length;
939 		data_ptr += page_length;
940 		offset += page_length;
941 	}
942 
943 fail_put_pages:
944 	i915_gem_object_put_pages(obj);
945 fail_unlock:
946 	mutex_unlock(&dev->struct_mutex);
947 fail_put_user_pages:
948 	for (i = 0; i < pinned_pages; i++)
949 		page_cache_release(user_pages[i]);
950 	drm_free_large(user_pages);
951 
952 	return ret;
953 }
954 
955 /**
956  * Writes data to the object referenced by handle.
957  *
958  * On error, the contents of the buffer that were to be modified are undefined.
959  */
960 int
961 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
962 		      struct drm_file *file_priv)
963 {
964 	struct drm_i915_gem_pwrite *args = data;
965 	struct drm_gem_object *obj;
966 	struct drm_i915_gem_object *obj_priv;
967 	int ret = 0;
968 
969 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
970 	if (obj == NULL)
971 		return -EBADF;
972 	obj_priv = obj->driver_private;
973 
974 	/* Bounds check destination.
975 	 *
976 	 * XXX: This could use review for overflow issues...
977 	 */
978 	if (args->offset > obj->size || args->size > obj->size ||
979 	    args->offset + args->size > obj->size) {
980 		drm_gem_object_unreference(obj);
981 		return -EINVAL;
982 	}
983 
984 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
985 	 * it would end up going through the fenced access, and we'll get
986 	 * different detiling behavior between reading and writing.
987 	 * pread/pwrite currently are reading and writing from the CPU
988 	 * perspective, requiring manual detiling by the client.
989 	 */
990 	if (obj_priv->phys_obj)
991 		ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
992 	else if (obj_priv->tiling_mode == I915_TILING_NONE &&
993 		 dev->gtt_total != 0) {
994 		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
995 		if (ret == -EFAULT) {
996 			ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
997 						       file_priv);
998 		}
999 	} else if (i915_gem_object_needs_bit17_swizzle(obj)) {
1000 		ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
1001 	} else {
1002 		ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
1003 		if (ret == -EFAULT) {
1004 			ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
1005 							 file_priv);
1006 		}
1007 	}
1008 
1009 #if WATCH_PWRITE
1010 	if (ret)
1011 		DRM_INFO("pwrite failed %d\n", ret);
1012 #endif
1013 
1014 	drm_gem_object_unreference(obj);
1015 
1016 	return ret;
1017 }
1018 
1019 /**
1020  * Called when user space prepares to use an object with the CPU, either
1021  * through the mmap ioctl's mapping or a GTT mapping.
1022  */
1023 int
1024 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1025 			  struct drm_file *file_priv)
1026 {
1027 	struct drm_i915_private *dev_priv = dev->dev_private;
1028 	struct drm_i915_gem_set_domain *args = data;
1029 	struct drm_gem_object *obj;
1030 	struct drm_i915_gem_object *obj_priv;
1031 	uint32_t read_domains = args->read_domains;
1032 	uint32_t write_domain = args->write_domain;
1033 	int ret;
1034 
1035 	if (!(dev->driver->driver_features & DRIVER_GEM))
1036 		return -ENODEV;
1037 
1038 	/* Only handle setting domains to types used by the CPU. */
1039 	if (write_domain & I915_GEM_GPU_DOMAINS)
1040 		return -EINVAL;
1041 
1042 	if (read_domains & I915_GEM_GPU_DOMAINS)
1043 		return -EINVAL;
1044 
1045 	/* Having something in the write domain implies it's in the read
1046 	 * domain, and only that read domain.  Enforce that in the request.
1047 	 */
1048 	if (write_domain != 0 && read_domains != write_domain)
1049 		return -EINVAL;
1050 
1051 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1052 	if (obj == NULL)
1053 		return -EBADF;
1054 	obj_priv = obj->driver_private;
1055 
1056 	mutex_lock(&dev->struct_mutex);
1057 
1058 	intel_mark_busy(dev, obj);
1059 
1060 #if WATCH_BUF
1061 	DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
1062 		 obj, obj->size, read_domains, write_domain);
1063 #endif
1064 	if (read_domains & I915_GEM_DOMAIN_GTT) {
1065 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1066 
1067 		/* Update the LRU on the fence for the CPU access that's
1068 		 * about to occur.
1069 		 */
1070 		if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1071 			list_move_tail(&obj_priv->fence_list,
1072 				       &dev_priv->mm.fence_list);
1073 		}
1074 
1075 		/* Silently promote "you're not bound, there was nothing to do"
1076 		 * to success, since the client was just asking us to
1077 		 * make sure everything was done.
1078 		 */
1079 		if (ret == -EINVAL)
1080 			ret = 0;
1081 	} else {
1082 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1083 	}
1084 
1085 	drm_gem_object_unreference(obj);
1086 	mutex_unlock(&dev->struct_mutex);
1087 	return ret;
1088 }
1089 
1090 /**
1091  * Called when user space has done writes to this buffer
1092  */
1093 int
1094 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1095 		      struct drm_file *file_priv)
1096 {
1097 	struct drm_i915_gem_sw_finish *args = data;
1098 	struct drm_gem_object *obj;
1099 	struct drm_i915_gem_object *obj_priv;
1100 	int ret = 0;
1101 
1102 	if (!(dev->driver->driver_features & DRIVER_GEM))
1103 		return -ENODEV;
1104 
1105 	mutex_lock(&dev->struct_mutex);
1106 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1107 	if (obj == NULL) {
1108 		mutex_unlock(&dev->struct_mutex);
1109 		return -EBADF;
1110 	}
1111 
1112 #if WATCH_BUF
1113 	DRM_INFO("%s: sw_finish %d (%p %zd)\n",
1114 		 __func__, args->handle, obj, obj->size);
1115 #endif
1116 	obj_priv = obj->driver_private;
1117 
1118 	/* Pinned buffers may be scanout, so flush the cache */
1119 	if (obj_priv->pin_count)
1120 		i915_gem_object_flush_cpu_write_domain(obj);
1121 
1122 	drm_gem_object_unreference(obj);
1123 	mutex_unlock(&dev->struct_mutex);
1124 	return ret;
1125 }
1126 
1127 /**
1128  * Maps the contents of an object, returning the address it is mapped
1129  * into.
1130  *
1131  * While the mapping holds a reference on the contents of the object, it doesn't
1132  * imply a ref on the object itself.
1133  */
1134 int
1135 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1136 		   struct drm_file *file_priv)
1137 {
1138 	struct drm_i915_gem_mmap *args = data;
1139 	struct drm_gem_object *obj;
1140 	loff_t offset;
1141 	unsigned long addr;
1142 
1143 	if (!(dev->driver->driver_features & DRIVER_GEM))
1144 		return -ENODEV;
1145 
1146 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1147 	if (obj == NULL)
1148 		return -EBADF;
1149 
1150 	offset = args->offset;
1151 
1152 	down_write(&current->mm->mmap_sem);
1153 	addr = do_mmap(obj->filp, 0, args->size,
1154 		       PROT_READ | PROT_WRITE, MAP_SHARED,
1155 		       args->offset);
1156 	up_write(&current->mm->mmap_sem);
1157 	mutex_lock(&dev->struct_mutex);
1158 	drm_gem_object_unreference(obj);
1159 	mutex_unlock(&dev->struct_mutex);
1160 	if (IS_ERR((void *)addr))
1161 		return addr;
1162 
1163 	args->addr_ptr = (uint64_t) addr;
1164 
1165 	return 0;
1166 }
1167 
1168 /**
1169  * i915_gem_fault - fault a page into the GTT
1170  * vma: VMA in question
1171  * vmf: fault info
1172  *
1173  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1174  * from userspace.  The fault handler takes care of binding the object to
1175  * the GTT (if needed), allocating and programming a fence register (again,
1176  * only if needed based on whether the old reg is still valid or the object
1177  * is tiled) and inserting a new PTE into the faulting process.
1178  *
1179  * Note that the faulting process may involve evicting existing objects
1180  * from the GTT and/or fence registers to make room.  So performance may
1181  * suffer if the GTT working set is large or there are few fence registers
1182  * left.
1183  */
1184 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1185 {
1186 	struct drm_gem_object *obj = vma->vm_private_data;
1187 	struct drm_device *dev = obj->dev;
1188 	struct drm_i915_private *dev_priv = dev->dev_private;
1189 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1190 	pgoff_t page_offset;
1191 	unsigned long pfn;
1192 	int ret = 0;
1193 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1194 
1195 	/* We don't use vmf->pgoff since that has the fake offset */
1196 	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1197 		PAGE_SHIFT;
1198 
1199 	/* Now bind it into the GTT if needed */
1200 	mutex_lock(&dev->struct_mutex);
1201 	if (!obj_priv->gtt_space) {
1202 		ret = i915_gem_object_bind_to_gtt(obj, 0);
1203 		if (ret)
1204 			goto unlock;
1205 
1206 		list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1207 
1208 		ret = i915_gem_object_set_to_gtt_domain(obj, write);
1209 		if (ret)
1210 			goto unlock;
1211 	}
1212 
1213 	/* Need a new fence register? */
1214 	if (obj_priv->tiling_mode != I915_TILING_NONE) {
1215 		ret = i915_gem_object_get_fence_reg(obj);
1216 		if (ret)
1217 			goto unlock;
1218 	}
1219 
1220 	pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1221 		page_offset;
1222 
1223 	/* Finally, remap it using the new GTT offset */
1224 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1225 unlock:
1226 	mutex_unlock(&dev->struct_mutex);
1227 
1228 	switch (ret) {
1229 	case 0:
1230 	case -ERESTARTSYS:
1231 		return VM_FAULT_NOPAGE;
1232 	case -ENOMEM:
1233 	case -EAGAIN:
1234 		return VM_FAULT_OOM;
1235 	default:
1236 		return VM_FAULT_SIGBUS;
1237 	}
1238 }
1239 
1240 /**
1241  * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1242  * @obj: obj in question
1243  *
1244  * GEM memory mapping works by handing back to userspace a fake mmap offset
1245  * it can use in a subsequent mmap(2) call.  The DRM core code then looks
1246  * up the object based on the offset and sets up the various memory mapping
1247  * structures.
1248  *
1249  * This routine allocates and attaches a fake offset for @obj.
1250  */
1251 static int
1252 i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1253 {
1254 	struct drm_device *dev = obj->dev;
1255 	struct drm_gem_mm *mm = dev->mm_private;
1256 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1257 	struct drm_map_list *list;
1258 	struct drm_local_map *map;
1259 	int ret = 0;
1260 
1261 	/* Set the object up for mmap'ing */
1262 	list = &obj->map_list;
1263 	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1264 	if (!list->map)
1265 		return -ENOMEM;
1266 
1267 	map = list->map;
1268 	map->type = _DRM_GEM;
1269 	map->size = obj->size;
1270 	map->handle = obj;
1271 
1272 	/* Get a DRM GEM mmap offset allocated... */
1273 	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1274 						    obj->size / PAGE_SIZE, 0, 0);
1275 	if (!list->file_offset_node) {
1276 		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1277 		ret = -ENOMEM;
1278 		goto out_free_list;
1279 	}
1280 
1281 	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1282 						  obj->size / PAGE_SIZE, 0);
1283 	if (!list->file_offset_node) {
1284 		ret = -ENOMEM;
1285 		goto out_free_list;
1286 	}
1287 
1288 	list->hash.key = list->file_offset_node->start;
1289 	if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1290 		DRM_ERROR("failed to add to map hash\n");
1291 		goto out_free_mm;
1292 	}
1293 
1294 	/* By now we should be all set, any drm_mmap request on the offset
1295 	 * below will get to our mmap & fault handler */
1296 	obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1297 
1298 	return 0;
1299 
1300 out_free_mm:
1301 	drm_mm_put_block(list->file_offset_node);
1302 out_free_list:
1303 	kfree(list->map);
1304 
1305 	return ret;
1306 }
1307 
1308 /**
1309  * i915_gem_release_mmap - remove physical page mappings
1310  * @obj: obj in question
1311  *
1312  * Preserve the reservation of the mmaping with the DRM core code, but
1313  * relinquish ownership of the pages back to the system.
1314  *
1315  * It is vital that we remove the page mapping if we have mapped a tiled
1316  * object through the GTT and then lose the fence register due to
1317  * resource pressure. Similarly if the object has been moved out of the
1318  * aperture, than pages mapped into userspace must be revoked. Removing the
1319  * mapping will then trigger a page fault on the next user access, allowing
1320  * fixup by i915_gem_fault().
1321  */
1322 void
1323 i915_gem_release_mmap(struct drm_gem_object *obj)
1324 {
1325 	struct drm_device *dev = obj->dev;
1326 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1327 
1328 	if (dev->dev_mapping)
1329 		unmap_mapping_range(dev->dev_mapping,
1330 				    obj_priv->mmap_offset, obj->size, 1);
1331 }
1332 
1333 static void
1334 i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1335 {
1336 	struct drm_device *dev = obj->dev;
1337 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1338 	struct drm_gem_mm *mm = dev->mm_private;
1339 	struct drm_map_list *list;
1340 
1341 	list = &obj->map_list;
1342 	drm_ht_remove_item(&mm->offset_hash, &list->hash);
1343 
1344 	if (list->file_offset_node) {
1345 		drm_mm_put_block(list->file_offset_node);
1346 		list->file_offset_node = NULL;
1347 	}
1348 
1349 	if (list->map) {
1350 		kfree(list->map);
1351 		list->map = NULL;
1352 	}
1353 
1354 	obj_priv->mmap_offset = 0;
1355 }
1356 
1357 /**
1358  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1359  * @obj: object to check
1360  *
1361  * Return the required GTT alignment for an object, taking into account
1362  * potential fence register mapping if needed.
1363  */
1364 static uint32_t
1365 i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1366 {
1367 	struct drm_device *dev = obj->dev;
1368 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1369 	int start, i;
1370 
1371 	/*
1372 	 * Minimum alignment is 4k (GTT page size), but might be greater
1373 	 * if a fence register is needed for the object.
1374 	 */
1375 	if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1376 		return 4096;
1377 
1378 	/*
1379 	 * Previous chips need to be aligned to the size of the smallest
1380 	 * fence register that can contain the object.
1381 	 */
1382 	if (IS_I9XX(dev))
1383 		start = 1024*1024;
1384 	else
1385 		start = 512*1024;
1386 
1387 	for (i = start; i < obj->size; i <<= 1)
1388 		;
1389 
1390 	return i;
1391 }
1392 
1393 /**
1394  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1395  * @dev: DRM device
1396  * @data: GTT mapping ioctl data
1397  * @file_priv: GEM object info
1398  *
1399  * Simply returns the fake offset to userspace so it can mmap it.
1400  * The mmap call will end up in drm_gem_mmap(), which will set things
1401  * up so we can get faults in the handler above.
1402  *
1403  * The fault handler will take care of binding the object into the GTT
1404  * (since it may have been evicted to make room for something), allocating
1405  * a fence register, and mapping the appropriate aperture address into
1406  * userspace.
1407  */
1408 int
1409 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1410 			struct drm_file *file_priv)
1411 {
1412 	struct drm_i915_gem_mmap_gtt *args = data;
1413 	struct drm_i915_private *dev_priv = dev->dev_private;
1414 	struct drm_gem_object *obj;
1415 	struct drm_i915_gem_object *obj_priv;
1416 	int ret;
1417 
1418 	if (!(dev->driver->driver_features & DRIVER_GEM))
1419 		return -ENODEV;
1420 
1421 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1422 	if (obj == NULL)
1423 		return -EBADF;
1424 
1425 	mutex_lock(&dev->struct_mutex);
1426 
1427 	obj_priv = obj->driver_private;
1428 
1429 	if (obj_priv->madv != I915_MADV_WILLNEED) {
1430 		DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1431 		drm_gem_object_unreference(obj);
1432 		mutex_unlock(&dev->struct_mutex);
1433 		return -EINVAL;
1434 	}
1435 
1436 
1437 	if (!obj_priv->mmap_offset) {
1438 		ret = i915_gem_create_mmap_offset(obj);
1439 		if (ret) {
1440 			drm_gem_object_unreference(obj);
1441 			mutex_unlock(&dev->struct_mutex);
1442 			return ret;
1443 		}
1444 	}
1445 
1446 	args->offset = obj_priv->mmap_offset;
1447 
1448 	/*
1449 	 * Pull it into the GTT so that we have a page list (makes the
1450 	 * initial fault faster and any subsequent flushing possible).
1451 	 */
1452 	if (!obj_priv->agp_mem) {
1453 		ret = i915_gem_object_bind_to_gtt(obj, 0);
1454 		if (ret) {
1455 			drm_gem_object_unreference(obj);
1456 			mutex_unlock(&dev->struct_mutex);
1457 			return ret;
1458 		}
1459 		list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1460 	}
1461 
1462 	drm_gem_object_unreference(obj);
1463 	mutex_unlock(&dev->struct_mutex);
1464 
1465 	return 0;
1466 }
1467 
1468 void
1469 i915_gem_object_put_pages(struct drm_gem_object *obj)
1470 {
1471 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1472 	int page_count = obj->size / PAGE_SIZE;
1473 	int i;
1474 
1475 	BUG_ON(obj_priv->pages_refcount == 0);
1476 	BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1477 
1478 	if (--obj_priv->pages_refcount != 0)
1479 		return;
1480 
1481 	if (obj_priv->tiling_mode != I915_TILING_NONE)
1482 		i915_gem_object_save_bit_17_swizzle(obj);
1483 
1484 	if (obj_priv->madv == I915_MADV_DONTNEED)
1485 		obj_priv->dirty = 0;
1486 
1487 	for (i = 0; i < page_count; i++) {
1488 		if (obj_priv->pages[i] == NULL)
1489 			break;
1490 
1491 		if (obj_priv->dirty)
1492 			set_page_dirty(obj_priv->pages[i]);
1493 
1494 		if (obj_priv->madv == I915_MADV_WILLNEED)
1495 			mark_page_accessed(obj_priv->pages[i]);
1496 
1497 		page_cache_release(obj_priv->pages[i]);
1498 	}
1499 	obj_priv->dirty = 0;
1500 
1501 	drm_free_large(obj_priv->pages);
1502 	obj_priv->pages = NULL;
1503 }
1504 
1505 static void
1506 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno)
1507 {
1508 	struct drm_device *dev = obj->dev;
1509 	drm_i915_private_t *dev_priv = dev->dev_private;
1510 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1511 
1512 	/* Add a reference if we're newly entering the active list. */
1513 	if (!obj_priv->active) {
1514 		drm_gem_object_reference(obj);
1515 		obj_priv->active = 1;
1516 	}
1517 	/* Move from whatever list we were on to the tail of execution. */
1518 	spin_lock(&dev_priv->mm.active_list_lock);
1519 	list_move_tail(&obj_priv->list,
1520 		       &dev_priv->mm.active_list);
1521 	spin_unlock(&dev_priv->mm.active_list_lock);
1522 	obj_priv->last_rendering_seqno = seqno;
1523 }
1524 
1525 static void
1526 i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1527 {
1528 	struct drm_device *dev = obj->dev;
1529 	drm_i915_private_t *dev_priv = dev->dev_private;
1530 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1531 
1532 	BUG_ON(!obj_priv->active);
1533 	list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1534 	obj_priv->last_rendering_seqno = 0;
1535 }
1536 
1537 /* Immediately discard the backing storage */
1538 static void
1539 i915_gem_object_truncate(struct drm_gem_object *obj)
1540 {
1541 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1542 	struct inode *inode;
1543 
1544 	inode = obj->filp->f_path.dentry->d_inode;
1545 	if (inode->i_op->truncate)
1546 		inode->i_op->truncate (inode);
1547 
1548 	obj_priv->madv = __I915_MADV_PURGED;
1549 }
1550 
1551 static inline int
1552 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1553 {
1554 	return obj_priv->madv == I915_MADV_DONTNEED;
1555 }
1556 
1557 static void
1558 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1559 {
1560 	struct drm_device *dev = obj->dev;
1561 	drm_i915_private_t *dev_priv = dev->dev_private;
1562 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1563 
1564 	i915_verify_inactive(dev, __FILE__, __LINE__);
1565 	if (obj_priv->pin_count != 0)
1566 		list_del_init(&obj_priv->list);
1567 	else
1568 		list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1569 
1570 	obj_priv->last_rendering_seqno = 0;
1571 	if (obj_priv->active) {
1572 		obj_priv->active = 0;
1573 		drm_gem_object_unreference(obj);
1574 	}
1575 	i915_verify_inactive(dev, __FILE__, __LINE__);
1576 }
1577 
1578 /**
1579  * Creates a new sequence number, emitting a write of it to the status page
1580  * plus an interrupt, which will trigger i915_user_interrupt_handler.
1581  *
1582  * Must be called with struct_lock held.
1583  *
1584  * Returned sequence numbers are nonzero on success.
1585  */
1586 static uint32_t
1587 i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1588 		 uint32_t flush_domains)
1589 {
1590 	drm_i915_private_t *dev_priv = dev->dev_private;
1591 	struct drm_i915_file_private *i915_file_priv = NULL;
1592 	struct drm_i915_gem_request *request;
1593 	uint32_t seqno;
1594 	int was_empty;
1595 	RING_LOCALS;
1596 
1597 	if (file_priv != NULL)
1598 		i915_file_priv = file_priv->driver_priv;
1599 
1600 	request = kzalloc(sizeof(*request), GFP_KERNEL);
1601 	if (request == NULL)
1602 		return 0;
1603 
1604 	/* Grab the seqno we're going to make this request be, and bump the
1605 	 * next (skipping 0 so it can be the reserved no-seqno value).
1606 	 */
1607 	seqno = dev_priv->mm.next_gem_seqno;
1608 	dev_priv->mm.next_gem_seqno++;
1609 	if (dev_priv->mm.next_gem_seqno == 0)
1610 		dev_priv->mm.next_gem_seqno++;
1611 
1612 	BEGIN_LP_RING(4);
1613 	OUT_RING(MI_STORE_DWORD_INDEX);
1614 	OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1615 	OUT_RING(seqno);
1616 
1617 	OUT_RING(MI_USER_INTERRUPT);
1618 	ADVANCE_LP_RING();
1619 
1620 	DRM_DEBUG("%d\n", seqno);
1621 
1622 	request->seqno = seqno;
1623 	request->emitted_jiffies = jiffies;
1624 	was_empty = list_empty(&dev_priv->mm.request_list);
1625 	list_add_tail(&request->list, &dev_priv->mm.request_list);
1626 	if (i915_file_priv) {
1627 		list_add_tail(&request->client_list,
1628 			      &i915_file_priv->mm.request_list);
1629 	} else {
1630 		INIT_LIST_HEAD(&request->client_list);
1631 	}
1632 
1633 	/* Associate any objects on the flushing list matching the write
1634 	 * domain we're flushing with our flush.
1635 	 */
1636 	if (flush_domains != 0) {
1637 		struct drm_i915_gem_object *obj_priv, *next;
1638 
1639 		list_for_each_entry_safe(obj_priv, next,
1640 					 &dev_priv->mm.flushing_list, list) {
1641 			struct drm_gem_object *obj = obj_priv->obj;
1642 
1643 			if ((obj->write_domain & flush_domains) ==
1644 			    obj->write_domain) {
1645 				uint32_t old_write_domain = obj->write_domain;
1646 
1647 				obj->write_domain = 0;
1648 				i915_gem_object_move_to_active(obj, seqno);
1649 
1650 				trace_i915_gem_object_change_domain(obj,
1651 								    obj->read_domains,
1652 								    old_write_domain);
1653 			}
1654 		}
1655 
1656 	}
1657 
1658 	if (!dev_priv->mm.suspended) {
1659 		mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1660 		if (was_empty)
1661 			queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1662 	}
1663 	return seqno;
1664 }
1665 
1666 /**
1667  * Command execution barrier
1668  *
1669  * Ensures that all commands in the ring are finished
1670  * before signalling the CPU
1671  */
1672 static uint32_t
1673 i915_retire_commands(struct drm_device *dev)
1674 {
1675 	drm_i915_private_t *dev_priv = dev->dev_private;
1676 	uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
1677 	uint32_t flush_domains = 0;
1678 	RING_LOCALS;
1679 
1680 	/* The sampler always gets flushed on i965 (sigh) */
1681 	if (IS_I965G(dev))
1682 		flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1683 	BEGIN_LP_RING(2);
1684 	OUT_RING(cmd);
1685 	OUT_RING(0); /* noop */
1686 	ADVANCE_LP_RING();
1687 	return flush_domains;
1688 }
1689 
1690 /**
1691  * Moves buffers associated only with the given active seqno from the active
1692  * to inactive list, potentially freeing them.
1693  */
1694 static void
1695 i915_gem_retire_request(struct drm_device *dev,
1696 			struct drm_i915_gem_request *request)
1697 {
1698 	drm_i915_private_t *dev_priv = dev->dev_private;
1699 
1700 	trace_i915_gem_request_retire(dev, request->seqno);
1701 
1702 	/* Move any buffers on the active list that are no longer referenced
1703 	 * by the ringbuffer to the flushing/inactive lists as appropriate.
1704 	 */
1705 	spin_lock(&dev_priv->mm.active_list_lock);
1706 	while (!list_empty(&dev_priv->mm.active_list)) {
1707 		struct drm_gem_object *obj;
1708 		struct drm_i915_gem_object *obj_priv;
1709 
1710 		obj_priv = list_first_entry(&dev_priv->mm.active_list,
1711 					    struct drm_i915_gem_object,
1712 					    list);
1713 		obj = obj_priv->obj;
1714 
1715 		/* If the seqno being retired doesn't match the oldest in the
1716 		 * list, then the oldest in the list must still be newer than
1717 		 * this seqno.
1718 		 */
1719 		if (obj_priv->last_rendering_seqno != request->seqno)
1720 			goto out;
1721 
1722 #if WATCH_LRU
1723 		DRM_INFO("%s: retire %d moves to inactive list %p\n",
1724 			 __func__, request->seqno, obj);
1725 #endif
1726 
1727 		if (obj->write_domain != 0)
1728 			i915_gem_object_move_to_flushing(obj);
1729 		else {
1730 			/* Take a reference on the object so it won't be
1731 			 * freed while the spinlock is held.  The list
1732 			 * protection for this spinlock is safe when breaking
1733 			 * the lock like this since the next thing we do
1734 			 * is just get the head of the list again.
1735 			 */
1736 			drm_gem_object_reference(obj);
1737 			i915_gem_object_move_to_inactive(obj);
1738 			spin_unlock(&dev_priv->mm.active_list_lock);
1739 			drm_gem_object_unreference(obj);
1740 			spin_lock(&dev_priv->mm.active_list_lock);
1741 		}
1742 	}
1743 out:
1744 	spin_unlock(&dev_priv->mm.active_list_lock);
1745 }
1746 
1747 /**
1748  * Returns true if seq1 is later than seq2.
1749  */
1750 bool
1751 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1752 {
1753 	return (int32_t)(seq1 - seq2) >= 0;
1754 }
1755 
1756 uint32_t
1757 i915_get_gem_seqno(struct drm_device *dev)
1758 {
1759 	drm_i915_private_t *dev_priv = dev->dev_private;
1760 
1761 	return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
1762 }
1763 
1764 /**
1765  * This function clears the request list as sequence numbers are passed.
1766  */
1767 void
1768 i915_gem_retire_requests(struct drm_device *dev)
1769 {
1770 	drm_i915_private_t *dev_priv = dev->dev_private;
1771 	uint32_t seqno;
1772 
1773 	if (!dev_priv->hw_status_page || list_empty(&dev_priv->mm.request_list))
1774 		return;
1775 
1776 	seqno = i915_get_gem_seqno(dev);
1777 
1778 	while (!list_empty(&dev_priv->mm.request_list)) {
1779 		struct drm_i915_gem_request *request;
1780 		uint32_t retiring_seqno;
1781 
1782 		request = list_first_entry(&dev_priv->mm.request_list,
1783 					   struct drm_i915_gem_request,
1784 					   list);
1785 		retiring_seqno = request->seqno;
1786 
1787 		if (i915_seqno_passed(seqno, retiring_seqno) ||
1788 		    atomic_read(&dev_priv->mm.wedged)) {
1789 			i915_gem_retire_request(dev, request);
1790 
1791 			list_del(&request->list);
1792 			list_del(&request->client_list);
1793 			kfree(request);
1794 		} else
1795 			break;
1796 	}
1797 
1798 	if (unlikely (dev_priv->trace_irq_seqno &&
1799 		      i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1800 		i915_user_irq_put(dev);
1801 		dev_priv->trace_irq_seqno = 0;
1802 	}
1803 }
1804 
1805 void
1806 i915_gem_retire_work_handler(struct work_struct *work)
1807 {
1808 	drm_i915_private_t *dev_priv;
1809 	struct drm_device *dev;
1810 
1811 	dev_priv = container_of(work, drm_i915_private_t,
1812 				mm.retire_work.work);
1813 	dev = dev_priv->dev;
1814 
1815 	mutex_lock(&dev->struct_mutex);
1816 	i915_gem_retire_requests(dev);
1817 	if (!dev_priv->mm.suspended &&
1818 	    !list_empty(&dev_priv->mm.request_list))
1819 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1820 	mutex_unlock(&dev->struct_mutex);
1821 }
1822 
1823 /**
1824  * Waits for a sequence number to be signaled, and cleans up the
1825  * request and object lists appropriately for that event.
1826  */
1827 static int
1828 i915_wait_request(struct drm_device *dev, uint32_t seqno)
1829 {
1830 	drm_i915_private_t *dev_priv = dev->dev_private;
1831 	u32 ier;
1832 	int ret = 0;
1833 
1834 	BUG_ON(seqno == 0);
1835 
1836 	if (atomic_read(&dev_priv->mm.wedged))
1837 		return -EIO;
1838 
1839 	if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
1840 		if (IS_IGDNG(dev))
1841 			ier = I915_READ(DEIER) | I915_READ(GTIER);
1842 		else
1843 			ier = I915_READ(IER);
1844 		if (!ier) {
1845 			DRM_ERROR("something (likely vbetool) disabled "
1846 				  "interrupts, re-enabling\n");
1847 			i915_driver_irq_preinstall(dev);
1848 			i915_driver_irq_postinstall(dev);
1849 		}
1850 
1851 		trace_i915_gem_request_wait_begin(dev, seqno);
1852 
1853 		dev_priv->mm.waiting_gem_seqno = seqno;
1854 		i915_user_irq_get(dev);
1855 		ret = wait_event_interruptible(dev_priv->irq_queue,
1856 					       i915_seqno_passed(i915_get_gem_seqno(dev),
1857 								 seqno) ||
1858 					       atomic_read(&dev_priv->mm.wedged));
1859 		i915_user_irq_put(dev);
1860 		dev_priv->mm.waiting_gem_seqno = 0;
1861 
1862 		trace_i915_gem_request_wait_end(dev, seqno);
1863 	}
1864 	if (atomic_read(&dev_priv->mm.wedged))
1865 		ret = -EIO;
1866 
1867 	if (ret && ret != -ERESTARTSYS)
1868 		DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
1869 			  __func__, ret, seqno, i915_get_gem_seqno(dev));
1870 
1871 	/* Directly dispatch request retiring.  While we have the work queue
1872 	 * to handle this, the waiter on a request often wants an associated
1873 	 * buffer to have made it to the inactive list, and we would need
1874 	 * a separate wait queue to handle that.
1875 	 */
1876 	if (ret == 0)
1877 		i915_gem_retire_requests(dev);
1878 
1879 	return ret;
1880 }
1881 
1882 static void
1883 i915_gem_flush(struct drm_device *dev,
1884 	       uint32_t invalidate_domains,
1885 	       uint32_t flush_domains)
1886 {
1887 	drm_i915_private_t *dev_priv = dev->dev_private;
1888 	uint32_t cmd;
1889 	RING_LOCALS;
1890 
1891 #if WATCH_EXEC
1892 	DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
1893 		  invalidate_domains, flush_domains);
1894 #endif
1895 	trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno,
1896 				     invalidate_domains, flush_domains);
1897 
1898 	if (flush_domains & I915_GEM_DOMAIN_CPU)
1899 		drm_agp_chipset_flush(dev);
1900 
1901 	if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
1902 		/*
1903 		 * read/write caches:
1904 		 *
1905 		 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
1906 		 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
1907 		 * also flushed at 2d versus 3d pipeline switches.
1908 		 *
1909 		 * read-only caches:
1910 		 *
1911 		 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
1912 		 * MI_READ_FLUSH is set, and is always flushed on 965.
1913 		 *
1914 		 * I915_GEM_DOMAIN_COMMAND may not exist?
1915 		 *
1916 		 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
1917 		 * invalidated when MI_EXE_FLUSH is set.
1918 		 *
1919 		 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
1920 		 * invalidated with every MI_FLUSH.
1921 		 *
1922 		 * TLBs:
1923 		 *
1924 		 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
1925 		 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
1926 		 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
1927 		 * are flushed at any MI_FLUSH.
1928 		 */
1929 
1930 		cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
1931 		if ((invalidate_domains|flush_domains) &
1932 		    I915_GEM_DOMAIN_RENDER)
1933 			cmd &= ~MI_NO_WRITE_FLUSH;
1934 		if (!IS_I965G(dev)) {
1935 			/*
1936 			 * On the 965, the sampler cache always gets flushed
1937 			 * and this bit is reserved.
1938 			 */
1939 			if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
1940 				cmd |= MI_READ_FLUSH;
1941 		}
1942 		if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
1943 			cmd |= MI_EXE_FLUSH;
1944 
1945 #if WATCH_EXEC
1946 		DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
1947 #endif
1948 		BEGIN_LP_RING(2);
1949 		OUT_RING(cmd);
1950 		OUT_RING(0); /* noop */
1951 		ADVANCE_LP_RING();
1952 	}
1953 }
1954 
1955 /**
1956  * Ensures that all rendering to the object has completed and the object is
1957  * safe to unbind from the GTT or access from the CPU.
1958  */
1959 static int
1960 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1961 {
1962 	struct drm_device *dev = obj->dev;
1963 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1964 	int ret;
1965 
1966 	/* This function only exists to support waiting for existing rendering,
1967 	 * not for emitting required flushes.
1968 	 */
1969 	BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
1970 
1971 	/* If there is rendering queued on the buffer being evicted, wait for
1972 	 * it.
1973 	 */
1974 	if (obj_priv->active) {
1975 #if WATCH_BUF
1976 		DRM_INFO("%s: object %p wait for seqno %08x\n",
1977 			  __func__, obj, obj_priv->last_rendering_seqno);
1978 #endif
1979 		ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
1980 		if (ret != 0)
1981 			return ret;
1982 	}
1983 
1984 	return 0;
1985 }
1986 
1987 /**
1988  * Unbinds an object from the GTT aperture.
1989  */
1990 int
1991 i915_gem_object_unbind(struct drm_gem_object *obj)
1992 {
1993 	struct drm_device *dev = obj->dev;
1994 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
1995 	int ret = 0;
1996 
1997 #if WATCH_BUF
1998 	DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1999 	DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
2000 #endif
2001 	if (obj_priv->gtt_space == NULL)
2002 		return 0;
2003 
2004 	if (obj_priv->pin_count != 0) {
2005 		DRM_ERROR("Attempting to unbind pinned buffer\n");
2006 		return -EINVAL;
2007 	}
2008 
2009 	/* blow away mappings if mapped through GTT */
2010 	i915_gem_release_mmap(obj);
2011 
2012 	if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
2013 		i915_gem_clear_fence_reg(obj);
2014 
2015 	/* Move the object to the CPU domain to ensure that
2016 	 * any possible CPU writes while it's not in the GTT
2017 	 * are flushed when we go to remap it. This will
2018 	 * also ensure that all pending GPU writes are finished
2019 	 * before we unbind.
2020 	 */
2021 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2022 	if (ret) {
2023 		if (ret != -ERESTARTSYS)
2024 			DRM_ERROR("set_domain failed: %d\n", ret);
2025 		return ret;
2026 	}
2027 
2028 	BUG_ON(obj_priv->active);
2029 
2030 	if (obj_priv->agp_mem != NULL) {
2031 		drm_unbind_agp(obj_priv->agp_mem);
2032 		drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
2033 		obj_priv->agp_mem = NULL;
2034 	}
2035 
2036 	i915_gem_object_put_pages(obj);
2037 	BUG_ON(obj_priv->pages_refcount);
2038 
2039 	if (obj_priv->gtt_space) {
2040 		atomic_dec(&dev->gtt_count);
2041 		atomic_sub(obj->size, &dev->gtt_memory);
2042 
2043 		drm_mm_put_block(obj_priv->gtt_space);
2044 		obj_priv->gtt_space = NULL;
2045 	}
2046 
2047 	/* Remove ourselves from the LRU list if present. */
2048 	if (!list_empty(&obj_priv->list))
2049 		list_del_init(&obj_priv->list);
2050 
2051 	if (i915_gem_object_is_purgeable(obj_priv))
2052 		i915_gem_object_truncate(obj);
2053 
2054 	trace_i915_gem_object_unbind(obj);
2055 
2056 	return 0;
2057 }
2058 
2059 static struct drm_gem_object *
2060 i915_gem_find_inactive_object(struct drm_device *dev, int min_size)
2061 {
2062 	drm_i915_private_t *dev_priv = dev->dev_private;
2063 	struct drm_i915_gem_object *obj_priv;
2064 	struct drm_gem_object *best = NULL;
2065 	struct drm_gem_object *first = NULL;
2066 
2067 	/* Try to find the smallest clean object */
2068 	list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) {
2069 		struct drm_gem_object *obj = obj_priv->obj;
2070 		if (obj->size >= min_size) {
2071 			if ((!obj_priv->dirty ||
2072 			     i915_gem_object_is_purgeable(obj_priv)) &&
2073 			    (!best || obj->size < best->size)) {
2074 				best = obj;
2075 				if (best->size == min_size)
2076 					return best;
2077 			}
2078 			if (!first)
2079 			    first = obj;
2080 		}
2081 	}
2082 
2083 	return best ? best : first;
2084 }
2085 
2086 static int
2087 i915_gem_evict_everything(struct drm_device *dev)
2088 {
2089 	drm_i915_private_t *dev_priv = dev->dev_private;
2090 	uint32_t seqno;
2091 	int ret;
2092 	bool lists_empty;
2093 
2094 	spin_lock(&dev_priv->mm.active_list_lock);
2095 	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2096 		       list_empty(&dev_priv->mm.flushing_list) &&
2097 		       list_empty(&dev_priv->mm.active_list));
2098 	spin_unlock(&dev_priv->mm.active_list_lock);
2099 
2100 	if (lists_empty)
2101 		return -ENOSPC;
2102 
2103 	/* Flush everything (on to the inactive lists) and evict */
2104 	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2105 	seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
2106 	if (seqno == 0)
2107 		return -ENOMEM;
2108 
2109 	ret = i915_wait_request(dev, seqno);
2110 	if (ret)
2111 		return ret;
2112 
2113 	ret = i915_gem_evict_from_inactive_list(dev);
2114 	if (ret)
2115 		return ret;
2116 
2117 	spin_lock(&dev_priv->mm.active_list_lock);
2118 	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2119 		       list_empty(&dev_priv->mm.flushing_list) &&
2120 		       list_empty(&dev_priv->mm.active_list));
2121 	spin_unlock(&dev_priv->mm.active_list_lock);
2122 	BUG_ON(!lists_empty);
2123 
2124 	return 0;
2125 }
2126 
2127 static int
2128 i915_gem_evict_something(struct drm_device *dev, int min_size)
2129 {
2130 	drm_i915_private_t *dev_priv = dev->dev_private;
2131 	struct drm_gem_object *obj;
2132 	int ret;
2133 
2134 	for (;;) {
2135 		i915_gem_retire_requests(dev);
2136 
2137 		/* If there's an inactive buffer available now, grab it
2138 		 * and be done.
2139 		 */
2140 		obj = i915_gem_find_inactive_object(dev, min_size);
2141 		if (obj) {
2142 			struct drm_i915_gem_object *obj_priv;
2143 
2144 #if WATCH_LRU
2145 			DRM_INFO("%s: evicting %p\n", __func__, obj);
2146 #endif
2147 			obj_priv = obj->driver_private;
2148 			BUG_ON(obj_priv->pin_count != 0);
2149 			BUG_ON(obj_priv->active);
2150 
2151 			/* Wait on the rendering and unbind the buffer. */
2152 			return i915_gem_object_unbind(obj);
2153 		}
2154 
2155 		/* If we didn't get anything, but the ring is still processing
2156 		 * things, wait for the next to finish and hopefully leave us
2157 		 * a buffer to evict.
2158 		 */
2159 		if (!list_empty(&dev_priv->mm.request_list)) {
2160 			struct drm_i915_gem_request *request;
2161 
2162 			request = list_first_entry(&dev_priv->mm.request_list,
2163 						   struct drm_i915_gem_request,
2164 						   list);
2165 
2166 			ret = i915_wait_request(dev, request->seqno);
2167 			if (ret)
2168 				return ret;
2169 
2170 			continue;
2171 		}
2172 
2173 		/* If we didn't have anything on the request list but there
2174 		 * are buffers awaiting a flush, emit one and try again.
2175 		 * When we wait on it, those buffers waiting for that flush
2176 		 * will get moved to inactive.
2177 		 */
2178 		if (!list_empty(&dev_priv->mm.flushing_list)) {
2179 			struct drm_i915_gem_object *obj_priv;
2180 
2181 			/* Find an object that we can immediately reuse */
2182 			list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) {
2183 				obj = obj_priv->obj;
2184 				if (obj->size >= min_size)
2185 					break;
2186 
2187 				obj = NULL;
2188 			}
2189 
2190 			if (obj != NULL) {
2191 				uint32_t seqno;
2192 
2193 				i915_gem_flush(dev,
2194 					       obj->write_domain,
2195 					       obj->write_domain);
2196 				seqno = i915_add_request(dev, NULL, obj->write_domain);
2197 				if (seqno == 0)
2198 					return -ENOMEM;
2199 
2200 				ret = i915_wait_request(dev, seqno);
2201 				if (ret)
2202 					return ret;
2203 
2204 				continue;
2205 			}
2206 		}
2207 
2208 		/* If we didn't do any of the above, there's no single buffer
2209 		 * large enough to swap out for the new one, so just evict
2210 		 * everything and start again. (This should be rare.)
2211 		 */
2212 		if (!list_empty (&dev_priv->mm.inactive_list))
2213 			return i915_gem_evict_from_inactive_list(dev);
2214 		else
2215 			return i915_gem_evict_everything(dev);
2216 	}
2217 }
2218 
2219 int
2220 i915_gem_object_get_pages(struct drm_gem_object *obj)
2221 {
2222 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
2223 	int page_count, i;
2224 	struct address_space *mapping;
2225 	struct inode *inode;
2226 	struct page *page;
2227 	int ret;
2228 
2229 	if (obj_priv->pages_refcount++ != 0)
2230 		return 0;
2231 
2232 	/* Get the list of pages out of our struct file.  They'll be pinned
2233 	 * at this point until we release them.
2234 	 */
2235 	page_count = obj->size / PAGE_SIZE;
2236 	BUG_ON(obj_priv->pages != NULL);
2237 	obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2238 	if (obj_priv->pages == NULL) {
2239 		obj_priv->pages_refcount--;
2240 		return -ENOMEM;
2241 	}
2242 
2243 	inode = obj->filp->f_path.dentry->d_inode;
2244 	mapping = inode->i_mapping;
2245 	for (i = 0; i < page_count; i++) {
2246 		page = read_mapping_page(mapping, i, NULL);
2247 		if (IS_ERR(page)) {
2248 			ret = PTR_ERR(page);
2249 			i915_gem_object_put_pages(obj);
2250 			return ret;
2251 		}
2252 		obj_priv->pages[i] = page;
2253 	}
2254 
2255 	if (obj_priv->tiling_mode != I915_TILING_NONE)
2256 		i915_gem_object_do_bit_17_swizzle(obj);
2257 
2258 	return 0;
2259 }
2260 
2261 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2262 {
2263 	struct drm_gem_object *obj = reg->obj;
2264 	struct drm_device *dev = obj->dev;
2265 	drm_i915_private_t *dev_priv = dev->dev_private;
2266 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
2267 	int regnum = obj_priv->fence_reg;
2268 	uint64_t val;
2269 
2270 	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2271 		    0xfffff000) << 32;
2272 	val |= obj_priv->gtt_offset & 0xfffff000;
2273 	val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2274 	if (obj_priv->tiling_mode == I915_TILING_Y)
2275 		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2276 	val |= I965_FENCE_REG_VALID;
2277 
2278 	I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2279 }
2280 
2281 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2282 {
2283 	struct drm_gem_object *obj = reg->obj;
2284 	struct drm_device *dev = obj->dev;
2285 	drm_i915_private_t *dev_priv = dev->dev_private;
2286 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
2287 	int regnum = obj_priv->fence_reg;
2288 	int tile_width;
2289 	uint32_t fence_reg, val;
2290 	uint32_t pitch_val;
2291 
2292 	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2293 	    (obj_priv->gtt_offset & (obj->size - 1))) {
2294 		WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2295 		     __func__, obj_priv->gtt_offset, obj->size);
2296 		return;
2297 	}
2298 
2299 	if (obj_priv->tiling_mode == I915_TILING_Y &&
2300 	    HAS_128_BYTE_Y_TILING(dev))
2301 		tile_width = 128;
2302 	else
2303 		tile_width = 512;
2304 
2305 	/* Note: pitch better be a power of two tile widths */
2306 	pitch_val = obj_priv->stride / tile_width;
2307 	pitch_val = ffs(pitch_val) - 1;
2308 
2309 	val = obj_priv->gtt_offset;
2310 	if (obj_priv->tiling_mode == I915_TILING_Y)
2311 		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2312 	val |= I915_FENCE_SIZE_BITS(obj->size);
2313 	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2314 	val |= I830_FENCE_REG_VALID;
2315 
2316 	if (regnum < 8)
2317 		fence_reg = FENCE_REG_830_0 + (regnum * 4);
2318 	else
2319 		fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2320 	I915_WRITE(fence_reg, val);
2321 }
2322 
2323 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2324 {
2325 	struct drm_gem_object *obj = reg->obj;
2326 	struct drm_device *dev = obj->dev;
2327 	drm_i915_private_t *dev_priv = dev->dev_private;
2328 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
2329 	int regnum = obj_priv->fence_reg;
2330 	uint32_t val;
2331 	uint32_t pitch_val;
2332 	uint32_t fence_size_bits;
2333 
2334 	if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2335 	    (obj_priv->gtt_offset & (obj->size - 1))) {
2336 		WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2337 		     __func__, obj_priv->gtt_offset);
2338 		return;
2339 	}
2340 
2341 	pitch_val = obj_priv->stride / 128;
2342 	pitch_val = ffs(pitch_val) - 1;
2343 	WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2344 
2345 	val = obj_priv->gtt_offset;
2346 	if (obj_priv->tiling_mode == I915_TILING_Y)
2347 		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2348 	fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2349 	WARN_ON(fence_size_bits & ~0x00000f00);
2350 	val |= fence_size_bits;
2351 	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2352 	val |= I830_FENCE_REG_VALID;
2353 
2354 	I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2355 }
2356 
2357 /**
2358  * i915_gem_object_get_fence_reg - set up a fence reg for an object
2359  * @obj: object to map through a fence reg
2360  *
2361  * When mapping objects through the GTT, userspace wants to be able to write
2362  * to them without having to worry about swizzling if the object is tiled.
2363  *
2364  * This function walks the fence regs looking for a free one for @obj,
2365  * stealing one if it can't find any.
2366  *
2367  * It then sets up the reg based on the object's properties: address, pitch
2368  * and tiling format.
2369  */
2370 int
2371 i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2372 {
2373 	struct drm_device *dev = obj->dev;
2374 	struct drm_i915_private *dev_priv = dev->dev_private;
2375 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
2376 	struct drm_i915_fence_reg *reg = NULL;
2377 	struct drm_i915_gem_object *old_obj_priv = NULL;
2378 	int i, ret, avail;
2379 
2380 	/* Just update our place in the LRU if our fence is getting used. */
2381 	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2382 		list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
2383 		return 0;
2384 	}
2385 
2386 	switch (obj_priv->tiling_mode) {
2387 	case I915_TILING_NONE:
2388 		WARN(1, "allocating a fence for non-tiled object?\n");
2389 		break;
2390 	case I915_TILING_X:
2391 		if (!obj_priv->stride)
2392 			return -EINVAL;
2393 		WARN((obj_priv->stride & (512 - 1)),
2394 		     "object 0x%08x is X tiled but has non-512B pitch\n",
2395 		     obj_priv->gtt_offset);
2396 		break;
2397 	case I915_TILING_Y:
2398 		if (!obj_priv->stride)
2399 			return -EINVAL;
2400 		WARN((obj_priv->stride & (128 - 1)),
2401 		     "object 0x%08x is Y tiled but has non-128B pitch\n",
2402 		     obj_priv->gtt_offset);
2403 		break;
2404 	}
2405 
2406 	/* First try to find a free reg */
2407 	avail = 0;
2408 	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2409 		reg = &dev_priv->fence_regs[i];
2410 		if (!reg->obj)
2411 			break;
2412 
2413 		old_obj_priv = reg->obj->driver_private;
2414 		if (!old_obj_priv->pin_count)
2415 		    avail++;
2416 	}
2417 
2418 	/* None available, try to steal one or wait for a user to finish */
2419 	if (i == dev_priv->num_fence_regs) {
2420 		struct drm_gem_object *old_obj = NULL;
2421 
2422 		if (avail == 0)
2423 			return -ENOSPC;
2424 
2425 		list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list,
2426 				    fence_list) {
2427 			old_obj = old_obj_priv->obj;
2428 
2429 			if (old_obj_priv->pin_count)
2430 				continue;
2431 
2432 			/* Take a reference, as otherwise the wait_rendering
2433 			 * below may cause the object to get freed out from
2434 			 * under us.
2435 			 */
2436 			drm_gem_object_reference(old_obj);
2437 
2438 			/* i915 uses fences for GPU access to tiled buffers */
2439 			if (IS_I965G(dev) || !old_obj_priv->active)
2440 				break;
2441 
2442 			/* This brings the object to the head of the LRU if it
2443 			 * had been written to.  The only way this should
2444 			 * result in us waiting longer than the expected
2445 			 * optimal amount of time is if there was a
2446 			 * fence-using buffer later that was read-only.
2447 			 */
2448 			i915_gem_object_flush_gpu_write_domain(old_obj);
2449 			ret = i915_gem_object_wait_rendering(old_obj);
2450 			if (ret != 0) {
2451 				drm_gem_object_unreference(old_obj);
2452 				return ret;
2453 			}
2454 
2455 			break;
2456 		}
2457 
2458 		/*
2459 		 * Zap this virtual mapping so we can set up a fence again
2460 		 * for this object next time we need it.
2461 		 */
2462 		i915_gem_release_mmap(old_obj);
2463 
2464 		i = old_obj_priv->fence_reg;
2465 		reg = &dev_priv->fence_regs[i];
2466 
2467 		old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
2468 		list_del_init(&old_obj_priv->fence_list);
2469 
2470 		drm_gem_object_unreference(old_obj);
2471 	}
2472 
2473 	obj_priv->fence_reg = i;
2474 	list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
2475 
2476 	reg->obj = obj;
2477 
2478 	if (IS_I965G(dev))
2479 		i965_write_fence_reg(reg);
2480 	else if (IS_I9XX(dev))
2481 		i915_write_fence_reg(reg);
2482 	else
2483 		i830_write_fence_reg(reg);
2484 
2485 	trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode);
2486 
2487 	return 0;
2488 }
2489 
2490 /**
2491  * i915_gem_clear_fence_reg - clear out fence register info
2492  * @obj: object to clear
2493  *
2494  * Zeroes out the fence register itself and clears out the associated
2495  * data structures in dev_priv and obj_priv.
2496  */
2497 static void
2498 i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2499 {
2500 	struct drm_device *dev = obj->dev;
2501 	drm_i915_private_t *dev_priv = dev->dev_private;
2502 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
2503 
2504 	if (IS_I965G(dev))
2505 		I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2506 	else {
2507 		uint32_t fence_reg;
2508 
2509 		if (obj_priv->fence_reg < 8)
2510 			fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2511 		else
2512 			fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2513 						       8) * 4;
2514 
2515 		I915_WRITE(fence_reg, 0);
2516 	}
2517 
2518 	dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
2519 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
2520 	list_del_init(&obj_priv->fence_list);
2521 }
2522 
2523 /**
2524  * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2525  * to the buffer to finish, and then resets the fence register.
2526  * @obj: tiled object holding a fence register.
2527  *
2528  * Zeroes out the fence register itself and clears out the associated
2529  * data structures in dev_priv and obj_priv.
2530  */
2531 int
2532 i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2533 {
2534 	struct drm_device *dev = obj->dev;
2535 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
2536 
2537 	if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2538 		return 0;
2539 
2540 	/* On the i915, GPU access to tiled buffers is via a fence,
2541 	 * therefore we must wait for any outstanding access to complete
2542 	 * before clearing the fence.
2543 	 */
2544 	if (!IS_I965G(dev)) {
2545 		int ret;
2546 
2547 		i915_gem_object_flush_gpu_write_domain(obj);
2548 		i915_gem_object_flush_gtt_write_domain(obj);
2549 		ret = i915_gem_object_wait_rendering(obj);
2550 		if (ret != 0)
2551 			return ret;
2552 	}
2553 
2554 	i915_gem_clear_fence_reg (obj);
2555 
2556 	return 0;
2557 }
2558 
2559 /**
2560  * Finds free space in the GTT aperture and binds the object there.
2561  */
2562 static int
2563 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2564 {
2565 	struct drm_device *dev = obj->dev;
2566 	drm_i915_private_t *dev_priv = dev->dev_private;
2567 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
2568 	struct drm_mm_node *free_space;
2569 	bool retry_alloc = false;
2570 	int ret;
2571 
2572 	if (dev_priv->mm.suspended)
2573 		return -EBUSY;
2574 
2575 	if (obj_priv->madv != I915_MADV_WILLNEED) {
2576 		DRM_ERROR("Attempting to bind a purgeable object\n");
2577 		return -EINVAL;
2578 	}
2579 
2580 	if (alignment == 0)
2581 		alignment = i915_gem_get_gtt_alignment(obj);
2582 	if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2583 		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2584 		return -EINVAL;
2585 	}
2586 
2587  search_free:
2588 	free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2589 					obj->size, alignment, 0);
2590 	if (free_space != NULL) {
2591 		obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2592 						       alignment);
2593 		if (obj_priv->gtt_space != NULL) {
2594 			obj_priv->gtt_space->private = obj;
2595 			obj_priv->gtt_offset = obj_priv->gtt_space->start;
2596 		}
2597 	}
2598 	if (obj_priv->gtt_space == NULL) {
2599 		/* If the gtt is empty and we're still having trouble
2600 		 * fitting our object in, we're out of memory.
2601 		 */
2602 #if WATCH_LRU
2603 		DRM_INFO("%s: GTT full, evicting something\n", __func__);
2604 #endif
2605 		ret = i915_gem_evict_something(dev, obj->size);
2606 		if (ret)
2607 			return ret;
2608 
2609 		goto search_free;
2610 	}
2611 
2612 #if WATCH_BUF
2613 	DRM_INFO("Binding object of size %zd at 0x%08x\n",
2614 		 obj->size, obj_priv->gtt_offset);
2615 #endif
2616 	if (retry_alloc) {
2617 		i915_gem_object_set_page_gfp_mask (obj,
2618 						   i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY);
2619 	}
2620 	ret = i915_gem_object_get_pages(obj);
2621 	if (retry_alloc) {
2622 		i915_gem_object_set_page_gfp_mask (obj,
2623 						   i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY);
2624 	}
2625 	if (ret) {
2626 		drm_mm_put_block(obj_priv->gtt_space);
2627 		obj_priv->gtt_space = NULL;
2628 
2629 		if (ret == -ENOMEM) {
2630 			/* first try to clear up some space from the GTT */
2631 			ret = i915_gem_evict_something(dev, obj->size);
2632 			if (ret) {
2633 				/* now try to shrink everyone else */
2634 				if (! retry_alloc) {
2635 				    retry_alloc = true;
2636 				    goto search_free;
2637 				}
2638 
2639 				return ret;
2640 			}
2641 
2642 			goto search_free;
2643 		}
2644 
2645 		return ret;
2646 	}
2647 
2648 	/* Create an AGP memory structure pointing at our pages, and bind it
2649 	 * into the GTT.
2650 	 */
2651 	obj_priv->agp_mem = drm_agp_bind_pages(dev,
2652 					       obj_priv->pages,
2653 					       obj->size >> PAGE_SHIFT,
2654 					       obj_priv->gtt_offset,
2655 					       obj_priv->agp_type);
2656 	if (obj_priv->agp_mem == NULL) {
2657 		i915_gem_object_put_pages(obj);
2658 		drm_mm_put_block(obj_priv->gtt_space);
2659 		obj_priv->gtt_space = NULL;
2660 
2661 		ret = i915_gem_evict_something(dev, obj->size);
2662 		if (ret)
2663 			return ret;
2664 
2665 		goto search_free;
2666 	}
2667 	atomic_inc(&dev->gtt_count);
2668 	atomic_add(obj->size, &dev->gtt_memory);
2669 
2670 	/* Assert that the object is not currently in any GPU domain. As it
2671 	 * wasn't in the GTT, there shouldn't be any way it could have been in
2672 	 * a GPU cache
2673 	 */
2674 	BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2675 	BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2676 
2677 	trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
2678 
2679 	return 0;
2680 }
2681 
2682 void
2683 i915_gem_clflush_object(struct drm_gem_object *obj)
2684 {
2685 	struct drm_i915_gem_object	*obj_priv = obj->driver_private;
2686 
2687 	/* If we don't have a page list set up, then we're not pinned
2688 	 * to GPU, and we can ignore the cache flush because it'll happen
2689 	 * again at bind time.
2690 	 */
2691 	if (obj_priv->pages == NULL)
2692 		return;
2693 
2694 	trace_i915_gem_object_clflush(obj);
2695 
2696 	drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2697 }
2698 
2699 /** Flushes any GPU write domain for the object if it's dirty. */
2700 static void
2701 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2702 {
2703 	struct drm_device *dev = obj->dev;
2704 	uint32_t seqno;
2705 	uint32_t old_write_domain;
2706 
2707 	if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2708 		return;
2709 
2710 	/* Queue the GPU write cache flushing we need. */
2711 	old_write_domain = obj->write_domain;
2712 	i915_gem_flush(dev, 0, obj->write_domain);
2713 	seqno = i915_add_request(dev, NULL, obj->write_domain);
2714 	obj->write_domain = 0;
2715 	i915_gem_object_move_to_active(obj, seqno);
2716 
2717 	trace_i915_gem_object_change_domain(obj,
2718 					    obj->read_domains,
2719 					    old_write_domain);
2720 }
2721 
2722 /** Flushes the GTT write domain for the object if it's dirty. */
2723 static void
2724 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2725 {
2726 	uint32_t old_write_domain;
2727 
2728 	if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2729 		return;
2730 
2731 	/* No actual flushing is required for the GTT write domain.   Writes
2732 	 * to it immediately go to main memory as far as we know, so there's
2733 	 * no chipset flush.  It also doesn't land in render cache.
2734 	 */
2735 	old_write_domain = obj->write_domain;
2736 	obj->write_domain = 0;
2737 
2738 	trace_i915_gem_object_change_domain(obj,
2739 					    obj->read_domains,
2740 					    old_write_domain);
2741 }
2742 
2743 /** Flushes the CPU write domain for the object if it's dirty. */
2744 static void
2745 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2746 {
2747 	struct drm_device *dev = obj->dev;
2748 	uint32_t old_write_domain;
2749 
2750 	if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2751 		return;
2752 
2753 	i915_gem_clflush_object(obj);
2754 	drm_agp_chipset_flush(dev);
2755 	old_write_domain = obj->write_domain;
2756 	obj->write_domain = 0;
2757 
2758 	trace_i915_gem_object_change_domain(obj,
2759 					    obj->read_domains,
2760 					    old_write_domain);
2761 }
2762 
2763 /**
2764  * Moves a single object to the GTT read, and possibly write domain.
2765  *
2766  * This function returns when the move is complete, including waiting on
2767  * flushes to occur.
2768  */
2769 int
2770 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2771 {
2772 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
2773 	uint32_t old_write_domain, old_read_domains;
2774 	int ret;
2775 
2776 	/* Not valid to be called on unbound objects. */
2777 	if (obj_priv->gtt_space == NULL)
2778 		return -EINVAL;
2779 
2780 	i915_gem_object_flush_gpu_write_domain(obj);
2781 	/* Wait on any GPU rendering and flushing to occur. */
2782 	ret = i915_gem_object_wait_rendering(obj);
2783 	if (ret != 0)
2784 		return ret;
2785 
2786 	old_write_domain = obj->write_domain;
2787 	old_read_domains = obj->read_domains;
2788 
2789 	/* If we're writing through the GTT domain, then CPU and GPU caches
2790 	 * will need to be invalidated at next use.
2791 	 */
2792 	if (write)
2793 		obj->read_domains &= I915_GEM_DOMAIN_GTT;
2794 
2795 	i915_gem_object_flush_cpu_write_domain(obj);
2796 
2797 	/* It should now be out of any other write domains, and we can update
2798 	 * the domain values for our changes.
2799 	 */
2800 	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2801 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
2802 	if (write) {
2803 		obj->write_domain = I915_GEM_DOMAIN_GTT;
2804 		obj_priv->dirty = 1;
2805 	}
2806 
2807 	trace_i915_gem_object_change_domain(obj,
2808 					    old_read_domains,
2809 					    old_write_domain);
2810 
2811 	return 0;
2812 }
2813 
2814 /**
2815  * Moves a single object to the CPU read, and possibly write domain.
2816  *
2817  * This function returns when the move is complete, including waiting on
2818  * flushes to occur.
2819  */
2820 static int
2821 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2822 {
2823 	uint32_t old_write_domain, old_read_domains;
2824 	int ret;
2825 
2826 	i915_gem_object_flush_gpu_write_domain(obj);
2827 	/* Wait on any GPU rendering and flushing to occur. */
2828 	ret = i915_gem_object_wait_rendering(obj);
2829 	if (ret != 0)
2830 		return ret;
2831 
2832 	i915_gem_object_flush_gtt_write_domain(obj);
2833 
2834 	/* If we have a partially-valid cache of the object in the CPU,
2835 	 * finish invalidating it and free the per-page flags.
2836 	 */
2837 	i915_gem_object_set_to_full_cpu_read_domain(obj);
2838 
2839 	old_write_domain = obj->write_domain;
2840 	old_read_domains = obj->read_domains;
2841 
2842 	/* Flush the CPU cache if it's still invalid. */
2843 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2844 		i915_gem_clflush_object(obj);
2845 
2846 		obj->read_domains |= I915_GEM_DOMAIN_CPU;
2847 	}
2848 
2849 	/* It should now be out of any other write domains, and we can update
2850 	 * the domain values for our changes.
2851 	 */
2852 	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2853 
2854 	/* If we're writing through the CPU, then the GPU read domains will
2855 	 * need to be invalidated at next use.
2856 	 */
2857 	if (write) {
2858 		obj->read_domains &= I915_GEM_DOMAIN_CPU;
2859 		obj->write_domain = I915_GEM_DOMAIN_CPU;
2860 	}
2861 
2862 	trace_i915_gem_object_change_domain(obj,
2863 					    old_read_domains,
2864 					    old_write_domain);
2865 
2866 	return 0;
2867 }
2868 
2869 /*
2870  * Set the next domain for the specified object. This
2871  * may not actually perform the necessary flushing/invaliding though,
2872  * as that may want to be batched with other set_domain operations
2873  *
2874  * This is (we hope) the only really tricky part of gem. The goal
2875  * is fairly simple -- track which caches hold bits of the object
2876  * and make sure they remain coherent. A few concrete examples may
2877  * help to explain how it works. For shorthand, we use the notation
2878  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
2879  * a pair of read and write domain masks.
2880  *
2881  * Case 1: the batch buffer
2882  *
2883  *	1. Allocated
2884  *	2. Written by CPU
2885  *	3. Mapped to GTT
2886  *	4. Read by GPU
2887  *	5. Unmapped from GTT
2888  *	6. Freed
2889  *
2890  *	Let's take these a step at a time
2891  *
2892  *	1. Allocated
2893  *		Pages allocated from the kernel may still have
2894  *		cache contents, so we set them to (CPU, CPU) always.
2895  *	2. Written by CPU (using pwrite)
2896  *		The pwrite function calls set_domain (CPU, CPU) and
2897  *		this function does nothing (as nothing changes)
2898  *	3. Mapped by GTT
2899  *		This function asserts that the object is not
2900  *		currently in any GPU-based read or write domains
2901  *	4. Read by GPU
2902  *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
2903  *		As write_domain is zero, this function adds in the
2904  *		current read domains (CPU+COMMAND, 0).
2905  *		flush_domains is set to CPU.
2906  *		invalidate_domains is set to COMMAND
2907  *		clflush is run to get data out of the CPU caches
2908  *		then i915_dev_set_domain calls i915_gem_flush to
2909  *		emit an MI_FLUSH and drm_agp_chipset_flush
2910  *	5. Unmapped from GTT
2911  *		i915_gem_object_unbind calls set_domain (CPU, CPU)
2912  *		flush_domains and invalidate_domains end up both zero
2913  *		so no flushing/invalidating happens
2914  *	6. Freed
2915  *		yay, done
2916  *
2917  * Case 2: The shared render buffer
2918  *
2919  *	1. Allocated
2920  *	2. Mapped to GTT
2921  *	3. Read/written by GPU
2922  *	4. set_domain to (CPU,CPU)
2923  *	5. Read/written by CPU
2924  *	6. Read/written by GPU
2925  *
2926  *	1. Allocated
2927  *		Same as last example, (CPU, CPU)
2928  *	2. Mapped to GTT
2929  *		Nothing changes (assertions find that it is not in the GPU)
2930  *	3. Read/written by GPU
2931  *		execbuffer calls set_domain (RENDER, RENDER)
2932  *		flush_domains gets CPU
2933  *		invalidate_domains gets GPU
2934  *		clflush (obj)
2935  *		MI_FLUSH and drm_agp_chipset_flush
2936  *	4. set_domain (CPU, CPU)
2937  *		flush_domains gets GPU
2938  *		invalidate_domains gets CPU
2939  *		wait_rendering (obj) to make sure all drawing is complete.
2940  *		This will include an MI_FLUSH to get the data from GPU
2941  *		to memory
2942  *		clflush (obj) to invalidate the CPU cache
2943  *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
2944  *	5. Read/written by CPU
2945  *		cache lines are loaded and dirtied
2946  *	6. Read written by GPU
2947  *		Same as last GPU access
2948  *
2949  * Case 3: The constant buffer
2950  *
2951  *	1. Allocated
2952  *	2. Written by CPU
2953  *	3. Read by GPU
2954  *	4. Updated (written) by CPU again
2955  *	5. Read by GPU
2956  *
2957  *	1. Allocated
2958  *		(CPU, CPU)
2959  *	2. Written by CPU
2960  *		(CPU, CPU)
2961  *	3. Read by GPU
2962  *		(CPU+RENDER, 0)
2963  *		flush_domains = CPU
2964  *		invalidate_domains = RENDER
2965  *		clflush (obj)
2966  *		MI_FLUSH
2967  *		drm_agp_chipset_flush
2968  *	4. Updated (written) by CPU again
2969  *		(CPU, CPU)
2970  *		flush_domains = 0 (no previous write domain)
2971  *		invalidate_domains = 0 (no new read domains)
2972  *	5. Read by GPU
2973  *		(CPU+RENDER, 0)
2974  *		flush_domains = CPU
2975  *		invalidate_domains = RENDER
2976  *		clflush (obj)
2977  *		MI_FLUSH
2978  *		drm_agp_chipset_flush
2979  */
2980 static void
2981 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
2982 {
2983 	struct drm_device		*dev = obj->dev;
2984 	struct drm_i915_gem_object	*obj_priv = obj->driver_private;
2985 	uint32_t			invalidate_domains = 0;
2986 	uint32_t			flush_domains = 0;
2987 	uint32_t			old_read_domains;
2988 
2989 	BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2990 	BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
2991 
2992 	intel_mark_busy(dev, obj);
2993 
2994 #if WATCH_BUF
2995 	DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
2996 		 __func__, obj,
2997 		 obj->read_domains, obj->pending_read_domains,
2998 		 obj->write_domain, obj->pending_write_domain);
2999 #endif
3000 	/*
3001 	 * If the object isn't moving to a new write domain,
3002 	 * let the object stay in multiple read domains
3003 	 */
3004 	if (obj->pending_write_domain == 0)
3005 		obj->pending_read_domains |= obj->read_domains;
3006 	else
3007 		obj_priv->dirty = 1;
3008 
3009 	/*
3010 	 * Flush the current write domain if
3011 	 * the new read domains don't match. Invalidate
3012 	 * any read domains which differ from the old
3013 	 * write domain
3014 	 */
3015 	if (obj->write_domain &&
3016 	    obj->write_domain != obj->pending_read_domains) {
3017 		flush_domains |= obj->write_domain;
3018 		invalidate_domains |=
3019 			obj->pending_read_domains & ~obj->write_domain;
3020 	}
3021 	/*
3022 	 * Invalidate any read caches which may have
3023 	 * stale data. That is, any new read domains.
3024 	 */
3025 	invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
3026 	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
3027 #if WATCH_BUF
3028 		DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
3029 			 __func__, flush_domains, invalidate_domains);
3030 #endif
3031 		i915_gem_clflush_object(obj);
3032 	}
3033 
3034 	old_read_domains = obj->read_domains;
3035 
3036 	/* The actual obj->write_domain will be updated with
3037 	 * pending_write_domain after we emit the accumulated flush for all
3038 	 * of our domain changes in execbuffers (which clears objects'
3039 	 * write_domains).  So if we have a current write domain that we
3040 	 * aren't changing, set pending_write_domain to that.
3041 	 */
3042 	if (flush_domains == 0 && obj->pending_write_domain == 0)
3043 		obj->pending_write_domain = obj->write_domain;
3044 	obj->read_domains = obj->pending_read_domains;
3045 
3046 	dev->invalidate_domains |= invalidate_domains;
3047 	dev->flush_domains |= flush_domains;
3048 #if WATCH_BUF
3049 	DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
3050 		 __func__,
3051 		 obj->read_domains, obj->write_domain,
3052 		 dev->invalidate_domains, dev->flush_domains);
3053 #endif
3054 
3055 	trace_i915_gem_object_change_domain(obj,
3056 					    old_read_domains,
3057 					    obj->write_domain);
3058 }
3059 
3060 /**
3061  * Moves the object from a partially CPU read to a full one.
3062  *
3063  * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3064  * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3065  */
3066 static void
3067 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
3068 {
3069 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
3070 
3071 	if (!obj_priv->page_cpu_valid)
3072 		return;
3073 
3074 	/* If we're partially in the CPU read domain, finish moving it in.
3075 	 */
3076 	if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3077 		int i;
3078 
3079 		for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3080 			if (obj_priv->page_cpu_valid[i])
3081 				continue;
3082 			drm_clflush_pages(obj_priv->pages + i, 1);
3083 		}
3084 	}
3085 
3086 	/* Free the page_cpu_valid mappings which are now stale, whether
3087 	 * or not we've got I915_GEM_DOMAIN_CPU.
3088 	 */
3089 	kfree(obj_priv->page_cpu_valid);
3090 	obj_priv->page_cpu_valid = NULL;
3091 }
3092 
3093 /**
3094  * Set the CPU read domain on a range of the object.
3095  *
3096  * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3097  * not entirely valid.  The page_cpu_valid member of the object flags which
3098  * pages have been flushed, and will be respected by
3099  * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3100  * of the whole object.
3101  *
3102  * This function returns when the move is complete, including waiting on
3103  * flushes to occur.
3104  */
3105 static int
3106 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3107 					  uint64_t offset, uint64_t size)
3108 {
3109 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
3110 	uint32_t old_read_domains;
3111 	int i, ret;
3112 
3113 	if (offset == 0 && size == obj->size)
3114 		return i915_gem_object_set_to_cpu_domain(obj, 0);
3115 
3116 	i915_gem_object_flush_gpu_write_domain(obj);
3117 	/* Wait on any GPU rendering and flushing to occur. */
3118 	ret = i915_gem_object_wait_rendering(obj);
3119 	if (ret != 0)
3120 		return ret;
3121 	i915_gem_object_flush_gtt_write_domain(obj);
3122 
3123 	/* If we're already fully in the CPU read domain, we're done. */
3124 	if (obj_priv->page_cpu_valid == NULL &&
3125 	    (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3126 		return 0;
3127 
3128 	/* Otherwise, create/clear the per-page CPU read domain flag if we're
3129 	 * newly adding I915_GEM_DOMAIN_CPU
3130 	 */
3131 	if (obj_priv->page_cpu_valid == NULL) {
3132 		obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3133 						   GFP_KERNEL);
3134 		if (obj_priv->page_cpu_valid == NULL)
3135 			return -ENOMEM;
3136 	} else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3137 		memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
3138 
3139 	/* Flush the cache on any pages that are still invalid from the CPU's
3140 	 * perspective.
3141 	 */
3142 	for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3143 	     i++) {
3144 		if (obj_priv->page_cpu_valid[i])
3145 			continue;
3146 
3147 		drm_clflush_pages(obj_priv->pages + i, 1);
3148 
3149 		obj_priv->page_cpu_valid[i] = 1;
3150 	}
3151 
3152 	/* It should now be out of any other write domains, and we can update
3153 	 * the domain values for our changes.
3154 	 */
3155 	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3156 
3157 	old_read_domains = obj->read_domains;
3158 	obj->read_domains |= I915_GEM_DOMAIN_CPU;
3159 
3160 	trace_i915_gem_object_change_domain(obj,
3161 					    old_read_domains,
3162 					    obj->write_domain);
3163 
3164 	return 0;
3165 }
3166 
3167 /**
3168  * Pin an object to the GTT and evaluate the relocations landing in it.
3169  */
3170 static int
3171 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3172 				 struct drm_file *file_priv,
3173 				 struct drm_i915_gem_exec_object *entry,
3174 				 struct drm_i915_gem_relocation_entry *relocs)
3175 {
3176 	struct drm_device *dev = obj->dev;
3177 	drm_i915_private_t *dev_priv = dev->dev_private;
3178 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
3179 	int i, ret;
3180 	void __iomem *reloc_page;
3181 
3182 	/* Choose the GTT offset for our buffer and put it there. */
3183 	ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
3184 	if (ret)
3185 		return ret;
3186 
3187 	entry->offset = obj_priv->gtt_offset;
3188 
3189 	/* Apply the relocations, using the GTT aperture to avoid cache
3190 	 * flushing requirements.
3191 	 */
3192 	for (i = 0; i < entry->relocation_count; i++) {
3193 		struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
3194 		struct drm_gem_object *target_obj;
3195 		struct drm_i915_gem_object *target_obj_priv;
3196 		uint32_t reloc_val, reloc_offset;
3197 		uint32_t __iomem *reloc_entry;
3198 
3199 		target_obj = drm_gem_object_lookup(obj->dev, file_priv,
3200 						   reloc->target_handle);
3201 		if (target_obj == NULL) {
3202 			i915_gem_object_unpin(obj);
3203 			return -EBADF;
3204 		}
3205 		target_obj_priv = target_obj->driver_private;
3206 
3207 #if WATCH_RELOC
3208 		DRM_INFO("%s: obj %p offset %08x target %d "
3209 			 "read %08x write %08x gtt %08x "
3210 			 "presumed %08x delta %08x\n",
3211 			 __func__,
3212 			 obj,
3213 			 (int) reloc->offset,
3214 			 (int) reloc->target_handle,
3215 			 (int) reloc->read_domains,
3216 			 (int) reloc->write_domain,
3217 			 (int) target_obj_priv->gtt_offset,
3218 			 (int) reloc->presumed_offset,
3219 			 reloc->delta);
3220 #endif
3221 
3222 		/* The target buffer should have appeared before us in the
3223 		 * exec_object list, so it should have a GTT space bound by now.
3224 		 */
3225 		if (target_obj_priv->gtt_space == NULL) {
3226 			DRM_ERROR("No GTT space found for object %d\n",
3227 				  reloc->target_handle);
3228 			drm_gem_object_unreference(target_obj);
3229 			i915_gem_object_unpin(obj);
3230 			return -EINVAL;
3231 		}
3232 
3233 		/* Validate that the target is in a valid r/w GPU domain */
3234 		if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3235 		    reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3236 			DRM_ERROR("reloc with read/write CPU domains: "
3237 				  "obj %p target %d offset %d "
3238 				  "read %08x write %08x",
3239 				  obj, reloc->target_handle,
3240 				  (int) reloc->offset,
3241 				  reloc->read_domains,
3242 				  reloc->write_domain);
3243 			drm_gem_object_unreference(target_obj);
3244 			i915_gem_object_unpin(obj);
3245 			return -EINVAL;
3246 		}
3247 		if (reloc->write_domain && target_obj->pending_write_domain &&
3248 		    reloc->write_domain != target_obj->pending_write_domain) {
3249 			DRM_ERROR("Write domain conflict: "
3250 				  "obj %p target %d offset %d "
3251 				  "new %08x old %08x\n",
3252 				  obj, reloc->target_handle,
3253 				  (int) reloc->offset,
3254 				  reloc->write_domain,
3255 				  target_obj->pending_write_domain);
3256 			drm_gem_object_unreference(target_obj);
3257 			i915_gem_object_unpin(obj);
3258 			return -EINVAL;
3259 		}
3260 
3261 		target_obj->pending_read_domains |= reloc->read_domains;
3262 		target_obj->pending_write_domain |= reloc->write_domain;
3263 
3264 		/* If the relocation already has the right value in it, no
3265 		 * more work needs to be done.
3266 		 */
3267 		if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
3268 			drm_gem_object_unreference(target_obj);
3269 			continue;
3270 		}
3271 
3272 		/* Check that the relocation address is valid... */
3273 		if (reloc->offset > obj->size - 4) {
3274 			DRM_ERROR("Relocation beyond object bounds: "
3275 				  "obj %p target %d offset %d size %d.\n",
3276 				  obj, reloc->target_handle,
3277 				  (int) reloc->offset, (int) obj->size);
3278 			drm_gem_object_unreference(target_obj);
3279 			i915_gem_object_unpin(obj);
3280 			return -EINVAL;
3281 		}
3282 		if (reloc->offset & 3) {
3283 			DRM_ERROR("Relocation not 4-byte aligned: "
3284 				  "obj %p target %d offset %d.\n",
3285 				  obj, reloc->target_handle,
3286 				  (int) reloc->offset);
3287 			drm_gem_object_unreference(target_obj);
3288 			i915_gem_object_unpin(obj);
3289 			return -EINVAL;
3290 		}
3291 
3292 		/* and points to somewhere within the target object. */
3293 		if (reloc->delta >= target_obj->size) {
3294 			DRM_ERROR("Relocation beyond target object bounds: "
3295 				  "obj %p target %d delta %d size %d.\n",
3296 				  obj, reloc->target_handle,
3297 				  (int) reloc->delta, (int) target_obj->size);
3298 			drm_gem_object_unreference(target_obj);
3299 			i915_gem_object_unpin(obj);
3300 			return -EINVAL;
3301 		}
3302 
3303 		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3304 		if (ret != 0) {
3305 			drm_gem_object_unreference(target_obj);
3306 			i915_gem_object_unpin(obj);
3307 			return -EINVAL;
3308 		}
3309 
3310 		/* Map the page containing the relocation we're going to
3311 		 * perform.
3312 		 */
3313 		reloc_offset = obj_priv->gtt_offset + reloc->offset;
3314 		reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3315 						      (reloc_offset &
3316 						       ~(PAGE_SIZE - 1)));
3317 		reloc_entry = (uint32_t __iomem *)(reloc_page +
3318 						   (reloc_offset & (PAGE_SIZE - 1)));
3319 		reloc_val = target_obj_priv->gtt_offset + reloc->delta;
3320 
3321 #if WATCH_BUF
3322 		DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
3323 			  obj, (unsigned int) reloc->offset,
3324 			  readl(reloc_entry), reloc_val);
3325 #endif
3326 		writel(reloc_val, reloc_entry);
3327 		io_mapping_unmap_atomic(reloc_page);
3328 
3329 		/* The updated presumed offset for this entry will be
3330 		 * copied back out to the user.
3331 		 */
3332 		reloc->presumed_offset = target_obj_priv->gtt_offset;
3333 
3334 		drm_gem_object_unreference(target_obj);
3335 	}
3336 
3337 #if WATCH_BUF
3338 	if (0)
3339 		i915_gem_dump_object(obj, 128, __func__, ~0);
3340 #endif
3341 	return 0;
3342 }
3343 
3344 /** Dispatch a batchbuffer to the ring
3345  */
3346 static int
3347 i915_dispatch_gem_execbuffer(struct drm_device *dev,
3348 			      struct drm_i915_gem_execbuffer *exec,
3349 			      struct drm_clip_rect *cliprects,
3350 			      uint64_t exec_offset)
3351 {
3352 	drm_i915_private_t *dev_priv = dev->dev_private;
3353 	int nbox = exec->num_cliprects;
3354 	int i = 0, count;
3355 	uint32_t exec_start, exec_len;
3356 	RING_LOCALS;
3357 
3358 	exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3359 	exec_len = (uint32_t) exec->batch_len;
3360 
3361 	trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno + 1);
3362 
3363 	count = nbox ? nbox : 1;
3364 
3365 	for (i = 0; i < count; i++) {
3366 		if (i < nbox) {
3367 			int ret = i915_emit_box(dev, cliprects, i,
3368 						exec->DR1, exec->DR4);
3369 			if (ret)
3370 				return ret;
3371 		}
3372 
3373 		if (IS_I830(dev) || IS_845G(dev)) {
3374 			BEGIN_LP_RING(4);
3375 			OUT_RING(MI_BATCH_BUFFER);
3376 			OUT_RING(exec_start | MI_BATCH_NON_SECURE);
3377 			OUT_RING(exec_start + exec_len - 4);
3378 			OUT_RING(0);
3379 			ADVANCE_LP_RING();
3380 		} else {
3381 			BEGIN_LP_RING(2);
3382 			if (IS_I965G(dev)) {
3383 				OUT_RING(MI_BATCH_BUFFER_START |
3384 					 (2 << 6) |
3385 					 MI_BATCH_NON_SECURE_I965);
3386 				OUT_RING(exec_start);
3387 			} else {
3388 				OUT_RING(MI_BATCH_BUFFER_START |
3389 					 (2 << 6));
3390 				OUT_RING(exec_start | MI_BATCH_NON_SECURE);
3391 			}
3392 			ADVANCE_LP_RING();
3393 		}
3394 	}
3395 
3396 	/* XXX breadcrumb */
3397 	return 0;
3398 }
3399 
3400 /* Throttle our rendering by waiting until the ring has completed our requests
3401  * emitted over 20 msec ago.
3402  *
3403  * Note that if we were to use the current jiffies each time around the loop,
3404  * we wouldn't escape the function with any frames outstanding if the time to
3405  * render a frame was over 20ms.
3406  *
3407  * This should get us reasonable parallelism between CPU and GPU but also
3408  * relatively low latency when blocking on a particular request to finish.
3409  */
3410 static int
3411 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3412 {
3413 	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3414 	int ret = 0;
3415 	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3416 
3417 	mutex_lock(&dev->struct_mutex);
3418 	while (!list_empty(&i915_file_priv->mm.request_list)) {
3419 		struct drm_i915_gem_request *request;
3420 
3421 		request = list_first_entry(&i915_file_priv->mm.request_list,
3422 					   struct drm_i915_gem_request,
3423 					   client_list);
3424 
3425 		if (time_after_eq(request->emitted_jiffies, recent_enough))
3426 			break;
3427 
3428 		ret = i915_wait_request(dev, request->seqno);
3429 		if (ret != 0)
3430 			break;
3431 	}
3432 	mutex_unlock(&dev->struct_mutex);
3433 
3434 	return ret;
3435 }
3436 
3437 static int
3438 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
3439 			      uint32_t buffer_count,
3440 			      struct drm_i915_gem_relocation_entry **relocs)
3441 {
3442 	uint32_t reloc_count = 0, reloc_index = 0, i;
3443 	int ret;
3444 
3445 	*relocs = NULL;
3446 	for (i = 0; i < buffer_count; i++) {
3447 		if (reloc_count + exec_list[i].relocation_count < reloc_count)
3448 			return -EINVAL;
3449 		reloc_count += exec_list[i].relocation_count;
3450 	}
3451 
3452 	*relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
3453 	if (*relocs == NULL)
3454 		return -ENOMEM;
3455 
3456 	for (i = 0; i < buffer_count; i++) {
3457 		struct drm_i915_gem_relocation_entry __user *user_relocs;
3458 
3459 		user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3460 
3461 		ret = copy_from_user(&(*relocs)[reloc_index],
3462 				     user_relocs,
3463 				     exec_list[i].relocation_count *
3464 				     sizeof(**relocs));
3465 		if (ret != 0) {
3466 			drm_free_large(*relocs);
3467 			*relocs = NULL;
3468 			return -EFAULT;
3469 		}
3470 
3471 		reloc_index += exec_list[i].relocation_count;
3472 	}
3473 
3474 	return 0;
3475 }
3476 
3477 static int
3478 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
3479 			    uint32_t buffer_count,
3480 			    struct drm_i915_gem_relocation_entry *relocs)
3481 {
3482 	uint32_t reloc_count = 0, i;
3483 	int ret = 0;
3484 
3485 	for (i = 0; i < buffer_count; i++) {
3486 		struct drm_i915_gem_relocation_entry __user *user_relocs;
3487 		int unwritten;
3488 
3489 		user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3490 
3491 		unwritten = copy_to_user(user_relocs,
3492 					 &relocs[reloc_count],
3493 					 exec_list[i].relocation_count *
3494 					 sizeof(*relocs));
3495 
3496 		if (unwritten) {
3497 			ret = -EFAULT;
3498 			goto err;
3499 		}
3500 
3501 		reloc_count += exec_list[i].relocation_count;
3502 	}
3503 
3504 err:
3505 	drm_free_large(relocs);
3506 
3507 	return ret;
3508 }
3509 
3510 static int
3511 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
3512 			   uint64_t exec_offset)
3513 {
3514 	uint32_t exec_start, exec_len;
3515 
3516 	exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3517 	exec_len = (uint32_t) exec->batch_len;
3518 
3519 	if ((exec_start | exec_len) & 0x7)
3520 		return -EINVAL;
3521 
3522 	if (!exec_start)
3523 		return -EINVAL;
3524 
3525 	return 0;
3526 }
3527 
3528 int
3529 i915_gem_execbuffer(struct drm_device *dev, void *data,
3530 		    struct drm_file *file_priv)
3531 {
3532 	drm_i915_private_t *dev_priv = dev->dev_private;
3533 	struct drm_i915_gem_execbuffer *args = data;
3534 	struct drm_i915_gem_exec_object *exec_list = NULL;
3535 	struct drm_gem_object **object_list = NULL;
3536 	struct drm_gem_object *batch_obj;
3537 	struct drm_i915_gem_object *obj_priv;
3538 	struct drm_clip_rect *cliprects = NULL;
3539 	struct drm_i915_gem_relocation_entry *relocs;
3540 	int ret, ret2, i, pinned = 0;
3541 	uint64_t exec_offset;
3542 	uint32_t seqno, flush_domains, reloc_index;
3543 	int pin_tries;
3544 
3545 #if WATCH_EXEC
3546 	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3547 		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3548 #endif
3549 
3550 	if (args->buffer_count < 1) {
3551 		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3552 		return -EINVAL;
3553 	}
3554 	/* Copy in the exec list from userland */
3555 	exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count);
3556 	object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count);
3557 	if (exec_list == NULL || object_list == NULL) {
3558 		DRM_ERROR("Failed to allocate exec or object list "
3559 			  "for %d buffers\n",
3560 			  args->buffer_count);
3561 		ret = -ENOMEM;
3562 		goto pre_mutex_err;
3563 	}
3564 	ret = copy_from_user(exec_list,
3565 			     (struct drm_i915_relocation_entry __user *)
3566 			     (uintptr_t) args->buffers_ptr,
3567 			     sizeof(*exec_list) * args->buffer_count);
3568 	if (ret != 0) {
3569 		DRM_ERROR("copy %d exec entries failed %d\n",
3570 			  args->buffer_count, ret);
3571 		goto pre_mutex_err;
3572 	}
3573 
3574 	if (args->num_cliprects != 0) {
3575 		cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3576 				    GFP_KERNEL);
3577 		if (cliprects == NULL)
3578 			goto pre_mutex_err;
3579 
3580 		ret = copy_from_user(cliprects,
3581 				     (struct drm_clip_rect __user *)
3582 				     (uintptr_t) args->cliprects_ptr,
3583 				     sizeof(*cliprects) * args->num_cliprects);
3584 		if (ret != 0) {
3585 			DRM_ERROR("copy %d cliprects failed: %d\n",
3586 				  args->num_cliprects, ret);
3587 			goto pre_mutex_err;
3588 		}
3589 	}
3590 
3591 	ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3592 					    &relocs);
3593 	if (ret != 0)
3594 		goto pre_mutex_err;
3595 
3596 	mutex_lock(&dev->struct_mutex);
3597 
3598 	i915_verify_inactive(dev, __FILE__, __LINE__);
3599 
3600 	if (atomic_read(&dev_priv->mm.wedged)) {
3601 		DRM_ERROR("Execbuf while wedged\n");
3602 		mutex_unlock(&dev->struct_mutex);
3603 		ret = -EIO;
3604 		goto pre_mutex_err;
3605 	}
3606 
3607 	if (dev_priv->mm.suspended) {
3608 		DRM_ERROR("Execbuf while VT-switched.\n");
3609 		mutex_unlock(&dev->struct_mutex);
3610 		ret = -EBUSY;
3611 		goto pre_mutex_err;
3612 	}
3613 
3614 	/* Look up object handles */
3615 	for (i = 0; i < args->buffer_count; i++) {
3616 		object_list[i] = drm_gem_object_lookup(dev, file_priv,
3617 						       exec_list[i].handle);
3618 		if (object_list[i] == NULL) {
3619 			DRM_ERROR("Invalid object handle %d at index %d\n",
3620 				   exec_list[i].handle, i);
3621 			ret = -EBADF;
3622 			goto err;
3623 		}
3624 
3625 		obj_priv = object_list[i]->driver_private;
3626 		if (obj_priv->in_execbuffer) {
3627 			DRM_ERROR("Object %p appears more than once in object list\n",
3628 				   object_list[i]);
3629 			ret = -EBADF;
3630 			goto err;
3631 		}
3632 		obj_priv->in_execbuffer = true;
3633 	}
3634 
3635 	/* Pin and relocate */
3636 	for (pin_tries = 0; ; pin_tries++) {
3637 		ret = 0;
3638 		reloc_index = 0;
3639 
3640 		for (i = 0; i < args->buffer_count; i++) {
3641 			object_list[i]->pending_read_domains = 0;
3642 			object_list[i]->pending_write_domain = 0;
3643 			ret = i915_gem_object_pin_and_relocate(object_list[i],
3644 							       file_priv,
3645 							       &exec_list[i],
3646 							       &relocs[reloc_index]);
3647 			if (ret)
3648 				break;
3649 			pinned = i + 1;
3650 			reloc_index += exec_list[i].relocation_count;
3651 		}
3652 		/* success */
3653 		if (ret == 0)
3654 			break;
3655 
3656 		/* error other than GTT full, or we've already tried again */
3657 		if (ret != -ENOSPC || pin_tries >= 1) {
3658 			if (ret != -ERESTARTSYS) {
3659 				unsigned long long total_size = 0;
3660 				for (i = 0; i < args->buffer_count; i++)
3661 					total_size += object_list[i]->size;
3662 				DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n",
3663 					  pinned+1, args->buffer_count,
3664 					  total_size, ret);
3665 				DRM_ERROR("%d objects [%d pinned], "
3666 					  "%d object bytes [%d pinned], "
3667 					  "%d/%d gtt bytes\n",
3668 					  atomic_read(&dev->object_count),
3669 					  atomic_read(&dev->pin_count),
3670 					  atomic_read(&dev->object_memory),
3671 					  atomic_read(&dev->pin_memory),
3672 					  atomic_read(&dev->gtt_memory),
3673 					  dev->gtt_total);
3674 			}
3675 			goto err;
3676 		}
3677 
3678 		/* unpin all of our buffers */
3679 		for (i = 0; i < pinned; i++)
3680 			i915_gem_object_unpin(object_list[i]);
3681 		pinned = 0;
3682 
3683 		/* evict everyone we can from the aperture */
3684 		ret = i915_gem_evict_everything(dev);
3685 		if (ret && ret != -ENOSPC)
3686 			goto err;
3687 	}
3688 
3689 	/* Set the pending read domains for the batch buffer to COMMAND */
3690 	batch_obj = object_list[args->buffer_count-1];
3691 	if (batch_obj->pending_write_domain) {
3692 		DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3693 		ret = -EINVAL;
3694 		goto err;
3695 	}
3696 	batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3697 
3698 	/* Sanity check the batch buffer, prior to moving objects */
3699 	exec_offset = exec_list[args->buffer_count - 1].offset;
3700 	ret = i915_gem_check_execbuffer (args, exec_offset);
3701 	if (ret != 0) {
3702 		DRM_ERROR("execbuf with invalid offset/length\n");
3703 		goto err;
3704 	}
3705 
3706 	i915_verify_inactive(dev, __FILE__, __LINE__);
3707 
3708 	/* Zero the global flush/invalidate flags. These
3709 	 * will be modified as new domains are computed
3710 	 * for each object
3711 	 */
3712 	dev->invalidate_domains = 0;
3713 	dev->flush_domains = 0;
3714 
3715 	for (i = 0; i < args->buffer_count; i++) {
3716 		struct drm_gem_object *obj = object_list[i];
3717 
3718 		/* Compute new gpu domains and update invalidate/flush */
3719 		i915_gem_object_set_to_gpu_domain(obj);
3720 	}
3721 
3722 	i915_verify_inactive(dev, __FILE__, __LINE__);
3723 
3724 	if (dev->invalidate_domains | dev->flush_domains) {
3725 #if WATCH_EXEC
3726 		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3727 			  __func__,
3728 			 dev->invalidate_domains,
3729 			 dev->flush_domains);
3730 #endif
3731 		i915_gem_flush(dev,
3732 			       dev->invalidate_domains,
3733 			       dev->flush_domains);
3734 		if (dev->flush_domains)
3735 			(void)i915_add_request(dev, file_priv,
3736 					       dev->flush_domains);
3737 	}
3738 
3739 	for (i = 0; i < args->buffer_count; i++) {
3740 		struct drm_gem_object *obj = object_list[i];
3741 		uint32_t old_write_domain = obj->write_domain;
3742 
3743 		obj->write_domain = obj->pending_write_domain;
3744 		trace_i915_gem_object_change_domain(obj,
3745 						    obj->read_domains,
3746 						    old_write_domain);
3747 	}
3748 
3749 	i915_verify_inactive(dev, __FILE__, __LINE__);
3750 
3751 #if WATCH_COHERENCY
3752 	for (i = 0; i < args->buffer_count; i++) {
3753 		i915_gem_object_check_coherency(object_list[i],
3754 						exec_list[i].handle);
3755 	}
3756 #endif
3757 
3758 #if WATCH_EXEC
3759 	i915_gem_dump_object(batch_obj,
3760 			      args->batch_len,
3761 			      __func__,
3762 			      ~0);
3763 #endif
3764 
3765 	/* Exec the batchbuffer */
3766 	ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset);
3767 	if (ret) {
3768 		DRM_ERROR("dispatch failed %d\n", ret);
3769 		goto err;
3770 	}
3771 
3772 	/*
3773 	 * Ensure that the commands in the batch buffer are
3774 	 * finished before the interrupt fires
3775 	 */
3776 	flush_domains = i915_retire_commands(dev);
3777 
3778 	i915_verify_inactive(dev, __FILE__, __LINE__);
3779 
3780 	/*
3781 	 * Get a seqno representing the execution of the current buffer,
3782 	 * which we can wait on.  We would like to mitigate these interrupts,
3783 	 * likely by only creating seqnos occasionally (so that we have
3784 	 * *some* interrupts representing completion of buffers that we can
3785 	 * wait on when trying to clear up gtt space).
3786 	 */
3787 	seqno = i915_add_request(dev, file_priv, flush_domains);
3788 	BUG_ON(seqno == 0);
3789 	for (i = 0; i < args->buffer_count; i++) {
3790 		struct drm_gem_object *obj = object_list[i];
3791 
3792 		i915_gem_object_move_to_active(obj, seqno);
3793 #if WATCH_LRU
3794 		DRM_INFO("%s: move to exec list %p\n", __func__, obj);
3795 #endif
3796 	}
3797 #if WATCH_LRU
3798 	i915_dump_lru(dev, __func__);
3799 #endif
3800 
3801 	i915_verify_inactive(dev, __FILE__, __LINE__);
3802 
3803 err:
3804 	for (i = 0; i < pinned; i++)
3805 		i915_gem_object_unpin(object_list[i]);
3806 
3807 	for (i = 0; i < args->buffer_count; i++) {
3808 		if (object_list[i]) {
3809 			obj_priv = object_list[i]->driver_private;
3810 			obj_priv->in_execbuffer = false;
3811 		}
3812 		drm_gem_object_unreference(object_list[i]);
3813 	}
3814 
3815 	mutex_unlock(&dev->struct_mutex);
3816 
3817 	if (!ret) {
3818 		/* Copy the new buffer offsets back to the user's exec list. */
3819 		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3820 				   (uintptr_t) args->buffers_ptr,
3821 				   exec_list,
3822 				   sizeof(*exec_list) * args->buffer_count);
3823 		if (ret) {
3824 			ret = -EFAULT;
3825 			DRM_ERROR("failed to copy %d exec entries "
3826 				  "back to user (%d)\n",
3827 				  args->buffer_count, ret);
3828 		}
3829 	}
3830 
3831 	/* Copy the updated relocations out regardless of current error
3832 	 * state.  Failure to update the relocs would mean that the next
3833 	 * time userland calls execbuf, it would do so with presumed offset
3834 	 * state that didn't match the actual object state.
3835 	 */
3836 	ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3837 					   relocs);
3838 	if (ret2 != 0) {
3839 		DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3840 
3841 		if (ret == 0)
3842 			ret = ret2;
3843 	}
3844 
3845 pre_mutex_err:
3846 	drm_free_large(object_list);
3847 	drm_free_large(exec_list);
3848 	kfree(cliprects);
3849 
3850 	return ret;
3851 }
3852 
3853 int
3854 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
3855 {
3856 	struct drm_device *dev = obj->dev;
3857 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
3858 	int ret;
3859 
3860 	i915_verify_inactive(dev, __FILE__, __LINE__);
3861 	if (obj_priv->gtt_space == NULL) {
3862 		ret = i915_gem_object_bind_to_gtt(obj, alignment);
3863 		if (ret)
3864 			return ret;
3865 	}
3866 	/*
3867 	 * Pre-965 chips need a fence register set up in order to
3868 	 * properly handle tiled surfaces.
3869 	 */
3870 	if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
3871 		ret = i915_gem_object_get_fence_reg(obj);
3872 		if (ret != 0) {
3873 			if (ret != -EBUSY && ret != -ERESTARTSYS)
3874 				DRM_ERROR("Failure to install fence: %d\n",
3875 					  ret);
3876 			return ret;
3877 		}
3878 	}
3879 	obj_priv->pin_count++;
3880 
3881 	/* If the object is not active and not pending a flush,
3882 	 * remove it from the inactive list
3883 	 */
3884 	if (obj_priv->pin_count == 1) {
3885 		atomic_inc(&dev->pin_count);
3886 		atomic_add(obj->size, &dev->pin_memory);
3887 		if (!obj_priv->active &&
3888 		    (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 &&
3889 		    !list_empty(&obj_priv->list))
3890 			list_del_init(&obj_priv->list);
3891 	}
3892 	i915_verify_inactive(dev, __FILE__, __LINE__);
3893 
3894 	return 0;
3895 }
3896 
3897 void
3898 i915_gem_object_unpin(struct drm_gem_object *obj)
3899 {
3900 	struct drm_device *dev = obj->dev;
3901 	drm_i915_private_t *dev_priv = dev->dev_private;
3902 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
3903 
3904 	i915_verify_inactive(dev, __FILE__, __LINE__);
3905 	obj_priv->pin_count--;
3906 	BUG_ON(obj_priv->pin_count < 0);
3907 	BUG_ON(obj_priv->gtt_space == NULL);
3908 
3909 	/* If the object is no longer pinned, and is
3910 	 * neither active nor being flushed, then stick it on
3911 	 * the inactive list
3912 	 */
3913 	if (obj_priv->pin_count == 0) {
3914 		if (!obj_priv->active &&
3915 		    (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
3916 			list_move_tail(&obj_priv->list,
3917 				       &dev_priv->mm.inactive_list);
3918 		atomic_dec(&dev->pin_count);
3919 		atomic_sub(obj->size, &dev->pin_memory);
3920 	}
3921 	i915_verify_inactive(dev, __FILE__, __LINE__);
3922 }
3923 
3924 int
3925 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3926 		   struct drm_file *file_priv)
3927 {
3928 	struct drm_i915_gem_pin *args = data;
3929 	struct drm_gem_object *obj;
3930 	struct drm_i915_gem_object *obj_priv;
3931 	int ret;
3932 
3933 	mutex_lock(&dev->struct_mutex);
3934 
3935 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3936 	if (obj == NULL) {
3937 		DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
3938 			  args->handle);
3939 		mutex_unlock(&dev->struct_mutex);
3940 		return -EBADF;
3941 	}
3942 	obj_priv = obj->driver_private;
3943 
3944 	if (obj_priv->madv != I915_MADV_WILLNEED) {
3945 		DRM_ERROR("Attempting to pin a purgeable buffer\n");
3946 		drm_gem_object_unreference(obj);
3947 		mutex_unlock(&dev->struct_mutex);
3948 		return -EINVAL;
3949 	}
3950 
3951 	if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
3952 		DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3953 			  args->handle);
3954 		drm_gem_object_unreference(obj);
3955 		mutex_unlock(&dev->struct_mutex);
3956 		return -EINVAL;
3957 	}
3958 
3959 	obj_priv->user_pin_count++;
3960 	obj_priv->pin_filp = file_priv;
3961 	if (obj_priv->user_pin_count == 1) {
3962 		ret = i915_gem_object_pin(obj, args->alignment);
3963 		if (ret != 0) {
3964 			drm_gem_object_unreference(obj);
3965 			mutex_unlock(&dev->struct_mutex);
3966 			return ret;
3967 		}
3968 	}
3969 
3970 	/* XXX - flush the CPU caches for pinned objects
3971 	 * as the X server doesn't manage domains yet
3972 	 */
3973 	i915_gem_object_flush_cpu_write_domain(obj);
3974 	args->offset = obj_priv->gtt_offset;
3975 	drm_gem_object_unreference(obj);
3976 	mutex_unlock(&dev->struct_mutex);
3977 
3978 	return 0;
3979 }
3980 
3981 int
3982 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3983 		     struct drm_file *file_priv)
3984 {
3985 	struct drm_i915_gem_pin *args = data;
3986 	struct drm_gem_object *obj;
3987 	struct drm_i915_gem_object *obj_priv;
3988 
3989 	mutex_lock(&dev->struct_mutex);
3990 
3991 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3992 	if (obj == NULL) {
3993 		DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
3994 			  args->handle);
3995 		mutex_unlock(&dev->struct_mutex);
3996 		return -EBADF;
3997 	}
3998 
3999 	obj_priv = obj->driver_private;
4000 	if (obj_priv->pin_filp != file_priv) {
4001 		DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4002 			  args->handle);
4003 		drm_gem_object_unreference(obj);
4004 		mutex_unlock(&dev->struct_mutex);
4005 		return -EINVAL;
4006 	}
4007 	obj_priv->user_pin_count--;
4008 	if (obj_priv->user_pin_count == 0) {
4009 		obj_priv->pin_filp = NULL;
4010 		i915_gem_object_unpin(obj);
4011 	}
4012 
4013 	drm_gem_object_unreference(obj);
4014 	mutex_unlock(&dev->struct_mutex);
4015 	return 0;
4016 }
4017 
4018 int
4019 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4020 		    struct drm_file *file_priv)
4021 {
4022 	struct drm_i915_gem_busy *args = data;
4023 	struct drm_gem_object *obj;
4024 	struct drm_i915_gem_object *obj_priv;
4025 
4026 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4027 	if (obj == NULL) {
4028 		DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
4029 			  args->handle);
4030 		return -EBADF;
4031 	}
4032 
4033 	mutex_lock(&dev->struct_mutex);
4034 	/* Update the active list for the hardware's current position.
4035 	 * Otherwise this only updates on a delayed timer or when irqs are
4036 	 * actually unmasked, and our working set ends up being larger than
4037 	 * required.
4038 	 */
4039 	i915_gem_retire_requests(dev);
4040 
4041 	obj_priv = obj->driver_private;
4042 	/* Don't count being on the flushing list against the object being
4043 	 * done.  Otherwise, a buffer left on the flushing list but not getting
4044 	 * flushed (because nobody's flushing that domain) won't ever return
4045 	 * unbusy and get reused by libdrm's bo cache.  The other expected
4046 	 * consumer of this interface, OpenGL's occlusion queries, also specs
4047 	 * that the objects get unbusy "eventually" without any interference.
4048 	 */
4049 	args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0;
4050 
4051 	drm_gem_object_unreference(obj);
4052 	mutex_unlock(&dev->struct_mutex);
4053 	return 0;
4054 }
4055 
4056 int
4057 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4058 			struct drm_file *file_priv)
4059 {
4060     return i915_gem_ring_throttle(dev, file_priv);
4061 }
4062 
4063 int
4064 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4065 		       struct drm_file *file_priv)
4066 {
4067 	struct drm_i915_gem_madvise *args = data;
4068 	struct drm_gem_object *obj;
4069 	struct drm_i915_gem_object *obj_priv;
4070 
4071 	switch (args->madv) {
4072 	case I915_MADV_DONTNEED:
4073 	case I915_MADV_WILLNEED:
4074 	    break;
4075 	default:
4076 	    return -EINVAL;
4077 	}
4078 
4079 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4080 	if (obj == NULL) {
4081 		DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
4082 			  args->handle);
4083 		return -EBADF;
4084 	}
4085 
4086 	mutex_lock(&dev->struct_mutex);
4087 	obj_priv = obj->driver_private;
4088 
4089 	if (obj_priv->pin_count) {
4090 		drm_gem_object_unreference(obj);
4091 		mutex_unlock(&dev->struct_mutex);
4092 
4093 		DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
4094 		return -EINVAL;
4095 	}
4096 
4097 	if (obj_priv->madv != __I915_MADV_PURGED)
4098 		obj_priv->madv = args->madv;
4099 
4100 	/* if the object is no longer bound, discard its backing storage */
4101 	if (i915_gem_object_is_purgeable(obj_priv) &&
4102 	    obj_priv->gtt_space == NULL)
4103 		i915_gem_object_truncate(obj);
4104 
4105 	args->retained = obj_priv->madv != __I915_MADV_PURGED;
4106 
4107 	drm_gem_object_unreference(obj);
4108 	mutex_unlock(&dev->struct_mutex);
4109 
4110 	return 0;
4111 }
4112 
4113 int i915_gem_init_object(struct drm_gem_object *obj)
4114 {
4115 	struct drm_i915_gem_object *obj_priv;
4116 
4117 	obj_priv = kzalloc(sizeof(*obj_priv), GFP_KERNEL);
4118 	if (obj_priv == NULL)
4119 		return -ENOMEM;
4120 
4121 	/*
4122 	 * We've just allocated pages from the kernel,
4123 	 * so they've just been written by the CPU with
4124 	 * zeros. They'll need to be clflushed before we
4125 	 * use them with the GPU.
4126 	 */
4127 	obj->write_domain = I915_GEM_DOMAIN_CPU;
4128 	obj->read_domains = I915_GEM_DOMAIN_CPU;
4129 
4130 	obj_priv->agp_type = AGP_USER_MEMORY;
4131 
4132 	obj->driver_private = obj_priv;
4133 	obj_priv->obj = obj;
4134 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
4135 	INIT_LIST_HEAD(&obj_priv->list);
4136 	INIT_LIST_HEAD(&obj_priv->fence_list);
4137 	obj_priv->madv = I915_MADV_WILLNEED;
4138 
4139 	trace_i915_gem_object_create(obj);
4140 
4141 	return 0;
4142 }
4143 
4144 void i915_gem_free_object(struct drm_gem_object *obj)
4145 {
4146 	struct drm_device *dev = obj->dev;
4147 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
4148 
4149 	trace_i915_gem_object_destroy(obj);
4150 
4151 	while (obj_priv->pin_count > 0)
4152 		i915_gem_object_unpin(obj);
4153 
4154 	if (obj_priv->phys_obj)
4155 		i915_gem_detach_phys_object(dev, obj);
4156 
4157 	i915_gem_object_unbind(obj);
4158 
4159 	if (obj_priv->mmap_offset)
4160 		i915_gem_free_mmap_offset(obj);
4161 
4162 	kfree(obj_priv->page_cpu_valid);
4163 	kfree(obj_priv->bit_17);
4164 	kfree(obj->driver_private);
4165 }
4166 
4167 /** Unbinds all inactive objects. */
4168 static int
4169 i915_gem_evict_from_inactive_list(struct drm_device *dev)
4170 {
4171 	drm_i915_private_t *dev_priv = dev->dev_private;
4172 
4173 	while (!list_empty(&dev_priv->mm.inactive_list)) {
4174 		struct drm_gem_object *obj;
4175 		int ret;
4176 
4177 		obj = list_first_entry(&dev_priv->mm.inactive_list,
4178 				       struct drm_i915_gem_object,
4179 				       list)->obj;
4180 
4181 		ret = i915_gem_object_unbind(obj);
4182 		if (ret != 0) {
4183 			DRM_ERROR("Error unbinding object: %d\n", ret);
4184 			return ret;
4185 		}
4186 	}
4187 
4188 	return 0;
4189 }
4190 
4191 int
4192 i915_gem_idle(struct drm_device *dev)
4193 {
4194 	drm_i915_private_t *dev_priv = dev->dev_private;
4195 	uint32_t seqno, cur_seqno, last_seqno;
4196 	int stuck, ret;
4197 
4198 	mutex_lock(&dev->struct_mutex);
4199 
4200 	if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) {
4201 		mutex_unlock(&dev->struct_mutex);
4202 		return 0;
4203 	}
4204 
4205 	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
4206 	 * We need to replace this with a semaphore, or something.
4207 	 */
4208 	dev_priv->mm.suspended = 1;
4209 	del_timer(&dev_priv->hangcheck_timer);
4210 
4211 	/* Cancel the retire work handler, wait for it to finish if running
4212 	 */
4213 	mutex_unlock(&dev->struct_mutex);
4214 	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4215 	mutex_lock(&dev->struct_mutex);
4216 
4217 	i915_kernel_lost_context(dev);
4218 
4219 	/* Flush the GPU along with all non-CPU write domains
4220 	 */
4221 	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
4222 	seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
4223 
4224 	if (seqno == 0) {
4225 		mutex_unlock(&dev->struct_mutex);
4226 		return -ENOMEM;
4227 	}
4228 
4229 	dev_priv->mm.waiting_gem_seqno = seqno;
4230 	last_seqno = 0;
4231 	stuck = 0;
4232 	for (;;) {
4233 		cur_seqno = i915_get_gem_seqno(dev);
4234 		if (i915_seqno_passed(cur_seqno, seqno))
4235 			break;
4236 		if (last_seqno == cur_seqno) {
4237 			if (stuck++ > 100) {
4238 				DRM_ERROR("hardware wedged\n");
4239 				atomic_set(&dev_priv->mm.wedged, 1);
4240 				DRM_WAKEUP(&dev_priv->irq_queue);
4241 				break;
4242 			}
4243 		}
4244 		msleep(10);
4245 		last_seqno = cur_seqno;
4246 	}
4247 	dev_priv->mm.waiting_gem_seqno = 0;
4248 
4249 	i915_gem_retire_requests(dev);
4250 
4251 	spin_lock(&dev_priv->mm.active_list_lock);
4252 	if (!atomic_read(&dev_priv->mm.wedged)) {
4253 		/* Active and flushing should now be empty as we've
4254 		 * waited for a sequence higher than any pending execbuffer
4255 		 */
4256 		WARN_ON(!list_empty(&dev_priv->mm.active_list));
4257 		WARN_ON(!list_empty(&dev_priv->mm.flushing_list));
4258 		/* Request should now be empty as we've also waited
4259 		 * for the last request in the list
4260 		 */
4261 		WARN_ON(!list_empty(&dev_priv->mm.request_list));
4262 	}
4263 
4264 	/* Empty the active and flushing lists to inactive.  If there's
4265 	 * anything left at this point, it means that we're wedged and
4266 	 * nothing good's going to happen by leaving them there.  So strip
4267 	 * the GPU domains and just stuff them onto inactive.
4268 	 */
4269 	while (!list_empty(&dev_priv->mm.active_list)) {
4270 		struct drm_gem_object *obj;
4271 		uint32_t old_write_domain;
4272 
4273 		obj = list_first_entry(&dev_priv->mm.active_list,
4274 				       struct drm_i915_gem_object,
4275 				       list)->obj;
4276 		old_write_domain = obj->write_domain;
4277 		obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
4278 		i915_gem_object_move_to_inactive(obj);
4279 
4280 		trace_i915_gem_object_change_domain(obj,
4281 						    obj->read_domains,
4282 						    old_write_domain);
4283 	}
4284 	spin_unlock(&dev_priv->mm.active_list_lock);
4285 
4286 	while (!list_empty(&dev_priv->mm.flushing_list)) {
4287 		struct drm_gem_object *obj;
4288 		uint32_t old_write_domain;
4289 
4290 		obj = list_first_entry(&dev_priv->mm.flushing_list,
4291 				       struct drm_i915_gem_object,
4292 				       list)->obj;
4293 		old_write_domain = obj->write_domain;
4294 		obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
4295 		i915_gem_object_move_to_inactive(obj);
4296 
4297 		trace_i915_gem_object_change_domain(obj,
4298 						    obj->read_domains,
4299 						    old_write_domain);
4300 	}
4301 
4302 
4303 	/* Move all inactive buffers out of the GTT. */
4304 	ret = i915_gem_evict_from_inactive_list(dev);
4305 	WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
4306 	if (ret) {
4307 		mutex_unlock(&dev->struct_mutex);
4308 		return ret;
4309 	}
4310 
4311 	i915_gem_cleanup_ringbuffer(dev);
4312 	mutex_unlock(&dev->struct_mutex);
4313 
4314 	return 0;
4315 }
4316 
4317 static int
4318 i915_gem_init_hws(struct drm_device *dev)
4319 {
4320 	drm_i915_private_t *dev_priv = dev->dev_private;
4321 	struct drm_gem_object *obj;
4322 	struct drm_i915_gem_object *obj_priv;
4323 	int ret;
4324 
4325 	/* If we need a physical address for the status page, it's already
4326 	 * initialized at driver load time.
4327 	 */
4328 	if (!I915_NEED_GFX_HWS(dev))
4329 		return 0;
4330 
4331 	obj = drm_gem_object_alloc(dev, 4096);
4332 	if (obj == NULL) {
4333 		DRM_ERROR("Failed to allocate status page\n");
4334 		return -ENOMEM;
4335 	}
4336 	obj_priv = obj->driver_private;
4337 	obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4338 
4339 	ret = i915_gem_object_pin(obj, 4096);
4340 	if (ret != 0) {
4341 		drm_gem_object_unreference(obj);
4342 		return ret;
4343 	}
4344 
4345 	dev_priv->status_gfx_addr = obj_priv->gtt_offset;
4346 
4347 	dev_priv->hw_status_page = kmap(obj_priv->pages[0]);
4348 	if (dev_priv->hw_status_page == NULL) {
4349 		DRM_ERROR("Failed to map status page.\n");
4350 		memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
4351 		i915_gem_object_unpin(obj);
4352 		drm_gem_object_unreference(obj);
4353 		return -EINVAL;
4354 	}
4355 	dev_priv->hws_obj = obj;
4356 	memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
4357 	I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
4358 	I915_READ(HWS_PGA); /* posting read */
4359 	DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
4360 
4361 	return 0;
4362 }
4363 
4364 static void
4365 i915_gem_cleanup_hws(struct drm_device *dev)
4366 {
4367 	drm_i915_private_t *dev_priv = dev->dev_private;
4368 	struct drm_gem_object *obj;
4369 	struct drm_i915_gem_object *obj_priv;
4370 
4371 	if (dev_priv->hws_obj == NULL)
4372 		return;
4373 
4374 	obj = dev_priv->hws_obj;
4375 	obj_priv = obj->driver_private;
4376 
4377 	kunmap(obj_priv->pages[0]);
4378 	i915_gem_object_unpin(obj);
4379 	drm_gem_object_unreference(obj);
4380 	dev_priv->hws_obj = NULL;
4381 
4382 	memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
4383 	dev_priv->hw_status_page = NULL;
4384 
4385 	/* Write high address into HWS_PGA when disabling. */
4386 	I915_WRITE(HWS_PGA, 0x1ffff000);
4387 }
4388 
4389 int
4390 i915_gem_init_ringbuffer(struct drm_device *dev)
4391 {
4392 	drm_i915_private_t *dev_priv = dev->dev_private;
4393 	struct drm_gem_object *obj;
4394 	struct drm_i915_gem_object *obj_priv;
4395 	drm_i915_ring_buffer_t *ring = &dev_priv->ring;
4396 	int ret;
4397 	u32 head;
4398 
4399 	ret = i915_gem_init_hws(dev);
4400 	if (ret != 0)
4401 		return ret;
4402 
4403 	obj = drm_gem_object_alloc(dev, 128 * 1024);
4404 	if (obj == NULL) {
4405 		DRM_ERROR("Failed to allocate ringbuffer\n");
4406 		i915_gem_cleanup_hws(dev);
4407 		return -ENOMEM;
4408 	}
4409 	obj_priv = obj->driver_private;
4410 
4411 	ret = i915_gem_object_pin(obj, 4096);
4412 	if (ret != 0) {
4413 		drm_gem_object_unreference(obj);
4414 		i915_gem_cleanup_hws(dev);
4415 		return ret;
4416 	}
4417 
4418 	/* Set up the kernel mapping for the ring. */
4419 	ring->Size = obj->size;
4420 
4421 	ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
4422 	ring->map.size = obj->size;
4423 	ring->map.type = 0;
4424 	ring->map.flags = 0;
4425 	ring->map.mtrr = 0;
4426 
4427 	drm_core_ioremap_wc(&ring->map, dev);
4428 	if (ring->map.handle == NULL) {
4429 		DRM_ERROR("Failed to map ringbuffer.\n");
4430 		memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
4431 		i915_gem_object_unpin(obj);
4432 		drm_gem_object_unreference(obj);
4433 		i915_gem_cleanup_hws(dev);
4434 		return -EINVAL;
4435 	}
4436 	ring->ring_obj = obj;
4437 	ring->virtual_start = ring->map.handle;
4438 
4439 	/* Stop the ring if it's running. */
4440 	I915_WRITE(PRB0_CTL, 0);
4441 	I915_WRITE(PRB0_TAIL, 0);
4442 	I915_WRITE(PRB0_HEAD, 0);
4443 
4444 	/* Initialize the ring. */
4445 	I915_WRITE(PRB0_START, obj_priv->gtt_offset);
4446 	head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
4447 
4448 	/* G45 ring initialization fails to reset head to zero */
4449 	if (head != 0) {
4450 		DRM_ERROR("Ring head not reset to zero "
4451 			  "ctl %08x head %08x tail %08x start %08x\n",
4452 			  I915_READ(PRB0_CTL),
4453 			  I915_READ(PRB0_HEAD),
4454 			  I915_READ(PRB0_TAIL),
4455 			  I915_READ(PRB0_START));
4456 		I915_WRITE(PRB0_HEAD, 0);
4457 
4458 		DRM_ERROR("Ring head forced to zero "
4459 			  "ctl %08x head %08x tail %08x start %08x\n",
4460 			  I915_READ(PRB0_CTL),
4461 			  I915_READ(PRB0_HEAD),
4462 			  I915_READ(PRB0_TAIL),
4463 			  I915_READ(PRB0_START));
4464 	}
4465 
4466 	I915_WRITE(PRB0_CTL,
4467 		   ((obj->size - 4096) & RING_NR_PAGES) |
4468 		   RING_NO_REPORT |
4469 		   RING_VALID);
4470 
4471 	head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
4472 
4473 	/* If the head is still not zero, the ring is dead */
4474 	if (head != 0) {
4475 		DRM_ERROR("Ring initialization failed "
4476 			  "ctl %08x head %08x tail %08x start %08x\n",
4477 			  I915_READ(PRB0_CTL),
4478 			  I915_READ(PRB0_HEAD),
4479 			  I915_READ(PRB0_TAIL),
4480 			  I915_READ(PRB0_START));
4481 		return -EIO;
4482 	}
4483 
4484 	/* Update our cache of the ring state */
4485 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
4486 		i915_kernel_lost_context(dev);
4487 	else {
4488 		ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
4489 		ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
4490 		ring->space = ring->head - (ring->tail + 8);
4491 		if (ring->space < 0)
4492 			ring->space += ring->Size;
4493 	}
4494 
4495 	return 0;
4496 }
4497 
4498 void
4499 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4500 {
4501 	drm_i915_private_t *dev_priv = dev->dev_private;
4502 
4503 	if (dev_priv->ring.ring_obj == NULL)
4504 		return;
4505 
4506 	drm_core_ioremapfree(&dev_priv->ring.map, dev);
4507 
4508 	i915_gem_object_unpin(dev_priv->ring.ring_obj);
4509 	drm_gem_object_unreference(dev_priv->ring.ring_obj);
4510 	dev_priv->ring.ring_obj = NULL;
4511 	memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
4512 
4513 	i915_gem_cleanup_hws(dev);
4514 }
4515 
4516 int
4517 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4518 		       struct drm_file *file_priv)
4519 {
4520 	drm_i915_private_t *dev_priv = dev->dev_private;
4521 	int ret;
4522 
4523 	if (drm_core_check_feature(dev, DRIVER_MODESET))
4524 		return 0;
4525 
4526 	if (atomic_read(&dev_priv->mm.wedged)) {
4527 		DRM_ERROR("Reenabling wedged hardware, good luck\n");
4528 		atomic_set(&dev_priv->mm.wedged, 0);
4529 	}
4530 
4531 	mutex_lock(&dev->struct_mutex);
4532 	dev_priv->mm.suspended = 0;
4533 
4534 	ret = i915_gem_init_ringbuffer(dev);
4535 	if (ret != 0) {
4536 		mutex_unlock(&dev->struct_mutex);
4537 		return ret;
4538 	}
4539 
4540 	spin_lock(&dev_priv->mm.active_list_lock);
4541 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
4542 	spin_unlock(&dev_priv->mm.active_list_lock);
4543 
4544 	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4545 	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4546 	BUG_ON(!list_empty(&dev_priv->mm.request_list));
4547 	mutex_unlock(&dev->struct_mutex);
4548 
4549 	drm_irq_install(dev);
4550 
4551 	return 0;
4552 }
4553 
4554 int
4555 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4556 		       struct drm_file *file_priv)
4557 {
4558 	if (drm_core_check_feature(dev, DRIVER_MODESET))
4559 		return 0;
4560 
4561 	drm_irq_uninstall(dev);
4562 	return i915_gem_idle(dev);
4563 }
4564 
4565 void
4566 i915_gem_lastclose(struct drm_device *dev)
4567 {
4568 	int ret;
4569 
4570 	if (drm_core_check_feature(dev, DRIVER_MODESET))
4571 		return;
4572 
4573 	ret = i915_gem_idle(dev);
4574 	if (ret)
4575 		DRM_ERROR("failed to idle hardware: %d\n", ret);
4576 }
4577 
4578 void
4579 i915_gem_load(struct drm_device *dev)
4580 {
4581 	int i;
4582 	drm_i915_private_t *dev_priv = dev->dev_private;
4583 
4584 	spin_lock_init(&dev_priv->mm.active_list_lock);
4585 	INIT_LIST_HEAD(&dev_priv->mm.active_list);
4586 	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4587 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4588 	INIT_LIST_HEAD(&dev_priv->mm.request_list);
4589 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4590 	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4591 			  i915_gem_retire_work_handler);
4592 	dev_priv->mm.next_gem_seqno = 1;
4593 
4594 	spin_lock(&shrink_list_lock);
4595 	list_add(&dev_priv->mm.shrink_list, &shrink_list);
4596 	spin_unlock(&shrink_list_lock);
4597 
4598 	/* Old X drivers will take 0-2 for front, back, depth buffers */
4599 	dev_priv->fence_reg_start = 3;
4600 
4601 	if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4602 		dev_priv->num_fence_regs = 16;
4603 	else
4604 		dev_priv->num_fence_regs = 8;
4605 
4606 	/* Initialize fence registers to zero */
4607 	if (IS_I965G(dev)) {
4608 		for (i = 0; i < 16; i++)
4609 			I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4610 	} else {
4611 		for (i = 0; i < 8; i++)
4612 			I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4613 		if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4614 			for (i = 0; i < 8; i++)
4615 				I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4616 	}
4617 
4618 	i915_gem_detect_bit_6_swizzle(dev);
4619 }
4620 
4621 /*
4622  * Create a physically contiguous memory object for this object
4623  * e.g. for cursor + overlay regs
4624  */
4625 int i915_gem_init_phys_object(struct drm_device *dev,
4626 			      int id, int size)
4627 {
4628 	drm_i915_private_t *dev_priv = dev->dev_private;
4629 	struct drm_i915_gem_phys_object *phys_obj;
4630 	int ret;
4631 
4632 	if (dev_priv->mm.phys_objs[id - 1] || !size)
4633 		return 0;
4634 
4635 	phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4636 	if (!phys_obj)
4637 		return -ENOMEM;
4638 
4639 	phys_obj->id = id;
4640 
4641 	phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff);
4642 	if (!phys_obj->handle) {
4643 		ret = -ENOMEM;
4644 		goto kfree_obj;
4645 	}
4646 #ifdef CONFIG_X86
4647 	set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4648 #endif
4649 
4650 	dev_priv->mm.phys_objs[id - 1] = phys_obj;
4651 
4652 	return 0;
4653 kfree_obj:
4654 	kfree(phys_obj);
4655 	return ret;
4656 }
4657 
4658 void i915_gem_free_phys_object(struct drm_device *dev, int id)
4659 {
4660 	drm_i915_private_t *dev_priv = dev->dev_private;
4661 	struct drm_i915_gem_phys_object *phys_obj;
4662 
4663 	if (!dev_priv->mm.phys_objs[id - 1])
4664 		return;
4665 
4666 	phys_obj = dev_priv->mm.phys_objs[id - 1];
4667 	if (phys_obj->cur_obj) {
4668 		i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4669 	}
4670 
4671 #ifdef CONFIG_X86
4672 	set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4673 #endif
4674 	drm_pci_free(dev, phys_obj->handle);
4675 	kfree(phys_obj);
4676 	dev_priv->mm.phys_objs[id - 1] = NULL;
4677 }
4678 
4679 void i915_gem_free_all_phys_object(struct drm_device *dev)
4680 {
4681 	int i;
4682 
4683 	for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4684 		i915_gem_free_phys_object(dev, i);
4685 }
4686 
4687 void i915_gem_detach_phys_object(struct drm_device *dev,
4688 				 struct drm_gem_object *obj)
4689 {
4690 	struct drm_i915_gem_object *obj_priv;
4691 	int i;
4692 	int ret;
4693 	int page_count;
4694 
4695 	obj_priv = obj->driver_private;
4696 	if (!obj_priv->phys_obj)
4697 		return;
4698 
4699 	ret = i915_gem_object_get_pages(obj);
4700 	if (ret)
4701 		goto out;
4702 
4703 	page_count = obj->size / PAGE_SIZE;
4704 
4705 	for (i = 0; i < page_count; i++) {
4706 		char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
4707 		char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4708 
4709 		memcpy(dst, src, PAGE_SIZE);
4710 		kunmap_atomic(dst, KM_USER0);
4711 	}
4712 	drm_clflush_pages(obj_priv->pages, page_count);
4713 	drm_agp_chipset_flush(dev);
4714 
4715 	i915_gem_object_put_pages(obj);
4716 out:
4717 	obj_priv->phys_obj->cur_obj = NULL;
4718 	obj_priv->phys_obj = NULL;
4719 }
4720 
4721 int
4722 i915_gem_attach_phys_object(struct drm_device *dev,
4723 			    struct drm_gem_object *obj, int id)
4724 {
4725 	drm_i915_private_t *dev_priv = dev->dev_private;
4726 	struct drm_i915_gem_object *obj_priv;
4727 	int ret = 0;
4728 	int page_count;
4729 	int i;
4730 
4731 	if (id > I915_MAX_PHYS_OBJECT)
4732 		return -EINVAL;
4733 
4734 	obj_priv = obj->driver_private;
4735 
4736 	if (obj_priv->phys_obj) {
4737 		if (obj_priv->phys_obj->id == id)
4738 			return 0;
4739 		i915_gem_detach_phys_object(dev, obj);
4740 	}
4741 
4742 
4743 	/* create a new object */
4744 	if (!dev_priv->mm.phys_objs[id - 1]) {
4745 		ret = i915_gem_init_phys_object(dev, id,
4746 						obj->size);
4747 		if (ret) {
4748 			DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
4749 			goto out;
4750 		}
4751 	}
4752 
4753 	/* bind to the object */
4754 	obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4755 	obj_priv->phys_obj->cur_obj = obj;
4756 
4757 	ret = i915_gem_object_get_pages(obj);
4758 	if (ret) {
4759 		DRM_ERROR("failed to get page list\n");
4760 		goto out;
4761 	}
4762 
4763 	page_count = obj->size / PAGE_SIZE;
4764 
4765 	for (i = 0; i < page_count; i++) {
4766 		char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
4767 		char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4768 
4769 		memcpy(dst, src, PAGE_SIZE);
4770 		kunmap_atomic(src, KM_USER0);
4771 	}
4772 
4773 	i915_gem_object_put_pages(obj);
4774 
4775 	return 0;
4776 out:
4777 	return ret;
4778 }
4779 
4780 static int
4781 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4782 		     struct drm_i915_gem_pwrite *args,
4783 		     struct drm_file *file_priv)
4784 {
4785 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
4786 	void *obj_addr;
4787 	int ret;
4788 	char __user *user_data;
4789 
4790 	user_data = (char __user *) (uintptr_t) args->data_ptr;
4791 	obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4792 
4793 	DRM_DEBUG("obj_addr %p, %lld\n", obj_addr, args->size);
4794 	ret = copy_from_user(obj_addr, user_data, args->size);
4795 	if (ret)
4796 		return -EFAULT;
4797 
4798 	drm_agp_chipset_flush(dev);
4799 	return 0;
4800 }
4801 
4802 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4803 {
4804 	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
4805 
4806 	/* Clean up our request list when the client is going away, so that
4807 	 * later retire_requests won't dereference our soon-to-be-gone
4808 	 * file_priv.
4809 	 */
4810 	mutex_lock(&dev->struct_mutex);
4811 	while (!list_empty(&i915_file_priv->mm.request_list))
4812 		list_del_init(i915_file_priv->mm.request_list.next);
4813 	mutex_unlock(&dev->struct_mutex);
4814 }
4815 
4816 static int
4817 i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
4818 {
4819 	drm_i915_private_t *dev_priv, *next_dev;
4820 	struct drm_i915_gem_object *obj_priv, *next_obj;
4821 	int cnt = 0;
4822 	int would_deadlock = 1;
4823 
4824 	/* "fast-path" to count number of available objects */
4825 	if (nr_to_scan == 0) {
4826 		spin_lock(&shrink_list_lock);
4827 		list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4828 			struct drm_device *dev = dev_priv->dev;
4829 
4830 			if (mutex_trylock(&dev->struct_mutex)) {
4831 				list_for_each_entry(obj_priv,
4832 						    &dev_priv->mm.inactive_list,
4833 						    list)
4834 					cnt++;
4835 				mutex_unlock(&dev->struct_mutex);
4836 			}
4837 		}
4838 		spin_unlock(&shrink_list_lock);
4839 
4840 		return (cnt / 100) * sysctl_vfs_cache_pressure;
4841 	}
4842 
4843 	spin_lock(&shrink_list_lock);
4844 
4845 	/* first scan for clean buffers */
4846 	list_for_each_entry_safe(dev_priv, next_dev,
4847 				 &shrink_list, mm.shrink_list) {
4848 		struct drm_device *dev = dev_priv->dev;
4849 
4850 		if (! mutex_trylock(&dev->struct_mutex))
4851 			continue;
4852 
4853 		spin_unlock(&shrink_list_lock);
4854 
4855 		i915_gem_retire_requests(dev);
4856 
4857 		list_for_each_entry_safe(obj_priv, next_obj,
4858 					 &dev_priv->mm.inactive_list,
4859 					 list) {
4860 			if (i915_gem_object_is_purgeable(obj_priv)) {
4861 				i915_gem_object_unbind(obj_priv->obj);
4862 				if (--nr_to_scan <= 0)
4863 					break;
4864 			}
4865 		}
4866 
4867 		spin_lock(&shrink_list_lock);
4868 		mutex_unlock(&dev->struct_mutex);
4869 
4870 		would_deadlock = 0;
4871 
4872 		if (nr_to_scan <= 0)
4873 			break;
4874 	}
4875 
4876 	/* second pass, evict/count anything still on the inactive list */
4877 	list_for_each_entry_safe(dev_priv, next_dev,
4878 				 &shrink_list, mm.shrink_list) {
4879 		struct drm_device *dev = dev_priv->dev;
4880 
4881 		if (! mutex_trylock(&dev->struct_mutex))
4882 			continue;
4883 
4884 		spin_unlock(&shrink_list_lock);
4885 
4886 		list_for_each_entry_safe(obj_priv, next_obj,
4887 					 &dev_priv->mm.inactive_list,
4888 					 list) {
4889 			if (nr_to_scan > 0) {
4890 				i915_gem_object_unbind(obj_priv->obj);
4891 				nr_to_scan--;
4892 			} else
4893 				cnt++;
4894 		}
4895 
4896 		spin_lock(&shrink_list_lock);
4897 		mutex_unlock(&dev->struct_mutex);
4898 
4899 		would_deadlock = 0;
4900 	}
4901 
4902 	spin_unlock(&shrink_list_lock);
4903 
4904 	if (would_deadlock)
4905 		return -1;
4906 	else if (cnt > 0)
4907 		return (cnt / 100) * sysctl_vfs_cache_pressure;
4908 	else
4909 		return 0;
4910 }
4911 
4912 static struct shrinker shrinker = {
4913 	.shrink = i915_gem_shrink,
4914 	.seeks = DEFAULT_SEEKS,
4915 };
4916 
4917 __init void
4918 i915_gem_shrinker_init(void)
4919 {
4920     register_shrinker(&shrinker);
4921 }
4922 
4923 __exit void
4924 i915_gem_shrinker_exit(void)
4925 {
4926     unregister_shrinker(&shrinker);
4927 }
4928