xref: /linux/drivers/gpu/drm/i915/i915_gem.c (revision e08a1d97d33e2ac05cd368b955f9fdc2823f15fd)
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_gem_dmabuf.h"
33 #include "i915_vgpu.h"
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36 #include "intel_frontbuffer.h"
37 #include "intel_mocs.h"
38 #include <linux/reservation.h>
39 #include <linux/shmem_fs.h>
40 #include <linux/slab.h>
41 #include <linux/swap.h>
42 #include <linux/pci.h>
43 #include <linux/dma-buf.h>
44 
45 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
46 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
47 
48 static bool cpu_cache_is_coherent(struct drm_device *dev,
49 				  enum i915_cache_level level)
50 {
51 	return HAS_LLC(dev) || level != I915_CACHE_NONE;
52 }
53 
54 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
55 {
56 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
57 		return false;
58 
59 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
60 		return true;
61 
62 	return obj->pin_display;
63 }
64 
65 static int
66 insert_mappable_node(struct drm_i915_private *i915,
67                      struct drm_mm_node *node, u32 size)
68 {
69 	memset(node, 0, sizeof(*node));
70 	return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
71 						   size, 0, 0, 0,
72 						   i915->ggtt.mappable_end,
73 						   DRM_MM_SEARCH_DEFAULT,
74 						   DRM_MM_CREATE_DEFAULT);
75 }
76 
77 static void
78 remove_mappable_node(struct drm_mm_node *node)
79 {
80 	drm_mm_remove_node(node);
81 }
82 
83 /* some bookkeeping */
84 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
85 				  u64 size)
86 {
87 	spin_lock(&dev_priv->mm.object_stat_lock);
88 	dev_priv->mm.object_count++;
89 	dev_priv->mm.object_memory += size;
90 	spin_unlock(&dev_priv->mm.object_stat_lock);
91 }
92 
93 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
94 				     u64 size)
95 {
96 	spin_lock(&dev_priv->mm.object_stat_lock);
97 	dev_priv->mm.object_count--;
98 	dev_priv->mm.object_memory -= size;
99 	spin_unlock(&dev_priv->mm.object_stat_lock);
100 }
101 
102 static int
103 i915_gem_wait_for_error(struct i915_gpu_error *error)
104 {
105 	int ret;
106 
107 	if (!i915_reset_in_progress(error))
108 		return 0;
109 
110 	/*
111 	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
112 	 * userspace. If it takes that long something really bad is going on and
113 	 * we should simply try to bail out and fail as gracefully as possible.
114 	 */
115 	ret = wait_event_interruptible_timeout(error->reset_queue,
116 					       !i915_reset_in_progress(error),
117 					       10*HZ);
118 	if (ret == 0) {
119 		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
120 		return -EIO;
121 	} else if (ret < 0) {
122 		return ret;
123 	} else {
124 		return 0;
125 	}
126 }
127 
128 int i915_mutex_lock_interruptible(struct drm_device *dev)
129 {
130 	struct drm_i915_private *dev_priv = to_i915(dev);
131 	int ret;
132 
133 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
134 	if (ret)
135 		return ret;
136 
137 	ret = mutex_lock_interruptible(&dev->struct_mutex);
138 	if (ret)
139 		return ret;
140 
141 	return 0;
142 }
143 
144 int
145 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
146 			    struct drm_file *file)
147 {
148 	struct drm_i915_private *dev_priv = to_i915(dev);
149 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
150 	struct drm_i915_gem_get_aperture *args = data;
151 	struct i915_vma *vma;
152 	size_t pinned;
153 
154 	pinned = 0;
155 	mutex_lock(&dev->struct_mutex);
156 	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
157 		if (i915_vma_is_pinned(vma))
158 			pinned += vma->node.size;
159 	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
160 		if (i915_vma_is_pinned(vma))
161 			pinned += vma->node.size;
162 	mutex_unlock(&dev->struct_mutex);
163 
164 	args->aper_size = ggtt->base.total;
165 	args->aper_available_size = args->aper_size - pinned;
166 
167 	return 0;
168 }
169 
170 static int
171 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
172 {
173 	struct address_space *mapping = obj->base.filp->f_mapping;
174 	char *vaddr = obj->phys_handle->vaddr;
175 	struct sg_table *st;
176 	struct scatterlist *sg;
177 	int i;
178 
179 	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
180 		return -EINVAL;
181 
182 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
183 		struct page *page;
184 		char *src;
185 
186 		page = shmem_read_mapping_page(mapping, i);
187 		if (IS_ERR(page))
188 			return PTR_ERR(page);
189 
190 		src = kmap_atomic(page);
191 		memcpy(vaddr, src, PAGE_SIZE);
192 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
193 		kunmap_atomic(src);
194 
195 		put_page(page);
196 		vaddr += PAGE_SIZE;
197 	}
198 
199 	i915_gem_chipset_flush(to_i915(obj->base.dev));
200 
201 	st = kmalloc(sizeof(*st), GFP_KERNEL);
202 	if (st == NULL)
203 		return -ENOMEM;
204 
205 	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
206 		kfree(st);
207 		return -ENOMEM;
208 	}
209 
210 	sg = st->sgl;
211 	sg->offset = 0;
212 	sg->length = obj->base.size;
213 
214 	sg_dma_address(sg) = obj->phys_handle->busaddr;
215 	sg_dma_len(sg) = obj->base.size;
216 
217 	obj->pages = st;
218 	return 0;
219 }
220 
221 static void
222 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
223 {
224 	int ret;
225 
226 	BUG_ON(obj->madv == __I915_MADV_PURGED);
227 
228 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
229 	if (WARN_ON(ret)) {
230 		/* In the event of a disaster, abandon all caches and
231 		 * hope for the best.
232 		 */
233 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
234 	}
235 
236 	if (obj->madv == I915_MADV_DONTNEED)
237 		obj->dirty = 0;
238 
239 	if (obj->dirty) {
240 		struct address_space *mapping = obj->base.filp->f_mapping;
241 		char *vaddr = obj->phys_handle->vaddr;
242 		int i;
243 
244 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
245 			struct page *page;
246 			char *dst;
247 
248 			page = shmem_read_mapping_page(mapping, i);
249 			if (IS_ERR(page))
250 				continue;
251 
252 			dst = kmap_atomic(page);
253 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
254 			memcpy(dst, vaddr, PAGE_SIZE);
255 			kunmap_atomic(dst);
256 
257 			set_page_dirty(page);
258 			if (obj->madv == I915_MADV_WILLNEED)
259 				mark_page_accessed(page);
260 			put_page(page);
261 			vaddr += PAGE_SIZE;
262 		}
263 		obj->dirty = 0;
264 	}
265 
266 	sg_free_table(obj->pages);
267 	kfree(obj->pages);
268 }
269 
270 static void
271 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
272 {
273 	drm_pci_free(obj->base.dev, obj->phys_handle);
274 }
275 
276 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
277 	.get_pages = i915_gem_object_get_pages_phys,
278 	.put_pages = i915_gem_object_put_pages_phys,
279 	.release = i915_gem_object_release_phys,
280 };
281 
282 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
283 {
284 	struct i915_vma *vma;
285 	LIST_HEAD(still_in_list);
286 	int ret;
287 
288 	lockdep_assert_held(&obj->base.dev->struct_mutex);
289 
290 	/* Closed vma are removed from the obj->vma_list - but they may
291 	 * still have an active binding on the object. To remove those we
292 	 * must wait for all rendering to complete to the object (as unbinding
293 	 * must anyway), and retire the requests.
294 	 */
295 	ret = i915_gem_object_wait_rendering(obj, false);
296 	if (ret)
297 		return ret;
298 
299 	i915_gem_retire_requests(to_i915(obj->base.dev));
300 
301 	while ((vma = list_first_entry_or_null(&obj->vma_list,
302 					       struct i915_vma,
303 					       obj_link))) {
304 		list_move_tail(&vma->obj_link, &still_in_list);
305 		ret = i915_vma_unbind(vma);
306 		if (ret)
307 			break;
308 	}
309 	list_splice(&still_in_list, &obj->vma_list);
310 
311 	return ret;
312 }
313 
314 /**
315  * Ensures that all rendering to the object has completed and the object is
316  * safe to unbind from the GTT or access from the CPU.
317  * @obj: i915 gem object
318  * @readonly: waiting for just read access or read-write access
319  */
320 int
321 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
322 			       bool readonly)
323 {
324 	struct reservation_object *resv;
325 	struct i915_gem_active *active;
326 	unsigned long active_mask;
327 	int idx;
328 
329 	lockdep_assert_held(&obj->base.dev->struct_mutex);
330 
331 	if (!readonly) {
332 		active = obj->last_read;
333 		active_mask = i915_gem_object_get_active(obj);
334 	} else {
335 		active_mask = 1;
336 		active = &obj->last_write;
337 	}
338 
339 	for_each_active(active_mask, idx) {
340 		int ret;
341 
342 		ret = i915_gem_active_wait(&active[idx],
343 					   &obj->base.dev->struct_mutex);
344 		if (ret)
345 			return ret;
346 	}
347 
348 	resv = i915_gem_object_get_dmabuf_resv(obj);
349 	if (resv) {
350 		long err;
351 
352 		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
353 							  MAX_SCHEDULE_TIMEOUT);
354 		if (err < 0)
355 			return err;
356 	}
357 
358 	return 0;
359 }
360 
361 /* A nonblocking variant of the above wait. Must be called prior to
362  * acquiring the mutex for the object, as the object state may change
363  * during this call. A reference must be held by the caller for the object.
364  */
365 static __must_check int
366 __unsafe_wait_rendering(struct drm_i915_gem_object *obj,
367 			struct intel_rps_client *rps,
368 			bool readonly)
369 {
370 	struct i915_gem_active *active;
371 	unsigned long active_mask;
372 	int idx;
373 
374 	active_mask = __I915_BO_ACTIVE(obj);
375 	if (!active_mask)
376 		return 0;
377 
378 	if (!readonly) {
379 		active = obj->last_read;
380 	} else {
381 		active_mask = 1;
382 		active = &obj->last_write;
383 	}
384 
385 	for_each_active(active_mask, idx) {
386 		int ret;
387 
388 		ret = i915_gem_active_wait_unlocked(&active[idx],
389 						    I915_WAIT_INTERRUPTIBLE,
390 						    NULL, rps);
391 		if (ret)
392 			return ret;
393 	}
394 
395 	return 0;
396 }
397 
398 static struct intel_rps_client *to_rps_client(struct drm_file *file)
399 {
400 	struct drm_i915_file_private *fpriv = file->driver_priv;
401 
402 	return &fpriv->rps;
403 }
404 
405 int
406 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
407 			    int align)
408 {
409 	drm_dma_handle_t *phys;
410 	int ret;
411 
412 	if (obj->phys_handle) {
413 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
414 			return -EBUSY;
415 
416 		return 0;
417 	}
418 
419 	if (obj->madv != I915_MADV_WILLNEED)
420 		return -EFAULT;
421 
422 	if (obj->base.filp == NULL)
423 		return -EINVAL;
424 
425 	ret = i915_gem_object_unbind(obj);
426 	if (ret)
427 		return ret;
428 
429 	ret = i915_gem_object_put_pages(obj);
430 	if (ret)
431 		return ret;
432 
433 	/* create a new object */
434 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
435 	if (!phys)
436 		return -ENOMEM;
437 
438 	obj->phys_handle = phys;
439 	obj->ops = &i915_gem_phys_ops;
440 
441 	return i915_gem_object_get_pages(obj);
442 }
443 
444 static int
445 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
446 		     struct drm_i915_gem_pwrite *args,
447 		     struct drm_file *file_priv)
448 {
449 	struct drm_device *dev = obj->base.dev;
450 	void *vaddr = obj->phys_handle->vaddr + args->offset;
451 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
452 	int ret = 0;
453 
454 	/* We manually control the domain here and pretend that it
455 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
456 	 */
457 	ret = i915_gem_object_wait_rendering(obj, false);
458 	if (ret)
459 		return ret;
460 
461 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
462 	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
463 		unsigned long unwritten;
464 
465 		/* The physical object once assigned is fixed for the lifetime
466 		 * of the obj, so we can safely drop the lock and continue
467 		 * to access vaddr.
468 		 */
469 		mutex_unlock(&dev->struct_mutex);
470 		unwritten = copy_from_user(vaddr, user_data, args->size);
471 		mutex_lock(&dev->struct_mutex);
472 		if (unwritten) {
473 			ret = -EFAULT;
474 			goto out;
475 		}
476 	}
477 
478 	drm_clflush_virt_range(vaddr, args->size);
479 	i915_gem_chipset_flush(to_i915(dev));
480 
481 out:
482 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
483 	return ret;
484 }
485 
486 void *i915_gem_object_alloc(struct drm_device *dev)
487 {
488 	struct drm_i915_private *dev_priv = to_i915(dev);
489 	return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
490 }
491 
492 void i915_gem_object_free(struct drm_i915_gem_object *obj)
493 {
494 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
495 	kmem_cache_free(dev_priv->objects, obj);
496 }
497 
498 static int
499 i915_gem_create(struct drm_file *file,
500 		struct drm_device *dev,
501 		uint64_t size,
502 		uint32_t *handle_p)
503 {
504 	struct drm_i915_gem_object *obj;
505 	int ret;
506 	u32 handle;
507 
508 	size = roundup(size, PAGE_SIZE);
509 	if (size == 0)
510 		return -EINVAL;
511 
512 	/* Allocate the new object */
513 	obj = i915_gem_object_create(dev, size);
514 	if (IS_ERR(obj))
515 		return PTR_ERR(obj);
516 
517 	ret = drm_gem_handle_create(file, &obj->base, &handle);
518 	/* drop reference from allocate - handle holds it now */
519 	i915_gem_object_put_unlocked(obj);
520 	if (ret)
521 		return ret;
522 
523 	*handle_p = handle;
524 	return 0;
525 }
526 
527 int
528 i915_gem_dumb_create(struct drm_file *file,
529 		     struct drm_device *dev,
530 		     struct drm_mode_create_dumb *args)
531 {
532 	/* have to work out size/pitch and return them */
533 	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
534 	args->size = args->pitch * args->height;
535 	return i915_gem_create(file, dev,
536 			       args->size, &args->handle);
537 }
538 
539 /**
540  * Creates a new mm object and returns a handle to it.
541  * @dev: drm device pointer
542  * @data: ioctl data blob
543  * @file: drm file pointer
544  */
545 int
546 i915_gem_create_ioctl(struct drm_device *dev, void *data,
547 		      struct drm_file *file)
548 {
549 	struct drm_i915_gem_create *args = data;
550 
551 	return i915_gem_create(file, dev,
552 			       args->size, &args->handle);
553 }
554 
555 static inline int
556 __copy_to_user_swizzled(char __user *cpu_vaddr,
557 			const char *gpu_vaddr, int gpu_offset,
558 			int length)
559 {
560 	int ret, cpu_offset = 0;
561 
562 	while (length > 0) {
563 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
564 		int this_length = min(cacheline_end - gpu_offset, length);
565 		int swizzled_gpu_offset = gpu_offset ^ 64;
566 
567 		ret = __copy_to_user(cpu_vaddr + cpu_offset,
568 				     gpu_vaddr + swizzled_gpu_offset,
569 				     this_length);
570 		if (ret)
571 			return ret + length;
572 
573 		cpu_offset += this_length;
574 		gpu_offset += this_length;
575 		length -= this_length;
576 	}
577 
578 	return 0;
579 }
580 
581 static inline int
582 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
583 			  const char __user *cpu_vaddr,
584 			  int length)
585 {
586 	int ret, cpu_offset = 0;
587 
588 	while (length > 0) {
589 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
590 		int this_length = min(cacheline_end - gpu_offset, length);
591 		int swizzled_gpu_offset = gpu_offset ^ 64;
592 
593 		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
594 				       cpu_vaddr + cpu_offset,
595 				       this_length);
596 		if (ret)
597 			return ret + length;
598 
599 		cpu_offset += this_length;
600 		gpu_offset += this_length;
601 		length -= this_length;
602 	}
603 
604 	return 0;
605 }
606 
607 /*
608  * Pins the specified object's pages and synchronizes the object with
609  * GPU accesses. Sets needs_clflush to non-zero if the caller should
610  * flush the object from the CPU cache.
611  */
612 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
613 				    unsigned int *needs_clflush)
614 {
615 	int ret;
616 
617 	*needs_clflush = 0;
618 
619 	if (!i915_gem_object_has_struct_page(obj))
620 		return -ENODEV;
621 
622 	ret = i915_gem_object_wait_rendering(obj, true);
623 	if (ret)
624 		return ret;
625 
626 	ret = i915_gem_object_get_pages(obj);
627 	if (ret)
628 		return ret;
629 
630 	i915_gem_object_pin_pages(obj);
631 
632 	i915_gem_object_flush_gtt_write_domain(obj);
633 
634 	/* If we're not in the cpu read domain, set ourself into the gtt
635 	 * read domain and manually flush cachelines (if required). This
636 	 * optimizes for the case when the gpu will dirty the data
637 	 * anyway again before the next pread happens.
638 	 */
639 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
640 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
641 							obj->cache_level);
642 
643 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
644 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
645 		if (ret)
646 			goto err_unpin;
647 
648 		*needs_clflush = 0;
649 	}
650 
651 	/* return with the pages pinned */
652 	return 0;
653 
654 err_unpin:
655 	i915_gem_object_unpin_pages(obj);
656 	return ret;
657 }
658 
659 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
660 				     unsigned int *needs_clflush)
661 {
662 	int ret;
663 
664 	*needs_clflush = 0;
665 	if (!i915_gem_object_has_struct_page(obj))
666 		return -ENODEV;
667 
668 	ret = i915_gem_object_wait_rendering(obj, false);
669 	if (ret)
670 		return ret;
671 
672 	ret = i915_gem_object_get_pages(obj);
673 	if (ret)
674 		return ret;
675 
676 	i915_gem_object_pin_pages(obj);
677 
678 	i915_gem_object_flush_gtt_write_domain(obj);
679 
680 	/* If we're not in the cpu write domain, set ourself into the
681 	 * gtt write domain and manually flush cachelines (as required).
682 	 * This optimizes for the case when the gpu will use the data
683 	 * right away and we therefore have to clflush anyway.
684 	 */
685 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
686 		*needs_clflush |= cpu_write_needs_clflush(obj) << 1;
687 
688 	/* Same trick applies to invalidate partially written cachelines read
689 	 * before writing.
690 	 */
691 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
692 		*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
693 							 obj->cache_level);
694 
695 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
696 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
697 		if (ret)
698 			goto err_unpin;
699 
700 		*needs_clflush = 0;
701 	}
702 
703 	if ((*needs_clflush & CLFLUSH_AFTER) == 0)
704 		obj->cache_dirty = true;
705 
706 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
707 	obj->dirty = 1;
708 	/* return with the pages pinned */
709 	return 0;
710 
711 err_unpin:
712 	i915_gem_object_unpin_pages(obj);
713 	return ret;
714 }
715 
716 /* Per-page copy function for the shmem pread fastpath.
717  * Flushes invalid cachelines before reading the target if
718  * needs_clflush is set. */
719 static int
720 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
721 		 char __user *user_data,
722 		 bool page_do_bit17_swizzling, bool needs_clflush)
723 {
724 	char *vaddr;
725 	int ret;
726 
727 	if (unlikely(page_do_bit17_swizzling))
728 		return -EINVAL;
729 
730 	vaddr = kmap_atomic(page);
731 	if (needs_clflush)
732 		drm_clflush_virt_range(vaddr + shmem_page_offset,
733 				       page_length);
734 	ret = __copy_to_user_inatomic(user_data,
735 				      vaddr + shmem_page_offset,
736 				      page_length);
737 	kunmap_atomic(vaddr);
738 
739 	return ret ? -EFAULT : 0;
740 }
741 
742 static void
743 shmem_clflush_swizzled_range(char *addr, unsigned long length,
744 			     bool swizzled)
745 {
746 	if (unlikely(swizzled)) {
747 		unsigned long start = (unsigned long) addr;
748 		unsigned long end = (unsigned long) addr + length;
749 
750 		/* For swizzling simply ensure that we always flush both
751 		 * channels. Lame, but simple and it works. Swizzled
752 		 * pwrite/pread is far from a hotpath - current userspace
753 		 * doesn't use it at all. */
754 		start = round_down(start, 128);
755 		end = round_up(end, 128);
756 
757 		drm_clflush_virt_range((void *)start, end - start);
758 	} else {
759 		drm_clflush_virt_range(addr, length);
760 	}
761 
762 }
763 
764 /* Only difference to the fast-path function is that this can handle bit17
765  * and uses non-atomic copy and kmap functions. */
766 static int
767 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
768 		 char __user *user_data,
769 		 bool page_do_bit17_swizzling, bool needs_clflush)
770 {
771 	char *vaddr;
772 	int ret;
773 
774 	vaddr = kmap(page);
775 	if (needs_clflush)
776 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
777 					     page_length,
778 					     page_do_bit17_swizzling);
779 
780 	if (page_do_bit17_swizzling)
781 		ret = __copy_to_user_swizzled(user_data,
782 					      vaddr, shmem_page_offset,
783 					      page_length);
784 	else
785 		ret = __copy_to_user(user_data,
786 				     vaddr + shmem_page_offset,
787 				     page_length);
788 	kunmap(page);
789 
790 	return ret ? - EFAULT : 0;
791 }
792 
793 static inline unsigned long
794 slow_user_access(struct io_mapping *mapping,
795 		 uint64_t page_base, int page_offset,
796 		 char __user *user_data,
797 		 unsigned long length, bool pwrite)
798 {
799 	void __iomem *ioaddr;
800 	void *vaddr;
801 	uint64_t unwritten;
802 
803 	ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
804 	/* We can use the cpu mem copy function because this is X86. */
805 	vaddr = (void __force *)ioaddr + page_offset;
806 	if (pwrite)
807 		unwritten = __copy_from_user(vaddr, user_data, length);
808 	else
809 		unwritten = __copy_to_user(user_data, vaddr, length);
810 
811 	io_mapping_unmap(ioaddr);
812 	return unwritten;
813 }
814 
815 static int
816 i915_gem_gtt_pread(struct drm_device *dev,
817 		   struct drm_i915_gem_object *obj, uint64_t size,
818 		   uint64_t data_offset, uint64_t data_ptr)
819 {
820 	struct drm_i915_private *dev_priv = to_i915(dev);
821 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
822 	struct i915_vma *vma;
823 	struct drm_mm_node node;
824 	char __user *user_data;
825 	uint64_t remain;
826 	uint64_t offset;
827 	int ret;
828 
829 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
830 	if (!IS_ERR(vma)) {
831 		node.start = i915_ggtt_offset(vma);
832 		node.allocated = false;
833 		ret = i915_vma_put_fence(vma);
834 		if (ret) {
835 			i915_vma_unpin(vma);
836 			vma = ERR_PTR(ret);
837 		}
838 	}
839 	if (IS_ERR(vma)) {
840 		ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
841 		if (ret)
842 			goto out;
843 
844 		ret = i915_gem_object_get_pages(obj);
845 		if (ret) {
846 			remove_mappable_node(&node);
847 			goto out;
848 		}
849 
850 		i915_gem_object_pin_pages(obj);
851 	}
852 
853 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
854 	if (ret)
855 		goto out_unpin;
856 
857 	user_data = u64_to_user_ptr(data_ptr);
858 	remain = size;
859 	offset = data_offset;
860 
861 	mutex_unlock(&dev->struct_mutex);
862 	if (likely(!i915.prefault_disable)) {
863 		ret = fault_in_pages_writeable(user_data, remain);
864 		if (ret) {
865 			mutex_lock(&dev->struct_mutex);
866 			goto out_unpin;
867 		}
868 	}
869 
870 	while (remain > 0) {
871 		/* Operation in this page
872 		 *
873 		 * page_base = page offset within aperture
874 		 * page_offset = offset within page
875 		 * page_length = bytes to copy for this page
876 		 */
877 		u32 page_base = node.start;
878 		unsigned page_offset = offset_in_page(offset);
879 		unsigned page_length = PAGE_SIZE - page_offset;
880 		page_length = remain < page_length ? remain : page_length;
881 		if (node.allocated) {
882 			wmb();
883 			ggtt->base.insert_page(&ggtt->base,
884 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
885 					       node.start,
886 					       I915_CACHE_NONE, 0);
887 			wmb();
888 		} else {
889 			page_base += offset & PAGE_MASK;
890 		}
891 		/* This is a slow read/write as it tries to read from
892 		 * and write to user memory which may result into page
893 		 * faults, and so we cannot perform this under struct_mutex.
894 		 */
895 		if (slow_user_access(&ggtt->mappable, page_base,
896 				     page_offset, user_data,
897 				     page_length, false)) {
898 			ret = -EFAULT;
899 			break;
900 		}
901 
902 		remain -= page_length;
903 		user_data += page_length;
904 		offset += page_length;
905 	}
906 
907 	mutex_lock(&dev->struct_mutex);
908 	if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
909 		/* The user has modified the object whilst we tried
910 		 * reading from it, and we now have no idea what domain
911 		 * the pages should be in. As we have just been touching
912 		 * them directly, flush everything back to the GTT
913 		 * domain.
914 		 */
915 		ret = i915_gem_object_set_to_gtt_domain(obj, false);
916 	}
917 
918 out_unpin:
919 	if (node.allocated) {
920 		wmb();
921 		ggtt->base.clear_range(&ggtt->base,
922 				       node.start, node.size);
923 		i915_gem_object_unpin_pages(obj);
924 		remove_mappable_node(&node);
925 	} else {
926 		i915_vma_unpin(vma);
927 	}
928 out:
929 	return ret;
930 }
931 
932 static int
933 i915_gem_shmem_pread(struct drm_device *dev,
934 		     struct drm_i915_gem_object *obj,
935 		     struct drm_i915_gem_pread *args,
936 		     struct drm_file *file)
937 {
938 	char __user *user_data;
939 	ssize_t remain;
940 	loff_t offset;
941 	int shmem_page_offset, page_length, ret = 0;
942 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
943 	int prefaulted = 0;
944 	int needs_clflush = 0;
945 	struct sg_page_iter sg_iter;
946 
947 	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
948 	if (ret)
949 		return ret;
950 
951 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
952 	user_data = u64_to_user_ptr(args->data_ptr);
953 	offset = args->offset;
954 	remain = args->size;
955 
956 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
957 			 offset >> PAGE_SHIFT) {
958 		struct page *page = sg_page_iter_page(&sg_iter);
959 
960 		if (remain <= 0)
961 			break;
962 
963 		/* Operation in this page
964 		 *
965 		 * shmem_page_offset = offset within page in shmem file
966 		 * page_length = bytes to copy for this page
967 		 */
968 		shmem_page_offset = offset_in_page(offset);
969 		page_length = remain;
970 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
971 			page_length = PAGE_SIZE - shmem_page_offset;
972 
973 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
974 			(page_to_phys(page) & (1 << 17)) != 0;
975 
976 		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
977 				       user_data, page_do_bit17_swizzling,
978 				       needs_clflush);
979 		if (ret == 0)
980 			goto next_page;
981 
982 		mutex_unlock(&dev->struct_mutex);
983 
984 		if (likely(!i915.prefault_disable) && !prefaulted) {
985 			ret = fault_in_pages_writeable(user_data, remain);
986 			/* Userspace is tricking us, but we've already clobbered
987 			 * its pages with the prefault and promised to write the
988 			 * data up to the first fault. Hence ignore any errors
989 			 * and just continue. */
990 			(void)ret;
991 			prefaulted = 1;
992 		}
993 
994 		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
995 				       user_data, page_do_bit17_swizzling,
996 				       needs_clflush);
997 
998 		mutex_lock(&dev->struct_mutex);
999 
1000 		if (ret)
1001 			goto out;
1002 
1003 next_page:
1004 		remain -= page_length;
1005 		user_data += page_length;
1006 		offset += page_length;
1007 	}
1008 
1009 out:
1010 	i915_gem_obj_finish_shmem_access(obj);
1011 
1012 	return ret;
1013 }
1014 
1015 /**
1016  * Reads data from the object referenced by handle.
1017  * @dev: drm device pointer
1018  * @data: ioctl data blob
1019  * @file: drm file pointer
1020  *
1021  * On error, the contents of *data are undefined.
1022  */
1023 int
1024 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1025 		     struct drm_file *file)
1026 {
1027 	struct drm_i915_gem_pread *args = data;
1028 	struct drm_i915_gem_object *obj;
1029 	int ret = 0;
1030 
1031 	if (args->size == 0)
1032 		return 0;
1033 
1034 	if (!access_ok(VERIFY_WRITE,
1035 		       u64_to_user_ptr(args->data_ptr),
1036 		       args->size))
1037 		return -EFAULT;
1038 
1039 	obj = i915_gem_object_lookup(file, args->handle);
1040 	if (!obj)
1041 		return -ENOENT;
1042 
1043 	/* Bounds check source.  */
1044 	if (args->offset > obj->base.size ||
1045 	    args->size > obj->base.size - args->offset) {
1046 		ret = -EINVAL;
1047 		goto err;
1048 	}
1049 
1050 	trace_i915_gem_object_pread(obj, args->offset, args->size);
1051 
1052 	ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
1053 	if (ret)
1054 		goto err;
1055 
1056 	ret = i915_mutex_lock_interruptible(dev);
1057 	if (ret)
1058 		goto err;
1059 
1060 	ret = i915_gem_shmem_pread(dev, obj, args, file);
1061 
1062 	/* pread for non shmem backed objects */
1063 	if (ret == -EFAULT || ret == -ENODEV) {
1064 		intel_runtime_pm_get(to_i915(dev));
1065 		ret = i915_gem_gtt_pread(dev, obj, args->size,
1066 					args->offset, args->data_ptr);
1067 		intel_runtime_pm_put(to_i915(dev));
1068 	}
1069 
1070 	i915_gem_object_put(obj);
1071 	mutex_unlock(&dev->struct_mutex);
1072 
1073 	return ret;
1074 
1075 err:
1076 	i915_gem_object_put_unlocked(obj);
1077 	return ret;
1078 }
1079 
1080 /* This is the fast write path which cannot handle
1081  * page faults in the source data
1082  */
1083 
1084 static inline int
1085 fast_user_write(struct io_mapping *mapping,
1086 		loff_t page_base, int page_offset,
1087 		char __user *user_data,
1088 		int length)
1089 {
1090 	void __iomem *vaddr_atomic;
1091 	void *vaddr;
1092 	unsigned long unwritten;
1093 
1094 	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
1095 	/* We can use the cpu mem copy function because this is X86. */
1096 	vaddr = (void __force*)vaddr_atomic + page_offset;
1097 	unwritten = __copy_from_user_inatomic_nocache(vaddr,
1098 						      user_data, length);
1099 	io_mapping_unmap_atomic(vaddr_atomic);
1100 	return unwritten;
1101 }
1102 
1103 /**
1104  * This is the fast pwrite path, where we copy the data directly from the
1105  * user into the GTT, uncached.
1106  * @i915: i915 device private data
1107  * @obj: i915 gem object
1108  * @args: pwrite arguments structure
1109  * @file: drm file pointer
1110  */
1111 static int
1112 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
1113 			 struct drm_i915_gem_object *obj,
1114 			 struct drm_i915_gem_pwrite *args,
1115 			 struct drm_file *file)
1116 {
1117 	struct i915_ggtt *ggtt = &i915->ggtt;
1118 	struct drm_device *dev = obj->base.dev;
1119 	struct i915_vma *vma;
1120 	struct drm_mm_node node;
1121 	uint64_t remain, offset;
1122 	char __user *user_data;
1123 	int ret;
1124 	bool hit_slow_path = false;
1125 
1126 	if (i915_gem_object_is_tiled(obj))
1127 		return -EFAULT;
1128 
1129 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1130 				       PIN_MAPPABLE | PIN_NONBLOCK);
1131 	if (!IS_ERR(vma)) {
1132 		node.start = i915_ggtt_offset(vma);
1133 		node.allocated = false;
1134 		ret = i915_vma_put_fence(vma);
1135 		if (ret) {
1136 			i915_vma_unpin(vma);
1137 			vma = ERR_PTR(ret);
1138 		}
1139 	}
1140 	if (IS_ERR(vma)) {
1141 		ret = insert_mappable_node(i915, &node, PAGE_SIZE);
1142 		if (ret)
1143 			goto out;
1144 
1145 		ret = i915_gem_object_get_pages(obj);
1146 		if (ret) {
1147 			remove_mappable_node(&node);
1148 			goto out;
1149 		}
1150 
1151 		i915_gem_object_pin_pages(obj);
1152 	}
1153 
1154 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1155 	if (ret)
1156 		goto out_unpin;
1157 
1158 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1159 	obj->dirty = true;
1160 
1161 	user_data = u64_to_user_ptr(args->data_ptr);
1162 	offset = args->offset;
1163 	remain = args->size;
1164 	while (remain) {
1165 		/* Operation in this page
1166 		 *
1167 		 * page_base = page offset within aperture
1168 		 * page_offset = offset within page
1169 		 * page_length = bytes to copy for this page
1170 		 */
1171 		u32 page_base = node.start;
1172 		unsigned page_offset = offset_in_page(offset);
1173 		unsigned page_length = PAGE_SIZE - page_offset;
1174 		page_length = remain < page_length ? remain : page_length;
1175 		if (node.allocated) {
1176 			wmb(); /* flush the write before we modify the GGTT */
1177 			ggtt->base.insert_page(&ggtt->base,
1178 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1179 					       node.start, I915_CACHE_NONE, 0);
1180 			wmb(); /* flush modifications to the GGTT (insert_page) */
1181 		} else {
1182 			page_base += offset & PAGE_MASK;
1183 		}
1184 		/* If we get a fault while copying data, then (presumably) our
1185 		 * source page isn't available.  Return the error and we'll
1186 		 * retry in the slow path.
1187 		 * If the object is non-shmem backed, we retry again with the
1188 		 * path that handles page fault.
1189 		 */
1190 		if (fast_user_write(&ggtt->mappable, page_base,
1191 				    page_offset, user_data, page_length)) {
1192 			hit_slow_path = true;
1193 			mutex_unlock(&dev->struct_mutex);
1194 			if (slow_user_access(&ggtt->mappable,
1195 					     page_base,
1196 					     page_offset, user_data,
1197 					     page_length, true)) {
1198 				ret = -EFAULT;
1199 				mutex_lock(&dev->struct_mutex);
1200 				goto out_flush;
1201 			}
1202 
1203 			mutex_lock(&dev->struct_mutex);
1204 		}
1205 
1206 		remain -= page_length;
1207 		user_data += page_length;
1208 		offset += page_length;
1209 	}
1210 
1211 out_flush:
1212 	if (hit_slow_path) {
1213 		if (ret == 0 &&
1214 		    (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
1215 			/* The user has modified the object whilst we tried
1216 			 * reading from it, and we now have no idea what domain
1217 			 * the pages should be in. As we have just been touching
1218 			 * them directly, flush everything back to the GTT
1219 			 * domain.
1220 			 */
1221 			ret = i915_gem_object_set_to_gtt_domain(obj, false);
1222 		}
1223 	}
1224 
1225 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1226 out_unpin:
1227 	if (node.allocated) {
1228 		wmb();
1229 		ggtt->base.clear_range(&ggtt->base,
1230 				       node.start, node.size);
1231 		i915_gem_object_unpin_pages(obj);
1232 		remove_mappable_node(&node);
1233 	} else {
1234 		i915_vma_unpin(vma);
1235 	}
1236 out:
1237 	return ret;
1238 }
1239 
1240 /* Per-page copy function for the shmem pwrite fastpath.
1241  * Flushes invalid cachelines before writing to the target if
1242  * needs_clflush_before is set and flushes out any written cachelines after
1243  * writing if needs_clflush is set. */
1244 static int
1245 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
1246 		  char __user *user_data,
1247 		  bool page_do_bit17_swizzling,
1248 		  bool needs_clflush_before,
1249 		  bool needs_clflush_after)
1250 {
1251 	char *vaddr;
1252 	int ret;
1253 
1254 	if (unlikely(page_do_bit17_swizzling))
1255 		return -EINVAL;
1256 
1257 	vaddr = kmap_atomic(page);
1258 	if (needs_clflush_before)
1259 		drm_clflush_virt_range(vaddr + shmem_page_offset,
1260 				       page_length);
1261 	ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
1262 					user_data, page_length);
1263 	if (needs_clflush_after)
1264 		drm_clflush_virt_range(vaddr + shmem_page_offset,
1265 				       page_length);
1266 	kunmap_atomic(vaddr);
1267 
1268 	return ret ? -EFAULT : 0;
1269 }
1270 
1271 /* Only difference to the fast-path function is that this can handle bit17
1272  * and uses non-atomic copy and kmap functions. */
1273 static int
1274 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1275 		  char __user *user_data,
1276 		  bool page_do_bit17_swizzling,
1277 		  bool needs_clflush_before,
1278 		  bool needs_clflush_after)
1279 {
1280 	char *vaddr;
1281 	int ret;
1282 
1283 	vaddr = kmap(page);
1284 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1285 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1286 					     page_length,
1287 					     page_do_bit17_swizzling);
1288 	if (page_do_bit17_swizzling)
1289 		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
1290 						user_data,
1291 						page_length);
1292 	else
1293 		ret = __copy_from_user(vaddr + shmem_page_offset,
1294 				       user_data,
1295 				       page_length);
1296 	if (needs_clflush_after)
1297 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1298 					     page_length,
1299 					     page_do_bit17_swizzling);
1300 	kunmap(page);
1301 
1302 	return ret ? -EFAULT : 0;
1303 }
1304 
1305 static int
1306 i915_gem_shmem_pwrite(struct drm_device *dev,
1307 		      struct drm_i915_gem_object *obj,
1308 		      struct drm_i915_gem_pwrite *args,
1309 		      struct drm_file *file)
1310 {
1311 	ssize_t remain;
1312 	loff_t offset;
1313 	char __user *user_data;
1314 	int shmem_page_offset, page_length, ret = 0;
1315 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
1316 	int hit_slowpath = 0;
1317 	unsigned int needs_clflush;
1318 	struct sg_page_iter sg_iter;
1319 
1320 	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1321 	if (ret)
1322 		return ret;
1323 
1324 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
1325 	user_data = u64_to_user_ptr(args->data_ptr);
1326 	offset = args->offset;
1327 	remain = args->size;
1328 
1329 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1330 			 offset >> PAGE_SHIFT) {
1331 		struct page *page = sg_page_iter_page(&sg_iter);
1332 		int partial_cacheline_write;
1333 
1334 		if (remain <= 0)
1335 			break;
1336 
1337 		/* Operation in this page
1338 		 *
1339 		 * shmem_page_offset = offset within page in shmem file
1340 		 * page_length = bytes to copy for this page
1341 		 */
1342 		shmem_page_offset = offset_in_page(offset);
1343 
1344 		page_length = remain;
1345 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
1346 			page_length = PAGE_SIZE - shmem_page_offset;
1347 
1348 		/* If we don't overwrite a cacheline completely we need to be
1349 		 * careful to have up-to-date data by first clflushing. Don't
1350 		 * overcomplicate things and flush the entire patch. */
1351 		partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
1352 			((shmem_page_offset | page_length)
1353 				& (boot_cpu_data.x86_clflush_size - 1));
1354 
1355 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1356 			(page_to_phys(page) & (1 << 17)) != 0;
1357 
1358 		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1359 					user_data, page_do_bit17_swizzling,
1360 					partial_cacheline_write,
1361 					needs_clflush & CLFLUSH_AFTER);
1362 		if (ret == 0)
1363 			goto next_page;
1364 
1365 		hit_slowpath = 1;
1366 		mutex_unlock(&dev->struct_mutex);
1367 		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1368 					user_data, page_do_bit17_swizzling,
1369 					partial_cacheline_write,
1370 					needs_clflush & CLFLUSH_AFTER);
1371 
1372 		mutex_lock(&dev->struct_mutex);
1373 
1374 		if (ret)
1375 			goto out;
1376 
1377 next_page:
1378 		remain -= page_length;
1379 		user_data += page_length;
1380 		offset += page_length;
1381 	}
1382 
1383 out:
1384 	i915_gem_obj_finish_shmem_access(obj);
1385 
1386 	if (hit_slowpath) {
1387 		/*
1388 		 * Fixup: Flush cpu caches in case we didn't flush the dirty
1389 		 * cachelines in-line while writing and the object moved
1390 		 * out of the cpu write domain while we've dropped the lock.
1391 		 */
1392 		if (!(needs_clflush & CLFLUSH_AFTER) &&
1393 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1394 			if (i915_gem_clflush_object(obj, obj->pin_display))
1395 				needs_clflush |= CLFLUSH_AFTER;
1396 		}
1397 	}
1398 
1399 	if (needs_clflush & CLFLUSH_AFTER)
1400 		i915_gem_chipset_flush(to_i915(dev));
1401 
1402 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1403 	return ret;
1404 }
1405 
1406 /**
1407  * Writes data to the object referenced by handle.
1408  * @dev: drm device
1409  * @data: ioctl data blob
1410  * @file: drm file
1411  *
1412  * On error, the contents of the buffer that were to be modified are undefined.
1413  */
1414 int
1415 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1416 		      struct drm_file *file)
1417 {
1418 	struct drm_i915_private *dev_priv = to_i915(dev);
1419 	struct drm_i915_gem_pwrite *args = data;
1420 	struct drm_i915_gem_object *obj;
1421 	int ret;
1422 
1423 	if (args->size == 0)
1424 		return 0;
1425 
1426 	if (!access_ok(VERIFY_READ,
1427 		       u64_to_user_ptr(args->data_ptr),
1428 		       args->size))
1429 		return -EFAULT;
1430 
1431 	if (likely(!i915.prefault_disable)) {
1432 		ret = fault_in_pages_readable(u64_to_user_ptr(args->data_ptr),
1433 						   args->size);
1434 		if (ret)
1435 			return -EFAULT;
1436 	}
1437 
1438 	obj = i915_gem_object_lookup(file, args->handle);
1439 	if (!obj)
1440 		return -ENOENT;
1441 
1442 	/* Bounds check destination. */
1443 	if (args->offset > obj->base.size ||
1444 	    args->size > obj->base.size - args->offset) {
1445 		ret = -EINVAL;
1446 		goto err;
1447 	}
1448 
1449 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1450 
1451 	ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
1452 	if (ret)
1453 		goto err;
1454 
1455 	intel_runtime_pm_get(dev_priv);
1456 
1457 	ret = i915_mutex_lock_interruptible(dev);
1458 	if (ret)
1459 		goto err_rpm;
1460 
1461 	ret = -EFAULT;
1462 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
1463 	 * it would end up going through the fenced access, and we'll get
1464 	 * different detiling behavior between reading and writing.
1465 	 * pread/pwrite currently are reading and writing from the CPU
1466 	 * perspective, requiring manual detiling by the client.
1467 	 */
1468 	if (!i915_gem_object_has_struct_page(obj) ||
1469 	    cpu_write_needs_clflush(obj)) {
1470 		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
1471 		/* Note that the gtt paths might fail with non-page-backed user
1472 		 * pointers (e.g. gtt mappings when moving data between
1473 		 * textures). Fallback to the shmem path in that case. */
1474 	}
1475 
1476 	if (ret == -EFAULT || ret == -ENOSPC) {
1477 		if (obj->phys_handle)
1478 			ret = i915_gem_phys_pwrite(obj, args, file);
1479 		else
1480 			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1481 	}
1482 
1483 	i915_gem_object_put(obj);
1484 	mutex_unlock(&dev->struct_mutex);
1485 	intel_runtime_pm_put(dev_priv);
1486 
1487 	return ret;
1488 
1489 err_rpm:
1490 	intel_runtime_pm_put(dev_priv);
1491 err:
1492 	i915_gem_object_put_unlocked(obj);
1493 	return ret;
1494 }
1495 
1496 static inline enum fb_op_origin
1497 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1498 {
1499 	return (domain == I915_GEM_DOMAIN_GTT ?
1500 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
1501 }
1502 
1503 /**
1504  * Called when user space prepares to use an object with the CPU, either
1505  * through the mmap ioctl's mapping or a GTT mapping.
1506  * @dev: drm device
1507  * @data: ioctl data blob
1508  * @file: drm file
1509  */
1510 int
1511 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1512 			  struct drm_file *file)
1513 {
1514 	struct drm_i915_gem_set_domain *args = data;
1515 	struct drm_i915_gem_object *obj;
1516 	uint32_t read_domains = args->read_domains;
1517 	uint32_t write_domain = args->write_domain;
1518 	int ret;
1519 
1520 	/* Only handle setting domains to types used by the CPU. */
1521 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1522 		return -EINVAL;
1523 
1524 	/* Having something in the write domain implies it's in the read
1525 	 * domain, and only that read domain.  Enforce that in the request.
1526 	 */
1527 	if (write_domain != 0 && read_domains != write_domain)
1528 		return -EINVAL;
1529 
1530 	obj = i915_gem_object_lookup(file, args->handle);
1531 	if (!obj)
1532 		return -ENOENT;
1533 
1534 	/* Try to flush the object off the GPU without holding the lock.
1535 	 * We will repeat the flush holding the lock in the normal manner
1536 	 * to catch cases where we are gazumped.
1537 	 */
1538 	ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
1539 	if (ret)
1540 		goto err;
1541 
1542 	ret = i915_mutex_lock_interruptible(dev);
1543 	if (ret)
1544 		goto err;
1545 
1546 	if (read_domains & I915_GEM_DOMAIN_GTT)
1547 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1548 	else
1549 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1550 
1551 	if (write_domain != 0)
1552 		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1553 
1554 	i915_gem_object_put(obj);
1555 	mutex_unlock(&dev->struct_mutex);
1556 	return ret;
1557 
1558 err:
1559 	i915_gem_object_put_unlocked(obj);
1560 	return ret;
1561 }
1562 
1563 /**
1564  * Called when user space has done writes to this buffer
1565  * @dev: drm device
1566  * @data: ioctl data blob
1567  * @file: drm file
1568  */
1569 int
1570 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1571 			 struct drm_file *file)
1572 {
1573 	struct drm_i915_gem_sw_finish *args = data;
1574 	struct drm_i915_gem_object *obj;
1575 	int err = 0;
1576 
1577 	obj = i915_gem_object_lookup(file, args->handle);
1578 	if (!obj)
1579 		return -ENOENT;
1580 
1581 	/* Pinned buffers may be scanout, so flush the cache */
1582 	if (READ_ONCE(obj->pin_display)) {
1583 		err = i915_mutex_lock_interruptible(dev);
1584 		if (!err) {
1585 			i915_gem_object_flush_cpu_write_domain(obj);
1586 			mutex_unlock(&dev->struct_mutex);
1587 		}
1588 	}
1589 
1590 	i915_gem_object_put_unlocked(obj);
1591 	return err;
1592 }
1593 
1594 /**
1595  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1596  *			 it is mapped to.
1597  * @dev: drm device
1598  * @data: ioctl data blob
1599  * @file: drm file
1600  *
1601  * While the mapping holds a reference on the contents of the object, it doesn't
1602  * imply a ref on the object itself.
1603  *
1604  * IMPORTANT:
1605  *
1606  * DRM driver writers who look a this function as an example for how to do GEM
1607  * mmap support, please don't implement mmap support like here. The modern way
1608  * to implement DRM mmap support is with an mmap offset ioctl (like
1609  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1610  * That way debug tooling like valgrind will understand what's going on, hiding
1611  * the mmap call in a driver private ioctl will break that. The i915 driver only
1612  * does cpu mmaps this way because we didn't know better.
1613  */
1614 int
1615 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1616 		    struct drm_file *file)
1617 {
1618 	struct drm_i915_gem_mmap *args = data;
1619 	struct drm_i915_gem_object *obj;
1620 	unsigned long addr;
1621 
1622 	if (args->flags & ~(I915_MMAP_WC))
1623 		return -EINVAL;
1624 
1625 	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1626 		return -ENODEV;
1627 
1628 	obj = i915_gem_object_lookup(file, args->handle);
1629 	if (!obj)
1630 		return -ENOENT;
1631 
1632 	/* prime objects have no backing filp to GEM mmap
1633 	 * pages from.
1634 	 */
1635 	if (!obj->base.filp) {
1636 		i915_gem_object_put_unlocked(obj);
1637 		return -EINVAL;
1638 	}
1639 
1640 	addr = vm_mmap(obj->base.filp, 0, args->size,
1641 		       PROT_READ | PROT_WRITE, MAP_SHARED,
1642 		       args->offset);
1643 	if (args->flags & I915_MMAP_WC) {
1644 		struct mm_struct *mm = current->mm;
1645 		struct vm_area_struct *vma;
1646 
1647 		if (down_write_killable(&mm->mmap_sem)) {
1648 			i915_gem_object_put_unlocked(obj);
1649 			return -EINTR;
1650 		}
1651 		vma = find_vma(mm, addr);
1652 		if (vma)
1653 			vma->vm_page_prot =
1654 				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1655 		else
1656 			addr = -ENOMEM;
1657 		up_write(&mm->mmap_sem);
1658 
1659 		/* This may race, but that's ok, it only gets set */
1660 		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1661 	}
1662 	i915_gem_object_put_unlocked(obj);
1663 	if (IS_ERR((void *)addr))
1664 		return addr;
1665 
1666 	args->addr_ptr = (uint64_t) addr;
1667 
1668 	return 0;
1669 }
1670 
1671 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1672 {
1673 	u64 size;
1674 
1675 	size = i915_gem_object_get_stride(obj);
1676 	size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
1677 
1678 	return size >> PAGE_SHIFT;
1679 }
1680 
1681 /**
1682  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1683  *
1684  * A history of the GTT mmap interface:
1685  *
1686  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1687  *     aligned and suitable for fencing, and still fit into the available
1688  *     mappable space left by the pinned display objects. A classic problem
1689  *     we called the page-fault-of-doom where we would ping-pong between
1690  *     two objects that could not fit inside the GTT and so the memcpy
1691  *     would page one object in at the expense of the other between every
1692  *     single byte.
1693  *
1694  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1695  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1696  *     object is too large for the available space (or simply too large
1697  *     for the mappable aperture!), a view is created instead and faulted
1698  *     into userspace. (This view is aligned and sized appropriately for
1699  *     fenced access.)
1700  *
1701  * Restrictions:
1702  *
1703  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1704  *    hangs on some architectures, corruption on others. An attempt to service
1705  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1706  *
1707  *  * the object must be able to fit into RAM (physical memory, though no
1708  *    limited to the mappable aperture).
1709  *
1710  *
1711  * Caveats:
1712  *
1713  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1714  *    all data to system memory. Subsequent access will not be synchronized.
1715  *
1716  *  * all mappings are revoked on runtime device suspend.
1717  *
1718  *  * there are only 8, 16 or 32 fence registers to share between all users
1719  *    (older machines require fence register for display and blitter access
1720  *    as well). Contention of the fence registers will cause the previous users
1721  *    to be unmapped and any new access will generate new page faults.
1722  *
1723  *  * running out of memory while servicing a fault may generate a SIGBUS,
1724  *    rather than the expected SIGSEGV.
1725  */
1726 int i915_gem_mmap_gtt_version(void)
1727 {
1728 	return 1;
1729 }
1730 
1731 /**
1732  * i915_gem_fault - fault a page into the GTT
1733  * @area: CPU VMA in question
1734  * @vmf: fault info
1735  *
1736  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1737  * from userspace.  The fault handler takes care of binding the object to
1738  * the GTT (if needed), allocating and programming a fence register (again,
1739  * only if needed based on whether the old reg is still valid or the object
1740  * is tiled) and inserting a new PTE into the faulting process.
1741  *
1742  * Note that the faulting process may involve evicting existing objects
1743  * from the GTT and/or fence registers to make room.  So performance may
1744  * suffer if the GTT working set is large or there are few fence registers
1745  * left.
1746  *
1747  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1748  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1749  */
1750 int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
1751 {
1752 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
1753 	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1754 	struct drm_device *dev = obj->base.dev;
1755 	struct drm_i915_private *dev_priv = to_i915(dev);
1756 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1757 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1758 	struct i915_vma *vma;
1759 	pgoff_t page_offset;
1760 	unsigned int flags;
1761 	int ret;
1762 
1763 	/* We don't use vmf->pgoff since that has the fake offset */
1764 	page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
1765 		PAGE_SHIFT;
1766 
1767 	trace_i915_gem_object_fault(obj, page_offset, true, write);
1768 
1769 	/* Try to flush the object off the GPU first without holding the lock.
1770 	 * Upon acquiring the lock, we will perform our sanity checks and then
1771 	 * repeat the flush holding the lock in the normal manner to catch cases
1772 	 * where we are gazumped.
1773 	 */
1774 	ret = __unsafe_wait_rendering(obj, NULL, !write);
1775 	if (ret)
1776 		goto err;
1777 
1778 	intel_runtime_pm_get(dev_priv);
1779 
1780 	ret = i915_mutex_lock_interruptible(dev);
1781 	if (ret)
1782 		goto err_rpm;
1783 
1784 	/* Access to snoopable pages through the GTT is incoherent. */
1785 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1786 		ret = -EFAULT;
1787 		goto err_unlock;
1788 	}
1789 
1790 	/* If the object is smaller than a couple of partial vma, it is
1791 	 * not worth only creating a single partial vma - we may as well
1792 	 * clear enough space for the full object.
1793 	 */
1794 	flags = PIN_MAPPABLE;
1795 	if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
1796 		flags |= PIN_NONBLOCK | PIN_NONFAULT;
1797 
1798 	/* Now pin it into the GTT as needed */
1799 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
1800 	if (IS_ERR(vma)) {
1801 		struct i915_ggtt_view view;
1802 		unsigned int chunk_size;
1803 
1804 		/* Use a partial view if it is bigger than available space */
1805 		chunk_size = MIN_CHUNK_PAGES;
1806 		if (i915_gem_object_is_tiled(obj))
1807 			chunk_size = max(chunk_size, tile_row_pages(obj));
1808 
1809 		memset(&view, 0, sizeof(view));
1810 		view.type = I915_GGTT_VIEW_PARTIAL;
1811 		view.params.partial.offset = rounddown(page_offset, chunk_size);
1812 		view.params.partial.size =
1813 			min_t(unsigned int, chunk_size,
1814 			      vma_pages(area) - view.params.partial.offset);
1815 
1816 		/* If the partial covers the entire object, just create a
1817 		 * normal VMA.
1818 		 */
1819 		if (chunk_size >= obj->base.size >> PAGE_SHIFT)
1820 			view.type = I915_GGTT_VIEW_NORMAL;
1821 
1822 		/* Userspace is now writing through an untracked VMA, abandon
1823 		 * all hope that the hardware is able to track future writes.
1824 		 */
1825 		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1826 
1827 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1828 	}
1829 	if (IS_ERR(vma)) {
1830 		ret = PTR_ERR(vma);
1831 		goto err_unlock;
1832 	}
1833 
1834 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
1835 	if (ret)
1836 		goto err_unpin;
1837 
1838 	ret = i915_vma_get_fence(vma);
1839 	if (ret)
1840 		goto err_unpin;
1841 
1842 	/* Finally, remap it using the new GTT offset */
1843 	ret = remap_io_mapping(area,
1844 			       area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
1845 			       (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
1846 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
1847 			       &ggtt->mappable);
1848 	if (ret)
1849 		goto err_unpin;
1850 
1851 	obj->fault_mappable = true;
1852 err_unpin:
1853 	__i915_vma_unpin(vma);
1854 err_unlock:
1855 	mutex_unlock(&dev->struct_mutex);
1856 err_rpm:
1857 	intel_runtime_pm_put(dev_priv);
1858 err:
1859 	switch (ret) {
1860 	case -EIO:
1861 		/*
1862 		 * We eat errors when the gpu is terminally wedged to avoid
1863 		 * userspace unduly crashing (gl has no provisions for mmaps to
1864 		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1865 		 * and so needs to be reported.
1866 		 */
1867 		if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
1868 			ret = VM_FAULT_SIGBUS;
1869 			break;
1870 		}
1871 	case -EAGAIN:
1872 		/*
1873 		 * EAGAIN means the gpu is hung and we'll wait for the error
1874 		 * handler to reset everything when re-faulting in
1875 		 * i915_mutex_lock_interruptible.
1876 		 */
1877 	case 0:
1878 	case -ERESTARTSYS:
1879 	case -EINTR:
1880 	case -EBUSY:
1881 		/*
1882 		 * EBUSY is ok: this just means that another thread
1883 		 * already did the job.
1884 		 */
1885 		ret = VM_FAULT_NOPAGE;
1886 		break;
1887 	case -ENOMEM:
1888 		ret = VM_FAULT_OOM;
1889 		break;
1890 	case -ENOSPC:
1891 	case -EFAULT:
1892 		ret = VM_FAULT_SIGBUS;
1893 		break;
1894 	default:
1895 		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1896 		ret = VM_FAULT_SIGBUS;
1897 		break;
1898 	}
1899 	return ret;
1900 }
1901 
1902 /**
1903  * i915_gem_release_mmap - remove physical page mappings
1904  * @obj: obj in question
1905  *
1906  * Preserve the reservation of the mmapping with the DRM core code, but
1907  * relinquish ownership of the pages back to the system.
1908  *
1909  * It is vital that we remove the page mapping if we have mapped a tiled
1910  * object through the GTT and then lose the fence register due to
1911  * resource pressure. Similarly if the object has been moved out of the
1912  * aperture, than pages mapped into userspace must be revoked. Removing the
1913  * mapping will then trigger a page fault on the next user access, allowing
1914  * fixup by i915_gem_fault().
1915  */
1916 void
1917 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1918 {
1919 	/* Serialisation between user GTT access and our code depends upon
1920 	 * revoking the CPU's PTE whilst the mutex is held. The next user
1921 	 * pagefault then has to wait until we release the mutex.
1922 	 */
1923 	lockdep_assert_held(&obj->base.dev->struct_mutex);
1924 
1925 	if (!obj->fault_mappable)
1926 		return;
1927 
1928 	drm_vma_node_unmap(&obj->base.vma_node,
1929 			   obj->base.dev->anon_inode->i_mapping);
1930 
1931 	/* Ensure that the CPU's PTE are revoked and there are not outstanding
1932 	 * memory transactions from userspace before we return. The TLB
1933 	 * flushing implied above by changing the PTE above *should* be
1934 	 * sufficient, an extra barrier here just provides us with a bit
1935 	 * of paranoid documentation about our requirement to serialise
1936 	 * memory writes before touching registers / GSM.
1937 	 */
1938 	wmb();
1939 
1940 	obj->fault_mappable = false;
1941 }
1942 
1943 void
1944 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1945 {
1946 	struct drm_i915_gem_object *obj;
1947 
1948 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1949 		i915_gem_release_mmap(obj);
1950 }
1951 
1952 /**
1953  * i915_gem_get_ggtt_size - return required global GTT size for an object
1954  * @dev_priv: i915 device
1955  * @size: object size
1956  * @tiling_mode: tiling mode
1957  *
1958  * Return the required global GTT size for an object, taking into account
1959  * potential fence register mapping.
1960  */
1961 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
1962 			   u64 size, int tiling_mode)
1963 {
1964 	u64 ggtt_size;
1965 
1966 	GEM_BUG_ON(size == 0);
1967 
1968 	if (INTEL_GEN(dev_priv) >= 4 ||
1969 	    tiling_mode == I915_TILING_NONE)
1970 		return size;
1971 
1972 	/* Previous chips need a power-of-two fence region when tiling */
1973 	if (IS_GEN3(dev_priv))
1974 		ggtt_size = 1024*1024;
1975 	else
1976 		ggtt_size = 512*1024;
1977 
1978 	while (ggtt_size < size)
1979 		ggtt_size <<= 1;
1980 
1981 	return ggtt_size;
1982 }
1983 
1984 /**
1985  * i915_gem_get_ggtt_alignment - return required global GTT alignment
1986  * @dev_priv: i915 device
1987  * @size: object size
1988  * @tiling_mode: tiling mode
1989  * @fenced: is fenced alignment required or not
1990  *
1991  * Return the required global GTT alignment for an object, taking into account
1992  * potential fence register mapping.
1993  */
1994 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
1995 				int tiling_mode, bool fenced)
1996 {
1997 	GEM_BUG_ON(size == 0);
1998 
1999 	/*
2000 	 * Minimum alignment is 4k (GTT page size), but might be greater
2001 	 * if a fence register is needed for the object.
2002 	 */
2003 	if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
2004 	    tiling_mode == I915_TILING_NONE)
2005 		return 4096;
2006 
2007 	/*
2008 	 * Previous chips need to be aligned to the size of the smallest
2009 	 * fence register that can contain the object.
2010 	 */
2011 	return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
2012 }
2013 
2014 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2015 {
2016 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2017 	int err;
2018 
2019 	err = drm_gem_create_mmap_offset(&obj->base);
2020 	if (!err)
2021 		return 0;
2022 
2023 	/* We can idle the GPU locklessly to flush stale objects, but in order
2024 	 * to claim that space for ourselves, we need to take the big
2025 	 * struct_mutex to free the requests+objects and allocate our slot.
2026 	 */
2027 	err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
2028 	if (err)
2029 		return err;
2030 
2031 	err = i915_mutex_lock_interruptible(&dev_priv->drm);
2032 	if (!err) {
2033 		i915_gem_retire_requests(dev_priv);
2034 		err = drm_gem_create_mmap_offset(&obj->base);
2035 		mutex_unlock(&dev_priv->drm.struct_mutex);
2036 	}
2037 
2038 	return err;
2039 }
2040 
2041 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2042 {
2043 	drm_gem_free_mmap_offset(&obj->base);
2044 }
2045 
2046 int
2047 i915_gem_mmap_gtt(struct drm_file *file,
2048 		  struct drm_device *dev,
2049 		  uint32_t handle,
2050 		  uint64_t *offset)
2051 {
2052 	struct drm_i915_gem_object *obj;
2053 	int ret;
2054 
2055 	obj = i915_gem_object_lookup(file, handle);
2056 	if (!obj)
2057 		return -ENOENT;
2058 
2059 	ret = i915_gem_object_create_mmap_offset(obj);
2060 	if (ret == 0)
2061 		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2062 
2063 	i915_gem_object_put_unlocked(obj);
2064 	return ret;
2065 }
2066 
2067 /**
2068  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2069  * @dev: DRM device
2070  * @data: GTT mapping ioctl data
2071  * @file: GEM object info
2072  *
2073  * Simply returns the fake offset to userspace so it can mmap it.
2074  * The mmap call will end up in drm_gem_mmap(), which will set things
2075  * up so we can get faults in the handler above.
2076  *
2077  * The fault handler will take care of binding the object into the GTT
2078  * (since it may have been evicted to make room for something), allocating
2079  * a fence register, and mapping the appropriate aperture address into
2080  * userspace.
2081  */
2082 int
2083 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2084 			struct drm_file *file)
2085 {
2086 	struct drm_i915_gem_mmap_gtt *args = data;
2087 
2088 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2089 }
2090 
2091 /* Immediately discard the backing storage */
2092 static void
2093 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2094 {
2095 	i915_gem_object_free_mmap_offset(obj);
2096 
2097 	if (obj->base.filp == NULL)
2098 		return;
2099 
2100 	/* Our goal here is to return as much of the memory as
2101 	 * is possible back to the system as we are called from OOM.
2102 	 * To do this we must instruct the shmfs to drop all of its
2103 	 * backing pages, *now*.
2104 	 */
2105 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2106 	obj->madv = __I915_MADV_PURGED;
2107 }
2108 
2109 /* Try to discard unwanted pages */
2110 static void
2111 i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2112 {
2113 	struct address_space *mapping;
2114 
2115 	switch (obj->madv) {
2116 	case I915_MADV_DONTNEED:
2117 		i915_gem_object_truncate(obj);
2118 	case __I915_MADV_PURGED:
2119 		return;
2120 	}
2121 
2122 	if (obj->base.filp == NULL)
2123 		return;
2124 
2125 	mapping = obj->base.filp->f_mapping,
2126 	invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2127 }
2128 
2129 static void
2130 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
2131 {
2132 	struct sgt_iter sgt_iter;
2133 	struct page *page;
2134 	int ret;
2135 
2136 	BUG_ON(obj->madv == __I915_MADV_PURGED);
2137 
2138 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
2139 	if (WARN_ON(ret)) {
2140 		/* In the event of a disaster, abandon all caches and
2141 		 * hope for the best.
2142 		 */
2143 		i915_gem_clflush_object(obj, true);
2144 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2145 	}
2146 
2147 	i915_gem_gtt_finish_object(obj);
2148 
2149 	if (i915_gem_object_needs_bit17_swizzle(obj))
2150 		i915_gem_object_save_bit_17_swizzle(obj);
2151 
2152 	if (obj->madv == I915_MADV_DONTNEED)
2153 		obj->dirty = 0;
2154 
2155 	for_each_sgt_page(page, sgt_iter, obj->pages) {
2156 		if (obj->dirty)
2157 			set_page_dirty(page);
2158 
2159 		if (obj->madv == I915_MADV_WILLNEED)
2160 			mark_page_accessed(page);
2161 
2162 		put_page(page);
2163 	}
2164 	obj->dirty = 0;
2165 
2166 	sg_free_table(obj->pages);
2167 	kfree(obj->pages);
2168 }
2169 
2170 int
2171 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2172 {
2173 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2174 
2175 	if (obj->pages == NULL)
2176 		return 0;
2177 
2178 	if (obj->pages_pin_count)
2179 		return -EBUSY;
2180 
2181 	GEM_BUG_ON(obj->bind_count);
2182 
2183 	/* ->put_pages might need to allocate memory for the bit17 swizzle
2184 	 * array, hence protect them from being reaped by removing them from gtt
2185 	 * lists early. */
2186 	list_del(&obj->global_list);
2187 
2188 	if (obj->mapping) {
2189 		void *ptr;
2190 
2191 		ptr = ptr_mask_bits(obj->mapping);
2192 		if (is_vmalloc_addr(ptr))
2193 			vunmap(ptr);
2194 		else
2195 			kunmap(kmap_to_page(ptr));
2196 
2197 		obj->mapping = NULL;
2198 	}
2199 
2200 	ops->put_pages(obj);
2201 	obj->pages = NULL;
2202 
2203 	i915_gem_object_invalidate(obj);
2204 
2205 	return 0;
2206 }
2207 
2208 static unsigned int swiotlb_max_size(void)
2209 {
2210 #if IS_ENABLED(CONFIG_SWIOTLB)
2211 	return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE);
2212 #else
2213 	return 0;
2214 #endif
2215 }
2216 
2217 static int
2218 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2219 {
2220 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2221 	int page_count, i;
2222 	struct address_space *mapping;
2223 	struct sg_table *st;
2224 	struct scatterlist *sg;
2225 	struct sgt_iter sgt_iter;
2226 	struct page *page;
2227 	unsigned long last_pfn = 0;	/* suppress gcc warning */
2228 	unsigned int max_segment;
2229 	int ret;
2230 	gfp_t gfp;
2231 
2232 	/* Assert that the object is not currently in any GPU domain. As it
2233 	 * wasn't in the GTT, there shouldn't be any way it could have been in
2234 	 * a GPU cache
2235 	 */
2236 	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2237 	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2238 
2239 	max_segment = swiotlb_max_size();
2240 	if (!max_segment)
2241 		max_segment = rounddown(UINT_MAX, PAGE_SIZE);
2242 
2243 	st = kmalloc(sizeof(*st), GFP_KERNEL);
2244 	if (st == NULL)
2245 		return -ENOMEM;
2246 
2247 	page_count = obj->base.size / PAGE_SIZE;
2248 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2249 		kfree(st);
2250 		return -ENOMEM;
2251 	}
2252 
2253 	/* Get the list of pages out of our struct file.  They'll be pinned
2254 	 * at this point until we release them.
2255 	 *
2256 	 * Fail silently without starting the shrinker
2257 	 */
2258 	mapping = obj->base.filp->f_mapping;
2259 	gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
2260 	gfp |= __GFP_NORETRY | __GFP_NOWARN;
2261 	sg = st->sgl;
2262 	st->nents = 0;
2263 	for (i = 0; i < page_count; i++) {
2264 		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2265 		if (IS_ERR(page)) {
2266 			i915_gem_shrink(dev_priv,
2267 					page_count,
2268 					I915_SHRINK_BOUND |
2269 					I915_SHRINK_UNBOUND |
2270 					I915_SHRINK_PURGEABLE);
2271 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2272 		}
2273 		if (IS_ERR(page)) {
2274 			/* We've tried hard to allocate the memory by reaping
2275 			 * our own buffer, now let the real VM do its job and
2276 			 * go down in flames if truly OOM.
2277 			 */
2278 			page = shmem_read_mapping_page(mapping, i);
2279 			if (IS_ERR(page)) {
2280 				ret = PTR_ERR(page);
2281 				goto err_pages;
2282 			}
2283 		}
2284 		if (!i ||
2285 		    sg->length >= max_segment ||
2286 		    page_to_pfn(page) != last_pfn + 1) {
2287 			if (i)
2288 				sg = sg_next(sg);
2289 			st->nents++;
2290 			sg_set_page(sg, page, PAGE_SIZE, 0);
2291 		} else {
2292 			sg->length += PAGE_SIZE;
2293 		}
2294 		last_pfn = page_to_pfn(page);
2295 
2296 		/* Check that the i965g/gm workaround works. */
2297 		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2298 	}
2299 	if (sg) /* loop terminated early; short sg table */
2300 		sg_mark_end(sg);
2301 	obj->pages = st;
2302 
2303 	ret = i915_gem_gtt_prepare_object(obj);
2304 	if (ret)
2305 		goto err_pages;
2306 
2307 	if (i915_gem_object_needs_bit17_swizzle(obj))
2308 		i915_gem_object_do_bit_17_swizzle(obj);
2309 
2310 	if (i915_gem_object_is_tiled(obj) &&
2311 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2312 		i915_gem_object_pin_pages(obj);
2313 
2314 	return 0;
2315 
2316 err_pages:
2317 	sg_mark_end(sg);
2318 	for_each_sgt_page(page, sgt_iter, st)
2319 		put_page(page);
2320 	sg_free_table(st);
2321 	kfree(st);
2322 
2323 	/* shmemfs first checks if there is enough memory to allocate the page
2324 	 * and reports ENOSPC should there be insufficient, along with the usual
2325 	 * ENOMEM for a genuine allocation failure.
2326 	 *
2327 	 * We use ENOSPC in our driver to mean that we have run out of aperture
2328 	 * space and so want to translate the error from shmemfs back to our
2329 	 * usual understanding of ENOMEM.
2330 	 */
2331 	if (ret == -ENOSPC)
2332 		ret = -ENOMEM;
2333 
2334 	return ret;
2335 }
2336 
2337 /* Ensure that the associated pages are gathered from the backing storage
2338  * and pinned into our object. i915_gem_object_get_pages() may be called
2339  * multiple times before they are released by a single call to
2340  * i915_gem_object_put_pages() - once the pages are no longer referenced
2341  * either as a result of memory pressure (reaping pages under the shrinker)
2342  * or as the object is itself released.
2343  */
2344 int
2345 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2346 {
2347 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2348 	const struct drm_i915_gem_object_ops *ops = obj->ops;
2349 	int ret;
2350 
2351 	if (obj->pages)
2352 		return 0;
2353 
2354 	if (obj->madv != I915_MADV_WILLNEED) {
2355 		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2356 		return -EFAULT;
2357 	}
2358 
2359 	BUG_ON(obj->pages_pin_count);
2360 
2361 	ret = ops->get_pages(obj);
2362 	if (ret)
2363 		return ret;
2364 
2365 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2366 
2367 	obj->get_page.sg = obj->pages->sgl;
2368 	obj->get_page.last = 0;
2369 
2370 	return 0;
2371 }
2372 
2373 /* The 'mapping' part of i915_gem_object_pin_map() below */
2374 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2375 				 enum i915_map_type type)
2376 {
2377 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2378 	struct sg_table *sgt = obj->pages;
2379 	struct sgt_iter sgt_iter;
2380 	struct page *page;
2381 	struct page *stack_pages[32];
2382 	struct page **pages = stack_pages;
2383 	unsigned long i = 0;
2384 	pgprot_t pgprot;
2385 	void *addr;
2386 
2387 	/* A single page can always be kmapped */
2388 	if (n_pages == 1 && type == I915_MAP_WB)
2389 		return kmap(sg_page(sgt->sgl));
2390 
2391 	if (n_pages > ARRAY_SIZE(stack_pages)) {
2392 		/* Too big for stack -- allocate temporary array instead */
2393 		pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2394 		if (!pages)
2395 			return NULL;
2396 	}
2397 
2398 	for_each_sgt_page(page, sgt_iter, sgt)
2399 		pages[i++] = page;
2400 
2401 	/* Check that we have the expected number of pages */
2402 	GEM_BUG_ON(i != n_pages);
2403 
2404 	switch (type) {
2405 	case I915_MAP_WB:
2406 		pgprot = PAGE_KERNEL;
2407 		break;
2408 	case I915_MAP_WC:
2409 		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2410 		break;
2411 	}
2412 	addr = vmap(pages, n_pages, 0, pgprot);
2413 
2414 	if (pages != stack_pages)
2415 		drm_free_large(pages);
2416 
2417 	return addr;
2418 }
2419 
2420 /* get, pin, and map the pages of the object into kernel space */
2421 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2422 			      enum i915_map_type type)
2423 {
2424 	enum i915_map_type has_type;
2425 	bool pinned;
2426 	void *ptr;
2427 	int ret;
2428 
2429 	lockdep_assert_held(&obj->base.dev->struct_mutex);
2430 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
2431 
2432 	ret = i915_gem_object_get_pages(obj);
2433 	if (ret)
2434 		return ERR_PTR(ret);
2435 
2436 	i915_gem_object_pin_pages(obj);
2437 	pinned = obj->pages_pin_count > 1;
2438 
2439 	ptr = ptr_unpack_bits(obj->mapping, has_type);
2440 	if (ptr && has_type != type) {
2441 		if (pinned) {
2442 			ret = -EBUSY;
2443 			goto err;
2444 		}
2445 
2446 		if (is_vmalloc_addr(ptr))
2447 			vunmap(ptr);
2448 		else
2449 			kunmap(kmap_to_page(ptr));
2450 
2451 		ptr = obj->mapping = NULL;
2452 	}
2453 
2454 	if (!ptr) {
2455 		ptr = i915_gem_object_map(obj, type);
2456 		if (!ptr) {
2457 			ret = -ENOMEM;
2458 			goto err;
2459 		}
2460 
2461 		obj->mapping = ptr_pack_bits(ptr, type);
2462 	}
2463 
2464 	return ptr;
2465 
2466 err:
2467 	i915_gem_object_unpin_pages(obj);
2468 	return ERR_PTR(ret);
2469 }
2470 
2471 static void
2472 i915_gem_object_retire__write(struct i915_gem_active *active,
2473 			      struct drm_i915_gem_request *request)
2474 {
2475 	struct drm_i915_gem_object *obj =
2476 		container_of(active, struct drm_i915_gem_object, last_write);
2477 
2478 	intel_fb_obj_flush(obj, true, ORIGIN_CS);
2479 }
2480 
2481 static void
2482 i915_gem_object_retire__read(struct i915_gem_active *active,
2483 			     struct drm_i915_gem_request *request)
2484 {
2485 	int idx = request->engine->id;
2486 	struct drm_i915_gem_object *obj =
2487 		container_of(active, struct drm_i915_gem_object, last_read[idx]);
2488 
2489 	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
2490 
2491 	i915_gem_object_clear_active(obj, idx);
2492 	if (i915_gem_object_is_active(obj))
2493 		return;
2494 
2495 	/* Bump our place on the bound list to keep it roughly in LRU order
2496 	 * so that we don't steal from recently used but inactive objects
2497 	 * (unless we are forced to ofc!)
2498 	 */
2499 	if (obj->bind_count)
2500 		list_move_tail(&obj->global_list,
2501 			       &request->i915->mm.bound_list);
2502 
2503 	i915_gem_object_put(obj);
2504 }
2505 
2506 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
2507 {
2508 	unsigned long elapsed;
2509 
2510 	if (ctx->hang_stats.banned)
2511 		return true;
2512 
2513 	elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2514 	if (ctx->hang_stats.ban_period_seconds &&
2515 	    elapsed <= ctx->hang_stats.ban_period_seconds) {
2516 		DRM_DEBUG("context hanging too fast, banning!\n");
2517 		return true;
2518 	}
2519 
2520 	return false;
2521 }
2522 
2523 static void i915_set_reset_status(struct i915_gem_context *ctx,
2524 				  const bool guilty)
2525 {
2526 	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
2527 
2528 	if (guilty) {
2529 		hs->banned = i915_context_is_banned(ctx);
2530 		hs->batch_active++;
2531 		hs->guilty_ts = get_seconds();
2532 	} else {
2533 		hs->batch_pending++;
2534 	}
2535 }
2536 
2537 struct drm_i915_gem_request *
2538 i915_gem_find_active_request(struct intel_engine_cs *engine)
2539 {
2540 	struct drm_i915_gem_request *request;
2541 
2542 	/* We are called by the error capture and reset at a random
2543 	 * point in time. In particular, note that neither is crucially
2544 	 * ordered with an interrupt. After a hang, the GPU is dead and we
2545 	 * assume that no more writes can happen (we waited long enough for
2546 	 * all writes that were in transaction to be flushed) - adding an
2547 	 * extra delay for a recent interrupt is pointless. Hence, we do
2548 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
2549 	 */
2550 	list_for_each_entry(request, &engine->request_list, link) {
2551 		if (i915_gem_request_completed(request))
2552 			continue;
2553 
2554 		if (!i915_sw_fence_done(&request->submit))
2555 			break;
2556 
2557 		return request;
2558 	}
2559 
2560 	return NULL;
2561 }
2562 
2563 static void reset_request(struct drm_i915_gem_request *request)
2564 {
2565 	void *vaddr = request->ring->vaddr;
2566 	u32 head;
2567 
2568 	/* As this request likely depends on state from the lost
2569 	 * context, clear out all the user operations leaving the
2570 	 * breadcrumb at the end (so we get the fence notifications).
2571 	 */
2572 	head = request->head;
2573 	if (request->postfix < head) {
2574 		memset(vaddr + head, 0, request->ring->size - head);
2575 		head = 0;
2576 	}
2577 	memset(vaddr + head, 0, request->postfix - head);
2578 }
2579 
2580 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
2581 {
2582 	struct drm_i915_gem_request *request;
2583 	struct i915_gem_context *incomplete_ctx;
2584 	bool ring_hung;
2585 
2586 	if (engine->irq_seqno_barrier)
2587 		engine->irq_seqno_barrier(engine);
2588 
2589 	request = i915_gem_find_active_request(engine);
2590 	if (!request)
2591 		return;
2592 
2593 	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2594 	if (engine->hangcheck.seqno != intel_engine_get_seqno(engine))
2595 		ring_hung = false;
2596 
2597 	i915_set_reset_status(request->ctx, ring_hung);
2598 	if (!ring_hung)
2599 		return;
2600 
2601 	DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
2602 			 engine->name, request->fence.seqno);
2603 
2604 	/* Setup the CS to resume from the breadcrumb of the hung request */
2605 	engine->reset_hw(engine, request);
2606 
2607 	/* Users of the default context do not rely on logical state
2608 	 * preserved between batches. They have to emit full state on
2609 	 * every batch and so it is safe to execute queued requests following
2610 	 * the hang.
2611 	 *
2612 	 * Other contexts preserve state, now corrupt. We want to skip all
2613 	 * queued requests that reference the corrupt context.
2614 	 */
2615 	incomplete_ctx = request->ctx;
2616 	if (i915_gem_context_is_default(incomplete_ctx))
2617 		return;
2618 
2619 	list_for_each_entry_continue(request, &engine->request_list, link)
2620 		if (request->ctx == incomplete_ctx)
2621 			reset_request(request);
2622 }
2623 
2624 void i915_gem_reset(struct drm_i915_private *dev_priv)
2625 {
2626 	struct intel_engine_cs *engine;
2627 	enum intel_engine_id id;
2628 
2629 	i915_gem_retire_requests(dev_priv);
2630 
2631 	for_each_engine(engine, dev_priv, id)
2632 		i915_gem_reset_engine(engine);
2633 
2634 	i915_gem_restore_fences(&dev_priv->drm);
2635 
2636 	if (dev_priv->gt.awake) {
2637 		intel_sanitize_gt_powersave(dev_priv);
2638 		intel_enable_gt_powersave(dev_priv);
2639 		if (INTEL_GEN(dev_priv) >= 6)
2640 			gen6_rps_busy(dev_priv);
2641 	}
2642 }
2643 
2644 static void nop_submit_request(struct drm_i915_gem_request *request)
2645 {
2646 }
2647 
2648 static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
2649 {
2650 	engine->submit_request = nop_submit_request;
2651 
2652 	/* Mark all pending requests as complete so that any concurrent
2653 	 * (lockless) lookup doesn't try and wait upon the request as we
2654 	 * reset it.
2655 	 */
2656 	intel_engine_init_seqno(engine, engine->last_submitted_seqno);
2657 
2658 	/*
2659 	 * Clear the execlists queue up before freeing the requests, as those
2660 	 * are the ones that keep the context and ringbuffer backing objects
2661 	 * pinned in place.
2662 	 */
2663 
2664 	if (i915.enable_execlists) {
2665 		spin_lock(&engine->execlist_lock);
2666 		INIT_LIST_HEAD(&engine->execlist_queue);
2667 		i915_gem_request_put(engine->execlist_port[0].request);
2668 		i915_gem_request_put(engine->execlist_port[1].request);
2669 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
2670 		spin_unlock(&engine->execlist_lock);
2671 	}
2672 
2673 	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
2674 }
2675 
2676 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
2677 {
2678 	struct intel_engine_cs *engine;
2679 	enum intel_engine_id id;
2680 
2681 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
2682 	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
2683 
2684 	i915_gem_context_lost(dev_priv);
2685 	for_each_engine(engine, dev_priv, id)
2686 		i915_gem_cleanup_engine(engine);
2687 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
2688 
2689 	i915_gem_retire_requests(dev_priv);
2690 }
2691 
2692 static void
2693 i915_gem_retire_work_handler(struct work_struct *work)
2694 {
2695 	struct drm_i915_private *dev_priv =
2696 		container_of(work, typeof(*dev_priv), gt.retire_work.work);
2697 	struct drm_device *dev = &dev_priv->drm;
2698 
2699 	/* Come back later if the device is busy... */
2700 	if (mutex_trylock(&dev->struct_mutex)) {
2701 		i915_gem_retire_requests(dev_priv);
2702 		mutex_unlock(&dev->struct_mutex);
2703 	}
2704 
2705 	/* Keep the retire handler running until we are finally idle.
2706 	 * We do not need to do this test under locking as in the worst-case
2707 	 * we queue the retire worker once too often.
2708 	 */
2709 	if (READ_ONCE(dev_priv->gt.awake)) {
2710 		i915_queue_hangcheck(dev_priv);
2711 		queue_delayed_work(dev_priv->wq,
2712 				   &dev_priv->gt.retire_work,
2713 				   round_jiffies_up_relative(HZ));
2714 	}
2715 }
2716 
2717 static void
2718 i915_gem_idle_work_handler(struct work_struct *work)
2719 {
2720 	struct drm_i915_private *dev_priv =
2721 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
2722 	struct drm_device *dev = &dev_priv->drm;
2723 	struct intel_engine_cs *engine;
2724 	enum intel_engine_id id;
2725 	bool rearm_hangcheck;
2726 
2727 	if (!READ_ONCE(dev_priv->gt.awake))
2728 		return;
2729 
2730 	if (READ_ONCE(dev_priv->gt.active_engines))
2731 		return;
2732 
2733 	rearm_hangcheck =
2734 		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2735 
2736 	if (!mutex_trylock(&dev->struct_mutex)) {
2737 		/* Currently busy, come back later */
2738 		mod_delayed_work(dev_priv->wq,
2739 				 &dev_priv->gt.idle_work,
2740 				 msecs_to_jiffies(50));
2741 		goto out_rearm;
2742 	}
2743 
2744 	if (dev_priv->gt.active_engines)
2745 		goto out_unlock;
2746 
2747 	for_each_engine(engine, dev_priv, id)
2748 		i915_gem_batch_pool_fini(&engine->batch_pool);
2749 
2750 	GEM_BUG_ON(!dev_priv->gt.awake);
2751 	dev_priv->gt.awake = false;
2752 	rearm_hangcheck = false;
2753 
2754 	if (INTEL_GEN(dev_priv) >= 6)
2755 		gen6_rps_idle(dev_priv);
2756 	intel_runtime_pm_put(dev_priv);
2757 out_unlock:
2758 	mutex_unlock(&dev->struct_mutex);
2759 
2760 out_rearm:
2761 	if (rearm_hangcheck) {
2762 		GEM_BUG_ON(!dev_priv->gt.awake);
2763 		i915_queue_hangcheck(dev_priv);
2764 	}
2765 }
2766 
2767 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2768 {
2769 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
2770 	struct drm_i915_file_private *fpriv = file->driver_priv;
2771 	struct i915_vma *vma, *vn;
2772 
2773 	mutex_lock(&obj->base.dev->struct_mutex);
2774 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
2775 		if (vma->vm->file == fpriv)
2776 			i915_vma_close(vma);
2777 	mutex_unlock(&obj->base.dev->struct_mutex);
2778 }
2779 
2780 /**
2781  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2782  * @dev: drm device pointer
2783  * @data: ioctl data blob
2784  * @file: drm file pointer
2785  *
2786  * Returns 0 if successful, else an error is returned with the remaining time in
2787  * the timeout parameter.
2788  *  -ETIME: object is still busy after timeout
2789  *  -ERESTARTSYS: signal interrupted the wait
2790  *  -ENONENT: object doesn't exist
2791  * Also possible, but rare:
2792  *  -EAGAIN: GPU wedged
2793  *  -ENOMEM: damn
2794  *  -ENODEV: Internal IRQ fail
2795  *  -E?: The add request failed
2796  *
2797  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2798  * non-zero timeout parameter the wait ioctl will wait for the given number of
2799  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2800  * without holding struct_mutex the object may become re-busied before this
2801  * function completes. A similar but shorter * race condition exists in the busy
2802  * ioctl
2803  */
2804 int
2805 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2806 {
2807 	struct drm_i915_gem_wait *args = data;
2808 	struct intel_rps_client *rps = to_rps_client(file);
2809 	struct drm_i915_gem_object *obj;
2810 	unsigned long active;
2811 	int idx, ret = 0;
2812 
2813 	if (args->flags != 0)
2814 		return -EINVAL;
2815 
2816 	obj = i915_gem_object_lookup(file, args->bo_handle);
2817 	if (!obj)
2818 		return -ENOENT;
2819 
2820 	active = __I915_BO_ACTIVE(obj);
2821 	for_each_active(active, idx) {
2822 		s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
2823 		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
2824 						    I915_WAIT_INTERRUPTIBLE,
2825 						    timeout, rps);
2826 		if (ret)
2827 			break;
2828 	}
2829 
2830 	i915_gem_object_put_unlocked(obj);
2831 	return ret;
2832 }
2833 
2834 static void __i915_vma_iounmap(struct i915_vma *vma)
2835 {
2836 	GEM_BUG_ON(i915_vma_is_pinned(vma));
2837 
2838 	if (vma->iomap == NULL)
2839 		return;
2840 
2841 	io_mapping_unmap(vma->iomap);
2842 	vma->iomap = NULL;
2843 }
2844 
2845 int i915_vma_unbind(struct i915_vma *vma)
2846 {
2847 	struct drm_i915_gem_object *obj = vma->obj;
2848 	unsigned long active;
2849 	int ret;
2850 
2851 	/* First wait upon any activity as retiring the request may
2852 	 * have side-effects such as unpinning or even unbinding this vma.
2853 	 */
2854 	active = i915_vma_get_active(vma);
2855 	if (active) {
2856 		int idx;
2857 
2858 		/* When a closed VMA is retired, it is unbound - eek.
2859 		 * In order to prevent it from being recursively closed,
2860 		 * take a pin on the vma so that the second unbind is
2861 		 * aborted.
2862 		 */
2863 		__i915_vma_pin(vma);
2864 
2865 		for_each_active(active, idx) {
2866 			ret = i915_gem_active_retire(&vma->last_read[idx],
2867 						   &vma->vm->dev->struct_mutex);
2868 			if (ret)
2869 				break;
2870 		}
2871 
2872 		__i915_vma_unpin(vma);
2873 		if (ret)
2874 			return ret;
2875 
2876 		GEM_BUG_ON(i915_vma_is_active(vma));
2877 	}
2878 
2879 	if (i915_vma_is_pinned(vma))
2880 		return -EBUSY;
2881 
2882 	if (!drm_mm_node_allocated(&vma->node))
2883 		goto destroy;
2884 
2885 	GEM_BUG_ON(obj->bind_count == 0);
2886 	GEM_BUG_ON(!obj->pages);
2887 
2888 	if (i915_vma_is_map_and_fenceable(vma)) {
2889 		/* release the fence reg _after_ flushing */
2890 		ret = i915_vma_put_fence(vma);
2891 		if (ret)
2892 			return ret;
2893 
2894 		/* Force a pagefault for domain tracking on next user access */
2895 		i915_gem_release_mmap(obj);
2896 
2897 		__i915_vma_iounmap(vma);
2898 		vma->flags &= ~I915_VMA_CAN_FENCE;
2899 	}
2900 
2901 	if (likely(!vma->vm->closed)) {
2902 		trace_i915_vma_unbind(vma);
2903 		vma->vm->unbind_vma(vma);
2904 	}
2905 	vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
2906 
2907 	drm_mm_remove_node(&vma->node);
2908 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
2909 
2910 	if (vma->pages != obj->pages) {
2911 		GEM_BUG_ON(!vma->pages);
2912 		sg_free_table(vma->pages);
2913 		kfree(vma->pages);
2914 	}
2915 	vma->pages = NULL;
2916 
2917 	/* Since the unbound list is global, only move to that list if
2918 	 * no more VMAs exist. */
2919 	if (--obj->bind_count == 0)
2920 		list_move_tail(&obj->global_list,
2921 			       &to_i915(obj->base.dev)->mm.unbound_list);
2922 
2923 	/* And finally now the object is completely decoupled from this vma,
2924 	 * we can drop its hold on the backing storage and allow it to be
2925 	 * reaped by the shrinker.
2926 	 */
2927 	i915_gem_object_unpin_pages(obj);
2928 
2929 destroy:
2930 	if (unlikely(i915_vma_is_closed(vma)))
2931 		i915_vma_destroy(vma);
2932 
2933 	return 0;
2934 }
2935 
2936 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
2937 			   unsigned int flags)
2938 {
2939 	struct intel_engine_cs *engine;
2940 	enum intel_engine_id id;
2941 	int ret;
2942 
2943 	for_each_engine(engine, dev_priv, id) {
2944 		if (engine->last_context == NULL)
2945 			continue;
2946 
2947 		ret = intel_engine_idle(engine, flags);
2948 		if (ret)
2949 			return ret;
2950 	}
2951 
2952 	return 0;
2953 }
2954 
2955 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
2956 				     unsigned long cache_level)
2957 {
2958 	struct drm_mm_node *gtt_space = &vma->node;
2959 	struct drm_mm_node *other;
2960 
2961 	/*
2962 	 * On some machines we have to be careful when putting differing types
2963 	 * of snoopable memory together to avoid the prefetcher crossing memory
2964 	 * domains and dying. During vm initialisation, we decide whether or not
2965 	 * these constraints apply and set the drm_mm.color_adjust
2966 	 * appropriately.
2967 	 */
2968 	if (vma->vm->mm.color_adjust == NULL)
2969 		return true;
2970 
2971 	if (!drm_mm_node_allocated(gtt_space))
2972 		return true;
2973 
2974 	if (list_empty(&gtt_space->node_list))
2975 		return true;
2976 
2977 	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2978 	if (other->allocated && !other->hole_follows && other->color != cache_level)
2979 		return false;
2980 
2981 	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2982 	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2983 		return false;
2984 
2985 	return true;
2986 }
2987 
2988 /**
2989  * i915_vma_insert - finds a slot for the vma in its address space
2990  * @vma: the vma
2991  * @size: requested size in bytes (can be larger than the VMA)
2992  * @alignment: required alignment
2993  * @flags: mask of PIN_* flags to use
2994  *
2995  * First we try to allocate some free space that meets the requirements for
2996  * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
2997  * preferrably the oldest idle entry to make room for the new VMA.
2998  *
2999  * Returns:
3000  * 0 on success, negative error code otherwise.
3001  */
3002 static int
3003 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3004 {
3005 	struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
3006 	struct drm_i915_gem_object *obj = vma->obj;
3007 	u64 start, end;
3008 	int ret;
3009 
3010 	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
3011 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
3012 
3013 	size = max(size, vma->size);
3014 	if (flags & PIN_MAPPABLE)
3015 		size = i915_gem_get_ggtt_size(dev_priv, size,
3016 					      i915_gem_object_get_tiling(obj));
3017 
3018 	alignment = max(max(alignment, vma->display_alignment),
3019 			i915_gem_get_ggtt_alignment(dev_priv, size,
3020 						    i915_gem_object_get_tiling(obj),
3021 						    flags & PIN_MAPPABLE));
3022 
3023 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3024 
3025 	end = vma->vm->total;
3026 	if (flags & PIN_MAPPABLE)
3027 		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
3028 	if (flags & PIN_ZONE_4G)
3029 		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
3030 
3031 	/* If binding the object/GGTT view requires more space than the entire
3032 	 * aperture has, reject it early before evicting everything in a vain
3033 	 * attempt to find space.
3034 	 */
3035 	if (size > end) {
3036 		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
3037 			  size, obj->base.size,
3038 			  flags & PIN_MAPPABLE ? "mappable" : "total",
3039 			  end);
3040 		return -E2BIG;
3041 	}
3042 
3043 	ret = i915_gem_object_get_pages(obj);
3044 	if (ret)
3045 		return ret;
3046 
3047 	i915_gem_object_pin_pages(obj);
3048 
3049 	if (flags & PIN_OFFSET_FIXED) {
3050 		u64 offset = flags & PIN_OFFSET_MASK;
3051 		if (offset & (alignment - 1) || offset > end - size) {
3052 			ret = -EINVAL;
3053 			goto err_unpin;
3054 		}
3055 
3056 		vma->node.start = offset;
3057 		vma->node.size = size;
3058 		vma->node.color = obj->cache_level;
3059 		ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3060 		if (ret) {
3061 			ret = i915_gem_evict_for_vma(vma);
3062 			if (ret == 0)
3063 				ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3064 			if (ret)
3065 				goto err_unpin;
3066 		}
3067 	} else {
3068 		u32 search_flag, alloc_flag;
3069 
3070 		if (flags & PIN_HIGH) {
3071 			search_flag = DRM_MM_SEARCH_BELOW;
3072 			alloc_flag = DRM_MM_CREATE_TOP;
3073 		} else {
3074 			search_flag = DRM_MM_SEARCH_DEFAULT;
3075 			alloc_flag = DRM_MM_CREATE_DEFAULT;
3076 		}
3077 
3078 		/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3079 		 * so we know that we always have a minimum alignment of 4096.
3080 		 * The drm_mm range manager is optimised to return results
3081 		 * with zero alignment, so where possible use the optimal
3082 		 * path.
3083 		 */
3084 		if (alignment <= 4096)
3085 			alignment = 0;
3086 
3087 search_free:
3088 		ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
3089 							  &vma->node,
3090 							  size, alignment,
3091 							  obj->cache_level,
3092 							  start, end,
3093 							  search_flag,
3094 							  alloc_flag);
3095 		if (ret) {
3096 			ret = i915_gem_evict_something(vma->vm, size, alignment,
3097 						       obj->cache_level,
3098 						       start, end,
3099 						       flags);
3100 			if (ret == 0)
3101 				goto search_free;
3102 
3103 			goto err_unpin;
3104 		}
3105 
3106 		GEM_BUG_ON(vma->node.start < start);
3107 		GEM_BUG_ON(vma->node.start + vma->node.size > end);
3108 	}
3109 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
3110 
3111 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3112 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3113 	obj->bind_count++;
3114 
3115 	return 0;
3116 
3117 err_unpin:
3118 	i915_gem_object_unpin_pages(obj);
3119 	return ret;
3120 }
3121 
3122 bool
3123 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3124 			bool force)
3125 {
3126 	/* If we don't have a page list set up, then we're not pinned
3127 	 * to GPU, and we can ignore the cache flush because it'll happen
3128 	 * again at bind time.
3129 	 */
3130 	if (obj->pages == NULL)
3131 		return false;
3132 
3133 	/*
3134 	 * Stolen memory is always coherent with the GPU as it is explicitly
3135 	 * marked as wc by the system, or the system is cache-coherent.
3136 	 */
3137 	if (obj->stolen || obj->phys_handle)
3138 		return false;
3139 
3140 	/* If the GPU is snooping the contents of the CPU cache,
3141 	 * we do not need to manually clear the CPU cache lines.  However,
3142 	 * the caches are only snooped when the render cache is
3143 	 * flushed/invalidated.  As we always have to emit invalidations
3144 	 * and flushes when moving into and out of the RENDER domain, correct
3145 	 * snooping behaviour occurs naturally as the result of our domain
3146 	 * tracking.
3147 	 */
3148 	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3149 		obj->cache_dirty = true;
3150 		return false;
3151 	}
3152 
3153 	trace_i915_gem_object_clflush(obj);
3154 	drm_clflush_sg(obj->pages);
3155 	obj->cache_dirty = false;
3156 
3157 	return true;
3158 }
3159 
3160 /** Flushes the GTT write domain for the object if it's dirty. */
3161 static void
3162 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3163 {
3164 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3165 
3166 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3167 		return;
3168 
3169 	/* No actual flushing is required for the GTT write domain.  Writes
3170 	 * to it "immediately" go to main memory as far as we know, so there's
3171 	 * no chipset flush.  It also doesn't land in render cache.
3172 	 *
3173 	 * However, we do have to enforce the order so that all writes through
3174 	 * the GTT land before any writes to the device, such as updates to
3175 	 * the GATT itself.
3176 	 *
3177 	 * We also have to wait a bit for the writes to land from the GTT.
3178 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
3179 	 * timing. This issue has only been observed when switching quickly
3180 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
3181 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
3182 	 * system agents we cannot reproduce this behaviour).
3183 	 */
3184 	wmb();
3185 	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
3186 		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
3187 
3188 	intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
3189 
3190 	obj->base.write_domain = 0;
3191 	trace_i915_gem_object_change_domain(obj,
3192 					    obj->base.read_domains,
3193 					    I915_GEM_DOMAIN_GTT);
3194 }
3195 
3196 /** Flushes the CPU write domain for the object if it's dirty. */
3197 static void
3198 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3199 {
3200 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3201 		return;
3202 
3203 	if (i915_gem_clflush_object(obj, obj->pin_display))
3204 		i915_gem_chipset_flush(to_i915(obj->base.dev));
3205 
3206 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3207 
3208 	obj->base.write_domain = 0;
3209 	trace_i915_gem_object_change_domain(obj,
3210 					    obj->base.read_domains,
3211 					    I915_GEM_DOMAIN_CPU);
3212 }
3213 
3214 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
3215 {
3216 	struct i915_vma *vma;
3217 
3218 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
3219 		if (!i915_vma_is_ggtt(vma))
3220 			continue;
3221 
3222 		if (i915_vma_is_active(vma))
3223 			continue;
3224 
3225 		if (!drm_mm_node_allocated(&vma->node))
3226 			continue;
3227 
3228 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3229 	}
3230 }
3231 
3232 /**
3233  * Moves a single object to the GTT read, and possibly write domain.
3234  * @obj: object to act on
3235  * @write: ask for write access or read only
3236  *
3237  * This function returns when the move is complete, including waiting on
3238  * flushes to occur.
3239  */
3240 int
3241 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3242 {
3243 	uint32_t old_write_domain, old_read_domains;
3244 	int ret;
3245 
3246 	ret = i915_gem_object_wait_rendering(obj, !write);
3247 	if (ret)
3248 		return ret;
3249 
3250 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3251 		return 0;
3252 
3253 	/* Flush and acquire obj->pages so that we are coherent through
3254 	 * direct access in memory with previous cached writes through
3255 	 * shmemfs and that our cache domain tracking remains valid.
3256 	 * For example, if the obj->filp was moved to swap without us
3257 	 * being notified and releasing the pages, we would mistakenly
3258 	 * continue to assume that the obj remained out of the CPU cached
3259 	 * domain.
3260 	 */
3261 	ret = i915_gem_object_get_pages(obj);
3262 	if (ret)
3263 		return ret;
3264 
3265 	i915_gem_object_flush_cpu_write_domain(obj);
3266 
3267 	/* Serialise direct access to this object with the barriers for
3268 	 * coherent writes from the GPU, by effectively invalidating the
3269 	 * GTT domain upon first access.
3270 	 */
3271 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3272 		mb();
3273 
3274 	old_write_domain = obj->base.write_domain;
3275 	old_read_domains = obj->base.read_domains;
3276 
3277 	/* It should now be out of any other write domains, and we can update
3278 	 * the domain values for our changes.
3279 	 */
3280 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3281 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3282 	if (write) {
3283 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3284 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3285 		obj->dirty = 1;
3286 	}
3287 
3288 	trace_i915_gem_object_change_domain(obj,
3289 					    old_read_domains,
3290 					    old_write_domain);
3291 
3292 	/* And bump the LRU for this access */
3293 	i915_gem_object_bump_inactive_ggtt(obj);
3294 
3295 	return 0;
3296 }
3297 
3298 /**
3299  * Changes the cache-level of an object across all VMA.
3300  * @obj: object to act on
3301  * @cache_level: new cache level to set for the object
3302  *
3303  * After this function returns, the object will be in the new cache-level
3304  * across all GTT and the contents of the backing storage will be coherent,
3305  * with respect to the new cache-level. In order to keep the backing storage
3306  * coherent for all users, we only allow a single cache level to be set
3307  * globally on the object and prevent it from being changed whilst the
3308  * hardware is reading from the object. That is if the object is currently
3309  * on the scanout it will be set to uncached (or equivalent display
3310  * cache coherency) and all non-MOCS GPU access will also be uncached so
3311  * that all direct access to the scanout remains coherent.
3312  */
3313 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3314 				    enum i915_cache_level cache_level)
3315 {
3316 	struct i915_vma *vma;
3317 	int ret = 0;
3318 
3319 	if (obj->cache_level == cache_level)
3320 		goto out;
3321 
3322 	/* Inspect the list of currently bound VMA and unbind any that would
3323 	 * be invalid given the new cache-level. This is principally to
3324 	 * catch the issue of the CS prefetch crossing page boundaries and
3325 	 * reading an invalid PTE on older architectures.
3326 	 */
3327 restart:
3328 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
3329 		if (!drm_mm_node_allocated(&vma->node))
3330 			continue;
3331 
3332 		if (i915_vma_is_pinned(vma)) {
3333 			DRM_DEBUG("can not change the cache level of pinned objects\n");
3334 			return -EBUSY;
3335 		}
3336 
3337 		if (i915_gem_valid_gtt_space(vma, cache_level))
3338 			continue;
3339 
3340 		ret = i915_vma_unbind(vma);
3341 		if (ret)
3342 			return ret;
3343 
3344 		/* As unbinding may affect other elements in the
3345 		 * obj->vma_list (due to side-effects from retiring
3346 		 * an active vma), play safe and restart the iterator.
3347 		 */
3348 		goto restart;
3349 	}
3350 
3351 	/* We can reuse the existing drm_mm nodes but need to change the
3352 	 * cache-level on the PTE. We could simply unbind them all and
3353 	 * rebind with the correct cache-level on next use. However since
3354 	 * we already have a valid slot, dma mapping, pages etc, we may as
3355 	 * rewrite the PTE in the belief that doing so tramples upon less
3356 	 * state and so involves less work.
3357 	 */
3358 	if (obj->bind_count) {
3359 		/* Before we change the PTE, the GPU must not be accessing it.
3360 		 * If we wait upon the object, we know that all the bound
3361 		 * VMA are no longer active.
3362 		 */
3363 		ret = i915_gem_object_wait_rendering(obj, false);
3364 		if (ret)
3365 			return ret;
3366 
3367 		if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
3368 			/* Access to snoopable pages through the GTT is
3369 			 * incoherent and on some machines causes a hard
3370 			 * lockup. Relinquish the CPU mmaping to force
3371 			 * userspace to refault in the pages and we can
3372 			 * then double check if the GTT mapping is still
3373 			 * valid for that pointer access.
3374 			 */
3375 			i915_gem_release_mmap(obj);
3376 
3377 			/* As we no longer need a fence for GTT access,
3378 			 * we can relinquish it now (and so prevent having
3379 			 * to steal a fence from someone else on the next
3380 			 * fence request). Note GPU activity would have
3381 			 * dropped the fence as all snoopable access is
3382 			 * supposed to be linear.
3383 			 */
3384 			list_for_each_entry(vma, &obj->vma_list, obj_link) {
3385 				ret = i915_vma_put_fence(vma);
3386 				if (ret)
3387 					return ret;
3388 			}
3389 		} else {
3390 			/* We either have incoherent backing store and
3391 			 * so no GTT access or the architecture is fully
3392 			 * coherent. In such cases, existing GTT mmaps
3393 			 * ignore the cache bit in the PTE and we can
3394 			 * rewrite it without confusing the GPU or having
3395 			 * to force userspace to fault back in its mmaps.
3396 			 */
3397 		}
3398 
3399 		list_for_each_entry(vma, &obj->vma_list, obj_link) {
3400 			if (!drm_mm_node_allocated(&vma->node))
3401 				continue;
3402 
3403 			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3404 			if (ret)
3405 				return ret;
3406 		}
3407 	}
3408 
3409 	list_for_each_entry(vma, &obj->vma_list, obj_link)
3410 		vma->node.color = cache_level;
3411 	obj->cache_level = cache_level;
3412 
3413 out:
3414 	/* Flush the dirty CPU caches to the backing storage so that the
3415 	 * object is now coherent at its new cache level (with respect
3416 	 * to the access domain).
3417 	 */
3418 	if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
3419 		if (i915_gem_clflush_object(obj, true))
3420 			i915_gem_chipset_flush(to_i915(obj->base.dev));
3421 	}
3422 
3423 	return 0;
3424 }
3425 
3426 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3427 			       struct drm_file *file)
3428 {
3429 	struct drm_i915_gem_caching *args = data;
3430 	struct drm_i915_gem_object *obj;
3431 
3432 	obj = i915_gem_object_lookup(file, args->handle);
3433 	if (!obj)
3434 		return -ENOENT;
3435 
3436 	switch (obj->cache_level) {
3437 	case I915_CACHE_LLC:
3438 	case I915_CACHE_L3_LLC:
3439 		args->caching = I915_CACHING_CACHED;
3440 		break;
3441 
3442 	case I915_CACHE_WT:
3443 		args->caching = I915_CACHING_DISPLAY;
3444 		break;
3445 
3446 	default:
3447 		args->caching = I915_CACHING_NONE;
3448 		break;
3449 	}
3450 
3451 	i915_gem_object_put_unlocked(obj);
3452 	return 0;
3453 }
3454 
3455 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3456 			       struct drm_file *file)
3457 {
3458 	struct drm_i915_private *dev_priv = to_i915(dev);
3459 	struct drm_i915_gem_caching *args = data;
3460 	struct drm_i915_gem_object *obj;
3461 	enum i915_cache_level level;
3462 	int ret;
3463 
3464 	switch (args->caching) {
3465 	case I915_CACHING_NONE:
3466 		level = I915_CACHE_NONE;
3467 		break;
3468 	case I915_CACHING_CACHED:
3469 		/*
3470 		 * Due to a HW issue on BXT A stepping, GPU stores via a
3471 		 * snooped mapping may leave stale data in a corresponding CPU
3472 		 * cacheline, whereas normally such cachelines would get
3473 		 * invalidated.
3474 		 */
3475 		if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
3476 			return -ENODEV;
3477 
3478 		level = I915_CACHE_LLC;
3479 		break;
3480 	case I915_CACHING_DISPLAY:
3481 		level = HAS_WT(dev_priv) ? I915_CACHE_WT : I915_CACHE_NONE;
3482 		break;
3483 	default:
3484 		return -EINVAL;
3485 	}
3486 
3487 	intel_runtime_pm_get(dev_priv);
3488 
3489 	ret = i915_mutex_lock_interruptible(dev);
3490 	if (ret)
3491 		goto rpm_put;
3492 
3493 	obj = i915_gem_object_lookup(file, args->handle);
3494 	if (!obj) {
3495 		ret = -ENOENT;
3496 		goto unlock;
3497 	}
3498 
3499 	ret = i915_gem_object_set_cache_level(obj, level);
3500 
3501 	i915_gem_object_put(obj);
3502 unlock:
3503 	mutex_unlock(&dev->struct_mutex);
3504 rpm_put:
3505 	intel_runtime_pm_put(dev_priv);
3506 
3507 	return ret;
3508 }
3509 
3510 /*
3511  * Prepare buffer for display plane (scanout, cursors, etc).
3512  * Can be called from an uninterruptible phase (modesetting) and allows
3513  * any flushes to be pipelined (for pageflips).
3514  */
3515 struct i915_vma *
3516 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3517 				     u32 alignment,
3518 				     const struct i915_ggtt_view *view)
3519 {
3520 	struct i915_vma *vma;
3521 	u32 old_read_domains, old_write_domain;
3522 	int ret;
3523 
3524 	/* Mark the pin_display early so that we account for the
3525 	 * display coherency whilst setting up the cache domains.
3526 	 */
3527 	obj->pin_display++;
3528 
3529 	/* The display engine is not coherent with the LLC cache on gen6.  As
3530 	 * a result, we make sure that the pinning that is about to occur is
3531 	 * done with uncached PTEs. This is lowest common denominator for all
3532 	 * chipsets.
3533 	 *
3534 	 * However for gen6+, we could do better by using the GFDT bit instead
3535 	 * of uncaching, which would allow us to flush all the LLC-cached data
3536 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3537 	 */
3538 	ret = i915_gem_object_set_cache_level(obj,
3539 					      HAS_WT(to_i915(obj->base.dev)) ?
3540 					      I915_CACHE_WT : I915_CACHE_NONE);
3541 	if (ret) {
3542 		vma = ERR_PTR(ret);
3543 		goto err_unpin_display;
3544 	}
3545 
3546 	/* As the user may map the buffer once pinned in the display plane
3547 	 * (e.g. libkms for the bootup splash), we have to ensure that we
3548 	 * always use map_and_fenceable for all scanout buffers. However,
3549 	 * it may simply be too big to fit into mappable, in which case
3550 	 * put it anyway and hope that userspace can cope (but always first
3551 	 * try to preserve the existing ABI).
3552 	 */
3553 	vma = ERR_PTR(-ENOSPC);
3554 	if (view->type == I915_GGTT_VIEW_NORMAL)
3555 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3556 					       PIN_MAPPABLE | PIN_NONBLOCK);
3557 	if (IS_ERR(vma))
3558 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
3559 	if (IS_ERR(vma))
3560 		goto err_unpin_display;
3561 
3562 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3563 
3564 	i915_gem_object_flush_cpu_write_domain(obj);
3565 
3566 	old_write_domain = obj->base.write_domain;
3567 	old_read_domains = obj->base.read_domains;
3568 
3569 	/* It should now be out of any other write domains, and we can update
3570 	 * the domain values for our changes.
3571 	 */
3572 	obj->base.write_domain = 0;
3573 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3574 
3575 	trace_i915_gem_object_change_domain(obj,
3576 					    old_read_domains,
3577 					    old_write_domain);
3578 
3579 	return vma;
3580 
3581 err_unpin_display:
3582 	obj->pin_display--;
3583 	return vma;
3584 }
3585 
3586 void
3587 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3588 {
3589 	if (WARN_ON(vma->obj->pin_display == 0))
3590 		return;
3591 
3592 	if (--vma->obj->pin_display == 0)
3593 		vma->display_alignment = 0;
3594 
3595 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
3596 	if (!i915_vma_is_active(vma))
3597 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3598 
3599 	i915_vma_unpin(vma);
3600 }
3601 
3602 /**
3603  * Moves a single object to the CPU read, and possibly write domain.
3604  * @obj: object to act on
3605  * @write: requesting write or read-only access
3606  *
3607  * This function returns when the move is complete, including waiting on
3608  * flushes to occur.
3609  */
3610 int
3611 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3612 {
3613 	uint32_t old_write_domain, old_read_domains;
3614 	int ret;
3615 
3616 	ret = i915_gem_object_wait_rendering(obj, !write);
3617 	if (ret)
3618 		return ret;
3619 
3620 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3621 		return 0;
3622 
3623 	i915_gem_object_flush_gtt_write_domain(obj);
3624 
3625 	old_write_domain = obj->base.write_domain;
3626 	old_read_domains = obj->base.read_domains;
3627 
3628 	/* Flush the CPU cache if it's still invalid. */
3629 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3630 		i915_gem_clflush_object(obj, false);
3631 
3632 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3633 	}
3634 
3635 	/* It should now be out of any other write domains, and we can update
3636 	 * the domain values for our changes.
3637 	 */
3638 	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3639 
3640 	/* If we're writing through the CPU, then the GPU read domains will
3641 	 * need to be invalidated at next use.
3642 	 */
3643 	if (write) {
3644 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3645 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3646 	}
3647 
3648 	trace_i915_gem_object_change_domain(obj,
3649 					    old_read_domains,
3650 					    old_write_domain);
3651 
3652 	return 0;
3653 }
3654 
3655 /* Throttle our rendering by waiting until the ring has completed our requests
3656  * emitted over 20 msec ago.
3657  *
3658  * Note that if we were to use the current jiffies each time around the loop,
3659  * we wouldn't escape the function with any frames outstanding if the time to
3660  * render a frame was over 20ms.
3661  *
3662  * This should get us reasonable parallelism between CPU and GPU but also
3663  * relatively low latency when blocking on a particular request to finish.
3664  */
3665 static int
3666 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3667 {
3668 	struct drm_i915_private *dev_priv = to_i915(dev);
3669 	struct drm_i915_file_private *file_priv = file->driver_priv;
3670 	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3671 	struct drm_i915_gem_request *request, *target = NULL;
3672 	int ret;
3673 
3674 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3675 	if (ret)
3676 		return ret;
3677 
3678 	/* ABI: return -EIO if already wedged */
3679 	if (i915_terminally_wedged(&dev_priv->gpu_error))
3680 		return -EIO;
3681 
3682 	spin_lock(&file_priv->mm.lock);
3683 	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3684 		if (time_after_eq(request->emitted_jiffies, recent_enough))
3685 			break;
3686 
3687 		/*
3688 		 * Note that the request might not have been submitted yet.
3689 		 * In which case emitted_jiffies will be zero.
3690 		 */
3691 		if (!request->emitted_jiffies)
3692 			continue;
3693 
3694 		target = request;
3695 	}
3696 	if (target)
3697 		i915_gem_request_get(target);
3698 	spin_unlock(&file_priv->mm.lock);
3699 
3700 	if (target == NULL)
3701 		return 0;
3702 
3703 	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
3704 	i915_gem_request_put(target);
3705 
3706 	return ret;
3707 }
3708 
3709 static bool
3710 i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3711 {
3712 	if (!drm_mm_node_allocated(&vma->node))
3713 		return false;
3714 
3715 	if (vma->node.size < size)
3716 		return true;
3717 
3718 	if (alignment && vma->node.start & (alignment - 1))
3719 		return true;
3720 
3721 	if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
3722 		return true;
3723 
3724 	if (flags & PIN_OFFSET_BIAS &&
3725 	    vma->node.start < (flags & PIN_OFFSET_MASK))
3726 		return true;
3727 
3728 	if (flags & PIN_OFFSET_FIXED &&
3729 	    vma->node.start != (flags & PIN_OFFSET_MASK))
3730 		return true;
3731 
3732 	return false;
3733 }
3734 
3735 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3736 {
3737 	struct drm_i915_gem_object *obj = vma->obj;
3738 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3739 	bool mappable, fenceable;
3740 	u32 fence_size, fence_alignment;
3741 
3742 	fence_size = i915_gem_get_ggtt_size(dev_priv,
3743 					    vma->size,
3744 					    i915_gem_object_get_tiling(obj));
3745 	fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
3746 						      vma->size,
3747 						      i915_gem_object_get_tiling(obj),
3748 						      true);
3749 
3750 	fenceable = (vma->node.size == fence_size &&
3751 		     (vma->node.start & (fence_alignment - 1)) == 0);
3752 
3753 	mappable = (vma->node.start + fence_size <=
3754 		    dev_priv->ggtt.mappable_end);
3755 
3756 	/*
3757 	 * Explicitly disable for rotated VMA since the display does not
3758 	 * need the fence and the VMA is not accessible to other users.
3759 	 */
3760 	if (mappable && fenceable &&
3761 	    vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED)
3762 		vma->flags |= I915_VMA_CAN_FENCE;
3763 	else
3764 		vma->flags &= ~I915_VMA_CAN_FENCE;
3765 }
3766 
3767 int __i915_vma_do_pin(struct i915_vma *vma,
3768 		      u64 size, u64 alignment, u64 flags)
3769 {
3770 	unsigned int bound = vma->flags;
3771 	int ret;
3772 
3773 	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
3774 	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
3775 
3776 	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
3777 		ret = -EBUSY;
3778 		goto err;
3779 	}
3780 
3781 	if ((bound & I915_VMA_BIND_MASK) == 0) {
3782 		ret = i915_vma_insert(vma, size, alignment, flags);
3783 		if (ret)
3784 			goto err;
3785 	}
3786 
3787 	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
3788 	if (ret)
3789 		goto err;
3790 
3791 	if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
3792 		__i915_vma_set_map_and_fenceable(vma);
3793 
3794 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
3795 	return 0;
3796 
3797 err:
3798 	__i915_vma_unpin(vma);
3799 	return ret;
3800 }
3801 
3802 struct i915_vma *
3803 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3804 			 const struct i915_ggtt_view *view,
3805 			 u64 size,
3806 			 u64 alignment,
3807 			 u64 flags)
3808 {
3809 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3810 	struct i915_address_space *vm = &dev_priv->ggtt.base;
3811 	struct i915_vma *vma;
3812 	int ret;
3813 
3814 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
3815 	if (IS_ERR(vma))
3816 		return vma;
3817 
3818 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
3819 		if (flags & PIN_NONBLOCK &&
3820 		    (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
3821 			return ERR_PTR(-ENOSPC);
3822 
3823 		if (flags & PIN_MAPPABLE) {
3824 			u32 fence_size;
3825 
3826 			fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
3827 							    i915_gem_object_get_tiling(obj));
3828 			/* If the required space is larger than the available
3829 			 * aperture, we will not able to find a slot for the
3830 			 * object and unbinding the object now will be in
3831 			 * vain. Worse, doing so may cause us to ping-pong
3832 			 * the object in and out of the Global GTT and
3833 			 * waste a lot of cycles under the mutex.
3834 			 */
3835 			if (fence_size > dev_priv->ggtt.mappable_end)
3836 				return ERR_PTR(-E2BIG);
3837 
3838 			/* If NONBLOCK is set the caller is optimistically
3839 			 * trying to cache the full object within the mappable
3840 			 * aperture, and *must* have a fallback in place for
3841 			 * situations where we cannot bind the object. We
3842 			 * can be a little more lax here and use the fallback
3843 			 * more often to avoid costly migrations of ourselves
3844 			 * and other objects within the aperture.
3845 			 *
3846 			 * Half-the-aperture is used as a simple heuristic.
3847 			 * More interesting would to do search for a free
3848 			 * block prior to making the commitment to unbind.
3849 			 * That caters for the self-harm case, and with a
3850 			 * little more heuristics (e.g. NOFAULT, NOEVICT)
3851 			 * we could try to minimise harm to others.
3852 			 */
3853 			if (flags & PIN_NONBLOCK &&
3854 			    fence_size > dev_priv->ggtt.mappable_end / 2)
3855 				return ERR_PTR(-ENOSPC);
3856 		}
3857 
3858 		WARN(i915_vma_is_pinned(vma),
3859 		     "bo is already pinned in ggtt with incorrect alignment:"
3860 		     " offset=%08x, req.alignment=%llx,"
3861 		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3862 		     i915_ggtt_offset(vma), alignment,
3863 		     !!(flags & PIN_MAPPABLE),
3864 		     i915_vma_is_map_and_fenceable(vma));
3865 		ret = i915_vma_unbind(vma);
3866 		if (ret)
3867 			return ERR_PTR(ret);
3868 	}
3869 
3870 	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3871 	if (ret)
3872 		return ERR_PTR(ret);
3873 
3874 	return vma;
3875 }
3876 
3877 static __always_inline unsigned int __busy_read_flag(unsigned int id)
3878 {
3879 	/* Note that we could alias engines in the execbuf API, but
3880 	 * that would be very unwise as it prevents userspace from
3881 	 * fine control over engine selection. Ahem.
3882 	 *
3883 	 * This should be something like EXEC_MAX_ENGINE instead of
3884 	 * I915_NUM_ENGINES.
3885 	 */
3886 	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
3887 	return 0x10000 << id;
3888 }
3889 
3890 static __always_inline unsigned int __busy_write_id(unsigned int id)
3891 {
3892 	/* The uABI guarantees an active writer is also amongst the read
3893 	 * engines. This would be true if we accessed the activity tracking
3894 	 * under the lock, but as we perform the lookup of the object and
3895 	 * its activity locklessly we can not guarantee that the last_write
3896 	 * being active implies that we have set the same engine flag from
3897 	 * last_read - hence we always set both read and write busy for
3898 	 * last_write.
3899 	 */
3900 	return id | __busy_read_flag(id);
3901 }
3902 
3903 static __always_inline unsigned int
3904 __busy_set_if_active(const struct i915_gem_active *active,
3905 		     unsigned int (*flag)(unsigned int id))
3906 {
3907 	struct drm_i915_gem_request *request;
3908 
3909 	request = rcu_dereference(active->request);
3910 	if (!request || i915_gem_request_completed(request))
3911 		return 0;
3912 
3913 	/* This is racy. See __i915_gem_active_get_rcu() for an in detail
3914 	 * discussion of how to handle the race correctly, but for reporting
3915 	 * the busy state we err on the side of potentially reporting the
3916 	 * wrong engine as being busy (but we guarantee that the result
3917 	 * is at least self-consistent).
3918 	 *
3919 	 * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
3920 	 * whilst we are inspecting it, even under the RCU read lock as we are.
3921 	 * This means that there is a small window for the engine and/or the
3922 	 * seqno to have been overwritten. The seqno will always be in the
3923 	 * future compared to the intended, and so we know that if that
3924 	 * seqno is idle (on whatever engine) our request is idle and the
3925 	 * return 0 above is correct.
3926 	 *
3927 	 * The issue is that if the engine is switched, it is just as likely
3928 	 * to report that it is busy (but since the switch happened, we know
3929 	 * the request should be idle). So there is a small chance that a busy
3930 	 * result is actually the wrong engine.
3931 	 *
3932 	 * So why don't we care?
3933 	 *
3934 	 * For starters, the busy ioctl is a heuristic that is by definition
3935 	 * racy. Even with perfect serialisation in the driver, the hardware
3936 	 * state is constantly advancing - the state we report to the user
3937 	 * is stale.
3938 	 *
3939 	 * The critical information for the busy-ioctl is whether the object
3940 	 * is idle as userspace relies on that to detect whether its next
3941 	 * access will stall, or if it has missed submitting commands to
3942 	 * the hardware allowing the GPU to stall. We never generate a
3943 	 * false-positive for idleness, thus busy-ioctl is reliable at the
3944 	 * most fundamental level, and we maintain the guarantee that a
3945 	 * busy object left to itself will eventually become idle (and stay
3946 	 * idle!).
3947 	 *
3948 	 * We allow ourselves the leeway of potentially misreporting the busy
3949 	 * state because that is an optimisation heuristic that is constantly
3950 	 * in flux. Being quickly able to detect the busy/idle state is much
3951 	 * more important than accurate logging of exactly which engines were
3952 	 * busy.
3953 	 *
3954 	 * For accuracy in reporting the engine, we could use
3955 	 *
3956 	 *	result = 0;
3957 	 *	request = __i915_gem_active_get_rcu(active);
3958 	 *	if (request) {
3959 	 *		if (!i915_gem_request_completed(request))
3960 	 *			result = flag(request->engine->exec_id);
3961 	 *		i915_gem_request_put(request);
3962 	 *	}
3963 	 *
3964 	 * but that still remains susceptible to both hardware and userspace
3965 	 * races. So we accept making the result of that race slightly worse,
3966 	 * given the rarity of the race and its low impact on the result.
3967 	 */
3968 	return flag(READ_ONCE(request->engine->exec_id));
3969 }
3970 
3971 static __always_inline unsigned int
3972 busy_check_reader(const struct i915_gem_active *active)
3973 {
3974 	return __busy_set_if_active(active, __busy_read_flag);
3975 }
3976 
3977 static __always_inline unsigned int
3978 busy_check_writer(const struct i915_gem_active *active)
3979 {
3980 	return __busy_set_if_active(active, __busy_write_id);
3981 }
3982 
3983 int
3984 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3985 		    struct drm_file *file)
3986 {
3987 	struct drm_i915_gem_busy *args = data;
3988 	struct drm_i915_gem_object *obj;
3989 	unsigned long active;
3990 
3991 	obj = i915_gem_object_lookup(file, args->handle);
3992 	if (!obj)
3993 		return -ENOENT;
3994 
3995 	args->busy = 0;
3996 	active = __I915_BO_ACTIVE(obj);
3997 	if (active) {
3998 		int idx;
3999 
4000 		/* Yes, the lookups are intentionally racy.
4001 		 *
4002 		 * First, we cannot simply rely on __I915_BO_ACTIVE. We have
4003 		 * to regard the value as stale and as our ABI guarantees
4004 		 * forward progress, we confirm the status of each active
4005 		 * request with the hardware.
4006 		 *
4007 		 * Even though we guard the pointer lookup by RCU, that only
4008 		 * guarantees that the pointer and its contents remain
4009 		 * dereferencable and does *not* mean that the request we
4010 		 * have is the same as the one being tracked by the object.
4011 		 *
4012 		 * Consider that we lookup the request just as it is being
4013 		 * retired and freed. We take a local copy of the pointer,
4014 		 * but before we add its engine into the busy set, the other
4015 		 * thread reallocates it and assigns it to a task on another
4016 		 * engine with a fresh and incomplete seqno. Guarding against
4017 		 * that requires careful serialisation and reference counting,
4018 		 * i.e. using __i915_gem_active_get_request_rcu(). We don't,
4019 		 * instead we expect that if the result is busy, which engines
4020 		 * are busy is not completely reliable - we only guarantee
4021 		 * that the object was busy.
4022 		 */
4023 		rcu_read_lock();
4024 
4025 		for_each_active(active, idx)
4026 			args->busy |= busy_check_reader(&obj->last_read[idx]);
4027 
4028 		/* For ABI sanity, we only care that the write engine is in
4029 		 * the set of read engines. This should be ensured by the
4030 		 * ordering of setting last_read/last_write in
4031 		 * i915_vma_move_to_active(), and then in reverse in retire.
4032 		 * However, for good measure, we always report the last_write
4033 		 * request as a busy read as well as being a busy write.
4034 		 *
4035 		 * We don't care that the set of active read/write engines
4036 		 * may change during construction of the result, as it is
4037 		 * equally liable to change before userspace can inspect
4038 		 * the result.
4039 		 */
4040 		args->busy |= busy_check_writer(&obj->last_write);
4041 
4042 		rcu_read_unlock();
4043 	}
4044 
4045 	i915_gem_object_put_unlocked(obj);
4046 	return 0;
4047 }
4048 
4049 int
4050 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4051 			struct drm_file *file_priv)
4052 {
4053 	return i915_gem_ring_throttle(dev, file_priv);
4054 }
4055 
4056 int
4057 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4058 		       struct drm_file *file_priv)
4059 {
4060 	struct drm_i915_private *dev_priv = to_i915(dev);
4061 	struct drm_i915_gem_madvise *args = data;
4062 	struct drm_i915_gem_object *obj;
4063 	int ret;
4064 
4065 	switch (args->madv) {
4066 	case I915_MADV_DONTNEED:
4067 	case I915_MADV_WILLNEED:
4068 	    break;
4069 	default:
4070 	    return -EINVAL;
4071 	}
4072 
4073 	ret = i915_mutex_lock_interruptible(dev);
4074 	if (ret)
4075 		return ret;
4076 
4077 	obj = i915_gem_object_lookup(file_priv, args->handle);
4078 	if (!obj) {
4079 		ret = -ENOENT;
4080 		goto unlock;
4081 	}
4082 
4083 	if (obj->pages &&
4084 	    i915_gem_object_is_tiled(obj) &&
4085 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4086 		if (obj->madv == I915_MADV_WILLNEED)
4087 			i915_gem_object_unpin_pages(obj);
4088 		if (args->madv == I915_MADV_WILLNEED)
4089 			i915_gem_object_pin_pages(obj);
4090 	}
4091 
4092 	if (obj->madv != __I915_MADV_PURGED)
4093 		obj->madv = args->madv;
4094 
4095 	/* if the object is no longer attached, discard its backing storage */
4096 	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
4097 		i915_gem_object_truncate(obj);
4098 
4099 	args->retained = obj->madv != __I915_MADV_PURGED;
4100 
4101 	i915_gem_object_put(obj);
4102 unlock:
4103 	mutex_unlock(&dev->struct_mutex);
4104 	return ret;
4105 }
4106 
4107 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4108 			  const struct drm_i915_gem_object_ops *ops)
4109 {
4110 	int i;
4111 
4112 	INIT_LIST_HEAD(&obj->global_list);
4113 	for (i = 0; i < I915_NUM_ENGINES; i++)
4114 		init_request_active(&obj->last_read[i],
4115 				    i915_gem_object_retire__read);
4116 	init_request_active(&obj->last_write,
4117 			    i915_gem_object_retire__write);
4118 	INIT_LIST_HEAD(&obj->obj_exec_link);
4119 	INIT_LIST_HEAD(&obj->vma_list);
4120 	INIT_LIST_HEAD(&obj->batch_pool_link);
4121 
4122 	obj->ops = ops;
4123 
4124 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4125 	obj->madv = I915_MADV_WILLNEED;
4126 
4127 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4128 }
4129 
4130 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4131 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
4132 	.get_pages = i915_gem_object_get_pages_gtt,
4133 	.put_pages = i915_gem_object_put_pages_gtt,
4134 };
4135 
4136 /* Note we don't consider signbits :| */
4137 #define overflows_type(x, T) \
4138 	(sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
4139 
4140 struct drm_i915_gem_object *
4141 i915_gem_object_create(struct drm_device *dev, u64 size)
4142 {
4143 	struct drm_i915_gem_object *obj;
4144 	struct address_space *mapping;
4145 	gfp_t mask;
4146 	int ret;
4147 
4148 	/* There is a prevalence of the assumption that we fit the object's
4149 	 * page count inside a 32bit _signed_ variable. Let's document this and
4150 	 * catch if we ever need to fix it. In the meantime, if you do spot
4151 	 * such a local variable, please consider fixing!
4152 	 */
4153 	if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
4154 		return ERR_PTR(-E2BIG);
4155 
4156 	if (overflows_type(size, obj->base.size))
4157 		return ERR_PTR(-E2BIG);
4158 
4159 	obj = i915_gem_object_alloc(dev);
4160 	if (obj == NULL)
4161 		return ERR_PTR(-ENOMEM);
4162 
4163 	ret = drm_gem_object_init(dev, &obj->base, size);
4164 	if (ret)
4165 		goto fail;
4166 
4167 	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4168 	if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
4169 		/* 965gm cannot relocate objects above 4GiB. */
4170 		mask &= ~__GFP_HIGHMEM;
4171 		mask |= __GFP_DMA32;
4172 	}
4173 
4174 	mapping = obj->base.filp->f_mapping;
4175 	mapping_set_gfp_mask(mapping, mask);
4176 
4177 	i915_gem_object_init(obj, &i915_gem_object_ops);
4178 
4179 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4180 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4181 
4182 	if (HAS_LLC(dev)) {
4183 		/* On some devices, we can have the GPU use the LLC (the CPU
4184 		 * cache) for about a 10% performance improvement
4185 		 * compared to uncached.  Graphics requests other than
4186 		 * display scanout are coherent with the CPU in
4187 		 * accessing this cache.  This means in this mode we
4188 		 * don't need to clflush on the CPU side, and on the
4189 		 * GPU side we only need to flush internal caches to
4190 		 * get data visible to the CPU.
4191 		 *
4192 		 * However, we maintain the display planes as UC, and so
4193 		 * need to rebind when first used as such.
4194 		 */
4195 		obj->cache_level = I915_CACHE_LLC;
4196 	} else
4197 		obj->cache_level = I915_CACHE_NONE;
4198 
4199 	trace_i915_gem_object_create(obj);
4200 
4201 	return obj;
4202 
4203 fail:
4204 	i915_gem_object_free(obj);
4205 
4206 	return ERR_PTR(ret);
4207 }
4208 
4209 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4210 {
4211 	/* If we are the last user of the backing storage (be it shmemfs
4212 	 * pages or stolen etc), we know that the pages are going to be
4213 	 * immediately released. In this case, we can then skip copying
4214 	 * back the contents from the GPU.
4215 	 */
4216 
4217 	if (obj->madv != I915_MADV_WILLNEED)
4218 		return false;
4219 
4220 	if (obj->base.filp == NULL)
4221 		return true;
4222 
4223 	/* At first glance, this looks racy, but then again so would be
4224 	 * userspace racing mmap against close. However, the first external
4225 	 * reference to the filp can only be obtained through the
4226 	 * i915_gem_mmap_ioctl() which safeguards us against the user
4227 	 * acquiring such a reference whilst we are in the middle of
4228 	 * freeing the object.
4229 	 */
4230 	return atomic_long_read(&obj->base.filp->f_count) == 1;
4231 }
4232 
4233 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4234 {
4235 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4236 	struct drm_device *dev = obj->base.dev;
4237 	struct drm_i915_private *dev_priv = to_i915(dev);
4238 	struct i915_vma *vma, *next;
4239 
4240 	intel_runtime_pm_get(dev_priv);
4241 
4242 	trace_i915_gem_object_destroy(obj);
4243 
4244 	/* All file-owned VMA should have been released by this point through
4245 	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4246 	 * However, the object may also be bound into the global GTT (e.g.
4247 	 * older GPUs without per-process support, or for direct access through
4248 	 * the GTT either for the user or for scanout). Those VMA still need to
4249 	 * unbound now.
4250 	 */
4251 	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
4252 		GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4253 		GEM_BUG_ON(i915_vma_is_active(vma));
4254 		vma->flags &= ~I915_VMA_PIN_MASK;
4255 		i915_vma_close(vma);
4256 	}
4257 	GEM_BUG_ON(obj->bind_count);
4258 
4259 	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4260 	 * before progressing. */
4261 	if (obj->stolen)
4262 		i915_gem_object_unpin_pages(obj);
4263 
4264 	WARN_ON(atomic_read(&obj->frontbuffer_bits));
4265 
4266 	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4267 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4268 	    i915_gem_object_is_tiled(obj))
4269 		i915_gem_object_unpin_pages(obj);
4270 
4271 	if (WARN_ON(obj->pages_pin_count))
4272 		obj->pages_pin_count = 0;
4273 	if (discard_backing_storage(obj))
4274 		obj->madv = I915_MADV_DONTNEED;
4275 	i915_gem_object_put_pages(obj);
4276 
4277 	BUG_ON(obj->pages);
4278 
4279 	if (obj->base.import_attach)
4280 		drm_prime_gem_destroy(&obj->base, NULL);
4281 
4282 	if (obj->ops->release)
4283 		obj->ops->release(obj);
4284 
4285 	drm_gem_object_release(&obj->base);
4286 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
4287 
4288 	kfree(obj->bit_17);
4289 	i915_gem_object_free(obj);
4290 
4291 	intel_runtime_pm_put(dev_priv);
4292 }
4293 
4294 int i915_gem_suspend(struct drm_device *dev)
4295 {
4296 	struct drm_i915_private *dev_priv = to_i915(dev);
4297 	int ret;
4298 
4299 	intel_suspend_gt_powersave(dev_priv);
4300 
4301 	mutex_lock(&dev->struct_mutex);
4302 
4303 	/* We have to flush all the executing contexts to main memory so
4304 	 * that they can saved in the hibernation image. To ensure the last
4305 	 * context image is coherent, we have to switch away from it. That
4306 	 * leaves the dev_priv->kernel_context still active when
4307 	 * we actually suspend, and its image in memory may not match the GPU
4308 	 * state. Fortunately, the kernel_context is disposable and we do
4309 	 * not rely on its state.
4310 	 */
4311 	ret = i915_gem_switch_to_kernel_context(dev_priv);
4312 	if (ret)
4313 		goto err;
4314 
4315 	ret = i915_gem_wait_for_idle(dev_priv,
4316 				     I915_WAIT_INTERRUPTIBLE |
4317 				     I915_WAIT_LOCKED);
4318 	if (ret)
4319 		goto err;
4320 
4321 	i915_gem_retire_requests(dev_priv);
4322 
4323 	i915_gem_context_lost(dev_priv);
4324 	mutex_unlock(&dev->struct_mutex);
4325 
4326 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4327 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4328 	flush_delayed_work(&dev_priv->gt.idle_work);
4329 
4330 	/* Assert that we sucessfully flushed all the work and
4331 	 * reset the GPU back to its idle, low power state.
4332 	 */
4333 	WARN_ON(dev_priv->gt.awake);
4334 
4335 	/*
4336 	 * Neither the BIOS, ourselves or any other kernel
4337 	 * expects the system to be in execlists mode on startup,
4338 	 * so we need to reset the GPU back to legacy mode. And the only
4339 	 * known way to disable logical contexts is through a GPU reset.
4340 	 *
4341 	 * So in order to leave the system in a known default configuration,
4342 	 * always reset the GPU upon unload and suspend. Afterwards we then
4343 	 * clean up the GEM state tracking, flushing off the requests and
4344 	 * leaving the system in a known idle state.
4345 	 *
4346 	 * Note that is of the upmost importance that the GPU is idle and
4347 	 * all stray writes are flushed *before* we dismantle the backing
4348 	 * storage for the pinned objects.
4349 	 *
4350 	 * However, since we are uncertain that resetting the GPU on older
4351 	 * machines is a good idea, we don't - just in case it leaves the
4352 	 * machine in an unusable condition.
4353 	 */
4354 	if (HAS_HW_CONTEXTS(dev)) {
4355 		int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
4356 		WARN_ON(reset && reset != -ENODEV);
4357 	}
4358 
4359 	return 0;
4360 
4361 err:
4362 	mutex_unlock(&dev->struct_mutex);
4363 	return ret;
4364 }
4365 
4366 void i915_gem_resume(struct drm_device *dev)
4367 {
4368 	struct drm_i915_private *dev_priv = to_i915(dev);
4369 
4370 	mutex_lock(&dev->struct_mutex);
4371 	i915_gem_restore_gtt_mappings(dev);
4372 
4373 	/* As we didn't flush the kernel context before suspend, we cannot
4374 	 * guarantee that the context image is complete. So let's just reset
4375 	 * it and start again.
4376 	 */
4377 	dev_priv->gt.resume(dev_priv);
4378 
4379 	mutex_unlock(&dev->struct_mutex);
4380 }
4381 
4382 void i915_gem_init_swizzling(struct drm_device *dev)
4383 {
4384 	struct drm_i915_private *dev_priv = to_i915(dev);
4385 
4386 	if (INTEL_INFO(dev)->gen < 5 ||
4387 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4388 		return;
4389 
4390 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4391 				 DISP_TILE_SURFACE_SWIZZLING);
4392 
4393 	if (IS_GEN5(dev_priv))
4394 		return;
4395 
4396 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4397 	if (IS_GEN6(dev_priv))
4398 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4399 	else if (IS_GEN7(dev_priv))
4400 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4401 	else if (IS_GEN8(dev_priv))
4402 		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4403 	else
4404 		BUG();
4405 }
4406 
4407 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4408 {
4409 	I915_WRITE(RING_CTL(base), 0);
4410 	I915_WRITE(RING_HEAD(base), 0);
4411 	I915_WRITE(RING_TAIL(base), 0);
4412 	I915_WRITE(RING_START(base), 0);
4413 }
4414 
4415 static void init_unused_rings(struct drm_i915_private *dev_priv)
4416 {
4417 	if (IS_I830(dev_priv)) {
4418 		init_unused_ring(dev_priv, PRB1_BASE);
4419 		init_unused_ring(dev_priv, SRB0_BASE);
4420 		init_unused_ring(dev_priv, SRB1_BASE);
4421 		init_unused_ring(dev_priv, SRB2_BASE);
4422 		init_unused_ring(dev_priv, SRB3_BASE);
4423 	} else if (IS_GEN2(dev_priv)) {
4424 		init_unused_ring(dev_priv, SRB0_BASE);
4425 		init_unused_ring(dev_priv, SRB1_BASE);
4426 	} else if (IS_GEN3(dev_priv)) {
4427 		init_unused_ring(dev_priv, PRB1_BASE);
4428 		init_unused_ring(dev_priv, PRB2_BASE);
4429 	}
4430 }
4431 
4432 int
4433 i915_gem_init_hw(struct drm_device *dev)
4434 {
4435 	struct drm_i915_private *dev_priv = to_i915(dev);
4436 	struct intel_engine_cs *engine;
4437 	enum intel_engine_id id;
4438 	int ret;
4439 
4440 	/* Double layer security blanket, see i915_gem_init() */
4441 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4442 
4443 	if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
4444 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4445 
4446 	if (IS_HASWELL(dev_priv))
4447 		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4448 			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4449 
4450 	if (HAS_PCH_NOP(dev_priv)) {
4451 		if (IS_IVYBRIDGE(dev_priv)) {
4452 			u32 temp = I915_READ(GEN7_MSG_CTL);
4453 			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4454 			I915_WRITE(GEN7_MSG_CTL, temp);
4455 		} else if (INTEL_INFO(dev)->gen >= 7) {
4456 			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4457 			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4458 			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4459 		}
4460 	}
4461 
4462 	i915_gem_init_swizzling(dev);
4463 
4464 	/*
4465 	 * At least 830 can leave some of the unused rings
4466 	 * "active" (ie. head != tail) after resume which
4467 	 * will prevent c3 entry. Makes sure all unused rings
4468 	 * are totally idle.
4469 	 */
4470 	init_unused_rings(dev_priv);
4471 
4472 	BUG_ON(!dev_priv->kernel_context);
4473 
4474 	ret = i915_ppgtt_init_hw(dev);
4475 	if (ret) {
4476 		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4477 		goto out;
4478 	}
4479 
4480 	/* Need to do basic initialisation of all rings first: */
4481 	for_each_engine(engine, dev_priv, id) {
4482 		ret = engine->init_hw(engine);
4483 		if (ret)
4484 			goto out;
4485 	}
4486 
4487 	intel_mocs_init_l3cc_table(dev);
4488 
4489 	/* We can't enable contexts until all firmware is loaded */
4490 	ret = intel_guc_setup(dev);
4491 	if (ret)
4492 		goto out;
4493 
4494 out:
4495 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4496 	return ret;
4497 }
4498 
4499 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4500 {
4501 	if (INTEL_INFO(dev_priv)->gen < 6)
4502 		return false;
4503 
4504 	/* TODO: make semaphores and Execlists play nicely together */
4505 	if (i915.enable_execlists)
4506 		return false;
4507 
4508 	if (value >= 0)
4509 		return value;
4510 
4511 #ifdef CONFIG_INTEL_IOMMU
4512 	/* Enable semaphores on SNB when IO remapping is off */
4513 	if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4514 		return false;
4515 #endif
4516 
4517 	return true;
4518 }
4519 
4520 int i915_gem_init(struct drm_device *dev)
4521 {
4522 	struct drm_i915_private *dev_priv = to_i915(dev);
4523 	int ret;
4524 
4525 	mutex_lock(&dev->struct_mutex);
4526 
4527 	if (!i915.enable_execlists) {
4528 		dev_priv->gt.resume = intel_legacy_submission_resume;
4529 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4530 	} else {
4531 		dev_priv->gt.resume = intel_lr_context_resume;
4532 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4533 	}
4534 
4535 	/* This is just a security blanket to placate dragons.
4536 	 * On some systems, we very sporadically observe that the first TLBs
4537 	 * used by the CS may be stale, despite us poking the TLB reset. If
4538 	 * we hold the forcewake during initialisation these problems
4539 	 * just magically go away.
4540 	 */
4541 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4542 
4543 	i915_gem_init_userptr(dev_priv);
4544 
4545 	ret = i915_gem_init_ggtt(dev_priv);
4546 	if (ret)
4547 		goto out_unlock;
4548 
4549 	ret = i915_gem_context_init(dev);
4550 	if (ret)
4551 		goto out_unlock;
4552 
4553 	ret = intel_engines_init(dev);
4554 	if (ret)
4555 		goto out_unlock;
4556 
4557 	ret = i915_gem_init_hw(dev);
4558 	if (ret == -EIO) {
4559 		/* Allow engine initialisation to fail by marking the GPU as
4560 		 * wedged. But we only want to do this where the GPU is angry,
4561 		 * for all other failure, such as an allocation failure, bail.
4562 		 */
4563 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
4564 		i915_gem_set_wedged(dev_priv);
4565 		ret = 0;
4566 	}
4567 
4568 out_unlock:
4569 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4570 	mutex_unlock(&dev->struct_mutex);
4571 
4572 	return ret;
4573 }
4574 
4575 void
4576 i915_gem_cleanup_engines(struct drm_device *dev)
4577 {
4578 	struct drm_i915_private *dev_priv = to_i915(dev);
4579 	struct intel_engine_cs *engine;
4580 	enum intel_engine_id id;
4581 
4582 	for_each_engine(engine, dev_priv, id)
4583 		dev_priv->gt.cleanup_engine(engine);
4584 }
4585 
4586 void
4587 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4588 {
4589 	struct drm_device *dev = &dev_priv->drm;
4590 	int i;
4591 
4592 	if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4593 	    !IS_CHERRYVIEW(dev_priv))
4594 		dev_priv->num_fence_regs = 32;
4595 	else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4596 		 IS_I945GM(dev_priv) || IS_G33(dev_priv))
4597 		dev_priv->num_fence_regs = 16;
4598 	else
4599 		dev_priv->num_fence_regs = 8;
4600 
4601 	if (intel_vgpu_active(dev_priv))
4602 		dev_priv->num_fence_regs =
4603 				I915_READ(vgtif_reg(avail_rs.fence_num));
4604 
4605 	/* Initialize fence registers to zero */
4606 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
4607 		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4608 
4609 		fence->i915 = dev_priv;
4610 		fence->id = i;
4611 		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4612 	}
4613 	i915_gem_restore_fences(dev);
4614 
4615 	i915_gem_detect_bit_6_swizzle(dev);
4616 }
4617 
4618 void
4619 i915_gem_load_init(struct drm_device *dev)
4620 {
4621 	struct drm_i915_private *dev_priv = to_i915(dev);
4622 
4623 	dev_priv->objects =
4624 		kmem_cache_create("i915_gem_object",
4625 				  sizeof(struct drm_i915_gem_object), 0,
4626 				  SLAB_HWCACHE_ALIGN,
4627 				  NULL);
4628 	dev_priv->vmas =
4629 		kmem_cache_create("i915_gem_vma",
4630 				  sizeof(struct i915_vma), 0,
4631 				  SLAB_HWCACHE_ALIGN,
4632 				  NULL);
4633 	dev_priv->requests =
4634 		kmem_cache_create("i915_gem_request",
4635 				  sizeof(struct drm_i915_gem_request), 0,
4636 				  SLAB_HWCACHE_ALIGN |
4637 				  SLAB_RECLAIM_ACCOUNT |
4638 				  SLAB_DESTROY_BY_RCU,
4639 				  NULL);
4640 
4641 	INIT_LIST_HEAD(&dev_priv->context_list);
4642 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4643 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4644 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4645 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4646 			  i915_gem_retire_work_handler);
4647 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
4648 			  i915_gem_idle_work_handler);
4649 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4650 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4651 
4652 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4653 
4654 	init_waitqueue_head(&dev_priv->pending_flip_queue);
4655 
4656 	dev_priv->mm.interruptible = true;
4657 
4658 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4659 
4660 	spin_lock_init(&dev_priv->fb_tracking.lock);
4661 }
4662 
4663 void i915_gem_load_cleanup(struct drm_device *dev)
4664 {
4665 	struct drm_i915_private *dev_priv = to_i915(dev);
4666 
4667 	kmem_cache_destroy(dev_priv->requests);
4668 	kmem_cache_destroy(dev_priv->vmas);
4669 	kmem_cache_destroy(dev_priv->objects);
4670 
4671 	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
4672 	rcu_barrier();
4673 }
4674 
4675 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4676 {
4677 	intel_runtime_pm_get(dev_priv);
4678 
4679 	mutex_lock(&dev_priv->drm.struct_mutex);
4680 	i915_gem_shrink_all(dev_priv);
4681 	mutex_unlock(&dev_priv->drm.struct_mutex);
4682 
4683 	intel_runtime_pm_put(dev_priv);
4684 
4685 	return 0;
4686 }
4687 
4688 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4689 {
4690 	struct drm_i915_gem_object *obj;
4691 	struct list_head *phases[] = {
4692 		&dev_priv->mm.unbound_list,
4693 		&dev_priv->mm.bound_list,
4694 		NULL
4695 	}, **p;
4696 
4697 	/* Called just before we write the hibernation image.
4698 	 *
4699 	 * We need to update the domain tracking to reflect that the CPU
4700 	 * will be accessing all the pages to create and restore from the
4701 	 * hibernation, and so upon restoration those pages will be in the
4702 	 * CPU domain.
4703 	 *
4704 	 * To make sure the hibernation image contains the latest state,
4705 	 * we update that state just before writing out the image.
4706 	 *
4707 	 * To try and reduce the hibernation image, we manually shrink
4708 	 * the objects as well.
4709 	 */
4710 
4711 	mutex_lock(&dev_priv->drm.struct_mutex);
4712 	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
4713 
4714 	for (p = phases; *p; p++) {
4715 		list_for_each_entry(obj, *p, global_list) {
4716 			obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4717 			obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4718 		}
4719 	}
4720 	mutex_unlock(&dev_priv->drm.struct_mutex);
4721 
4722 	return 0;
4723 }
4724 
4725 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4726 {
4727 	struct drm_i915_file_private *file_priv = file->driver_priv;
4728 	struct drm_i915_gem_request *request;
4729 
4730 	/* Clean up our request list when the client is going away, so that
4731 	 * later retire_requests won't dereference our soon-to-be-gone
4732 	 * file_priv.
4733 	 */
4734 	spin_lock(&file_priv->mm.lock);
4735 	list_for_each_entry(request, &file_priv->mm.request_list, client_list)
4736 		request->file_priv = NULL;
4737 	spin_unlock(&file_priv->mm.lock);
4738 
4739 	if (!list_empty(&file_priv->rps.link)) {
4740 		spin_lock(&to_i915(dev)->rps.client_lock);
4741 		list_del(&file_priv->rps.link);
4742 		spin_unlock(&to_i915(dev)->rps.client_lock);
4743 	}
4744 }
4745 
4746 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4747 {
4748 	struct drm_i915_file_private *file_priv;
4749 	int ret;
4750 
4751 	DRM_DEBUG_DRIVER("\n");
4752 
4753 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4754 	if (!file_priv)
4755 		return -ENOMEM;
4756 
4757 	file->driver_priv = file_priv;
4758 	file_priv->dev_priv = to_i915(dev);
4759 	file_priv->file = file;
4760 	INIT_LIST_HEAD(&file_priv->rps.link);
4761 
4762 	spin_lock_init(&file_priv->mm.lock);
4763 	INIT_LIST_HEAD(&file_priv->mm.request_list);
4764 
4765 	file_priv->bsd_engine = -1;
4766 
4767 	ret = i915_gem_context_open(dev, file);
4768 	if (ret)
4769 		kfree(file_priv);
4770 
4771 	return ret;
4772 }
4773 
4774 /**
4775  * i915_gem_track_fb - update frontbuffer tracking
4776  * @old: current GEM buffer for the frontbuffer slots
4777  * @new: new GEM buffer for the frontbuffer slots
4778  * @frontbuffer_bits: bitmask of frontbuffer slots
4779  *
4780  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4781  * from @old and setting them in @new. Both @old and @new can be NULL.
4782  */
4783 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4784 		       struct drm_i915_gem_object *new,
4785 		       unsigned frontbuffer_bits)
4786 {
4787 	/* Control of individual bits within the mask are guarded by
4788 	 * the owning plane->mutex, i.e. we can never see concurrent
4789 	 * manipulation of individual bits. But since the bitfield as a whole
4790 	 * is updated using RMW, we need to use atomics in order to update
4791 	 * the bits.
4792 	 */
4793 	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4794 		     sizeof(atomic_t) * BITS_PER_BYTE);
4795 
4796 	if (old) {
4797 		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4798 		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
4799 	}
4800 
4801 	if (new) {
4802 		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4803 		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
4804 	}
4805 }
4806 
4807 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
4808 struct page *
4809 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
4810 {
4811 	struct page *page;
4812 
4813 	/* Only default objects have per-page dirty tracking */
4814 	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
4815 		return NULL;
4816 
4817 	page = i915_gem_object_get_page(obj, n);
4818 	set_page_dirty(page);
4819 	return page;
4820 }
4821 
4822 /* Allocate a new GEM object and fill it with the supplied data */
4823 struct drm_i915_gem_object *
4824 i915_gem_object_create_from_data(struct drm_device *dev,
4825 			         const void *data, size_t size)
4826 {
4827 	struct drm_i915_gem_object *obj;
4828 	struct sg_table *sg;
4829 	size_t bytes;
4830 	int ret;
4831 
4832 	obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
4833 	if (IS_ERR(obj))
4834 		return obj;
4835 
4836 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4837 	if (ret)
4838 		goto fail;
4839 
4840 	ret = i915_gem_object_get_pages(obj);
4841 	if (ret)
4842 		goto fail;
4843 
4844 	i915_gem_object_pin_pages(obj);
4845 	sg = obj->pages;
4846 	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
4847 	obj->dirty = 1;		/* Backing store is now out of date */
4848 	i915_gem_object_unpin_pages(obj);
4849 
4850 	if (WARN_ON(bytes != size)) {
4851 		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4852 		ret = -EFAULT;
4853 		goto fail;
4854 	}
4855 
4856 	return obj;
4857 
4858 fail:
4859 	i915_gem_object_put(obj);
4860 	return ERR_PTR(ret);
4861 }
4862