xref: /linux/drivers/gpu/drm/i915/i915_gem.c (revision e5c86679d5e864947a52fb31e45a425dea3e7fa9)
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include <drm/drmP.h>
29 #include <drm/drm_vma_manager.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_vgpu.h"
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35 #include "intel_frontbuffer.h"
36 #include "intel_mocs.h"
37 #include <linux/dma-fence-array.h>
38 #include <linux/reservation.h>
39 #include <linux/shmem_fs.h>
40 #include <linux/slab.h>
41 #include <linux/stop_machine.h>
42 #include <linux/swap.h>
43 #include <linux/pci.h>
44 #include <linux/dma-buf.h>
45 
46 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
47 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
48 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
49 
50 static bool cpu_cache_is_coherent(struct drm_device *dev,
51 				  enum i915_cache_level level)
52 {
53 	return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
54 }
55 
56 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
57 {
58 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
59 		return false;
60 
61 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
62 		return true;
63 
64 	return obj->pin_display;
65 }
66 
67 static int
68 insert_mappable_node(struct i915_ggtt *ggtt,
69                      struct drm_mm_node *node, u32 size)
70 {
71 	memset(node, 0, sizeof(*node));
72 	return drm_mm_insert_node_in_range(&ggtt->base.mm, node,
73 					   size, 0, I915_COLOR_UNEVICTABLE,
74 					   0, ggtt->mappable_end,
75 					   DRM_MM_INSERT_LOW);
76 }
77 
78 static void
79 remove_mappable_node(struct drm_mm_node *node)
80 {
81 	drm_mm_remove_node(node);
82 }
83 
84 /* some bookkeeping */
85 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
86 				  u64 size)
87 {
88 	spin_lock(&dev_priv->mm.object_stat_lock);
89 	dev_priv->mm.object_count++;
90 	dev_priv->mm.object_memory += size;
91 	spin_unlock(&dev_priv->mm.object_stat_lock);
92 }
93 
94 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
95 				     u64 size)
96 {
97 	spin_lock(&dev_priv->mm.object_stat_lock);
98 	dev_priv->mm.object_count--;
99 	dev_priv->mm.object_memory -= size;
100 	spin_unlock(&dev_priv->mm.object_stat_lock);
101 }
102 
103 static int
104 i915_gem_wait_for_error(struct i915_gpu_error *error)
105 {
106 	int ret;
107 
108 	might_sleep();
109 
110 	if (!i915_reset_in_progress(error))
111 		return 0;
112 
113 	/*
114 	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
115 	 * userspace. If it takes that long something really bad is going on and
116 	 * we should simply try to bail out and fail as gracefully as possible.
117 	 */
118 	ret = wait_event_interruptible_timeout(error->reset_queue,
119 					       !i915_reset_in_progress(error),
120 					       I915_RESET_TIMEOUT);
121 	if (ret == 0) {
122 		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
123 		return -EIO;
124 	} else if (ret < 0) {
125 		return ret;
126 	} else {
127 		return 0;
128 	}
129 }
130 
131 int i915_mutex_lock_interruptible(struct drm_device *dev)
132 {
133 	struct drm_i915_private *dev_priv = to_i915(dev);
134 	int ret;
135 
136 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
137 	if (ret)
138 		return ret;
139 
140 	ret = mutex_lock_interruptible(&dev->struct_mutex);
141 	if (ret)
142 		return ret;
143 
144 	return 0;
145 }
146 
147 int
148 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
149 			    struct drm_file *file)
150 {
151 	struct drm_i915_private *dev_priv = to_i915(dev);
152 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
153 	struct drm_i915_gem_get_aperture *args = data;
154 	struct i915_vma *vma;
155 	size_t pinned;
156 
157 	pinned = 0;
158 	mutex_lock(&dev->struct_mutex);
159 	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
160 		if (i915_vma_is_pinned(vma))
161 			pinned += vma->node.size;
162 	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
163 		if (i915_vma_is_pinned(vma))
164 			pinned += vma->node.size;
165 	mutex_unlock(&dev->struct_mutex);
166 
167 	args->aper_size = ggtt->base.total;
168 	args->aper_available_size = args->aper_size - pinned;
169 
170 	return 0;
171 }
172 
173 static struct sg_table *
174 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
175 {
176 	struct address_space *mapping = obj->base.filp->f_mapping;
177 	drm_dma_handle_t *phys;
178 	struct sg_table *st;
179 	struct scatterlist *sg;
180 	char *vaddr;
181 	int i;
182 
183 	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
184 		return ERR_PTR(-EINVAL);
185 
186 	/* Always aligning to the object size, allows a single allocation
187 	 * to handle all possible callers, and given typical object sizes,
188 	 * the alignment of the buddy allocation will naturally match.
189 	 */
190 	phys = drm_pci_alloc(obj->base.dev,
191 			     obj->base.size,
192 			     roundup_pow_of_two(obj->base.size));
193 	if (!phys)
194 		return ERR_PTR(-ENOMEM);
195 
196 	vaddr = phys->vaddr;
197 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
198 		struct page *page;
199 		char *src;
200 
201 		page = shmem_read_mapping_page(mapping, i);
202 		if (IS_ERR(page)) {
203 			st = ERR_CAST(page);
204 			goto err_phys;
205 		}
206 
207 		src = kmap_atomic(page);
208 		memcpy(vaddr, src, PAGE_SIZE);
209 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
210 		kunmap_atomic(src);
211 
212 		put_page(page);
213 		vaddr += PAGE_SIZE;
214 	}
215 
216 	i915_gem_chipset_flush(to_i915(obj->base.dev));
217 
218 	st = kmalloc(sizeof(*st), GFP_KERNEL);
219 	if (!st) {
220 		st = ERR_PTR(-ENOMEM);
221 		goto err_phys;
222 	}
223 
224 	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
225 		kfree(st);
226 		st = ERR_PTR(-ENOMEM);
227 		goto err_phys;
228 	}
229 
230 	sg = st->sgl;
231 	sg->offset = 0;
232 	sg->length = obj->base.size;
233 
234 	sg_dma_address(sg) = phys->busaddr;
235 	sg_dma_len(sg) = obj->base.size;
236 
237 	obj->phys_handle = phys;
238 	return st;
239 
240 err_phys:
241 	drm_pci_free(obj->base.dev, phys);
242 	return st;
243 }
244 
245 static void
246 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
247 				struct sg_table *pages,
248 				bool needs_clflush)
249 {
250 	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
251 
252 	if (obj->mm.madv == I915_MADV_DONTNEED)
253 		obj->mm.dirty = false;
254 
255 	if (needs_clflush &&
256 	    (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
257 	    !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
258 		drm_clflush_sg(pages);
259 
260 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
261 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
262 }
263 
264 static void
265 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
266 			       struct sg_table *pages)
267 {
268 	__i915_gem_object_release_shmem(obj, pages, false);
269 
270 	if (obj->mm.dirty) {
271 		struct address_space *mapping = obj->base.filp->f_mapping;
272 		char *vaddr = obj->phys_handle->vaddr;
273 		int i;
274 
275 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
276 			struct page *page;
277 			char *dst;
278 
279 			page = shmem_read_mapping_page(mapping, i);
280 			if (IS_ERR(page))
281 				continue;
282 
283 			dst = kmap_atomic(page);
284 			drm_clflush_virt_range(vaddr, PAGE_SIZE);
285 			memcpy(dst, vaddr, PAGE_SIZE);
286 			kunmap_atomic(dst);
287 
288 			set_page_dirty(page);
289 			if (obj->mm.madv == I915_MADV_WILLNEED)
290 				mark_page_accessed(page);
291 			put_page(page);
292 			vaddr += PAGE_SIZE;
293 		}
294 		obj->mm.dirty = false;
295 	}
296 
297 	sg_free_table(pages);
298 	kfree(pages);
299 
300 	drm_pci_free(obj->base.dev, obj->phys_handle);
301 }
302 
303 static void
304 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
305 {
306 	i915_gem_object_unpin_pages(obj);
307 }
308 
309 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
310 	.get_pages = i915_gem_object_get_pages_phys,
311 	.put_pages = i915_gem_object_put_pages_phys,
312 	.release = i915_gem_object_release_phys,
313 };
314 
315 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
316 {
317 	struct i915_vma *vma;
318 	LIST_HEAD(still_in_list);
319 	int ret;
320 
321 	lockdep_assert_held(&obj->base.dev->struct_mutex);
322 
323 	/* Closed vma are removed from the obj->vma_list - but they may
324 	 * still have an active binding on the object. To remove those we
325 	 * must wait for all rendering to complete to the object (as unbinding
326 	 * must anyway), and retire the requests.
327 	 */
328 	ret = i915_gem_object_wait(obj,
329 				   I915_WAIT_INTERRUPTIBLE |
330 				   I915_WAIT_LOCKED |
331 				   I915_WAIT_ALL,
332 				   MAX_SCHEDULE_TIMEOUT,
333 				   NULL);
334 	if (ret)
335 		return ret;
336 
337 	i915_gem_retire_requests(to_i915(obj->base.dev));
338 
339 	while ((vma = list_first_entry_or_null(&obj->vma_list,
340 					       struct i915_vma,
341 					       obj_link))) {
342 		list_move_tail(&vma->obj_link, &still_in_list);
343 		ret = i915_vma_unbind(vma);
344 		if (ret)
345 			break;
346 	}
347 	list_splice(&still_in_list, &obj->vma_list);
348 
349 	return ret;
350 }
351 
352 static long
353 i915_gem_object_wait_fence(struct dma_fence *fence,
354 			   unsigned int flags,
355 			   long timeout,
356 			   struct intel_rps_client *rps)
357 {
358 	struct drm_i915_gem_request *rq;
359 
360 	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
361 
362 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
363 		return timeout;
364 
365 	if (!dma_fence_is_i915(fence))
366 		return dma_fence_wait_timeout(fence,
367 					      flags & I915_WAIT_INTERRUPTIBLE,
368 					      timeout);
369 
370 	rq = to_request(fence);
371 	if (i915_gem_request_completed(rq))
372 		goto out;
373 
374 	/* This client is about to stall waiting for the GPU. In many cases
375 	 * this is undesirable and limits the throughput of the system, as
376 	 * many clients cannot continue processing user input/output whilst
377 	 * blocked. RPS autotuning may take tens of milliseconds to respond
378 	 * to the GPU load and thus incurs additional latency for the client.
379 	 * We can circumvent that by promoting the GPU frequency to maximum
380 	 * before we wait. This makes the GPU throttle up much more quickly
381 	 * (good for benchmarks and user experience, e.g. window animations),
382 	 * but at a cost of spending more power processing the workload
383 	 * (bad for battery). Not all clients even want their results
384 	 * immediately and for them we should just let the GPU select its own
385 	 * frequency to maximise efficiency. To prevent a single client from
386 	 * forcing the clocks too high for the whole system, we only allow
387 	 * each client to waitboost once in a busy period.
388 	 */
389 	if (rps) {
390 		if (INTEL_GEN(rq->i915) >= 6)
391 			gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
392 		else
393 			rps = NULL;
394 	}
395 
396 	timeout = i915_wait_request(rq, flags, timeout);
397 
398 out:
399 	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
400 		i915_gem_request_retire_upto(rq);
401 
402 	if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
403 		/* The GPU is now idle and this client has stalled.
404 		 * Since no other client has submitted a request in the
405 		 * meantime, assume that this client is the only one
406 		 * supplying work to the GPU but is unable to keep that
407 		 * work supplied because it is waiting. Since the GPU is
408 		 * then never kept fully busy, RPS autoclocking will
409 		 * keep the clocks relatively low, causing further delays.
410 		 * Compensate by giving the synchronous client credit for
411 		 * a waitboost next time.
412 		 */
413 		spin_lock(&rq->i915->rps.client_lock);
414 		list_del_init(&rps->link);
415 		spin_unlock(&rq->i915->rps.client_lock);
416 	}
417 
418 	return timeout;
419 }
420 
421 static long
422 i915_gem_object_wait_reservation(struct reservation_object *resv,
423 				 unsigned int flags,
424 				 long timeout,
425 				 struct intel_rps_client *rps)
426 {
427 	struct dma_fence *excl;
428 
429 	if (flags & I915_WAIT_ALL) {
430 		struct dma_fence **shared;
431 		unsigned int count, i;
432 		int ret;
433 
434 		ret = reservation_object_get_fences_rcu(resv,
435 							&excl, &count, &shared);
436 		if (ret)
437 			return ret;
438 
439 		for (i = 0; i < count; i++) {
440 			timeout = i915_gem_object_wait_fence(shared[i],
441 							     flags, timeout,
442 							     rps);
443 			if (timeout < 0)
444 				break;
445 
446 			dma_fence_put(shared[i]);
447 		}
448 
449 		for (; i < count; i++)
450 			dma_fence_put(shared[i]);
451 		kfree(shared);
452 	} else {
453 		excl = reservation_object_get_excl_rcu(resv);
454 	}
455 
456 	if (excl && timeout >= 0)
457 		timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
458 
459 	dma_fence_put(excl);
460 
461 	return timeout;
462 }
463 
464 static void __fence_set_priority(struct dma_fence *fence, int prio)
465 {
466 	struct drm_i915_gem_request *rq;
467 	struct intel_engine_cs *engine;
468 
469 	if (!dma_fence_is_i915(fence))
470 		return;
471 
472 	rq = to_request(fence);
473 	engine = rq->engine;
474 	if (!engine->schedule)
475 		return;
476 
477 	engine->schedule(rq, prio);
478 }
479 
480 static void fence_set_priority(struct dma_fence *fence, int prio)
481 {
482 	/* Recurse once into a fence-array */
483 	if (dma_fence_is_array(fence)) {
484 		struct dma_fence_array *array = to_dma_fence_array(fence);
485 		int i;
486 
487 		for (i = 0; i < array->num_fences; i++)
488 			__fence_set_priority(array->fences[i], prio);
489 	} else {
490 		__fence_set_priority(fence, prio);
491 	}
492 }
493 
494 int
495 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
496 			      unsigned int flags,
497 			      int prio)
498 {
499 	struct dma_fence *excl;
500 
501 	if (flags & I915_WAIT_ALL) {
502 		struct dma_fence **shared;
503 		unsigned int count, i;
504 		int ret;
505 
506 		ret = reservation_object_get_fences_rcu(obj->resv,
507 							&excl, &count, &shared);
508 		if (ret)
509 			return ret;
510 
511 		for (i = 0; i < count; i++) {
512 			fence_set_priority(shared[i], prio);
513 			dma_fence_put(shared[i]);
514 		}
515 
516 		kfree(shared);
517 	} else {
518 		excl = reservation_object_get_excl_rcu(obj->resv);
519 	}
520 
521 	if (excl) {
522 		fence_set_priority(excl, prio);
523 		dma_fence_put(excl);
524 	}
525 	return 0;
526 }
527 
528 /**
529  * Waits for rendering to the object to be completed
530  * @obj: i915 gem object
531  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
532  * @timeout: how long to wait
533  * @rps: client (user process) to charge for any waitboosting
534  */
535 int
536 i915_gem_object_wait(struct drm_i915_gem_object *obj,
537 		     unsigned int flags,
538 		     long timeout,
539 		     struct intel_rps_client *rps)
540 {
541 	might_sleep();
542 #if IS_ENABLED(CONFIG_LOCKDEP)
543 	GEM_BUG_ON(debug_locks &&
544 		   !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
545 		   !!(flags & I915_WAIT_LOCKED));
546 #endif
547 	GEM_BUG_ON(timeout < 0);
548 
549 	timeout = i915_gem_object_wait_reservation(obj->resv,
550 						   flags, timeout,
551 						   rps);
552 	return timeout < 0 ? timeout : 0;
553 }
554 
555 static struct intel_rps_client *to_rps_client(struct drm_file *file)
556 {
557 	struct drm_i915_file_private *fpriv = file->driver_priv;
558 
559 	return &fpriv->rps;
560 }
561 
562 int
563 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
564 			    int align)
565 {
566 	int ret;
567 
568 	if (align > obj->base.size)
569 		return -EINVAL;
570 
571 	if (obj->ops == &i915_gem_phys_ops)
572 		return 0;
573 
574 	if (obj->mm.madv != I915_MADV_WILLNEED)
575 		return -EFAULT;
576 
577 	if (obj->base.filp == NULL)
578 		return -EINVAL;
579 
580 	ret = i915_gem_object_unbind(obj);
581 	if (ret)
582 		return ret;
583 
584 	__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
585 	if (obj->mm.pages)
586 		return -EBUSY;
587 
588 	obj->ops = &i915_gem_phys_ops;
589 
590 	return i915_gem_object_pin_pages(obj);
591 }
592 
593 static int
594 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
595 		     struct drm_i915_gem_pwrite *args,
596 		     struct drm_file *file)
597 {
598 	void *vaddr = obj->phys_handle->vaddr + args->offset;
599 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
600 
601 	/* We manually control the domain here and pretend that it
602 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
603 	 */
604 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
605 	if (copy_from_user(vaddr, user_data, args->size))
606 		return -EFAULT;
607 
608 	drm_clflush_virt_range(vaddr, args->size);
609 	i915_gem_chipset_flush(to_i915(obj->base.dev));
610 
611 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
612 	return 0;
613 }
614 
615 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv)
616 {
617 	return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
618 }
619 
620 void i915_gem_object_free(struct drm_i915_gem_object *obj)
621 {
622 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
623 	kmem_cache_free(dev_priv->objects, obj);
624 }
625 
626 static int
627 i915_gem_create(struct drm_file *file,
628 		struct drm_i915_private *dev_priv,
629 		uint64_t size,
630 		uint32_t *handle_p)
631 {
632 	struct drm_i915_gem_object *obj;
633 	int ret;
634 	u32 handle;
635 
636 	size = roundup(size, PAGE_SIZE);
637 	if (size == 0)
638 		return -EINVAL;
639 
640 	/* Allocate the new object */
641 	obj = i915_gem_object_create(dev_priv, size);
642 	if (IS_ERR(obj))
643 		return PTR_ERR(obj);
644 
645 	ret = drm_gem_handle_create(file, &obj->base, &handle);
646 	/* drop reference from allocate - handle holds it now */
647 	i915_gem_object_put(obj);
648 	if (ret)
649 		return ret;
650 
651 	*handle_p = handle;
652 	return 0;
653 }
654 
655 int
656 i915_gem_dumb_create(struct drm_file *file,
657 		     struct drm_device *dev,
658 		     struct drm_mode_create_dumb *args)
659 {
660 	/* have to work out size/pitch and return them */
661 	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
662 	args->size = args->pitch * args->height;
663 	return i915_gem_create(file, to_i915(dev),
664 			       args->size, &args->handle);
665 }
666 
667 /**
668  * Creates a new mm object and returns a handle to it.
669  * @dev: drm device pointer
670  * @data: ioctl data blob
671  * @file: drm file pointer
672  */
673 int
674 i915_gem_create_ioctl(struct drm_device *dev, void *data,
675 		      struct drm_file *file)
676 {
677 	struct drm_i915_private *dev_priv = to_i915(dev);
678 	struct drm_i915_gem_create *args = data;
679 
680 	i915_gem_flush_free_objects(dev_priv);
681 
682 	return i915_gem_create(file, dev_priv,
683 			       args->size, &args->handle);
684 }
685 
686 static inline int
687 __copy_to_user_swizzled(char __user *cpu_vaddr,
688 			const char *gpu_vaddr, int gpu_offset,
689 			int length)
690 {
691 	int ret, cpu_offset = 0;
692 
693 	while (length > 0) {
694 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
695 		int this_length = min(cacheline_end - gpu_offset, length);
696 		int swizzled_gpu_offset = gpu_offset ^ 64;
697 
698 		ret = __copy_to_user(cpu_vaddr + cpu_offset,
699 				     gpu_vaddr + swizzled_gpu_offset,
700 				     this_length);
701 		if (ret)
702 			return ret + length;
703 
704 		cpu_offset += this_length;
705 		gpu_offset += this_length;
706 		length -= this_length;
707 	}
708 
709 	return 0;
710 }
711 
712 static inline int
713 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
714 			  const char __user *cpu_vaddr,
715 			  int length)
716 {
717 	int ret, cpu_offset = 0;
718 
719 	while (length > 0) {
720 		int cacheline_end = ALIGN(gpu_offset + 1, 64);
721 		int this_length = min(cacheline_end - gpu_offset, length);
722 		int swizzled_gpu_offset = gpu_offset ^ 64;
723 
724 		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
725 				       cpu_vaddr + cpu_offset,
726 				       this_length);
727 		if (ret)
728 			return ret + length;
729 
730 		cpu_offset += this_length;
731 		gpu_offset += this_length;
732 		length -= this_length;
733 	}
734 
735 	return 0;
736 }
737 
738 /*
739  * Pins the specified object's pages and synchronizes the object with
740  * GPU accesses. Sets needs_clflush to non-zero if the caller should
741  * flush the object from the CPU cache.
742  */
743 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
744 				    unsigned int *needs_clflush)
745 {
746 	int ret;
747 
748 	lockdep_assert_held(&obj->base.dev->struct_mutex);
749 
750 	*needs_clflush = 0;
751 	if (!i915_gem_object_has_struct_page(obj))
752 		return -ENODEV;
753 
754 	ret = i915_gem_object_wait(obj,
755 				   I915_WAIT_INTERRUPTIBLE |
756 				   I915_WAIT_LOCKED,
757 				   MAX_SCHEDULE_TIMEOUT,
758 				   NULL);
759 	if (ret)
760 		return ret;
761 
762 	ret = i915_gem_object_pin_pages(obj);
763 	if (ret)
764 		return ret;
765 
766 	i915_gem_object_flush_gtt_write_domain(obj);
767 
768 	/* If we're not in the cpu read domain, set ourself into the gtt
769 	 * read domain and manually flush cachelines (if required). This
770 	 * optimizes for the case when the gpu will dirty the data
771 	 * anyway again before the next pread happens.
772 	 */
773 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
774 		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
775 							obj->cache_level);
776 
777 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
778 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
779 		if (ret)
780 			goto err_unpin;
781 
782 		*needs_clflush = 0;
783 	}
784 
785 	/* return with the pages pinned */
786 	return 0;
787 
788 err_unpin:
789 	i915_gem_object_unpin_pages(obj);
790 	return ret;
791 }
792 
793 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
794 				     unsigned int *needs_clflush)
795 {
796 	int ret;
797 
798 	lockdep_assert_held(&obj->base.dev->struct_mutex);
799 
800 	*needs_clflush = 0;
801 	if (!i915_gem_object_has_struct_page(obj))
802 		return -ENODEV;
803 
804 	ret = i915_gem_object_wait(obj,
805 				   I915_WAIT_INTERRUPTIBLE |
806 				   I915_WAIT_LOCKED |
807 				   I915_WAIT_ALL,
808 				   MAX_SCHEDULE_TIMEOUT,
809 				   NULL);
810 	if (ret)
811 		return ret;
812 
813 	ret = i915_gem_object_pin_pages(obj);
814 	if (ret)
815 		return ret;
816 
817 	i915_gem_object_flush_gtt_write_domain(obj);
818 
819 	/* If we're not in the cpu write domain, set ourself into the
820 	 * gtt write domain and manually flush cachelines (as required).
821 	 * This optimizes for the case when the gpu will use the data
822 	 * right away and we therefore have to clflush anyway.
823 	 */
824 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
825 		*needs_clflush |= cpu_write_needs_clflush(obj) << 1;
826 
827 	/* Same trick applies to invalidate partially written cachelines read
828 	 * before writing.
829 	 */
830 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
831 		*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
832 							 obj->cache_level);
833 
834 	if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
835 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
836 		if (ret)
837 			goto err_unpin;
838 
839 		*needs_clflush = 0;
840 	}
841 
842 	if ((*needs_clflush & CLFLUSH_AFTER) == 0)
843 		obj->cache_dirty = true;
844 
845 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
846 	obj->mm.dirty = true;
847 	/* return with the pages pinned */
848 	return 0;
849 
850 err_unpin:
851 	i915_gem_object_unpin_pages(obj);
852 	return ret;
853 }
854 
855 static void
856 shmem_clflush_swizzled_range(char *addr, unsigned long length,
857 			     bool swizzled)
858 {
859 	if (unlikely(swizzled)) {
860 		unsigned long start = (unsigned long) addr;
861 		unsigned long end = (unsigned long) addr + length;
862 
863 		/* For swizzling simply ensure that we always flush both
864 		 * channels. Lame, but simple and it works. Swizzled
865 		 * pwrite/pread is far from a hotpath - current userspace
866 		 * doesn't use it at all. */
867 		start = round_down(start, 128);
868 		end = round_up(end, 128);
869 
870 		drm_clflush_virt_range((void *)start, end - start);
871 	} else {
872 		drm_clflush_virt_range(addr, length);
873 	}
874 
875 }
876 
877 /* Only difference to the fast-path function is that this can handle bit17
878  * and uses non-atomic copy and kmap functions. */
879 static int
880 shmem_pread_slow(struct page *page, int offset, int length,
881 		 char __user *user_data,
882 		 bool page_do_bit17_swizzling, bool needs_clflush)
883 {
884 	char *vaddr;
885 	int ret;
886 
887 	vaddr = kmap(page);
888 	if (needs_clflush)
889 		shmem_clflush_swizzled_range(vaddr + offset, length,
890 					     page_do_bit17_swizzling);
891 
892 	if (page_do_bit17_swizzling)
893 		ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
894 	else
895 		ret = __copy_to_user(user_data, vaddr + offset, length);
896 	kunmap(page);
897 
898 	return ret ? - EFAULT : 0;
899 }
900 
901 static int
902 shmem_pread(struct page *page, int offset, int length, char __user *user_data,
903 	    bool page_do_bit17_swizzling, bool needs_clflush)
904 {
905 	int ret;
906 
907 	ret = -ENODEV;
908 	if (!page_do_bit17_swizzling) {
909 		char *vaddr = kmap_atomic(page);
910 
911 		if (needs_clflush)
912 			drm_clflush_virt_range(vaddr + offset, length);
913 		ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
914 		kunmap_atomic(vaddr);
915 	}
916 	if (ret == 0)
917 		return 0;
918 
919 	return shmem_pread_slow(page, offset, length, user_data,
920 				page_do_bit17_swizzling, needs_clflush);
921 }
922 
923 static int
924 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
925 		     struct drm_i915_gem_pread *args)
926 {
927 	char __user *user_data;
928 	u64 remain;
929 	unsigned int obj_do_bit17_swizzling;
930 	unsigned int needs_clflush;
931 	unsigned int idx, offset;
932 	int ret;
933 
934 	obj_do_bit17_swizzling = 0;
935 	if (i915_gem_object_needs_bit17_swizzle(obj))
936 		obj_do_bit17_swizzling = BIT(17);
937 
938 	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
939 	if (ret)
940 		return ret;
941 
942 	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
943 	mutex_unlock(&obj->base.dev->struct_mutex);
944 	if (ret)
945 		return ret;
946 
947 	remain = args->size;
948 	user_data = u64_to_user_ptr(args->data_ptr);
949 	offset = offset_in_page(args->offset);
950 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
951 		struct page *page = i915_gem_object_get_page(obj, idx);
952 		int length;
953 
954 		length = remain;
955 		if (offset + length > PAGE_SIZE)
956 			length = PAGE_SIZE - offset;
957 
958 		ret = shmem_pread(page, offset, length, user_data,
959 				  page_to_phys(page) & obj_do_bit17_swizzling,
960 				  needs_clflush);
961 		if (ret)
962 			break;
963 
964 		remain -= length;
965 		user_data += length;
966 		offset = 0;
967 	}
968 
969 	i915_gem_obj_finish_shmem_access(obj);
970 	return ret;
971 }
972 
973 static inline bool
974 gtt_user_read(struct io_mapping *mapping,
975 	      loff_t base, int offset,
976 	      char __user *user_data, int length)
977 {
978 	void *vaddr;
979 	unsigned long unwritten;
980 
981 	/* We can use the cpu mem copy function because this is X86. */
982 	vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
983 	unwritten = __copy_to_user_inatomic(user_data, vaddr + offset, length);
984 	io_mapping_unmap_atomic(vaddr);
985 	if (unwritten) {
986 		vaddr = (void __force *)
987 			io_mapping_map_wc(mapping, base, PAGE_SIZE);
988 		unwritten = copy_to_user(user_data, vaddr + offset, length);
989 		io_mapping_unmap(vaddr);
990 	}
991 	return unwritten;
992 }
993 
994 static int
995 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
996 		   const struct drm_i915_gem_pread *args)
997 {
998 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
999 	struct i915_ggtt *ggtt = &i915->ggtt;
1000 	struct drm_mm_node node;
1001 	struct i915_vma *vma;
1002 	void __user *user_data;
1003 	u64 remain, offset;
1004 	int ret;
1005 
1006 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1007 	if (ret)
1008 		return ret;
1009 
1010 	intel_runtime_pm_get(i915);
1011 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1012 				       PIN_MAPPABLE | PIN_NONBLOCK);
1013 	if (!IS_ERR(vma)) {
1014 		node.start = i915_ggtt_offset(vma);
1015 		node.allocated = false;
1016 		ret = i915_vma_put_fence(vma);
1017 		if (ret) {
1018 			i915_vma_unpin(vma);
1019 			vma = ERR_PTR(ret);
1020 		}
1021 	}
1022 	if (IS_ERR(vma)) {
1023 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1024 		if (ret)
1025 			goto out_unlock;
1026 		GEM_BUG_ON(!node.allocated);
1027 	}
1028 
1029 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
1030 	if (ret)
1031 		goto out_unpin;
1032 
1033 	mutex_unlock(&i915->drm.struct_mutex);
1034 
1035 	user_data = u64_to_user_ptr(args->data_ptr);
1036 	remain = args->size;
1037 	offset = args->offset;
1038 
1039 	while (remain > 0) {
1040 		/* Operation in this page
1041 		 *
1042 		 * page_base = page offset within aperture
1043 		 * page_offset = offset within page
1044 		 * page_length = bytes to copy for this page
1045 		 */
1046 		u32 page_base = node.start;
1047 		unsigned page_offset = offset_in_page(offset);
1048 		unsigned page_length = PAGE_SIZE - page_offset;
1049 		page_length = remain < page_length ? remain : page_length;
1050 		if (node.allocated) {
1051 			wmb();
1052 			ggtt->base.insert_page(&ggtt->base,
1053 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1054 					       node.start, I915_CACHE_NONE, 0);
1055 			wmb();
1056 		} else {
1057 			page_base += offset & PAGE_MASK;
1058 		}
1059 
1060 		if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
1061 				  user_data, page_length)) {
1062 			ret = -EFAULT;
1063 			break;
1064 		}
1065 
1066 		remain -= page_length;
1067 		user_data += page_length;
1068 		offset += page_length;
1069 	}
1070 
1071 	mutex_lock(&i915->drm.struct_mutex);
1072 out_unpin:
1073 	if (node.allocated) {
1074 		wmb();
1075 		ggtt->base.clear_range(&ggtt->base,
1076 				       node.start, node.size);
1077 		remove_mappable_node(&node);
1078 	} else {
1079 		i915_vma_unpin(vma);
1080 	}
1081 out_unlock:
1082 	intel_runtime_pm_put(i915);
1083 	mutex_unlock(&i915->drm.struct_mutex);
1084 
1085 	return ret;
1086 }
1087 
1088 /**
1089  * Reads data from the object referenced by handle.
1090  * @dev: drm device pointer
1091  * @data: ioctl data blob
1092  * @file: drm file pointer
1093  *
1094  * On error, the contents of *data are undefined.
1095  */
1096 int
1097 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1098 		     struct drm_file *file)
1099 {
1100 	struct drm_i915_gem_pread *args = data;
1101 	struct drm_i915_gem_object *obj;
1102 	int ret;
1103 
1104 	if (args->size == 0)
1105 		return 0;
1106 
1107 	if (!access_ok(VERIFY_WRITE,
1108 		       u64_to_user_ptr(args->data_ptr),
1109 		       args->size))
1110 		return -EFAULT;
1111 
1112 	obj = i915_gem_object_lookup(file, args->handle);
1113 	if (!obj)
1114 		return -ENOENT;
1115 
1116 	/* Bounds check source.  */
1117 	if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1118 		ret = -EINVAL;
1119 		goto out;
1120 	}
1121 
1122 	trace_i915_gem_object_pread(obj, args->offset, args->size);
1123 
1124 	ret = i915_gem_object_wait(obj,
1125 				   I915_WAIT_INTERRUPTIBLE,
1126 				   MAX_SCHEDULE_TIMEOUT,
1127 				   to_rps_client(file));
1128 	if (ret)
1129 		goto out;
1130 
1131 	ret = i915_gem_object_pin_pages(obj);
1132 	if (ret)
1133 		goto out;
1134 
1135 	ret = i915_gem_shmem_pread(obj, args);
1136 	if (ret == -EFAULT || ret == -ENODEV)
1137 		ret = i915_gem_gtt_pread(obj, args);
1138 
1139 	i915_gem_object_unpin_pages(obj);
1140 out:
1141 	i915_gem_object_put(obj);
1142 	return ret;
1143 }
1144 
1145 /* This is the fast write path which cannot handle
1146  * page faults in the source data
1147  */
1148 
1149 static inline bool
1150 ggtt_write(struct io_mapping *mapping,
1151 	   loff_t base, int offset,
1152 	   char __user *user_data, int length)
1153 {
1154 	void *vaddr;
1155 	unsigned long unwritten;
1156 
1157 	/* We can use the cpu mem copy function because this is X86. */
1158 	vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
1159 	unwritten = __copy_from_user_inatomic_nocache(vaddr + offset,
1160 						      user_data, length);
1161 	io_mapping_unmap_atomic(vaddr);
1162 	if (unwritten) {
1163 		vaddr = (void __force *)
1164 			io_mapping_map_wc(mapping, base, PAGE_SIZE);
1165 		unwritten = copy_from_user(vaddr + offset, user_data, length);
1166 		io_mapping_unmap(vaddr);
1167 	}
1168 
1169 	return unwritten;
1170 }
1171 
1172 /**
1173  * This is the fast pwrite path, where we copy the data directly from the
1174  * user into the GTT, uncached.
1175  * @obj: i915 GEM object
1176  * @args: pwrite arguments structure
1177  */
1178 static int
1179 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1180 			 const struct drm_i915_gem_pwrite *args)
1181 {
1182 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1183 	struct i915_ggtt *ggtt = &i915->ggtt;
1184 	struct drm_mm_node node;
1185 	struct i915_vma *vma;
1186 	u64 remain, offset;
1187 	void __user *user_data;
1188 	int ret;
1189 
1190 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1191 	if (ret)
1192 		return ret;
1193 
1194 	intel_runtime_pm_get(i915);
1195 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1196 				       PIN_MAPPABLE | PIN_NONBLOCK);
1197 	if (!IS_ERR(vma)) {
1198 		node.start = i915_ggtt_offset(vma);
1199 		node.allocated = false;
1200 		ret = i915_vma_put_fence(vma);
1201 		if (ret) {
1202 			i915_vma_unpin(vma);
1203 			vma = ERR_PTR(ret);
1204 		}
1205 	}
1206 	if (IS_ERR(vma)) {
1207 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1208 		if (ret)
1209 			goto out_unlock;
1210 		GEM_BUG_ON(!node.allocated);
1211 	}
1212 
1213 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1214 	if (ret)
1215 		goto out_unpin;
1216 
1217 	mutex_unlock(&i915->drm.struct_mutex);
1218 
1219 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1220 
1221 	user_data = u64_to_user_ptr(args->data_ptr);
1222 	offset = args->offset;
1223 	remain = args->size;
1224 	while (remain) {
1225 		/* Operation in this page
1226 		 *
1227 		 * page_base = page offset within aperture
1228 		 * page_offset = offset within page
1229 		 * page_length = bytes to copy for this page
1230 		 */
1231 		u32 page_base = node.start;
1232 		unsigned int page_offset = offset_in_page(offset);
1233 		unsigned int page_length = PAGE_SIZE - page_offset;
1234 		page_length = remain < page_length ? remain : page_length;
1235 		if (node.allocated) {
1236 			wmb(); /* flush the write before we modify the GGTT */
1237 			ggtt->base.insert_page(&ggtt->base,
1238 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1239 					       node.start, I915_CACHE_NONE, 0);
1240 			wmb(); /* flush modifications to the GGTT (insert_page) */
1241 		} else {
1242 			page_base += offset & PAGE_MASK;
1243 		}
1244 		/* If we get a fault while copying data, then (presumably) our
1245 		 * source page isn't available.  Return the error and we'll
1246 		 * retry in the slow path.
1247 		 * If the object is non-shmem backed, we retry again with the
1248 		 * path that handles page fault.
1249 		 */
1250 		if (ggtt_write(&ggtt->mappable, page_base, page_offset,
1251 			       user_data, page_length)) {
1252 			ret = -EFAULT;
1253 			break;
1254 		}
1255 
1256 		remain -= page_length;
1257 		user_data += page_length;
1258 		offset += page_length;
1259 	}
1260 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1261 
1262 	mutex_lock(&i915->drm.struct_mutex);
1263 out_unpin:
1264 	if (node.allocated) {
1265 		wmb();
1266 		ggtt->base.clear_range(&ggtt->base,
1267 				       node.start, node.size);
1268 		remove_mappable_node(&node);
1269 	} else {
1270 		i915_vma_unpin(vma);
1271 	}
1272 out_unlock:
1273 	intel_runtime_pm_put(i915);
1274 	mutex_unlock(&i915->drm.struct_mutex);
1275 	return ret;
1276 }
1277 
1278 static int
1279 shmem_pwrite_slow(struct page *page, int offset, int length,
1280 		  char __user *user_data,
1281 		  bool page_do_bit17_swizzling,
1282 		  bool needs_clflush_before,
1283 		  bool needs_clflush_after)
1284 {
1285 	char *vaddr;
1286 	int ret;
1287 
1288 	vaddr = kmap(page);
1289 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1290 		shmem_clflush_swizzled_range(vaddr + offset, length,
1291 					     page_do_bit17_swizzling);
1292 	if (page_do_bit17_swizzling)
1293 		ret = __copy_from_user_swizzled(vaddr, offset, user_data,
1294 						length);
1295 	else
1296 		ret = __copy_from_user(vaddr + offset, user_data, length);
1297 	if (needs_clflush_after)
1298 		shmem_clflush_swizzled_range(vaddr + offset, length,
1299 					     page_do_bit17_swizzling);
1300 	kunmap(page);
1301 
1302 	return ret ? -EFAULT : 0;
1303 }
1304 
1305 /* Per-page copy function for the shmem pwrite fastpath.
1306  * Flushes invalid cachelines before writing to the target if
1307  * needs_clflush_before is set and flushes out any written cachelines after
1308  * writing if needs_clflush is set.
1309  */
1310 static int
1311 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1312 	     bool page_do_bit17_swizzling,
1313 	     bool needs_clflush_before,
1314 	     bool needs_clflush_after)
1315 {
1316 	int ret;
1317 
1318 	ret = -ENODEV;
1319 	if (!page_do_bit17_swizzling) {
1320 		char *vaddr = kmap_atomic(page);
1321 
1322 		if (needs_clflush_before)
1323 			drm_clflush_virt_range(vaddr + offset, len);
1324 		ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
1325 		if (needs_clflush_after)
1326 			drm_clflush_virt_range(vaddr + offset, len);
1327 
1328 		kunmap_atomic(vaddr);
1329 	}
1330 	if (ret == 0)
1331 		return ret;
1332 
1333 	return shmem_pwrite_slow(page, offset, len, user_data,
1334 				 page_do_bit17_swizzling,
1335 				 needs_clflush_before,
1336 				 needs_clflush_after);
1337 }
1338 
1339 static int
1340 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1341 		      const struct drm_i915_gem_pwrite *args)
1342 {
1343 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1344 	void __user *user_data;
1345 	u64 remain;
1346 	unsigned int obj_do_bit17_swizzling;
1347 	unsigned int partial_cacheline_write;
1348 	unsigned int needs_clflush;
1349 	unsigned int offset, idx;
1350 	int ret;
1351 
1352 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1353 	if (ret)
1354 		return ret;
1355 
1356 	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1357 	mutex_unlock(&i915->drm.struct_mutex);
1358 	if (ret)
1359 		return ret;
1360 
1361 	obj_do_bit17_swizzling = 0;
1362 	if (i915_gem_object_needs_bit17_swizzle(obj))
1363 		obj_do_bit17_swizzling = BIT(17);
1364 
1365 	/* If we don't overwrite a cacheline completely we need to be
1366 	 * careful to have up-to-date data by first clflushing. Don't
1367 	 * overcomplicate things and flush the entire patch.
1368 	 */
1369 	partial_cacheline_write = 0;
1370 	if (needs_clflush & CLFLUSH_BEFORE)
1371 		partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1372 
1373 	user_data = u64_to_user_ptr(args->data_ptr);
1374 	remain = args->size;
1375 	offset = offset_in_page(args->offset);
1376 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1377 		struct page *page = i915_gem_object_get_page(obj, idx);
1378 		int length;
1379 
1380 		length = remain;
1381 		if (offset + length > PAGE_SIZE)
1382 			length = PAGE_SIZE - offset;
1383 
1384 		ret = shmem_pwrite(page, offset, length, user_data,
1385 				   page_to_phys(page) & obj_do_bit17_swizzling,
1386 				   (offset | length) & partial_cacheline_write,
1387 				   needs_clflush & CLFLUSH_AFTER);
1388 		if (ret)
1389 			break;
1390 
1391 		remain -= length;
1392 		user_data += length;
1393 		offset = 0;
1394 	}
1395 
1396 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1397 	i915_gem_obj_finish_shmem_access(obj);
1398 	return ret;
1399 }
1400 
1401 /**
1402  * Writes data to the object referenced by handle.
1403  * @dev: drm device
1404  * @data: ioctl data blob
1405  * @file: drm file
1406  *
1407  * On error, the contents of the buffer that were to be modified are undefined.
1408  */
1409 int
1410 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1411 		      struct drm_file *file)
1412 {
1413 	struct drm_i915_gem_pwrite *args = data;
1414 	struct drm_i915_gem_object *obj;
1415 	int ret;
1416 
1417 	if (args->size == 0)
1418 		return 0;
1419 
1420 	if (!access_ok(VERIFY_READ,
1421 		       u64_to_user_ptr(args->data_ptr),
1422 		       args->size))
1423 		return -EFAULT;
1424 
1425 	obj = i915_gem_object_lookup(file, args->handle);
1426 	if (!obj)
1427 		return -ENOENT;
1428 
1429 	/* Bounds check destination. */
1430 	if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1431 		ret = -EINVAL;
1432 		goto err;
1433 	}
1434 
1435 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1436 
1437 	ret = -ENODEV;
1438 	if (obj->ops->pwrite)
1439 		ret = obj->ops->pwrite(obj, args);
1440 	if (ret != -ENODEV)
1441 		goto err;
1442 
1443 	ret = i915_gem_object_wait(obj,
1444 				   I915_WAIT_INTERRUPTIBLE |
1445 				   I915_WAIT_ALL,
1446 				   MAX_SCHEDULE_TIMEOUT,
1447 				   to_rps_client(file));
1448 	if (ret)
1449 		goto err;
1450 
1451 	ret = i915_gem_object_pin_pages(obj);
1452 	if (ret)
1453 		goto err;
1454 
1455 	ret = -EFAULT;
1456 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
1457 	 * it would end up going through the fenced access, and we'll get
1458 	 * different detiling behavior between reading and writing.
1459 	 * pread/pwrite currently are reading and writing from the CPU
1460 	 * perspective, requiring manual detiling by the client.
1461 	 */
1462 	if (!i915_gem_object_has_struct_page(obj) ||
1463 	    cpu_write_needs_clflush(obj))
1464 		/* Note that the gtt paths might fail with non-page-backed user
1465 		 * pointers (e.g. gtt mappings when moving data between
1466 		 * textures). Fallback to the shmem path in that case.
1467 		 */
1468 		ret = i915_gem_gtt_pwrite_fast(obj, args);
1469 
1470 	if (ret == -EFAULT || ret == -ENOSPC) {
1471 		if (obj->phys_handle)
1472 			ret = i915_gem_phys_pwrite(obj, args, file);
1473 		else
1474 			ret = i915_gem_shmem_pwrite(obj, args);
1475 	}
1476 
1477 	i915_gem_object_unpin_pages(obj);
1478 err:
1479 	i915_gem_object_put(obj);
1480 	return ret;
1481 }
1482 
1483 static inline enum fb_op_origin
1484 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1485 {
1486 	return (domain == I915_GEM_DOMAIN_GTT ?
1487 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
1488 }
1489 
1490 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1491 {
1492 	struct drm_i915_private *i915;
1493 	struct list_head *list;
1494 	struct i915_vma *vma;
1495 
1496 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
1497 		if (!i915_vma_is_ggtt(vma))
1498 			break;
1499 
1500 		if (i915_vma_is_active(vma))
1501 			continue;
1502 
1503 		if (!drm_mm_node_allocated(&vma->node))
1504 			continue;
1505 
1506 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
1507 	}
1508 
1509 	i915 = to_i915(obj->base.dev);
1510 	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1511 	list_move_tail(&obj->global_link, list);
1512 }
1513 
1514 /**
1515  * Called when user space prepares to use an object with the CPU, either
1516  * through the mmap ioctl's mapping or a GTT mapping.
1517  * @dev: drm device
1518  * @data: ioctl data blob
1519  * @file: drm file
1520  */
1521 int
1522 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1523 			  struct drm_file *file)
1524 {
1525 	struct drm_i915_gem_set_domain *args = data;
1526 	struct drm_i915_gem_object *obj;
1527 	uint32_t read_domains = args->read_domains;
1528 	uint32_t write_domain = args->write_domain;
1529 	int err;
1530 
1531 	/* Only handle setting domains to types used by the CPU. */
1532 	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1533 		return -EINVAL;
1534 
1535 	/* Having something in the write domain implies it's in the read
1536 	 * domain, and only that read domain.  Enforce that in the request.
1537 	 */
1538 	if (write_domain != 0 && read_domains != write_domain)
1539 		return -EINVAL;
1540 
1541 	obj = i915_gem_object_lookup(file, args->handle);
1542 	if (!obj)
1543 		return -ENOENT;
1544 
1545 	/* Try to flush the object off the GPU without holding the lock.
1546 	 * We will repeat the flush holding the lock in the normal manner
1547 	 * to catch cases where we are gazumped.
1548 	 */
1549 	err = i915_gem_object_wait(obj,
1550 				   I915_WAIT_INTERRUPTIBLE |
1551 				   (write_domain ? I915_WAIT_ALL : 0),
1552 				   MAX_SCHEDULE_TIMEOUT,
1553 				   to_rps_client(file));
1554 	if (err)
1555 		goto out;
1556 
1557 	/* Flush and acquire obj->pages so that we are coherent through
1558 	 * direct access in memory with previous cached writes through
1559 	 * shmemfs and that our cache domain tracking remains valid.
1560 	 * For example, if the obj->filp was moved to swap without us
1561 	 * being notified and releasing the pages, we would mistakenly
1562 	 * continue to assume that the obj remained out of the CPU cached
1563 	 * domain.
1564 	 */
1565 	err = i915_gem_object_pin_pages(obj);
1566 	if (err)
1567 		goto out;
1568 
1569 	err = i915_mutex_lock_interruptible(dev);
1570 	if (err)
1571 		goto out_unpin;
1572 
1573 	if (read_domains & I915_GEM_DOMAIN_GTT)
1574 		err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1575 	else
1576 		err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1577 
1578 	/* And bump the LRU for this access */
1579 	i915_gem_object_bump_inactive_ggtt(obj);
1580 
1581 	mutex_unlock(&dev->struct_mutex);
1582 
1583 	if (write_domain != 0)
1584 		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1585 
1586 out_unpin:
1587 	i915_gem_object_unpin_pages(obj);
1588 out:
1589 	i915_gem_object_put(obj);
1590 	return err;
1591 }
1592 
1593 /**
1594  * Called when user space has done writes to this buffer
1595  * @dev: drm device
1596  * @data: ioctl data blob
1597  * @file: drm file
1598  */
1599 int
1600 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1601 			 struct drm_file *file)
1602 {
1603 	struct drm_i915_gem_sw_finish *args = data;
1604 	struct drm_i915_gem_object *obj;
1605 	int err = 0;
1606 
1607 	obj = i915_gem_object_lookup(file, args->handle);
1608 	if (!obj)
1609 		return -ENOENT;
1610 
1611 	/* Pinned buffers may be scanout, so flush the cache */
1612 	if (READ_ONCE(obj->pin_display)) {
1613 		err = i915_mutex_lock_interruptible(dev);
1614 		if (!err) {
1615 			i915_gem_object_flush_cpu_write_domain(obj);
1616 			mutex_unlock(&dev->struct_mutex);
1617 		}
1618 	}
1619 
1620 	i915_gem_object_put(obj);
1621 	return err;
1622 }
1623 
1624 /**
1625  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1626  *			 it is mapped to.
1627  * @dev: drm device
1628  * @data: ioctl data blob
1629  * @file: drm file
1630  *
1631  * While the mapping holds a reference on the contents of the object, it doesn't
1632  * imply a ref on the object itself.
1633  *
1634  * IMPORTANT:
1635  *
1636  * DRM driver writers who look a this function as an example for how to do GEM
1637  * mmap support, please don't implement mmap support like here. The modern way
1638  * to implement DRM mmap support is with an mmap offset ioctl (like
1639  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1640  * That way debug tooling like valgrind will understand what's going on, hiding
1641  * the mmap call in a driver private ioctl will break that. The i915 driver only
1642  * does cpu mmaps this way because we didn't know better.
1643  */
1644 int
1645 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1646 		    struct drm_file *file)
1647 {
1648 	struct drm_i915_gem_mmap *args = data;
1649 	struct drm_i915_gem_object *obj;
1650 	unsigned long addr;
1651 
1652 	if (args->flags & ~(I915_MMAP_WC))
1653 		return -EINVAL;
1654 
1655 	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1656 		return -ENODEV;
1657 
1658 	obj = i915_gem_object_lookup(file, args->handle);
1659 	if (!obj)
1660 		return -ENOENT;
1661 
1662 	/* prime objects have no backing filp to GEM mmap
1663 	 * pages from.
1664 	 */
1665 	if (!obj->base.filp) {
1666 		i915_gem_object_put(obj);
1667 		return -EINVAL;
1668 	}
1669 
1670 	addr = vm_mmap(obj->base.filp, 0, args->size,
1671 		       PROT_READ | PROT_WRITE, MAP_SHARED,
1672 		       args->offset);
1673 	if (args->flags & I915_MMAP_WC) {
1674 		struct mm_struct *mm = current->mm;
1675 		struct vm_area_struct *vma;
1676 
1677 		if (down_write_killable(&mm->mmap_sem)) {
1678 			i915_gem_object_put(obj);
1679 			return -EINTR;
1680 		}
1681 		vma = find_vma(mm, addr);
1682 		if (vma)
1683 			vma->vm_page_prot =
1684 				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1685 		else
1686 			addr = -ENOMEM;
1687 		up_write(&mm->mmap_sem);
1688 
1689 		/* This may race, but that's ok, it only gets set */
1690 		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1691 	}
1692 	i915_gem_object_put(obj);
1693 	if (IS_ERR((void *)addr))
1694 		return addr;
1695 
1696 	args->addr_ptr = (uint64_t) addr;
1697 
1698 	return 0;
1699 }
1700 
1701 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1702 {
1703 	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1704 }
1705 
1706 /**
1707  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1708  *
1709  * A history of the GTT mmap interface:
1710  *
1711  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1712  *     aligned and suitable for fencing, and still fit into the available
1713  *     mappable space left by the pinned display objects. A classic problem
1714  *     we called the page-fault-of-doom where we would ping-pong between
1715  *     two objects that could not fit inside the GTT and so the memcpy
1716  *     would page one object in at the expense of the other between every
1717  *     single byte.
1718  *
1719  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1720  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1721  *     object is too large for the available space (or simply too large
1722  *     for the mappable aperture!), a view is created instead and faulted
1723  *     into userspace. (This view is aligned and sized appropriately for
1724  *     fenced access.)
1725  *
1726  * Restrictions:
1727  *
1728  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1729  *    hangs on some architectures, corruption on others. An attempt to service
1730  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1731  *
1732  *  * the object must be able to fit into RAM (physical memory, though no
1733  *    limited to the mappable aperture).
1734  *
1735  *
1736  * Caveats:
1737  *
1738  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1739  *    all data to system memory. Subsequent access will not be synchronized.
1740  *
1741  *  * all mappings are revoked on runtime device suspend.
1742  *
1743  *  * there are only 8, 16 or 32 fence registers to share between all users
1744  *    (older machines require fence register for display and blitter access
1745  *    as well). Contention of the fence registers will cause the previous users
1746  *    to be unmapped and any new access will generate new page faults.
1747  *
1748  *  * running out of memory while servicing a fault may generate a SIGBUS,
1749  *    rather than the expected SIGSEGV.
1750  */
1751 int i915_gem_mmap_gtt_version(void)
1752 {
1753 	return 1;
1754 }
1755 
1756 static inline struct i915_ggtt_view
1757 compute_partial_view(struct drm_i915_gem_object *obj,
1758 		     pgoff_t page_offset,
1759 		     unsigned int chunk)
1760 {
1761 	struct i915_ggtt_view view;
1762 
1763 	if (i915_gem_object_is_tiled(obj))
1764 		chunk = roundup(chunk, tile_row_pages(obj));
1765 
1766 	view.type = I915_GGTT_VIEW_PARTIAL;
1767 	view.partial.offset = rounddown(page_offset, chunk);
1768 	view.partial.size =
1769 		min_t(unsigned int, chunk,
1770 		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1771 
1772 	/* If the partial covers the entire object, just create a normal VMA. */
1773 	if (chunk >= obj->base.size >> PAGE_SHIFT)
1774 		view.type = I915_GGTT_VIEW_NORMAL;
1775 
1776 	return view;
1777 }
1778 
1779 /**
1780  * i915_gem_fault - fault a page into the GTT
1781  * @vmf: fault info
1782  *
1783  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1784  * from userspace.  The fault handler takes care of binding the object to
1785  * the GTT (if needed), allocating and programming a fence register (again,
1786  * only if needed based on whether the old reg is still valid or the object
1787  * is tiled) and inserting a new PTE into the faulting process.
1788  *
1789  * Note that the faulting process may involve evicting existing objects
1790  * from the GTT and/or fence registers to make room.  So performance may
1791  * suffer if the GTT working set is large or there are few fence registers
1792  * left.
1793  *
1794  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1795  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1796  */
1797 int i915_gem_fault(struct vm_fault *vmf)
1798 {
1799 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
1800 	struct vm_area_struct *area = vmf->vma;
1801 	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1802 	struct drm_device *dev = obj->base.dev;
1803 	struct drm_i915_private *dev_priv = to_i915(dev);
1804 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1805 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1806 	struct i915_vma *vma;
1807 	pgoff_t page_offset;
1808 	unsigned int flags;
1809 	int ret;
1810 
1811 	/* We don't use vmf->pgoff since that has the fake offset */
1812 	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1813 
1814 	trace_i915_gem_object_fault(obj, page_offset, true, write);
1815 
1816 	/* Try to flush the object off the GPU first without holding the lock.
1817 	 * Upon acquiring the lock, we will perform our sanity checks and then
1818 	 * repeat the flush holding the lock in the normal manner to catch cases
1819 	 * where we are gazumped.
1820 	 */
1821 	ret = i915_gem_object_wait(obj,
1822 				   I915_WAIT_INTERRUPTIBLE,
1823 				   MAX_SCHEDULE_TIMEOUT,
1824 				   NULL);
1825 	if (ret)
1826 		goto err;
1827 
1828 	ret = i915_gem_object_pin_pages(obj);
1829 	if (ret)
1830 		goto err;
1831 
1832 	intel_runtime_pm_get(dev_priv);
1833 
1834 	ret = i915_mutex_lock_interruptible(dev);
1835 	if (ret)
1836 		goto err_rpm;
1837 
1838 	/* Access to snoopable pages through the GTT is incoherent. */
1839 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1840 		ret = -EFAULT;
1841 		goto err_unlock;
1842 	}
1843 
1844 	/* If the object is smaller than a couple of partial vma, it is
1845 	 * not worth only creating a single partial vma - we may as well
1846 	 * clear enough space for the full object.
1847 	 */
1848 	flags = PIN_MAPPABLE;
1849 	if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
1850 		flags |= PIN_NONBLOCK | PIN_NONFAULT;
1851 
1852 	/* Now pin it into the GTT as needed */
1853 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
1854 	if (IS_ERR(vma)) {
1855 		/* Use a partial view if it is bigger than available space */
1856 		struct i915_ggtt_view view =
1857 			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1858 
1859 		/* Userspace is now writing through an untracked VMA, abandon
1860 		 * all hope that the hardware is able to track future writes.
1861 		 */
1862 		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1863 
1864 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1865 	}
1866 	if (IS_ERR(vma)) {
1867 		ret = PTR_ERR(vma);
1868 		goto err_unlock;
1869 	}
1870 
1871 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
1872 	if (ret)
1873 		goto err_unpin;
1874 
1875 	ret = i915_vma_get_fence(vma);
1876 	if (ret)
1877 		goto err_unpin;
1878 
1879 	/* Mark as being mmapped into userspace for later revocation */
1880 	assert_rpm_wakelock_held(dev_priv);
1881 	if (list_empty(&obj->userfault_link))
1882 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1883 
1884 	/* Finally, remap it using the new GTT offset */
1885 	ret = remap_io_mapping(area,
1886 			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1887 			       (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
1888 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
1889 			       &ggtt->mappable);
1890 
1891 err_unpin:
1892 	__i915_vma_unpin(vma);
1893 err_unlock:
1894 	mutex_unlock(&dev->struct_mutex);
1895 err_rpm:
1896 	intel_runtime_pm_put(dev_priv);
1897 	i915_gem_object_unpin_pages(obj);
1898 err:
1899 	switch (ret) {
1900 	case -EIO:
1901 		/*
1902 		 * We eat errors when the gpu is terminally wedged to avoid
1903 		 * userspace unduly crashing (gl has no provisions for mmaps to
1904 		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1905 		 * and so needs to be reported.
1906 		 */
1907 		if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
1908 			ret = VM_FAULT_SIGBUS;
1909 			break;
1910 		}
1911 	case -EAGAIN:
1912 		/*
1913 		 * EAGAIN means the gpu is hung and we'll wait for the error
1914 		 * handler to reset everything when re-faulting in
1915 		 * i915_mutex_lock_interruptible.
1916 		 */
1917 	case 0:
1918 	case -ERESTARTSYS:
1919 	case -EINTR:
1920 	case -EBUSY:
1921 		/*
1922 		 * EBUSY is ok: this just means that another thread
1923 		 * already did the job.
1924 		 */
1925 		ret = VM_FAULT_NOPAGE;
1926 		break;
1927 	case -ENOMEM:
1928 		ret = VM_FAULT_OOM;
1929 		break;
1930 	case -ENOSPC:
1931 	case -EFAULT:
1932 		ret = VM_FAULT_SIGBUS;
1933 		break;
1934 	default:
1935 		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1936 		ret = VM_FAULT_SIGBUS;
1937 		break;
1938 	}
1939 	return ret;
1940 }
1941 
1942 /**
1943  * i915_gem_release_mmap - remove physical page mappings
1944  * @obj: obj in question
1945  *
1946  * Preserve the reservation of the mmapping with the DRM core code, but
1947  * relinquish ownership of the pages back to the system.
1948  *
1949  * It is vital that we remove the page mapping if we have mapped a tiled
1950  * object through the GTT and then lose the fence register due to
1951  * resource pressure. Similarly if the object has been moved out of the
1952  * aperture, than pages mapped into userspace must be revoked. Removing the
1953  * mapping will then trigger a page fault on the next user access, allowing
1954  * fixup by i915_gem_fault().
1955  */
1956 void
1957 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1958 {
1959 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1960 
1961 	/* Serialisation between user GTT access and our code depends upon
1962 	 * revoking the CPU's PTE whilst the mutex is held. The next user
1963 	 * pagefault then has to wait until we release the mutex.
1964 	 *
1965 	 * Note that RPM complicates somewhat by adding an additional
1966 	 * requirement that operations to the GGTT be made holding the RPM
1967 	 * wakeref.
1968 	 */
1969 	lockdep_assert_held(&i915->drm.struct_mutex);
1970 	intel_runtime_pm_get(i915);
1971 
1972 	if (list_empty(&obj->userfault_link))
1973 		goto out;
1974 
1975 	list_del_init(&obj->userfault_link);
1976 	drm_vma_node_unmap(&obj->base.vma_node,
1977 			   obj->base.dev->anon_inode->i_mapping);
1978 
1979 	/* Ensure that the CPU's PTE are revoked and there are not outstanding
1980 	 * memory transactions from userspace before we return. The TLB
1981 	 * flushing implied above by changing the PTE above *should* be
1982 	 * sufficient, an extra barrier here just provides us with a bit
1983 	 * of paranoid documentation about our requirement to serialise
1984 	 * memory writes before touching registers / GSM.
1985 	 */
1986 	wmb();
1987 
1988 out:
1989 	intel_runtime_pm_put(i915);
1990 }
1991 
1992 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
1993 {
1994 	struct drm_i915_gem_object *obj, *on;
1995 	int i;
1996 
1997 	/*
1998 	 * Only called during RPM suspend. All users of the userfault_list
1999 	 * must be holding an RPM wakeref to ensure that this can not
2000 	 * run concurrently with themselves (and use the struct_mutex for
2001 	 * protection between themselves).
2002 	 */
2003 
2004 	list_for_each_entry_safe(obj, on,
2005 				 &dev_priv->mm.userfault_list, userfault_link) {
2006 		list_del_init(&obj->userfault_link);
2007 		drm_vma_node_unmap(&obj->base.vma_node,
2008 				   obj->base.dev->anon_inode->i_mapping);
2009 	}
2010 
2011 	/* The fence will be lost when the device powers down. If any were
2012 	 * in use by hardware (i.e. they are pinned), we should not be powering
2013 	 * down! All other fences will be reacquired by the user upon waking.
2014 	 */
2015 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
2016 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2017 
2018 		/* Ideally we want to assert that the fence register is not
2019 		 * live at this point (i.e. that no piece of code will be
2020 		 * trying to write through fence + GTT, as that both violates
2021 		 * our tracking of activity and associated locking/barriers,
2022 		 * but also is illegal given that the hw is powered down).
2023 		 *
2024 		 * Previously we used reg->pin_count as a "liveness" indicator.
2025 		 * That is not sufficient, and we need a more fine-grained
2026 		 * tool if we want to have a sanity check here.
2027 		 */
2028 
2029 		if (!reg->vma)
2030 			continue;
2031 
2032 		GEM_BUG_ON(!list_empty(&reg->vma->obj->userfault_link));
2033 		reg->dirty = true;
2034 	}
2035 }
2036 
2037 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2038 {
2039 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2040 	int err;
2041 
2042 	err = drm_gem_create_mmap_offset(&obj->base);
2043 	if (likely(!err))
2044 		return 0;
2045 
2046 	/* Attempt to reap some mmap space from dead objects */
2047 	do {
2048 		err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
2049 		if (err)
2050 			break;
2051 
2052 		i915_gem_drain_freed_objects(dev_priv);
2053 		err = drm_gem_create_mmap_offset(&obj->base);
2054 		if (!err)
2055 			break;
2056 
2057 	} while (flush_delayed_work(&dev_priv->gt.retire_work));
2058 
2059 	return err;
2060 }
2061 
2062 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2063 {
2064 	drm_gem_free_mmap_offset(&obj->base);
2065 }
2066 
2067 int
2068 i915_gem_mmap_gtt(struct drm_file *file,
2069 		  struct drm_device *dev,
2070 		  uint32_t handle,
2071 		  uint64_t *offset)
2072 {
2073 	struct drm_i915_gem_object *obj;
2074 	int ret;
2075 
2076 	obj = i915_gem_object_lookup(file, handle);
2077 	if (!obj)
2078 		return -ENOENT;
2079 
2080 	ret = i915_gem_object_create_mmap_offset(obj);
2081 	if (ret == 0)
2082 		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2083 
2084 	i915_gem_object_put(obj);
2085 	return ret;
2086 }
2087 
2088 /**
2089  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2090  * @dev: DRM device
2091  * @data: GTT mapping ioctl data
2092  * @file: GEM object info
2093  *
2094  * Simply returns the fake offset to userspace so it can mmap it.
2095  * The mmap call will end up in drm_gem_mmap(), which will set things
2096  * up so we can get faults in the handler above.
2097  *
2098  * The fault handler will take care of binding the object into the GTT
2099  * (since it may have been evicted to make room for something), allocating
2100  * a fence register, and mapping the appropriate aperture address into
2101  * userspace.
2102  */
2103 int
2104 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2105 			struct drm_file *file)
2106 {
2107 	struct drm_i915_gem_mmap_gtt *args = data;
2108 
2109 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2110 }
2111 
2112 /* Immediately discard the backing storage */
2113 static void
2114 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2115 {
2116 	i915_gem_object_free_mmap_offset(obj);
2117 
2118 	if (obj->base.filp == NULL)
2119 		return;
2120 
2121 	/* Our goal here is to return as much of the memory as
2122 	 * is possible back to the system as we are called from OOM.
2123 	 * To do this we must instruct the shmfs to drop all of its
2124 	 * backing pages, *now*.
2125 	 */
2126 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2127 	obj->mm.madv = __I915_MADV_PURGED;
2128 	obj->mm.pages = ERR_PTR(-EFAULT);
2129 }
2130 
2131 /* Try to discard unwanted pages */
2132 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2133 {
2134 	struct address_space *mapping;
2135 
2136 	lockdep_assert_held(&obj->mm.lock);
2137 	GEM_BUG_ON(obj->mm.pages);
2138 
2139 	switch (obj->mm.madv) {
2140 	case I915_MADV_DONTNEED:
2141 		i915_gem_object_truncate(obj);
2142 	case __I915_MADV_PURGED:
2143 		return;
2144 	}
2145 
2146 	if (obj->base.filp == NULL)
2147 		return;
2148 
2149 	mapping = obj->base.filp->f_mapping,
2150 	invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2151 }
2152 
2153 static void
2154 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2155 			      struct sg_table *pages)
2156 {
2157 	struct sgt_iter sgt_iter;
2158 	struct page *page;
2159 
2160 	__i915_gem_object_release_shmem(obj, pages, true);
2161 
2162 	i915_gem_gtt_finish_pages(obj, pages);
2163 
2164 	if (i915_gem_object_needs_bit17_swizzle(obj))
2165 		i915_gem_object_save_bit_17_swizzle(obj, pages);
2166 
2167 	for_each_sgt_page(page, sgt_iter, pages) {
2168 		if (obj->mm.dirty)
2169 			set_page_dirty(page);
2170 
2171 		if (obj->mm.madv == I915_MADV_WILLNEED)
2172 			mark_page_accessed(page);
2173 
2174 		put_page(page);
2175 	}
2176 	obj->mm.dirty = false;
2177 
2178 	sg_free_table(pages);
2179 	kfree(pages);
2180 }
2181 
2182 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2183 {
2184 	struct radix_tree_iter iter;
2185 	void **slot;
2186 
2187 	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2188 		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2189 }
2190 
2191 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2192 				 enum i915_mm_subclass subclass)
2193 {
2194 	struct sg_table *pages;
2195 
2196 	if (i915_gem_object_has_pinned_pages(obj))
2197 		return;
2198 
2199 	GEM_BUG_ON(obj->bind_count);
2200 	if (!READ_ONCE(obj->mm.pages))
2201 		return;
2202 
2203 	/* May be called by shrinker from within get_pages() (on another bo) */
2204 	mutex_lock_nested(&obj->mm.lock, subclass);
2205 	if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
2206 		goto unlock;
2207 
2208 	/* ->put_pages might need to allocate memory for the bit17 swizzle
2209 	 * array, hence protect them from being reaped by removing them from gtt
2210 	 * lists early. */
2211 	pages = fetch_and_zero(&obj->mm.pages);
2212 	GEM_BUG_ON(!pages);
2213 
2214 	if (obj->mm.mapping) {
2215 		void *ptr;
2216 
2217 		ptr = ptr_mask_bits(obj->mm.mapping);
2218 		if (is_vmalloc_addr(ptr))
2219 			vunmap(ptr);
2220 		else
2221 			kunmap(kmap_to_page(ptr));
2222 
2223 		obj->mm.mapping = NULL;
2224 	}
2225 
2226 	__i915_gem_object_reset_page_iter(obj);
2227 
2228 	if (!IS_ERR(pages))
2229 		obj->ops->put_pages(obj, pages);
2230 
2231 unlock:
2232 	mutex_unlock(&obj->mm.lock);
2233 }
2234 
2235 static void i915_sg_trim(struct sg_table *orig_st)
2236 {
2237 	struct sg_table new_st;
2238 	struct scatterlist *sg, *new_sg;
2239 	unsigned int i;
2240 
2241 	if (orig_st->nents == orig_st->orig_nents)
2242 		return;
2243 
2244 	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2245 		return;
2246 
2247 	new_sg = new_st.sgl;
2248 	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2249 		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2250 		/* called before being DMA mapped, no need to copy sg->dma_* */
2251 		new_sg = sg_next(new_sg);
2252 	}
2253 	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2254 
2255 	sg_free_table(orig_st);
2256 
2257 	*orig_st = new_st;
2258 }
2259 
2260 static struct sg_table *
2261 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2262 {
2263 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2264 	const unsigned long page_count = obj->base.size / PAGE_SIZE;
2265 	unsigned long i;
2266 	struct address_space *mapping;
2267 	struct sg_table *st;
2268 	struct scatterlist *sg;
2269 	struct sgt_iter sgt_iter;
2270 	struct page *page;
2271 	unsigned long last_pfn = 0;	/* suppress gcc warning */
2272 	unsigned int max_segment;
2273 	int ret;
2274 	gfp_t gfp;
2275 
2276 	/* Assert that the object is not currently in any GPU domain. As it
2277 	 * wasn't in the GTT, there shouldn't be any way it could have been in
2278 	 * a GPU cache
2279 	 */
2280 	GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2281 	GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2282 
2283 	max_segment = swiotlb_max_segment();
2284 	if (!max_segment)
2285 		max_segment = rounddown(UINT_MAX, PAGE_SIZE);
2286 
2287 	st = kmalloc(sizeof(*st), GFP_KERNEL);
2288 	if (st == NULL)
2289 		return ERR_PTR(-ENOMEM);
2290 
2291 rebuild_st:
2292 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2293 		kfree(st);
2294 		return ERR_PTR(-ENOMEM);
2295 	}
2296 
2297 	/* Get the list of pages out of our struct file.  They'll be pinned
2298 	 * at this point until we release them.
2299 	 *
2300 	 * Fail silently without starting the shrinker
2301 	 */
2302 	mapping = obj->base.filp->f_mapping;
2303 	gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
2304 	gfp |= __GFP_NORETRY | __GFP_NOWARN;
2305 	sg = st->sgl;
2306 	st->nents = 0;
2307 	for (i = 0; i < page_count; i++) {
2308 		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2309 		if (IS_ERR(page)) {
2310 			i915_gem_shrink(dev_priv,
2311 					page_count,
2312 					I915_SHRINK_BOUND |
2313 					I915_SHRINK_UNBOUND |
2314 					I915_SHRINK_PURGEABLE);
2315 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2316 		}
2317 		if (IS_ERR(page)) {
2318 			/* We've tried hard to allocate the memory by reaping
2319 			 * our own buffer, now let the real VM do its job and
2320 			 * go down in flames if truly OOM.
2321 			 */
2322 			page = shmem_read_mapping_page(mapping, i);
2323 			if (IS_ERR(page)) {
2324 				ret = PTR_ERR(page);
2325 				goto err_sg;
2326 			}
2327 		}
2328 		if (!i ||
2329 		    sg->length >= max_segment ||
2330 		    page_to_pfn(page) != last_pfn + 1) {
2331 			if (i)
2332 				sg = sg_next(sg);
2333 			st->nents++;
2334 			sg_set_page(sg, page, PAGE_SIZE, 0);
2335 		} else {
2336 			sg->length += PAGE_SIZE;
2337 		}
2338 		last_pfn = page_to_pfn(page);
2339 
2340 		/* Check that the i965g/gm workaround works. */
2341 		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2342 	}
2343 	if (sg) /* loop terminated early; short sg table */
2344 		sg_mark_end(sg);
2345 
2346 	/* Trim unused sg entries to avoid wasting memory. */
2347 	i915_sg_trim(st);
2348 
2349 	ret = i915_gem_gtt_prepare_pages(obj, st);
2350 	if (ret) {
2351 		/* DMA remapping failed? One possible cause is that
2352 		 * it could not reserve enough large entries, asking
2353 		 * for PAGE_SIZE chunks instead may be helpful.
2354 		 */
2355 		if (max_segment > PAGE_SIZE) {
2356 			for_each_sgt_page(page, sgt_iter, st)
2357 				put_page(page);
2358 			sg_free_table(st);
2359 
2360 			max_segment = PAGE_SIZE;
2361 			goto rebuild_st;
2362 		} else {
2363 			dev_warn(&dev_priv->drm.pdev->dev,
2364 				 "Failed to DMA remap %lu pages\n",
2365 				 page_count);
2366 			goto err_pages;
2367 		}
2368 	}
2369 
2370 	if (i915_gem_object_needs_bit17_swizzle(obj))
2371 		i915_gem_object_do_bit_17_swizzle(obj, st);
2372 
2373 	return st;
2374 
2375 err_sg:
2376 	sg_mark_end(sg);
2377 err_pages:
2378 	for_each_sgt_page(page, sgt_iter, st)
2379 		put_page(page);
2380 	sg_free_table(st);
2381 	kfree(st);
2382 
2383 	/* shmemfs first checks if there is enough memory to allocate the page
2384 	 * and reports ENOSPC should there be insufficient, along with the usual
2385 	 * ENOMEM for a genuine allocation failure.
2386 	 *
2387 	 * We use ENOSPC in our driver to mean that we have run out of aperture
2388 	 * space and so want to translate the error from shmemfs back to our
2389 	 * usual understanding of ENOMEM.
2390 	 */
2391 	if (ret == -ENOSPC)
2392 		ret = -ENOMEM;
2393 
2394 	return ERR_PTR(ret);
2395 }
2396 
2397 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2398 				 struct sg_table *pages)
2399 {
2400 	lockdep_assert_held(&obj->mm.lock);
2401 
2402 	obj->mm.get_page.sg_pos = pages->sgl;
2403 	obj->mm.get_page.sg_idx = 0;
2404 
2405 	obj->mm.pages = pages;
2406 
2407 	if (i915_gem_object_is_tiled(obj) &&
2408 	    to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2409 		GEM_BUG_ON(obj->mm.quirked);
2410 		__i915_gem_object_pin_pages(obj);
2411 		obj->mm.quirked = true;
2412 	}
2413 }
2414 
2415 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2416 {
2417 	struct sg_table *pages;
2418 
2419 	GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2420 
2421 	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2422 		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2423 		return -EFAULT;
2424 	}
2425 
2426 	pages = obj->ops->get_pages(obj);
2427 	if (unlikely(IS_ERR(pages)))
2428 		return PTR_ERR(pages);
2429 
2430 	__i915_gem_object_set_pages(obj, pages);
2431 	return 0;
2432 }
2433 
2434 /* Ensure that the associated pages are gathered from the backing storage
2435  * and pinned into our object. i915_gem_object_pin_pages() may be called
2436  * multiple times before they are released by a single call to
2437  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2438  * either as a result of memory pressure (reaping pages under the shrinker)
2439  * or as the object is itself released.
2440  */
2441 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2442 {
2443 	int err;
2444 
2445 	err = mutex_lock_interruptible(&obj->mm.lock);
2446 	if (err)
2447 		return err;
2448 
2449 	if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
2450 		err = ____i915_gem_object_get_pages(obj);
2451 		if (err)
2452 			goto unlock;
2453 
2454 		smp_mb__before_atomic();
2455 	}
2456 	atomic_inc(&obj->mm.pages_pin_count);
2457 
2458 unlock:
2459 	mutex_unlock(&obj->mm.lock);
2460 	return err;
2461 }
2462 
2463 /* The 'mapping' part of i915_gem_object_pin_map() below */
2464 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2465 				 enum i915_map_type type)
2466 {
2467 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2468 	struct sg_table *sgt = obj->mm.pages;
2469 	struct sgt_iter sgt_iter;
2470 	struct page *page;
2471 	struct page *stack_pages[32];
2472 	struct page **pages = stack_pages;
2473 	unsigned long i = 0;
2474 	pgprot_t pgprot;
2475 	void *addr;
2476 
2477 	/* A single page can always be kmapped */
2478 	if (n_pages == 1 && type == I915_MAP_WB)
2479 		return kmap(sg_page(sgt->sgl));
2480 
2481 	if (n_pages > ARRAY_SIZE(stack_pages)) {
2482 		/* Too big for stack -- allocate temporary array instead */
2483 		pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2484 		if (!pages)
2485 			return NULL;
2486 	}
2487 
2488 	for_each_sgt_page(page, sgt_iter, sgt)
2489 		pages[i++] = page;
2490 
2491 	/* Check that we have the expected number of pages */
2492 	GEM_BUG_ON(i != n_pages);
2493 
2494 	switch (type) {
2495 	case I915_MAP_WB:
2496 		pgprot = PAGE_KERNEL;
2497 		break;
2498 	case I915_MAP_WC:
2499 		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2500 		break;
2501 	}
2502 	addr = vmap(pages, n_pages, 0, pgprot);
2503 
2504 	if (pages != stack_pages)
2505 		drm_free_large(pages);
2506 
2507 	return addr;
2508 }
2509 
2510 /* get, pin, and map the pages of the object into kernel space */
2511 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2512 			      enum i915_map_type type)
2513 {
2514 	enum i915_map_type has_type;
2515 	bool pinned;
2516 	void *ptr;
2517 	int ret;
2518 
2519 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
2520 
2521 	ret = mutex_lock_interruptible(&obj->mm.lock);
2522 	if (ret)
2523 		return ERR_PTR(ret);
2524 
2525 	pinned = true;
2526 	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2527 		if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
2528 			ret = ____i915_gem_object_get_pages(obj);
2529 			if (ret)
2530 				goto err_unlock;
2531 
2532 			smp_mb__before_atomic();
2533 		}
2534 		atomic_inc(&obj->mm.pages_pin_count);
2535 		pinned = false;
2536 	}
2537 	GEM_BUG_ON(!obj->mm.pages);
2538 
2539 	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
2540 	if (ptr && has_type != type) {
2541 		if (pinned) {
2542 			ret = -EBUSY;
2543 			goto err_unpin;
2544 		}
2545 
2546 		if (is_vmalloc_addr(ptr))
2547 			vunmap(ptr);
2548 		else
2549 			kunmap(kmap_to_page(ptr));
2550 
2551 		ptr = obj->mm.mapping = NULL;
2552 	}
2553 
2554 	if (!ptr) {
2555 		ptr = i915_gem_object_map(obj, type);
2556 		if (!ptr) {
2557 			ret = -ENOMEM;
2558 			goto err_unpin;
2559 		}
2560 
2561 		obj->mm.mapping = ptr_pack_bits(ptr, type);
2562 	}
2563 
2564 out_unlock:
2565 	mutex_unlock(&obj->mm.lock);
2566 	return ptr;
2567 
2568 err_unpin:
2569 	atomic_dec(&obj->mm.pages_pin_count);
2570 err_unlock:
2571 	ptr = ERR_PTR(ret);
2572 	goto out_unlock;
2573 }
2574 
2575 static int
2576 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2577 			   const struct drm_i915_gem_pwrite *arg)
2578 {
2579 	struct address_space *mapping = obj->base.filp->f_mapping;
2580 	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2581 	u64 remain, offset;
2582 	unsigned int pg;
2583 
2584 	/* Before we instantiate/pin the backing store for our use, we
2585 	 * can prepopulate the shmemfs filp efficiently using a write into
2586 	 * the pagecache. We avoid the penalty of instantiating all the
2587 	 * pages, important if the user is just writing to a few and never
2588 	 * uses the object on the GPU, and using a direct write into shmemfs
2589 	 * allows it to avoid the cost of retrieving a page (either swapin
2590 	 * or clearing-before-use) before it is overwritten.
2591 	 */
2592 	if (READ_ONCE(obj->mm.pages))
2593 		return -ENODEV;
2594 
2595 	/* Before the pages are instantiated the object is treated as being
2596 	 * in the CPU domain. The pages will be clflushed as required before
2597 	 * use, and we can freely write into the pages directly. If userspace
2598 	 * races pwrite with any other operation; corruption will ensue -
2599 	 * that is userspace's prerogative!
2600 	 */
2601 
2602 	remain = arg->size;
2603 	offset = arg->offset;
2604 	pg = offset_in_page(offset);
2605 
2606 	do {
2607 		unsigned int len, unwritten;
2608 		struct page *page;
2609 		void *data, *vaddr;
2610 		int err;
2611 
2612 		len = PAGE_SIZE - pg;
2613 		if (len > remain)
2614 			len = remain;
2615 
2616 		err = pagecache_write_begin(obj->base.filp, mapping,
2617 					    offset, len, 0,
2618 					    &page, &data);
2619 		if (err < 0)
2620 			return err;
2621 
2622 		vaddr = kmap(page);
2623 		unwritten = copy_from_user(vaddr + pg, user_data, len);
2624 		kunmap(page);
2625 
2626 		err = pagecache_write_end(obj->base.filp, mapping,
2627 					  offset, len, len - unwritten,
2628 					  page, data);
2629 		if (err < 0)
2630 			return err;
2631 
2632 		if (unwritten)
2633 			return -EFAULT;
2634 
2635 		remain -= len;
2636 		user_data += len;
2637 		offset += len;
2638 		pg = 0;
2639 	} while (remain);
2640 
2641 	return 0;
2642 }
2643 
2644 static bool ban_context(const struct i915_gem_context *ctx)
2645 {
2646 	return (i915_gem_context_is_bannable(ctx) &&
2647 		ctx->ban_score >= CONTEXT_SCORE_BAN_THRESHOLD);
2648 }
2649 
2650 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
2651 {
2652 	ctx->guilty_count++;
2653 	ctx->ban_score += CONTEXT_SCORE_GUILTY;
2654 	if (ban_context(ctx))
2655 		i915_gem_context_set_banned(ctx);
2656 
2657 	DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
2658 			 ctx->name, ctx->ban_score,
2659 			 yesno(i915_gem_context_is_banned(ctx)));
2660 
2661 	if (!i915_gem_context_is_banned(ctx) || IS_ERR_OR_NULL(ctx->file_priv))
2662 		return;
2663 
2664 	ctx->file_priv->context_bans++;
2665 	DRM_DEBUG_DRIVER("client %s has had %d context banned\n",
2666 			 ctx->name, ctx->file_priv->context_bans);
2667 }
2668 
2669 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
2670 {
2671 	ctx->active_count++;
2672 }
2673 
2674 struct drm_i915_gem_request *
2675 i915_gem_find_active_request(struct intel_engine_cs *engine)
2676 {
2677 	struct drm_i915_gem_request *request;
2678 
2679 	/* We are called by the error capture and reset at a random
2680 	 * point in time. In particular, note that neither is crucially
2681 	 * ordered with an interrupt. After a hang, the GPU is dead and we
2682 	 * assume that no more writes can happen (we waited long enough for
2683 	 * all writes that were in transaction to be flushed) - adding an
2684 	 * extra delay for a recent interrupt is pointless. Hence, we do
2685 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
2686 	 */
2687 	list_for_each_entry(request, &engine->timeline->requests, link) {
2688 		if (__i915_gem_request_completed(request))
2689 			continue;
2690 
2691 		GEM_BUG_ON(request->engine != engine);
2692 		return request;
2693 	}
2694 
2695 	return NULL;
2696 }
2697 
2698 static bool engine_stalled(struct intel_engine_cs *engine)
2699 {
2700 	if (!engine->hangcheck.stalled)
2701 		return false;
2702 
2703 	/* Check for possible seqno movement after hang declaration */
2704 	if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) {
2705 		DRM_DEBUG_DRIVER("%s pardoned\n", engine->name);
2706 		return false;
2707 	}
2708 
2709 	return true;
2710 }
2711 
2712 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
2713 {
2714 	struct intel_engine_cs *engine;
2715 	enum intel_engine_id id;
2716 	int err = 0;
2717 
2718 	/* Ensure irq handler finishes, and not run again. */
2719 	for_each_engine(engine, dev_priv, id) {
2720 		struct drm_i915_gem_request *request;
2721 
2722 		/* Prevent request submission to the hardware until we have
2723 		 * completed the reset in i915_gem_reset_finish(). If a request
2724 		 * is completed by one engine, it may then queue a request
2725 		 * to a second via its engine->irq_tasklet *just* as we are
2726 		 * calling engine->init_hw() and also writing the ELSP.
2727 		 * Turning off the engine->irq_tasklet until the reset is over
2728 		 * prevents the race.
2729 		 */
2730 		tasklet_kill(&engine->irq_tasklet);
2731 		tasklet_disable(&engine->irq_tasklet);
2732 
2733 		if (engine_stalled(engine)) {
2734 			request = i915_gem_find_active_request(engine);
2735 			if (request && request->fence.error == -EIO)
2736 				err = -EIO; /* Previous reset failed! */
2737 		}
2738 	}
2739 
2740 	i915_gem_revoke_fences(dev_priv);
2741 
2742 	return err;
2743 }
2744 
2745 static void skip_request(struct drm_i915_gem_request *request)
2746 {
2747 	void *vaddr = request->ring->vaddr;
2748 	u32 head;
2749 
2750 	/* As this request likely depends on state from the lost
2751 	 * context, clear out all the user operations leaving the
2752 	 * breadcrumb at the end (so we get the fence notifications).
2753 	 */
2754 	head = request->head;
2755 	if (request->postfix < head) {
2756 		memset(vaddr + head, 0, request->ring->size - head);
2757 		head = 0;
2758 	}
2759 	memset(vaddr + head, 0, request->postfix - head);
2760 
2761 	dma_fence_set_error(&request->fence, -EIO);
2762 }
2763 
2764 static void engine_skip_context(struct drm_i915_gem_request *request)
2765 {
2766 	struct intel_engine_cs *engine = request->engine;
2767 	struct i915_gem_context *hung_ctx = request->ctx;
2768 	struct intel_timeline *timeline;
2769 	unsigned long flags;
2770 
2771 	timeline = i915_gem_context_lookup_timeline(hung_ctx, engine);
2772 
2773 	spin_lock_irqsave(&engine->timeline->lock, flags);
2774 	spin_lock(&timeline->lock);
2775 
2776 	list_for_each_entry_continue(request, &engine->timeline->requests, link)
2777 		if (request->ctx == hung_ctx)
2778 			skip_request(request);
2779 
2780 	list_for_each_entry(request, &timeline->requests, link)
2781 		skip_request(request);
2782 
2783 	spin_unlock(&timeline->lock);
2784 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
2785 }
2786 
2787 /* Returns true if the request was guilty of hang */
2788 static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
2789 {
2790 	/* Read once and return the resolution */
2791 	const bool guilty = engine_stalled(request->engine);
2792 
2793 	/* The guilty request will get skipped on a hung engine.
2794 	 *
2795 	 * Users of client default contexts do not rely on logical
2796 	 * state preserved between batches so it is safe to execute
2797 	 * queued requests following the hang. Non default contexts
2798 	 * rely on preserved state, so skipping a batch loses the
2799 	 * evolution of the state and it needs to be considered corrupted.
2800 	 * Executing more queued batches on top of corrupted state is
2801 	 * risky. But we take the risk by trying to advance through
2802 	 * the queued requests in order to make the client behaviour
2803 	 * more predictable around resets, by not throwing away random
2804 	 * amount of batches it has prepared for execution. Sophisticated
2805 	 * clients can use gem_reset_stats_ioctl and dma fence status
2806 	 * (exported via sync_file info ioctl on explicit fences) to observe
2807 	 * when it loses the context state and should rebuild accordingly.
2808 	 *
2809 	 * The context ban, and ultimately the client ban, mechanism are safety
2810 	 * valves if client submission ends up resulting in nothing more than
2811 	 * subsequent hangs.
2812 	 */
2813 
2814 	if (guilty) {
2815 		i915_gem_context_mark_guilty(request->ctx);
2816 		skip_request(request);
2817 	} else {
2818 		i915_gem_context_mark_innocent(request->ctx);
2819 		dma_fence_set_error(&request->fence, -EAGAIN);
2820 	}
2821 
2822 	return guilty;
2823 }
2824 
2825 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
2826 {
2827 	struct drm_i915_gem_request *request;
2828 
2829 	if (engine->irq_seqno_barrier)
2830 		engine->irq_seqno_barrier(engine);
2831 
2832 	request = i915_gem_find_active_request(engine);
2833 	if (request && i915_gem_reset_request(request)) {
2834 		DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
2835 				 engine->name, request->global_seqno);
2836 
2837 		/* If this context is now banned, skip all pending requests. */
2838 		if (i915_gem_context_is_banned(request->ctx))
2839 			engine_skip_context(request);
2840 	}
2841 
2842 	/* Setup the CS to resume from the breadcrumb of the hung request */
2843 	engine->reset_hw(engine, request);
2844 }
2845 
2846 void i915_gem_reset(struct drm_i915_private *dev_priv)
2847 {
2848 	struct intel_engine_cs *engine;
2849 	enum intel_engine_id id;
2850 
2851 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
2852 
2853 	i915_gem_retire_requests(dev_priv);
2854 
2855 	for_each_engine(engine, dev_priv, id)
2856 		i915_gem_reset_engine(engine);
2857 
2858 	i915_gem_restore_fences(dev_priv);
2859 
2860 	if (dev_priv->gt.awake) {
2861 		intel_sanitize_gt_powersave(dev_priv);
2862 		intel_enable_gt_powersave(dev_priv);
2863 		if (INTEL_GEN(dev_priv) >= 6)
2864 			gen6_rps_busy(dev_priv);
2865 	}
2866 }
2867 
2868 void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
2869 {
2870 	struct intel_engine_cs *engine;
2871 	enum intel_engine_id id;
2872 
2873 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
2874 
2875 	for_each_engine(engine, dev_priv, id)
2876 		tasklet_enable(&engine->irq_tasklet);
2877 }
2878 
2879 static void nop_submit_request(struct drm_i915_gem_request *request)
2880 {
2881 	dma_fence_set_error(&request->fence, -EIO);
2882 	i915_gem_request_submit(request);
2883 	intel_engine_init_global_seqno(request->engine, request->global_seqno);
2884 }
2885 
2886 static void engine_set_wedged(struct intel_engine_cs *engine)
2887 {
2888 	struct drm_i915_gem_request *request;
2889 	unsigned long flags;
2890 
2891 	/* We need to be sure that no thread is running the old callback as
2892 	 * we install the nop handler (otherwise we would submit a request
2893 	 * to hardware that will never complete). In order to prevent this
2894 	 * race, we wait until the machine is idle before making the swap
2895 	 * (using stop_machine()).
2896 	 */
2897 	engine->submit_request = nop_submit_request;
2898 
2899 	/* Mark all executing requests as skipped */
2900 	spin_lock_irqsave(&engine->timeline->lock, flags);
2901 	list_for_each_entry(request, &engine->timeline->requests, link)
2902 		dma_fence_set_error(&request->fence, -EIO);
2903 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
2904 
2905 	/* Mark all pending requests as complete so that any concurrent
2906 	 * (lockless) lookup doesn't try and wait upon the request as we
2907 	 * reset it.
2908 	 */
2909 	intel_engine_init_global_seqno(engine,
2910 				       intel_engine_last_submit(engine));
2911 
2912 	/*
2913 	 * Clear the execlists queue up before freeing the requests, as those
2914 	 * are the ones that keep the context and ringbuffer backing objects
2915 	 * pinned in place.
2916 	 */
2917 
2918 	if (i915.enable_execlists) {
2919 		unsigned long flags;
2920 
2921 		spin_lock_irqsave(&engine->timeline->lock, flags);
2922 
2923 		i915_gem_request_put(engine->execlist_port[0].request);
2924 		i915_gem_request_put(engine->execlist_port[1].request);
2925 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
2926 		engine->execlist_queue = RB_ROOT;
2927 		engine->execlist_first = NULL;
2928 
2929 		spin_unlock_irqrestore(&engine->timeline->lock, flags);
2930 	}
2931 }
2932 
2933 static int __i915_gem_set_wedged_BKL(void *data)
2934 {
2935 	struct drm_i915_private *i915 = data;
2936 	struct intel_engine_cs *engine;
2937 	enum intel_engine_id id;
2938 
2939 	for_each_engine(engine, i915, id)
2940 		engine_set_wedged(engine);
2941 
2942 	return 0;
2943 }
2944 
2945 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
2946 {
2947 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
2948 	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
2949 
2950 	stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
2951 
2952 	i915_gem_context_lost(dev_priv);
2953 	i915_gem_retire_requests(dev_priv);
2954 
2955 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
2956 }
2957 
2958 static void
2959 i915_gem_retire_work_handler(struct work_struct *work)
2960 {
2961 	struct drm_i915_private *dev_priv =
2962 		container_of(work, typeof(*dev_priv), gt.retire_work.work);
2963 	struct drm_device *dev = &dev_priv->drm;
2964 
2965 	/* Come back later if the device is busy... */
2966 	if (mutex_trylock(&dev->struct_mutex)) {
2967 		i915_gem_retire_requests(dev_priv);
2968 		mutex_unlock(&dev->struct_mutex);
2969 	}
2970 
2971 	/* Keep the retire handler running until we are finally idle.
2972 	 * We do not need to do this test under locking as in the worst-case
2973 	 * we queue the retire worker once too often.
2974 	 */
2975 	if (READ_ONCE(dev_priv->gt.awake)) {
2976 		i915_queue_hangcheck(dev_priv);
2977 		queue_delayed_work(dev_priv->wq,
2978 				   &dev_priv->gt.retire_work,
2979 				   round_jiffies_up_relative(HZ));
2980 	}
2981 }
2982 
2983 static void
2984 i915_gem_idle_work_handler(struct work_struct *work)
2985 {
2986 	struct drm_i915_private *dev_priv =
2987 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
2988 	struct drm_device *dev = &dev_priv->drm;
2989 	struct intel_engine_cs *engine;
2990 	enum intel_engine_id id;
2991 	bool rearm_hangcheck;
2992 
2993 	if (!READ_ONCE(dev_priv->gt.awake))
2994 		return;
2995 
2996 	/*
2997 	 * Wait for last execlists context complete, but bail out in case a
2998 	 * new request is submitted.
2999 	 */
3000 	wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
3001 		 intel_execlists_idle(dev_priv), 10);
3002 
3003 	if (READ_ONCE(dev_priv->gt.active_requests))
3004 		return;
3005 
3006 	rearm_hangcheck =
3007 		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3008 
3009 	if (!mutex_trylock(&dev->struct_mutex)) {
3010 		/* Currently busy, come back later */
3011 		mod_delayed_work(dev_priv->wq,
3012 				 &dev_priv->gt.idle_work,
3013 				 msecs_to_jiffies(50));
3014 		goto out_rearm;
3015 	}
3016 
3017 	/*
3018 	 * New request retired after this work handler started, extend active
3019 	 * period until next instance of the work.
3020 	 */
3021 	if (work_pending(work))
3022 		goto out_unlock;
3023 
3024 	if (dev_priv->gt.active_requests)
3025 		goto out_unlock;
3026 
3027 	if (wait_for(intel_execlists_idle(dev_priv), 10))
3028 		DRM_ERROR("Timeout waiting for engines to idle\n");
3029 
3030 	for_each_engine(engine, dev_priv, id)
3031 		i915_gem_batch_pool_fini(&engine->batch_pool);
3032 
3033 	GEM_BUG_ON(!dev_priv->gt.awake);
3034 	dev_priv->gt.awake = false;
3035 	rearm_hangcheck = false;
3036 
3037 	if (INTEL_GEN(dev_priv) >= 6)
3038 		gen6_rps_idle(dev_priv);
3039 	intel_runtime_pm_put(dev_priv);
3040 out_unlock:
3041 	mutex_unlock(&dev->struct_mutex);
3042 
3043 out_rearm:
3044 	if (rearm_hangcheck) {
3045 		GEM_BUG_ON(!dev_priv->gt.awake);
3046 		i915_queue_hangcheck(dev_priv);
3047 	}
3048 }
3049 
3050 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
3051 {
3052 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
3053 	struct drm_i915_file_private *fpriv = file->driver_priv;
3054 	struct i915_vma *vma, *vn;
3055 
3056 	mutex_lock(&obj->base.dev->struct_mutex);
3057 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
3058 		if (vma->vm->file == fpriv)
3059 			i915_vma_close(vma);
3060 
3061 	if (i915_gem_object_is_active(obj) &&
3062 	    !i915_gem_object_has_active_reference(obj)) {
3063 		i915_gem_object_set_active_reference(obj);
3064 		i915_gem_object_get(obj);
3065 	}
3066 	mutex_unlock(&obj->base.dev->struct_mutex);
3067 }
3068 
3069 static unsigned long to_wait_timeout(s64 timeout_ns)
3070 {
3071 	if (timeout_ns < 0)
3072 		return MAX_SCHEDULE_TIMEOUT;
3073 
3074 	if (timeout_ns == 0)
3075 		return 0;
3076 
3077 	return nsecs_to_jiffies_timeout(timeout_ns);
3078 }
3079 
3080 /**
3081  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3082  * @dev: drm device pointer
3083  * @data: ioctl data blob
3084  * @file: drm file pointer
3085  *
3086  * Returns 0 if successful, else an error is returned with the remaining time in
3087  * the timeout parameter.
3088  *  -ETIME: object is still busy after timeout
3089  *  -ERESTARTSYS: signal interrupted the wait
3090  *  -ENONENT: object doesn't exist
3091  * Also possible, but rare:
3092  *  -EAGAIN: GPU wedged
3093  *  -ENOMEM: damn
3094  *  -ENODEV: Internal IRQ fail
3095  *  -E?: The add request failed
3096  *
3097  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3098  * non-zero timeout parameter the wait ioctl will wait for the given number of
3099  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3100  * without holding struct_mutex the object may become re-busied before this
3101  * function completes. A similar but shorter * race condition exists in the busy
3102  * ioctl
3103  */
3104 int
3105 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3106 {
3107 	struct drm_i915_gem_wait *args = data;
3108 	struct drm_i915_gem_object *obj;
3109 	ktime_t start;
3110 	long ret;
3111 
3112 	if (args->flags != 0)
3113 		return -EINVAL;
3114 
3115 	obj = i915_gem_object_lookup(file, args->bo_handle);
3116 	if (!obj)
3117 		return -ENOENT;
3118 
3119 	start = ktime_get();
3120 
3121 	ret = i915_gem_object_wait(obj,
3122 				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
3123 				   to_wait_timeout(args->timeout_ns),
3124 				   to_rps_client(file));
3125 
3126 	if (args->timeout_ns > 0) {
3127 		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3128 		if (args->timeout_ns < 0)
3129 			args->timeout_ns = 0;
3130 
3131 		/*
3132 		 * Apparently ktime isn't accurate enough and occasionally has a
3133 		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3134 		 * things up to make the test happy. We allow up to 1 jiffy.
3135 		 *
3136 		 * This is a regression from the timespec->ktime conversion.
3137 		 */
3138 		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3139 			args->timeout_ns = 0;
3140 	}
3141 
3142 	i915_gem_object_put(obj);
3143 	return ret;
3144 }
3145 
3146 static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
3147 {
3148 	int ret, i;
3149 
3150 	for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
3151 		ret = i915_gem_active_wait(&tl->engine[i].last_request, flags);
3152 		if (ret)
3153 			return ret;
3154 	}
3155 
3156 	return 0;
3157 }
3158 
3159 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
3160 {
3161 	int ret;
3162 
3163 	if (flags & I915_WAIT_LOCKED) {
3164 		struct i915_gem_timeline *tl;
3165 
3166 		lockdep_assert_held(&i915->drm.struct_mutex);
3167 
3168 		list_for_each_entry(tl, &i915->gt.timelines, link) {
3169 			ret = wait_for_timeline(tl, flags);
3170 			if (ret)
3171 				return ret;
3172 		}
3173 	} else {
3174 		ret = wait_for_timeline(&i915->gt.global_timeline, flags);
3175 		if (ret)
3176 			return ret;
3177 	}
3178 
3179 	return 0;
3180 }
3181 
3182 void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3183 			     bool force)
3184 {
3185 	/* If we don't have a page list set up, then we're not pinned
3186 	 * to GPU, and we can ignore the cache flush because it'll happen
3187 	 * again at bind time.
3188 	 */
3189 	if (!obj->mm.pages)
3190 		return;
3191 
3192 	/*
3193 	 * Stolen memory is always coherent with the GPU as it is explicitly
3194 	 * marked as wc by the system, or the system is cache-coherent.
3195 	 */
3196 	if (obj->stolen || obj->phys_handle)
3197 		return;
3198 
3199 	/* If the GPU is snooping the contents of the CPU cache,
3200 	 * we do not need to manually clear the CPU cache lines.  However,
3201 	 * the caches are only snooped when the render cache is
3202 	 * flushed/invalidated.  As we always have to emit invalidations
3203 	 * and flushes when moving into and out of the RENDER domain, correct
3204 	 * snooping behaviour occurs naturally as the result of our domain
3205 	 * tracking.
3206 	 */
3207 	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3208 		obj->cache_dirty = true;
3209 		return;
3210 	}
3211 
3212 	trace_i915_gem_object_clflush(obj);
3213 	drm_clflush_sg(obj->mm.pages);
3214 	obj->cache_dirty = false;
3215 }
3216 
3217 /** Flushes the GTT write domain for the object if it's dirty. */
3218 static void
3219 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3220 {
3221 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3222 
3223 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3224 		return;
3225 
3226 	/* No actual flushing is required for the GTT write domain.  Writes
3227 	 * to it "immediately" go to main memory as far as we know, so there's
3228 	 * no chipset flush.  It also doesn't land in render cache.
3229 	 *
3230 	 * However, we do have to enforce the order so that all writes through
3231 	 * the GTT land before any writes to the device, such as updates to
3232 	 * the GATT itself.
3233 	 *
3234 	 * We also have to wait a bit for the writes to land from the GTT.
3235 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
3236 	 * timing. This issue has only been observed when switching quickly
3237 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
3238 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
3239 	 * system agents we cannot reproduce this behaviour).
3240 	 */
3241 	wmb();
3242 	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
3243 		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
3244 
3245 	intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
3246 
3247 	obj->base.write_domain = 0;
3248 	trace_i915_gem_object_change_domain(obj,
3249 					    obj->base.read_domains,
3250 					    I915_GEM_DOMAIN_GTT);
3251 }
3252 
3253 /** Flushes the CPU write domain for the object if it's dirty. */
3254 static void
3255 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3256 {
3257 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3258 		return;
3259 
3260 	i915_gem_clflush_object(obj, obj->pin_display);
3261 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3262 
3263 	obj->base.write_domain = 0;
3264 	trace_i915_gem_object_change_domain(obj,
3265 					    obj->base.read_domains,
3266 					    I915_GEM_DOMAIN_CPU);
3267 }
3268 
3269 /**
3270  * Moves a single object to the GTT read, and possibly write domain.
3271  * @obj: object to act on
3272  * @write: ask for write access or read only
3273  *
3274  * This function returns when the move is complete, including waiting on
3275  * flushes to occur.
3276  */
3277 int
3278 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3279 {
3280 	uint32_t old_write_domain, old_read_domains;
3281 	int ret;
3282 
3283 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3284 
3285 	ret = i915_gem_object_wait(obj,
3286 				   I915_WAIT_INTERRUPTIBLE |
3287 				   I915_WAIT_LOCKED |
3288 				   (write ? I915_WAIT_ALL : 0),
3289 				   MAX_SCHEDULE_TIMEOUT,
3290 				   NULL);
3291 	if (ret)
3292 		return ret;
3293 
3294 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3295 		return 0;
3296 
3297 	/* Flush and acquire obj->pages so that we are coherent through
3298 	 * direct access in memory with previous cached writes through
3299 	 * shmemfs and that our cache domain tracking remains valid.
3300 	 * For example, if the obj->filp was moved to swap without us
3301 	 * being notified and releasing the pages, we would mistakenly
3302 	 * continue to assume that the obj remained out of the CPU cached
3303 	 * domain.
3304 	 */
3305 	ret = i915_gem_object_pin_pages(obj);
3306 	if (ret)
3307 		return ret;
3308 
3309 	i915_gem_object_flush_cpu_write_domain(obj);
3310 
3311 	/* Serialise direct access to this object with the barriers for
3312 	 * coherent writes from the GPU, by effectively invalidating the
3313 	 * GTT domain upon first access.
3314 	 */
3315 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3316 		mb();
3317 
3318 	old_write_domain = obj->base.write_domain;
3319 	old_read_domains = obj->base.read_domains;
3320 
3321 	/* It should now be out of any other write domains, and we can update
3322 	 * the domain values for our changes.
3323 	 */
3324 	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3325 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3326 	if (write) {
3327 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3328 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3329 		obj->mm.dirty = true;
3330 	}
3331 
3332 	trace_i915_gem_object_change_domain(obj,
3333 					    old_read_domains,
3334 					    old_write_domain);
3335 
3336 	i915_gem_object_unpin_pages(obj);
3337 	return 0;
3338 }
3339 
3340 /**
3341  * Changes the cache-level of an object across all VMA.
3342  * @obj: object to act on
3343  * @cache_level: new cache level to set for the object
3344  *
3345  * After this function returns, the object will be in the new cache-level
3346  * across all GTT and the contents of the backing storage will be coherent,
3347  * with respect to the new cache-level. In order to keep the backing storage
3348  * coherent for all users, we only allow a single cache level to be set
3349  * globally on the object and prevent it from being changed whilst the
3350  * hardware is reading from the object. That is if the object is currently
3351  * on the scanout it will be set to uncached (or equivalent display
3352  * cache coherency) and all non-MOCS GPU access will also be uncached so
3353  * that all direct access to the scanout remains coherent.
3354  */
3355 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3356 				    enum i915_cache_level cache_level)
3357 {
3358 	struct i915_vma *vma;
3359 	int ret;
3360 
3361 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3362 
3363 	if (obj->cache_level == cache_level)
3364 		return 0;
3365 
3366 	/* Inspect the list of currently bound VMA and unbind any that would
3367 	 * be invalid given the new cache-level. This is principally to
3368 	 * catch the issue of the CS prefetch crossing page boundaries and
3369 	 * reading an invalid PTE on older architectures.
3370 	 */
3371 restart:
3372 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
3373 		if (!drm_mm_node_allocated(&vma->node))
3374 			continue;
3375 
3376 		if (i915_vma_is_pinned(vma)) {
3377 			DRM_DEBUG("can not change the cache level of pinned objects\n");
3378 			return -EBUSY;
3379 		}
3380 
3381 		if (i915_gem_valid_gtt_space(vma, cache_level))
3382 			continue;
3383 
3384 		ret = i915_vma_unbind(vma);
3385 		if (ret)
3386 			return ret;
3387 
3388 		/* As unbinding may affect other elements in the
3389 		 * obj->vma_list (due to side-effects from retiring
3390 		 * an active vma), play safe and restart the iterator.
3391 		 */
3392 		goto restart;
3393 	}
3394 
3395 	/* We can reuse the existing drm_mm nodes but need to change the
3396 	 * cache-level on the PTE. We could simply unbind them all and
3397 	 * rebind with the correct cache-level on next use. However since
3398 	 * we already have a valid slot, dma mapping, pages etc, we may as
3399 	 * rewrite the PTE in the belief that doing so tramples upon less
3400 	 * state and so involves less work.
3401 	 */
3402 	if (obj->bind_count) {
3403 		/* Before we change the PTE, the GPU must not be accessing it.
3404 		 * If we wait upon the object, we know that all the bound
3405 		 * VMA are no longer active.
3406 		 */
3407 		ret = i915_gem_object_wait(obj,
3408 					   I915_WAIT_INTERRUPTIBLE |
3409 					   I915_WAIT_LOCKED |
3410 					   I915_WAIT_ALL,
3411 					   MAX_SCHEDULE_TIMEOUT,
3412 					   NULL);
3413 		if (ret)
3414 			return ret;
3415 
3416 		if (!HAS_LLC(to_i915(obj->base.dev)) &&
3417 		    cache_level != I915_CACHE_NONE) {
3418 			/* Access to snoopable pages through the GTT is
3419 			 * incoherent and on some machines causes a hard
3420 			 * lockup. Relinquish the CPU mmaping to force
3421 			 * userspace to refault in the pages and we can
3422 			 * then double check if the GTT mapping is still
3423 			 * valid for that pointer access.
3424 			 */
3425 			i915_gem_release_mmap(obj);
3426 
3427 			/* As we no longer need a fence for GTT access,
3428 			 * we can relinquish it now (and so prevent having
3429 			 * to steal a fence from someone else on the next
3430 			 * fence request). Note GPU activity would have
3431 			 * dropped the fence as all snoopable access is
3432 			 * supposed to be linear.
3433 			 */
3434 			list_for_each_entry(vma, &obj->vma_list, obj_link) {
3435 				ret = i915_vma_put_fence(vma);
3436 				if (ret)
3437 					return ret;
3438 			}
3439 		} else {
3440 			/* We either have incoherent backing store and
3441 			 * so no GTT access or the architecture is fully
3442 			 * coherent. In such cases, existing GTT mmaps
3443 			 * ignore the cache bit in the PTE and we can
3444 			 * rewrite it without confusing the GPU or having
3445 			 * to force userspace to fault back in its mmaps.
3446 			 */
3447 		}
3448 
3449 		list_for_each_entry(vma, &obj->vma_list, obj_link) {
3450 			if (!drm_mm_node_allocated(&vma->node))
3451 				continue;
3452 
3453 			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3454 			if (ret)
3455 				return ret;
3456 		}
3457 	}
3458 
3459 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
3460 	    cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
3461 		obj->cache_dirty = true;
3462 
3463 	list_for_each_entry(vma, &obj->vma_list, obj_link)
3464 		vma->node.color = cache_level;
3465 	obj->cache_level = cache_level;
3466 
3467 	return 0;
3468 }
3469 
3470 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3471 			       struct drm_file *file)
3472 {
3473 	struct drm_i915_gem_caching *args = data;
3474 	struct drm_i915_gem_object *obj;
3475 	int err = 0;
3476 
3477 	rcu_read_lock();
3478 	obj = i915_gem_object_lookup_rcu(file, args->handle);
3479 	if (!obj) {
3480 		err = -ENOENT;
3481 		goto out;
3482 	}
3483 
3484 	switch (obj->cache_level) {
3485 	case I915_CACHE_LLC:
3486 	case I915_CACHE_L3_LLC:
3487 		args->caching = I915_CACHING_CACHED;
3488 		break;
3489 
3490 	case I915_CACHE_WT:
3491 		args->caching = I915_CACHING_DISPLAY;
3492 		break;
3493 
3494 	default:
3495 		args->caching = I915_CACHING_NONE;
3496 		break;
3497 	}
3498 out:
3499 	rcu_read_unlock();
3500 	return err;
3501 }
3502 
3503 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3504 			       struct drm_file *file)
3505 {
3506 	struct drm_i915_private *i915 = to_i915(dev);
3507 	struct drm_i915_gem_caching *args = data;
3508 	struct drm_i915_gem_object *obj;
3509 	enum i915_cache_level level;
3510 	int ret = 0;
3511 
3512 	switch (args->caching) {
3513 	case I915_CACHING_NONE:
3514 		level = I915_CACHE_NONE;
3515 		break;
3516 	case I915_CACHING_CACHED:
3517 		/*
3518 		 * Due to a HW issue on BXT A stepping, GPU stores via a
3519 		 * snooped mapping may leave stale data in a corresponding CPU
3520 		 * cacheline, whereas normally such cachelines would get
3521 		 * invalidated.
3522 		 */
3523 		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3524 			return -ENODEV;
3525 
3526 		level = I915_CACHE_LLC;
3527 		break;
3528 	case I915_CACHING_DISPLAY:
3529 		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3530 		break;
3531 	default:
3532 		return -EINVAL;
3533 	}
3534 
3535 	obj = i915_gem_object_lookup(file, args->handle);
3536 	if (!obj)
3537 		return -ENOENT;
3538 
3539 	if (obj->cache_level == level)
3540 		goto out;
3541 
3542 	ret = i915_gem_object_wait(obj,
3543 				   I915_WAIT_INTERRUPTIBLE,
3544 				   MAX_SCHEDULE_TIMEOUT,
3545 				   to_rps_client(file));
3546 	if (ret)
3547 		goto out;
3548 
3549 	ret = i915_mutex_lock_interruptible(dev);
3550 	if (ret)
3551 		goto out;
3552 
3553 	ret = i915_gem_object_set_cache_level(obj, level);
3554 	mutex_unlock(&dev->struct_mutex);
3555 
3556 out:
3557 	i915_gem_object_put(obj);
3558 	return ret;
3559 }
3560 
3561 /*
3562  * Prepare buffer for display plane (scanout, cursors, etc).
3563  * Can be called from an uninterruptible phase (modesetting) and allows
3564  * any flushes to be pipelined (for pageflips).
3565  */
3566 struct i915_vma *
3567 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3568 				     u32 alignment,
3569 				     const struct i915_ggtt_view *view)
3570 {
3571 	struct i915_vma *vma;
3572 	u32 old_read_domains, old_write_domain;
3573 	int ret;
3574 
3575 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3576 
3577 	/* Mark the pin_display early so that we account for the
3578 	 * display coherency whilst setting up the cache domains.
3579 	 */
3580 	obj->pin_display++;
3581 
3582 	/* The display engine is not coherent with the LLC cache on gen6.  As
3583 	 * a result, we make sure that the pinning that is about to occur is
3584 	 * done with uncached PTEs. This is lowest common denominator for all
3585 	 * chipsets.
3586 	 *
3587 	 * However for gen6+, we could do better by using the GFDT bit instead
3588 	 * of uncaching, which would allow us to flush all the LLC-cached data
3589 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3590 	 */
3591 	ret = i915_gem_object_set_cache_level(obj,
3592 					      HAS_WT(to_i915(obj->base.dev)) ?
3593 					      I915_CACHE_WT : I915_CACHE_NONE);
3594 	if (ret) {
3595 		vma = ERR_PTR(ret);
3596 		goto err_unpin_display;
3597 	}
3598 
3599 	/* As the user may map the buffer once pinned in the display plane
3600 	 * (e.g. libkms for the bootup splash), we have to ensure that we
3601 	 * always use map_and_fenceable for all scanout buffers. However,
3602 	 * it may simply be too big to fit into mappable, in which case
3603 	 * put it anyway and hope that userspace can cope (but always first
3604 	 * try to preserve the existing ABI).
3605 	 */
3606 	vma = ERR_PTR(-ENOSPC);
3607 	if (!view || view->type == I915_GGTT_VIEW_NORMAL)
3608 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3609 					       PIN_MAPPABLE | PIN_NONBLOCK);
3610 	if (IS_ERR(vma)) {
3611 		struct drm_i915_private *i915 = to_i915(obj->base.dev);
3612 		unsigned int flags;
3613 
3614 		/* Valleyview is definitely limited to scanning out the first
3615 		 * 512MiB. Lets presume this behaviour was inherited from the
3616 		 * g4x display engine and that all earlier gen are similarly
3617 		 * limited. Testing suggests that it is a little more
3618 		 * complicated than this. For example, Cherryview appears quite
3619 		 * happy to scanout from anywhere within its global aperture.
3620 		 */
3621 		flags = 0;
3622 		if (HAS_GMCH_DISPLAY(i915))
3623 			flags = PIN_MAPPABLE;
3624 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3625 	}
3626 	if (IS_ERR(vma))
3627 		goto err_unpin_display;
3628 
3629 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3630 
3631 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
3632 	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
3633 		i915_gem_clflush_object(obj, true);
3634 		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
3635 	}
3636 
3637 	old_write_domain = obj->base.write_domain;
3638 	old_read_domains = obj->base.read_domains;
3639 
3640 	/* It should now be out of any other write domains, and we can update
3641 	 * the domain values for our changes.
3642 	 */
3643 	obj->base.write_domain = 0;
3644 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3645 
3646 	trace_i915_gem_object_change_domain(obj,
3647 					    old_read_domains,
3648 					    old_write_domain);
3649 
3650 	return vma;
3651 
3652 err_unpin_display:
3653 	obj->pin_display--;
3654 	return vma;
3655 }
3656 
3657 void
3658 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3659 {
3660 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
3661 
3662 	if (WARN_ON(vma->obj->pin_display == 0))
3663 		return;
3664 
3665 	if (--vma->obj->pin_display == 0)
3666 		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
3667 
3668 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
3669 	i915_gem_object_bump_inactive_ggtt(vma->obj);
3670 
3671 	i915_vma_unpin(vma);
3672 }
3673 
3674 /**
3675  * Moves a single object to the CPU read, and possibly write domain.
3676  * @obj: object to act on
3677  * @write: requesting write or read-only access
3678  *
3679  * This function returns when the move is complete, including waiting on
3680  * flushes to occur.
3681  */
3682 int
3683 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3684 {
3685 	uint32_t old_write_domain, old_read_domains;
3686 	int ret;
3687 
3688 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3689 
3690 	ret = i915_gem_object_wait(obj,
3691 				   I915_WAIT_INTERRUPTIBLE |
3692 				   I915_WAIT_LOCKED |
3693 				   (write ? I915_WAIT_ALL : 0),
3694 				   MAX_SCHEDULE_TIMEOUT,
3695 				   NULL);
3696 	if (ret)
3697 		return ret;
3698 
3699 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3700 		return 0;
3701 
3702 	i915_gem_object_flush_gtt_write_domain(obj);
3703 
3704 	old_write_domain = obj->base.write_domain;
3705 	old_read_domains = obj->base.read_domains;
3706 
3707 	/* Flush the CPU cache if it's still invalid. */
3708 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3709 		i915_gem_clflush_object(obj, false);
3710 
3711 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3712 	}
3713 
3714 	/* It should now be out of any other write domains, and we can update
3715 	 * the domain values for our changes.
3716 	 */
3717 	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3718 
3719 	/* If we're writing through the CPU, then the GPU read domains will
3720 	 * need to be invalidated at next use.
3721 	 */
3722 	if (write) {
3723 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3724 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3725 	}
3726 
3727 	trace_i915_gem_object_change_domain(obj,
3728 					    old_read_domains,
3729 					    old_write_domain);
3730 
3731 	return 0;
3732 }
3733 
3734 /* Throttle our rendering by waiting until the ring has completed our requests
3735  * emitted over 20 msec ago.
3736  *
3737  * Note that if we were to use the current jiffies each time around the loop,
3738  * we wouldn't escape the function with any frames outstanding if the time to
3739  * render a frame was over 20ms.
3740  *
3741  * This should get us reasonable parallelism between CPU and GPU but also
3742  * relatively low latency when blocking on a particular request to finish.
3743  */
3744 static int
3745 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3746 {
3747 	struct drm_i915_private *dev_priv = to_i915(dev);
3748 	struct drm_i915_file_private *file_priv = file->driver_priv;
3749 	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3750 	struct drm_i915_gem_request *request, *target = NULL;
3751 	long ret;
3752 
3753 	/* ABI: return -EIO if already wedged */
3754 	if (i915_terminally_wedged(&dev_priv->gpu_error))
3755 		return -EIO;
3756 
3757 	spin_lock(&file_priv->mm.lock);
3758 	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3759 		if (time_after_eq(request->emitted_jiffies, recent_enough))
3760 			break;
3761 
3762 		/*
3763 		 * Note that the request might not have been submitted yet.
3764 		 * In which case emitted_jiffies will be zero.
3765 		 */
3766 		if (!request->emitted_jiffies)
3767 			continue;
3768 
3769 		target = request;
3770 	}
3771 	if (target)
3772 		i915_gem_request_get(target);
3773 	spin_unlock(&file_priv->mm.lock);
3774 
3775 	if (target == NULL)
3776 		return 0;
3777 
3778 	ret = i915_wait_request(target,
3779 				I915_WAIT_INTERRUPTIBLE,
3780 				MAX_SCHEDULE_TIMEOUT);
3781 	i915_gem_request_put(target);
3782 
3783 	return ret < 0 ? ret : 0;
3784 }
3785 
3786 struct i915_vma *
3787 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3788 			 const struct i915_ggtt_view *view,
3789 			 u64 size,
3790 			 u64 alignment,
3791 			 u64 flags)
3792 {
3793 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3794 	struct i915_address_space *vm = &dev_priv->ggtt.base;
3795 	struct i915_vma *vma;
3796 	int ret;
3797 
3798 	lockdep_assert_held(&obj->base.dev->struct_mutex);
3799 
3800 	vma = i915_vma_instance(obj, vm, view);
3801 	if (unlikely(IS_ERR(vma)))
3802 		return vma;
3803 
3804 	if (i915_vma_misplaced(vma, size, alignment, flags)) {
3805 		if (flags & PIN_NONBLOCK &&
3806 		    (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
3807 			return ERR_PTR(-ENOSPC);
3808 
3809 		if (flags & PIN_MAPPABLE) {
3810 			/* If the required space is larger than the available
3811 			 * aperture, we will not able to find a slot for the
3812 			 * object and unbinding the object now will be in
3813 			 * vain. Worse, doing so may cause us to ping-pong
3814 			 * the object in and out of the Global GTT and
3815 			 * waste a lot of cycles under the mutex.
3816 			 */
3817 			if (vma->fence_size > dev_priv->ggtt.mappable_end)
3818 				return ERR_PTR(-E2BIG);
3819 
3820 			/* If NONBLOCK is set the caller is optimistically
3821 			 * trying to cache the full object within the mappable
3822 			 * aperture, and *must* have a fallback in place for
3823 			 * situations where we cannot bind the object. We
3824 			 * can be a little more lax here and use the fallback
3825 			 * more often to avoid costly migrations of ourselves
3826 			 * and other objects within the aperture.
3827 			 *
3828 			 * Half-the-aperture is used as a simple heuristic.
3829 			 * More interesting would to do search for a free
3830 			 * block prior to making the commitment to unbind.
3831 			 * That caters for the self-harm case, and with a
3832 			 * little more heuristics (e.g. NOFAULT, NOEVICT)
3833 			 * we could try to minimise harm to others.
3834 			 */
3835 			if (flags & PIN_NONBLOCK &&
3836 			    vma->fence_size > dev_priv->ggtt.mappable_end / 2)
3837 				return ERR_PTR(-ENOSPC);
3838 		}
3839 
3840 		WARN(i915_vma_is_pinned(vma),
3841 		     "bo is already pinned in ggtt with incorrect alignment:"
3842 		     " offset=%08x, req.alignment=%llx,"
3843 		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3844 		     i915_ggtt_offset(vma), alignment,
3845 		     !!(flags & PIN_MAPPABLE),
3846 		     i915_vma_is_map_and_fenceable(vma));
3847 		ret = i915_vma_unbind(vma);
3848 		if (ret)
3849 			return ERR_PTR(ret);
3850 	}
3851 
3852 	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3853 	if (ret)
3854 		return ERR_PTR(ret);
3855 
3856 	return vma;
3857 }
3858 
3859 static __always_inline unsigned int __busy_read_flag(unsigned int id)
3860 {
3861 	/* Note that we could alias engines in the execbuf API, but
3862 	 * that would be very unwise as it prevents userspace from
3863 	 * fine control over engine selection. Ahem.
3864 	 *
3865 	 * This should be something like EXEC_MAX_ENGINE instead of
3866 	 * I915_NUM_ENGINES.
3867 	 */
3868 	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
3869 	return 0x10000 << id;
3870 }
3871 
3872 static __always_inline unsigned int __busy_write_id(unsigned int id)
3873 {
3874 	/* The uABI guarantees an active writer is also amongst the read
3875 	 * engines. This would be true if we accessed the activity tracking
3876 	 * under the lock, but as we perform the lookup of the object and
3877 	 * its activity locklessly we can not guarantee that the last_write
3878 	 * being active implies that we have set the same engine flag from
3879 	 * last_read - hence we always set both read and write busy for
3880 	 * last_write.
3881 	 */
3882 	return id | __busy_read_flag(id);
3883 }
3884 
3885 static __always_inline unsigned int
3886 __busy_set_if_active(const struct dma_fence *fence,
3887 		     unsigned int (*flag)(unsigned int id))
3888 {
3889 	struct drm_i915_gem_request *rq;
3890 
3891 	/* We have to check the current hw status of the fence as the uABI
3892 	 * guarantees forward progress. We could rely on the idle worker
3893 	 * to eventually flush us, but to minimise latency just ask the
3894 	 * hardware.
3895 	 *
3896 	 * Note we only report on the status of native fences.
3897 	 */
3898 	if (!dma_fence_is_i915(fence))
3899 		return 0;
3900 
3901 	/* opencode to_request() in order to avoid const warnings */
3902 	rq = container_of(fence, struct drm_i915_gem_request, fence);
3903 	if (i915_gem_request_completed(rq))
3904 		return 0;
3905 
3906 	return flag(rq->engine->exec_id);
3907 }
3908 
3909 static __always_inline unsigned int
3910 busy_check_reader(const struct dma_fence *fence)
3911 {
3912 	return __busy_set_if_active(fence, __busy_read_flag);
3913 }
3914 
3915 static __always_inline unsigned int
3916 busy_check_writer(const struct dma_fence *fence)
3917 {
3918 	if (!fence)
3919 		return 0;
3920 
3921 	return __busy_set_if_active(fence, __busy_write_id);
3922 }
3923 
3924 int
3925 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3926 		    struct drm_file *file)
3927 {
3928 	struct drm_i915_gem_busy *args = data;
3929 	struct drm_i915_gem_object *obj;
3930 	struct reservation_object_list *list;
3931 	unsigned int seq;
3932 	int err;
3933 
3934 	err = -ENOENT;
3935 	rcu_read_lock();
3936 	obj = i915_gem_object_lookup_rcu(file, args->handle);
3937 	if (!obj)
3938 		goto out;
3939 
3940 	/* A discrepancy here is that we do not report the status of
3941 	 * non-i915 fences, i.e. even though we may report the object as idle,
3942 	 * a call to set-domain may still stall waiting for foreign rendering.
3943 	 * This also means that wait-ioctl may report an object as busy,
3944 	 * where busy-ioctl considers it idle.
3945 	 *
3946 	 * We trade the ability to warn of foreign fences to report on which
3947 	 * i915 engines are active for the object.
3948 	 *
3949 	 * Alternatively, we can trade that extra information on read/write
3950 	 * activity with
3951 	 *	args->busy =
3952 	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
3953 	 * to report the overall busyness. This is what the wait-ioctl does.
3954 	 *
3955 	 */
3956 retry:
3957 	seq = raw_read_seqcount(&obj->resv->seq);
3958 
3959 	/* Translate the exclusive fence to the READ *and* WRITE engine */
3960 	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3961 
3962 	/* Translate shared fences to READ set of engines */
3963 	list = rcu_dereference(obj->resv->fence);
3964 	if (list) {
3965 		unsigned int shared_count = list->shared_count, i;
3966 
3967 		for (i = 0; i < shared_count; ++i) {
3968 			struct dma_fence *fence =
3969 				rcu_dereference(list->shared[i]);
3970 
3971 			args->busy |= busy_check_reader(fence);
3972 		}
3973 	}
3974 
3975 	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3976 		goto retry;
3977 
3978 	err = 0;
3979 out:
3980 	rcu_read_unlock();
3981 	return err;
3982 }
3983 
3984 int
3985 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3986 			struct drm_file *file_priv)
3987 {
3988 	return i915_gem_ring_throttle(dev, file_priv);
3989 }
3990 
3991 int
3992 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3993 		       struct drm_file *file_priv)
3994 {
3995 	struct drm_i915_private *dev_priv = to_i915(dev);
3996 	struct drm_i915_gem_madvise *args = data;
3997 	struct drm_i915_gem_object *obj;
3998 	int err;
3999 
4000 	switch (args->madv) {
4001 	case I915_MADV_DONTNEED:
4002 	case I915_MADV_WILLNEED:
4003 	    break;
4004 	default:
4005 	    return -EINVAL;
4006 	}
4007 
4008 	obj = i915_gem_object_lookup(file_priv, args->handle);
4009 	if (!obj)
4010 		return -ENOENT;
4011 
4012 	err = mutex_lock_interruptible(&obj->mm.lock);
4013 	if (err)
4014 		goto out;
4015 
4016 	if (obj->mm.pages &&
4017 	    i915_gem_object_is_tiled(obj) &&
4018 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4019 		if (obj->mm.madv == I915_MADV_WILLNEED) {
4020 			GEM_BUG_ON(!obj->mm.quirked);
4021 			__i915_gem_object_unpin_pages(obj);
4022 			obj->mm.quirked = false;
4023 		}
4024 		if (args->madv == I915_MADV_WILLNEED) {
4025 			GEM_BUG_ON(obj->mm.quirked);
4026 			__i915_gem_object_pin_pages(obj);
4027 			obj->mm.quirked = true;
4028 		}
4029 	}
4030 
4031 	if (obj->mm.madv != __I915_MADV_PURGED)
4032 		obj->mm.madv = args->madv;
4033 
4034 	/* if the object is no longer attached, discard its backing storage */
4035 	if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages)
4036 		i915_gem_object_truncate(obj);
4037 
4038 	args->retained = obj->mm.madv != __I915_MADV_PURGED;
4039 	mutex_unlock(&obj->mm.lock);
4040 
4041 out:
4042 	i915_gem_object_put(obj);
4043 	return err;
4044 }
4045 
4046 static void
4047 frontbuffer_retire(struct i915_gem_active *active,
4048 		   struct drm_i915_gem_request *request)
4049 {
4050 	struct drm_i915_gem_object *obj =
4051 		container_of(active, typeof(*obj), frontbuffer_write);
4052 
4053 	intel_fb_obj_flush(obj, true, ORIGIN_CS);
4054 }
4055 
4056 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4057 			  const struct drm_i915_gem_object_ops *ops)
4058 {
4059 	mutex_init(&obj->mm.lock);
4060 
4061 	INIT_LIST_HEAD(&obj->global_link);
4062 	INIT_LIST_HEAD(&obj->userfault_link);
4063 	INIT_LIST_HEAD(&obj->obj_exec_link);
4064 	INIT_LIST_HEAD(&obj->vma_list);
4065 	INIT_LIST_HEAD(&obj->batch_pool_link);
4066 
4067 	obj->ops = ops;
4068 
4069 	reservation_object_init(&obj->__builtin_resv);
4070 	obj->resv = &obj->__builtin_resv;
4071 
4072 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4073 	init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
4074 
4075 	obj->mm.madv = I915_MADV_WILLNEED;
4076 	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
4077 	mutex_init(&obj->mm.get_page.lock);
4078 
4079 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4080 }
4081 
4082 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4083 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
4084 		 I915_GEM_OBJECT_IS_SHRINKABLE,
4085 
4086 	.get_pages = i915_gem_object_get_pages_gtt,
4087 	.put_pages = i915_gem_object_put_pages_gtt,
4088 
4089 	.pwrite = i915_gem_object_pwrite_gtt,
4090 };
4091 
4092 struct drm_i915_gem_object *
4093 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
4094 {
4095 	struct drm_i915_gem_object *obj;
4096 	struct address_space *mapping;
4097 	gfp_t mask;
4098 	int ret;
4099 
4100 	/* There is a prevalence of the assumption that we fit the object's
4101 	 * page count inside a 32bit _signed_ variable. Let's document this and
4102 	 * catch if we ever need to fix it. In the meantime, if you do spot
4103 	 * such a local variable, please consider fixing!
4104 	 */
4105 	if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
4106 		return ERR_PTR(-E2BIG);
4107 
4108 	if (overflows_type(size, obj->base.size))
4109 		return ERR_PTR(-E2BIG);
4110 
4111 	obj = i915_gem_object_alloc(dev_priv);
4112 	if (obj == NULL)
4113 		return ERR_PTR(-ENOMEM);
4114 
4115 	ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size);
4116 	if (ret)
4117 		goto fail;
4118 
4119 	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4120 	if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
4121 		/* 965gm cannot relocate objects above 4GiB. */
4122 		mask &= ~__GFP_HIGHMEM;
4123 		mask |= __GFP_DMA32;
4124 	}
4125 
4126 	mapping = obj->base.filp->f_mapping;
4127 	mapping_set_gfp_mask(mapping, mask);
4128 
4129 	i915_gem_object_init(obj, &i915_gem_object_ops);
4130 
4131 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4132 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4133 
4134 	if (HAS_LLC(dev_priv)) {
4135 		/* On some devices, we can have the GPU use the LLC (the CPU
4136 		 * cache) for about a 10% performance improvement
4137 		 * compared to uncached.  Graphics requests other than
4138 		 * display scanout are coherent with the CPU in
4139 		 * accessing this cache.  This means in this mode we
4140 		 * don't need to clflush on the CPU side, and on the
4141 		 * GPU side we only need to flush internal caches to
4142 		 * get data visible to the CPU.
4143 		 *
4144 		 * However, we maintain the display planes as UC, and so
4145 		 * need to rebind when first used as such.
4146 		 */
4147 		obj->cache_level = I915_CACHE_LLC;
4148 	} else
4149 		obj->cache_level = I915_CACHE_NONE;
4150 
4151 	trace_i915_gem_object_create(obj);
4152 
4153 	return obj;
4154 
4155 fail:
4156 	i915_gem_object_free(obj);
4157 	return ERR_PTR(ret);
4158 }
4159 
4160 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4161 {
4162 	/* If we are the last user of the backing storage (be it shmemfs
4163 	 * pages or stolen etc), we know that the pages are going to be
4164 	 * immediately released. In this case, we can then skip copying
4165 	 * back the contents from the GPU.
4166 	 */
4167 
4168 	if (obj->mm.madv != I915_MADV_WILLNEED)
4169 		return false;
4170 
4171 	if (obj->base.filp == NULL)
4172 		return true;
4173 
4174 	/* At first glance, this looks racy, but then again so would be
4175 	 * userspace racing mmap against close. However, the first external
4176 	 * reference to the filp can only be obtained through the
4177 	 * i915_gem_mmap_ioctl() which safeguards us against the user
4178 	 * acquiring such a reference whilst we are in the middle of
4179 	 * freeing the object.
4180 	 */
4181 	return atomic_long_read(&obj->base.filp->f_count) == 1;
4182 }
4183 
4184 static void __i915_gem_free_objects(struct drm_i915_private *i915,
4185 				    struct llist_node *freed)
4186 {
4187 	struct drm_i915_gem_object *obj, *on;
4188 
4189 	mutex_lock(&i915->drm.struct_mutex);
4190 	intel_runtime_pm_get(i915);
4191 	llist_for_each_entry(obj, freed, freed) {
4192 		struct i915_vma *vma, *vn;
4193 
4194 		trace_i915_gem_object_destroy(obj);
4195 
4196 		GEM_BUG_ON(i915_gem_object_is_active(obj));
4197 		list_for_each_entry_safe(vma, vn,
4198 					 &obj->vma_list, obj_link) {
4199 			GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4200 			GEM_BUG_ON(i915_vma_is_active(vma));
4201 			vma->flags &= ~I915_VMA_PIN_MASK;
4202 			i915_vma_close(vma);
4203 		}
4204 		GEM_BUG_ON(!list_empty(&obj->vma_list));
4205 		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
4206 
4207 		list_del(&obj->global_link);
4208 	}
4209 	intel_runtime_pm_put(i915);
4210 	mutex_unlock(&i915->drm.struct_mutex);
4211 
4212 	llist_for_each_entry_safe(obj, on, freed, freed) {
4213 		GEM_BUG_ON(obj->bind_count);
4214 		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4215 
4216 		if (obj->ops->release)
4217 			obj->ops->release(obj);
4218 
4219 		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4220 			atomic_set(&obj->mm.pages_pin_count, 0);
4221 		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4222 		GEM_BUG_ON(obj->mm.pages);
4223 
4224 		if (obj->base.import_attach)
4225 			drm_prime_gem_destroy(&obj->base, NULL);
4226 
4227 		reservation_object_fini(&obj->__builtin_resv);
4228 		drm_gem_object_release(&obj->base);
4229 		i915_gem_info_remove_obj(i915, obj->base.size);
4230 
4231 		kfree(obj->bit_17);
4232 		i915_gem_object_free(obj);
4233 	}
4234 }
4235 
4236 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4237 {
4238 	struct llist_node *freed;
4239 
4240 	freed = llist_del_all(&i915->mm.free_list);
4241 	if (unlikely(freed))
4242 		__i915_gem_free_objects(i915, freed);
4243 }
4244 
4245 static void __i915_gem_free_work(struct work_struct *work)
4246 {
4247 	struct drm_i915_private *i915 =
4248 		container_of(work, struct drm_i915_private, mm.free_work);
4249 	struct llist_node *freed;
4250 
4251 	/* All file-owned VMA should have been released by this point through
4252 	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4253 	 * However, the object may also be bound into the global GTT (e.g.
4254 	 * older GPUs without per-process support, or for direct access through
4255 	 * the GTT either for the user or for scanout). Those VMA still need to
4256 	 * unbound now.
4257 	 */
4258 
4259 	while ((freed = llist_del_all(&i915->mm.free_list)))
4260 		__i915_gem_free_objects(i915, freed);
4261 }
4262 
4263 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4264 {
4265 	struct drm_i915_gem_object *obj =
4266 		container_of(head, typeof(*obj), rcu);
4267 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
4268 
4269 	/* We can't simply use call_rcu() from i915_gem_free_object()
4270 	 * as we need to block whilst unbinding, and the call_rcu
4271 	 * task may be called from softirq context. So we take a
4272 	 * detour through a worker.
4273 	 */
4274 	if (llist_add(&obj->freed, &i915->mm.free_list))
4275 		schedule_work(&i915->mm.free_work);
4276 }
4277 
4278 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4279 {
4280 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4281 
4282 	if (obj->mm.quirked)
4283 		__i915_gem_object_unpin_pages(obj);
4284 
4285 	if (discard_backing_storage(obj))
4286 		obj->mm.madv = I915_MADV_DONTNEED;
4287 
4288 	/* Before we free the object, make sure any pure RCU-only
4289 	 * read-side critical sections are complete, e.g.
4290 	 * i915_gem_busy_ioctl(). For the corresponding synchronized
4291 	 * lookup see i915_gem_object_lookup_rcu().
4292 	 */
4293 	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4294 }
4295 
4296 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4297 {
4298 	lockdep_assert_held(&obj->base.dev->struct_mutex);
4299 
4300 	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
4301 	if (i915_gem_object_is_active(obj))
4302 		i915_gem_object_set_active_reference(obj);
4303 	else
4304 		i915_gem_object_put(obj);
4305 }
4306 
4307 static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
4308 {
4309 	struct intel_engine_cs *engine;
4310 	enum intel_engine_id id;
4311 
4312 	for_each_engine(engine, dev_priv, id)
4313 		GEM_BUG_ON(engine->last_retired_context &&
4314 			   !i915_gem_context_is_kernel(engine->last_retired_context));
4315 }
4316 
4317 int i915_gem_suspend(struct drm_i915_private *dev_priv)
4318 {
4319 	struct drm_device *dev = &dev_priv->drm;
4320 	int ret;
4321 
4322 	intel_suspend_gt_powersave(dev_priv);
4323 
4324 	mutex_lock(&dev->struct_mutex);
4325 
4326 	/* We have to flush all the executing contexts to main memory so
4327 	 * that they can saved in the hibernation image. To ensure the last
4328 	 * context image is coherent, we have to switch away from it. That
4329 	 * leaves the dev_priv->kernel_context still active when
4330 	 * we actually suspend, and its image in memory may not match the GPU
4331 	 * state. Fortunately, the kernel_context is disposable and we do
4332 	 * not rely on its state.
4333 	 */
4334 	ret = i915_gem_switch_to_kernel_context(dev_priv);
4335 	if (ret)
4336 		goto err;
4337 
4338 	ret = i915_gem_wait_for_idle(dev_priv,
4339 				     I915_WAIT_INTERRUPTIBLE |
4340 				     I915_WAIT_LOCKED);
4341 	if (ret)
4342 		goto err;
4343 
4344 	i915_gem_retire_requests(dev_priv);
4345 	GEM_BUG_ON(dev_priv->gt.active_requests);
4346 
4347 	assert_kernel_context_is_current(dev_priv);
4348 	i915_gem_context_lost(dev_priv);
4349 	mutex_unlock(&dev->struct_mutex);
4350 
4351 	intel_guc_suspend(dev_priv);
4352 
4353 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4354 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4355 
4356 	/* As the idle_work is rearming if it detects a race, play safe and
4357 	 * repeat the flush until it is definitely idle.
4358 	 */
4359 	while (flush_delayed_work(&dev_priv->gt.idle_work))
4360 		;
4361 
4362 	i915_gem_drain_freed_objects(dev_priv);
4363 
4364 	/* Assert that we sucessfully flushed all the work and
4365 	 * reset the GPU back to its idle, low power state.
4366 	 */
4367 	WARN_ON(dev_priv->gt.awake);
4368 	WARN_ON(!intel_execlists_idle(dev_priv));
4369 
4370 	/*
4371 	 * Neither the BIOS, ourselves or any other kernel
4372 	 * expects the system to be in execlists mode on startup,
4373 	 * so we need to reset the GPU back to legacy mode. And the only
4374 	 * known way to disable logical contexts is through a GPU reset.
4375 	 *
4376 	 * So in order to leave the system in a known default configuration,
4377 	 * always reset the GPU upon unload and suspend. Afterwards we then
4378 	 * clean up the GEM state tracking, flushing off the requests and
4379 	 * leaving the system in a known idle state.
4380 	 *
4381 	 * Note that is of the upmost importance that the GPU is idle and
4382 	 * all stray writes are flushed *before* we dismantle the backing
4383 	 * storage for the pinned objects.
4384 	 *
4385 	 * However, since we are uncertain that resetting the GPU on older
4386 	 * machines is a good idea, we don't - just in case it leaves the
4387 	 * machine in an unusable condition.
4388 	 */
4389 	if (HAS_HW_CONTEXTS(dev_priv)) {
4390 		int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
4391 		WARN_ON(reset && reset != -ENODEV);
4392 	}
4393 
4394 	return 0;
4395 
4396 err:
4397 	mutex_unlock(&dev->struct_mutex);
4398 	return ret;
4399 }
4400 
4401 void i915_gem_resume(struct drm_i915_private *dev_priv)
4402 {
4403 	struct drm_device *dev = &dev_priv->drm;
4404 
4405 	WARN_ON(dev_priv->gt.awake);
4406 
4407 	mutex_lock(&dev->struct_mutex);
4408 	i915_gem_restore_gtt_mappings(dev_priv);
4409 
4410 	/* As we didn't flush the kernel context before suspend, we cannot
4411 	 * guarantee that the context image is complete. So let's just reset
4412 	 * it and start again.
4413 	 */
4414 	dev_priv->gt.resume(dev_priv);
4415 
4416 	mutex_unlock(&dev->struct_mutex);
4417 }
4418 
4419 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4420 {
4421 	if (INTEL_GEN(dev_priv) < 5 ||
4422 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4423 		return;
4424 
4425 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4426 				 DISP_TILE_SURFACE_SWIZZLING);
4427 
4428 	if (IS_GEN5(dev_priv))
4429 		return;
4430 
4431 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4432 	if (IS_GEN6(dev_priv))
4433 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4434 	else if (IS_GEN7(dev_priv))
4435 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4436 	else if (IS_GEN8(dev_priv))
4437 		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4438 	else
4439 		BUG();
4440 }
4441 
4442 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4443 {
4444 	I915_WRITE(RING_CTL(base), 0);
4445 	I915_WRITE(RING_HEAD(base), 0);
4446 	I915_WRITE(RING_TAIL(base), 0);
4447 	I915_WRITE(RING_START(base), 0);
4448 }
4449 
4450 static void init_unused_rings(struct drm_i915_private *dev_priv)
4451 {
4452 	if (IS_I830(dev_priv)) {
4453 		init_unused_ring(dev_priv, PRB1_BASE);
4454 		init_unused_ring(dev_priv, SRB0_BASE);
4455 		init_unused_ring(dev_priv, SRB1_BASE);
4456 		init_unused_ring(dev_priv, SRB2_BASE);
4457 		init_unused_ring(dev_priv, SRB3_BASE);
4458 	} else if (IS_GEN2(dev_priv)) {
4459 		init_unused_ring(dev_priv, SRB0_BASE);
4460 		init_unused_ring(dev_priv, SRB1_BASE);
4461 	} else if (IS_GEN3(dev_priv)) {
4462 		init_unused_ring(dev_priv, PRB1_BASE);
4463 		init_unused_ring(dev_priv, PRB2_BASE);
4464 	}
4465 }
4466 
4467 int
4468 i915_gem_init_hw(struct drm_i915_private *dev_priv)
4469 {
4470 	struct intel_engine_cs *engine;
4471 	enum intel_engine_id id;
4472 	int ret;
4473 
4474 	dev_priv->gt.last_init_time = ktime_get();
4475 
4476 	/* Double layer security blanket, see i915_gem_init() */
4477 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4478 
4479 	if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4480 		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4481 
4482 	if (IS_HASWELL(dev_priv))
4483 		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4484 			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4485 
4486 	if (HAS_PCH_NOP(dev_priv)) {
4487 		if (IS_IVYBRIDGE(dev_priv)) {
4488 			u32 temp = I915_READ(GEN7_MSG_CTL);
4489 			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4490 			I915_WRITE(GEN7_MSG_CTL, temp);
4491 		} else if (INTEL_GEN(dev_priv) >= 7) {
4492 			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4493 			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4494 			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4495 		}
4496 	}
4497 
4498 	i915_gem_init_swizzling(dev_priv);
4499 
4500 	/*
4501 	 * At least 830 can leave some of the unused rings
4502 	 * "active" (ie. head != tail) after resume which
4503 	 * will prevent c3 entry. Makes sure all unused rings
4504 	 * are totally idle.
4505 	 */
4506 	init_unused_rings(dev_priv);
4507 
4508 	BUG_ON(!dev_priv->kernel_context);
4509 
4510 	ret = i915_ppgtt_init_hw(dev_priv);
4511 	if (ret) {
4512 		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4513 		goto out;
4514 	}
4515 
4516 	/* Need to do basic initialisation of all rings first: */
4517 	for_each_engine(engine, dev_priv, id) {
4518 		ret = engine->init_hw(engine);
4519 		if (ret)
4520 			goto out;
4521 	}
4522 
4523 	intel_mocs_init_l3cc_table(dev_priv);
4524 
4525 	/* We can't enable contexts until all firmware is loaded */
4526 	ret = intel_guc_setup(dev_priv);
4527 	if (ret)
4528 		goto out;
4529 
4530 out:
4531 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4532 	return ret;
4533 }
4534 
4535 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4536 {
4537 	if (INTEL_INFO(dev_priv)->gen < 6)
4538 		return false;
4539 
4540 	/* TODO: make semaphores and Execlists play nicely together */
4541 	if (i915.enable_execlists)
4542 		return false;
4543 
4544 	if (value >= 0)
4545 		return value;
4546 
4547 #ifdef CONFIG_INTEL_IOMMU
4548 	/* Enable semaphores on SNB when IO remapping is off */
4549 	if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4550 		return false;
4551 #endif
4552 
4553 	return true;
4554 }
4555 
4556 int i915_gem_init(struct drm_i915_private *dev_priv)
4557 {
4558 	int ret;
4559 
4560 	mutex_lock(&dev_priv->drm.struct_mutex);
4561 
4562 	if (!i915.enable_execlists) {
4563 		dev_priv->gt.resume = intel_legacy_submission_resume;
4564 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4565 	} else {
4566 		dev_priv->gt.resume = intel_lr_context_resume;
4567 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4568 	}
4569 
4570 	/* This is just a security blanket to placate dragons.
4571 	 * On some systems, we very sporadically observe that the first TLBs
4572 	 * used by the CS may be stale, despite us poking the TLB reset. If
4573 	 * we hold the forcewake during initialisation these problems
4574 	 * just magically go away.
4575 	 */
4576 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4577 
4578 	i915_gem_init_userptr(dev_priv);
4579 
4580 	ret = i915_gem_init_ggtt(dev_priv);
4581 	if (ret)
4582 		goto out_unlock;
4583 
4584 	ret = i915_gem_context_init(dev_priv);
4585 	if (ret)
4586 		goto out_unlock;
4587 
4588 	ret = intel_engines_init(dev_priv);
4589 	if (ret)
4590 		goto out_unlock;
4591 
4592 	ret = i915_gem_init_hw(dev_priv);
4593 	if (ret == -EIO) {
4594 		/* Allow engine initialisation to fail by marking the GPU as
4595 		 * wedged. But we only want to do this where the GPU is angry,
4596 		 * for all other failure, such as an allocation failure, bail.
4597 		 */
4598 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
4599 		i915_gem_set_wedged(dev_priv);
4600 		ret = 0;
4601 	}
4602 
4603 out_unlock:
4604 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4605 	mutex_unlock(&dev_priv->drm.struct_mutex);
4606 
4607 	return ret;
4608 }
4609 
4610 void
4611 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
4612 {
4613 	struct intel_engine_cs *engine;
4614 	enum intel_engine_id id;
4615 
4616 	for_each_engine(engine, dev_priv, id)
4617 		dev_priv->gt.cleanup_engine(engine);
4618 }
4619 
4620 void
4621 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4622 {
4623 	int i;
4624 
4625 	if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4626 	    !IS_CHERRYVIEW(dev_priv))
4627 		dev_priv->num_fence_regs = 32;
4628 	else if (INTEL_INFO(dev_priv)->gen >= 4 ||
4629 		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
4630 		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
4631 		dev_priv->num_fence_regs = 16;
4632 	else
4633 		dev_priv->num_fence_regs = 8;
4634 
4635 	if (intel_vgpu_active(dev_priv))
4636 		dev_priv->num_fence_regs =
4637 				I915_READ(vgtif_reg(avail_rs.fence_num));
4638 
4639 	/* Initialize fence registers to zero */
4640 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
4641 		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4642 
4643 		fence->i915 = dev_priv;
4644 		fence->id = i;
4645 		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4646 	}
4647 	i915_gem_restore_fences(dev_priv);
4648 
4649 	i915_gem_detect_bit_6_swizzle(dev_priv);
4650 }
4651 
4652 int
4653 i915_gem_load_init(struct drm_i915_private *dev_priv)
4654 {
4655 	int err = -ENOMEM;
4656 
4657 	dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
4658 	if (!dev_priv->objects)
4659 		goto err_out;
4660 
4661 	dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
4662 	if (!dev_priv->vmas)
4663 		goto err_objects;
4664 
4665 	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
4666 					SLAB_HWCACHE_ALIGN |
4667 					SLAB_RECLAIM_ACCOUNT |
4668 					SLAB_DESTROY_BY_RCU);
4669 	if (!dev_priv->requests)
4670 		goto err_vmas;
4671 
4672 	dev_priv->dependencies = KMEM_CACHE(i915_dependency,
4673 					    SLAB_HWCACHE_ALIGN |
4674 					    SLAB_RECLAIM_ACCOUNT);
4675 	if (!dev_priv->dependencies)
4676 		goto err_requests;
4677 
4678 	mutex_lock(&dev_priv->drm.struct_mutex);
4679 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
4680 	err = i915_gem_timeline_init__global(dev_priv);
4681 	mutex_unlock(&dev_priv->drm.struct_mutex);
4682 	if (err)
4683 		goto err_dependencies;
4684 
4685 	INIT_LIST_HEAD(&dev_priv->context_list);
4686 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
4687 	init_llist_head(&dev_priv->mm.free_list);
4688 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4689 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4690 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4691 	INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
4692 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4693 			  i915_gem_retire_work_handler);
4694 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
4695 			  i915_gem_idle_work_handler);
4696 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4697 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4698 
4699 	init_waitqueue_head(&dev_priv->pending_flip_queue);
4700 
4701 	dev_priv->mm.interruptible = true;
4702 
4703 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4704 
4705 	spin_lock_init(&dev_priv->fb_tracking.lock);
4706 
4707 	return 0;
4708 
4709 err_dependencies:
4710 	kmem_cache_destroy(dev_priv->dependencies);
4711 err_requests:
4712 	kmem_cache_destroy(dev_priv->requests);
4713 err_vmas:
4714 	kmem_cache_destroy(dev_priv->vmas);
4715 err_objects:
4716 	kmem_cache_destroy(dev_priv->objects);
4717 err_out:
4718 	return err;
4719 }
4720 
4721 void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
4722 {
4723 	WARN_ON(!llist_empty(&dev_priv->mm.free_list));
4724 
4725 	mutex_lock(&dev_priv->drm.struct_mutex);
4726 	i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
4727 	WARN_ON(!list_empty(&dev_priv->gt.timelines));
4728 	mutex_unlock(&dev_priv->drm.struct_mutex);
4729 
4730 	kmem_cache_destroy(dev_priv->dependencies);
4731 	kmem_cache_destroy(dev_priv->requests);
4732 	kmem_cache_destroy(dev_priv->vmas);
4733 	kmem_cache_destroy(dev_priv->objects);
4734 
4735 	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
4736 	rcu_barrier();
4737 }
4738 
4739 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4740 {
4741 	intel_runtime_pm_get(dev_priv);
4742 
4743 	mutex_lock(&dev_priv->drm.struct_mutex);
4744 	i915_gem_shrink_all(dev_priv);
4745 	mutex_unlock(&dev_priv->drm.struct_mutex);
4746 
4747 	intel_runtime_pm_put(dev_priv);
4748 
4749 	return 0;
4750 }
4751 
4752 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4753 {
4754 	struct drm_i915_gem_object *obj;
4755 	struct list_head *phases[] = {
4756 		&dev_priv->mm.unbound_list,
4757 		&dev_priv->mm.bound_list,
4758 		NULL
4759 	}, **p;
4760 
4761 	/* Called just before we write the hibernation image.
4762 	 *
4763 	 * We need to update the domain tracking to reflect that the CPU
4764 	 * will be accessing all the pages to create and restore from the
4765 	 * hibernation, and so upon restoration those pages will be in the
4766 	 * CPU domain.
4767 	 *
4768 	 * To make sure the hibernation image contains the latest state,
4769 	 * we update that state just before writing out the image.
4770 	 *
4771 	 * To try and reduce the hibernation image, we manually shrink
4772 	 * the objects as well.
4773 	 */
4774 
4775 	mutex_lock(&dev_priv->drm.struct_mutex);
4776 	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
4777 
4778 	for (p = phases; *p; p++) {
4779 		list_for_each_entry(obj, *p, global_link) {
4780 			obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4781 			obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4782 		}
4783 	}
4784 	mutex_unlock(&dev_priv->drm.struct_mutex);
4785 
4786 	return 0;
4787 }
4788 
4789 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4790 {
4791 	struct drm_i915_file_private *file_priv = file->driver_priv;
4792 	struct drm_i915_gem_request *request;
4793 
4794 	/* Clean up our request list when the client is going away, so that
4795 	 * later retire_requests won't dereference our soon-to-be-gone
4796 	 * file_priv.
4797 	 */
4798 	spin_lock(&file_priv->mm.lock);
4799 	list_for_each_entry(request, &file_priv->mm.request_list, client_list)
4800 		request->file_priv = NULL;
4801 	spin_unlock(&file_priv->mm.lock);
4802 
4803 	if (!list_empty(&file_priv->rps.link)) {
4804 		spin_lock(&to_i915(dev)->rps.client_lock);
4805 		list_del(&file_priv->rps.link);
4806 		spin_unlock(&to_i915(dev)->rps.client_lock);
4807 	}
4808 }
4809 
4810 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4811 {
4812 	struct drm_i915_file_private *file_priv;
4813 	int ret;
4814 
4815 	DRM_DEBUG("\n");
4816 
4817 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4818 	if (!file_priv)
4819 		return -ENOMEM;
4820 
4821 	file->driver_priv = file_priv;
4822 	file_priv->dev_priv = to_i915(dev);
4823 	file_priv->file = file;
4824 	INIT_LIST_HEAD(&file_priv->rps.link);
4825 
4826 	spin_lock_init(&file_priv->mm.lock);
4827 	INIT_LIST_HEAD(&file_priv->mm.request_list);
4828 
4829 	file_priv->bsd_engine = -1;
4830 
4831 	ret = i915_gem_context_open(dev, file);
4832 	if (ret)
4833 		kfree(file_priv);
4834 
4835 	return ret;
4836 }
4837 
4838 /**
4839  * i915_gem_track_fb - update frontbuffer tracking
4840  * @old: current GEM buffer for the frontbuffer slots
4841  * @new: new GEM buffer for the frontbuffer slots
4842  * @frontbuffer_bits: bitmask of frontbuffer slots
4843  *
4844  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4845  * from @old and setting them in @new. Both @old and @new can be NULL.
4846  */
4847 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4848 		       struct drm_i915_gem_object *new,
4849 		       unsigned frontbuffer_bits)
4850 {
4851 	/* Control of individual bits within the mask are guarded by
4852 	 * the owning plane->mutex, i.e. we can never see concurrent
4853 	 * manipulation of individual bits. But since the bitfield as a whole
4854 	 * is updated using RMW, we need to use atomics in order to update
4855 	 * the bits.
4856 	 */
4857 	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4858 		     sizeof(atomic_t) * BITS_PER_BYTE);
4859 
4860 	if (old) {
4861 		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4862 		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
4863 	}
4864 
4865 	if (new) {
4866 		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4867 		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
4868 	}
4869 }
4870 
4871 /* Allocate a new GEM object and fill it with the supplied data */
4872 struct drm_i915_gem_object *
4873 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
4874 			         const void *data, size_t size)
4875 {
4876 	struct drm_i915_gem_object *obj;
4877 	struct sg_table *sg;
4878 	size_t bytes;
4879 	int ret;
4880 
4881 	obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
4882 	if (IS_ERR(obj))
4883 		return obj;
4884 
4885 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4886 	if (ret)
4887 		goto fail;
4888 
4889 	ret = i915_gem_object_pin_pages(obj);
4890 	if (ret)
4891 		goto fail;
4892 
4893 	sg = obj->mm.pages;
4894 	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
4895 	obj->mm.dirty = true; /* Backing store is now out of date */
4896 	i915_gem_object_unpin_pages(obj);
4897 
4898 	if (WARN_ON(bytes != size)) {
4899 		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4900 		ret = -EFAULT;
4901 		goto fail;
4902 	}
4903 
4904 	return obj;
4905 
4906 fail:
4907 	i915_gem_object_put(obj);
4908 	return ERR_PTR(ret);
4909 }
4910 
4911 struct scatterlist *
4912 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
4913 		       unsigned int n,
4914 		       unsigned int *offset)
4915 {
4916 	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
4917 	struct scatterlist *sg;
4918 	unsigned int idx, count;
4919 
4920 	might_sleep();
4921 	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
4922 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
4923 
4924 	/* As we iterate forward through the sg, we record each entry in a
4925 	 * radixtree for quick repeated (backwards) lookups. If we have seen
4926 	 * this index previously, we will have an entry for it.
4927 	 *
4928 	 * Initial lookup is O(N), but this is amortized to O(1) for
4929 	 * sequential page access (where each new request is consecutive
4930 	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
4931 	 * i.e. O(1) with a large constant!
4932 	 */
4933 	if (n < READ_ONCE(iter->sg_idx))
4934 		goto lookup;
4935 
4936 	mutex_lock(&iter->lock);
4937 
4938 	/* We prefer to reuse the last sg so that repeated lookup of this
4939 	 * (or the subsequent) sg are fast - comparing against the last
4940 	 * sg is faster than going through the radixtree.
4941 	 */
4942 
4943 	sg = iter->sg_pos;
4944 	idx = iter->sg_idx;
4945 	count = __sg_page_count(sg);
4946 
4947 	while (idx + count <= n) {
4948 		unsigned long exception, i;
4949 		int ret;
4950 
4951 		/* If we cannot allocate and insert this entry, or the
4952 		 * individual pages from this range, cancel updating the
4953 		 * sg_idx so that on this lookup we are forced to linearly
4954 		 * scan onwards, but on future lookups we will try the
4955 		 * insertion again (in which case we need to be careful of
4956 		 * the error return reporting that we have already inserted
4957 		 * this index).
4958 		 */
4959 		ret = radix_tree_insert(&iter->radix, idx, sg);
4960 		if (ret && ret != -EEXIST)
4961 			goto scan;
4962 
4963 		exception =
4964 			RADIX_TREE_EXCEPTIONAL_ENTRY |
4965 			idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
4966 		for (i = 1; i < count; i++) {
4967 			ret = radix_tree_insert(&iter->radix, idx + i,
4968 						(void *)exception);
4969 			if (ret && ret != -EEXIST)
4970 				goto scan;
4971 		}
4972 
4973 		idx += count;
4974 		sg = ____sg_next(sg);
4975 		count = __sg_page_count(sg);
4976 	}
4977 
4978 scan:
4979 	iter->sg_pos = sg;
4980 	iter->sg_idx = idx;
4981 
4982 	mutex_unlock(&iter->lock);
4983 
4984 	if (unlikely(n < idx)) /* insertion completed by another thread */
4985 		goto lookup;
4986 
4987 	/* In case we failed to insert the entry into the radixtree, we need
4988 	 * to look beyond the current sg.
4989 	 */
4990 	while (idx + count <= n) {
4991 		idx += count;
4992 		sg = ____sg_next(sg);
4993 		count = __sg_page_count(sg);
4994 	}
4995 
4996 	*offset = n - idx;
4997 	return sg;
4998 
4999 lookup:
5000 	rcu_read_lock();
5001 
5002 	sg = radix_tree_lookup(&iter->radix, n);
5003 	GEM_BUG_ON(!sg);
5004 
5005 	/* If this index is in the middle of multi-page sg entry,
5006 	 * the radixtree will contain an exceptional entry that points
5007 	 * to the start of that range. We will return the pointer to
5008 	 * the base page and the offset of this page within the
5009 	 * sg entry's range.
5010 	 */
5011 	*offset = 0;
5012 	if (unlikely(radix_tree_exception(sg))) {
5013 		unsigned long base =
5014 			(unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
5015 
5016 		sg = radix_tree_lookup(&iter->radix, base);
5017 		GEM_BUG_ON(!sg);
5018 
5019 		*offset = n - base;
5020 	}
5021 
5022 	rcu_read_unlock();
5023 
5024 	return sg;
5025 }
5026 
5027 struct page *
5028 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5029 {
5030 	struct scatterlist *sg;
5031 	unsigned int offset;
5032 
5033 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5034 
5035 	sg = i915_gem_object_get_sg(obj, n, &offset);
5036 	return nth_page(sg_page(sg), offset);
5037 }
5038 
5039 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5040 struct page *
5041 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5042 			       unsigned int n)
5043 {
5044 	struct page *page;
5045 
5046 	page = i915_gem_object_get_page(obj, n);
5047 	if (!obj->mm.dirty)
5048 		set_page_dirty(page);
5049 
5050 	return page;
5051 }
5052 
5053 dma_addr_t
5054 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5055 				unsigned long n)
5056 {
5057 	struct scatterlist *sg;
5058 	unsigned int offset;
5059 
5060 	sg = i915_gem_object_get_sg(obj, n, &offset);
5061 	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5062 }
5063