xref: /linux/drivers/gpu/drm/i915/gem/i915_gem_mman.c (revision bca5cfbb694d66a1c482d0c347eee80f6afbc870)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014-2016 Intel Corporation
4  */
5 
6 #include <linux/anon_inodes.h>
7 #include <linux/mman.h>
8 #include <linux/pfn_t.h>
9 #include <linux/sizes.h>
10 
11 #include <drm/drm_cache.h>
12 
13 #include "gt/intel_gt.h"
14 #include "gt/intel_gt_requests.h"
15 
16 #include "i915_drv.h"
17 #include "i915_gem_evict.h"
18 #include "i915_gem_gtt.h"
19 #include "i915_gem_ioctls.h"
20 #include "i915_gem_object.h"
21 #include "i915_gem_mman.h"
22 #include "i915_mm.h"
23 #include "i915_trace.h"
24 #include "i915_user_extensions.h"
25 #include "i915_gem_ttm.h"
26 #include "i915_vma.h"
27 
28 static inline bool
29 __vma_matches(struct vm_area_struct *vma, struct file *filp,
30 	      unsigned long addr, unsigned long size)
31 {
32 	if (vma->vm_file != filp)
33 		return false;
34 
35 	return vma->vm_start == addr &&
36 	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
37 }
38 
39 /**
40  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
41  *			 it is mapped to.
42  * @dev: drm device
43  * @data: ioctl data blob
44  * @file: drm file
45  *
46  * While the mapping holds a reference on the contents of the object, it doesn't
47  * imply a ref on the object itself.
48  *
49  * IMPORTANT:
50  *
51  * DRM driver writers who look a this function as an example for how to do GEM
52  * mmap support, please don't implement mmap support like here. The modern way
53  * to implement DRM mmap support is with an mmap offset ioctl (like
54  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
55  * That way debug tooling like valgrind will understand what's going on, hiding
56  * the mmap call in a driver private ioctl will break that. The i915 driver only
57  * does cpu mmaps this way because we didn't know better.
58  */
59 int
60 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
61 		    struct drm_file *file)
62 {
63 	struct drm_i915_private *i915 = to_i915(dev);
64 	struct drm_i915_gem_mmap *args = data;
65 	struct drm_i915_gem_object *obj;
66 	unsigned long addr;
67 
68 	/*
69 	 * mmap ioctl is disallowed for all discrete platforms,
70 	 * and for all platforms with GRAPHICS_VER > 12.
71 	 */
72 	if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) > IP_VER(12, 0))
73 		return -EOPNOTSUPP;
74 
75 	if (args->flags & ~(I915_MMAP_WC))
76 		return -EINVAL;
77 
78 	if (args->flags & I915_MMAP_WC && !pat_enabled())
79 		return -ENODEV;
80 
81 	obj = i915_gem_object_lookup(file, args->handle);
82 	if (!obj)
83 		return -ENOENT;
84 
85 	/* prime objects have no backing filp to GEM mmap
86 	 * pages from.
87 	 */
88 	if (!obj->base.filp) {
89 		addr = -ENXIO;
90 		goto err;
91 	}
92 
93 	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
94 		addr = -EINVAL;
95 		goto err;
96 	}
97 
98 	addr = vm_mmap(obj->base.filp, 0, args->size,
99 		       PROT_READ | PROT_WRITE, MAP_SHARED,
100 		       args->offset);
101 	if (IS_ERR_VALUE(addr))
102 		goto err;
103 
104 	if (args->flags & I915_MMAP_WC) {
105 		struct mm_struct *mm = current->mm;
106 		struct vm_area_struct *vma;
107 
108 		if (mmap_write_lock_killable(mm)) {
109 			addr = -EINTR;
110 			goto err;
111 		}
112 		vma = find_vma(mm, addr);
113 		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
114 			vma->vm_page_prot =
115 				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
116 		else
117 			addr = -ENOMEM;
118 		mmap_write_unlock(mm);
119 		if (IS_ERR_VALUE(addr))
120 			goto err;
121 	}
122 	i915_gem_object_put(obj);
123 
124 	args->addr_ptr = (u64)addr;
125 	return 0;
126 
127 err:
128 	i915_gem_object_put(obj);
129 	return addr;
130 }
131 
132 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
133 {
134 	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
135 }
136 
137 /**
138  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
139  *
140  * A history of the GTT mmap interface:
141  *
142  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
143  *     aligned and suitable for fencing, and still fit into the available
144  *     mappable space left by the pinned display objects. A classic problem
145  *     we called the page-fault-of-doom where we would ping-pong between
146  *     two objects that could not fit inside the GTT and so the memcpy
147  *     would page one object in at the expense of the other between every
148  *     single byte.
149  *
150  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
151  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
152  *     object is too large for the available space (or simply too large
153  *     for the mappable aperture!), a view is created instead and faulted
154  *     into userspace. (This view is aligned and sized appropriately for
155  *     fenced access.)
156  *
157  * 2 - Recognise WC as a separate cache domain so that we can flush the
158  *     delayed writes via GTT before performing direct access via WC.
159  *
160  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
161  *     pagefault; swapin remains transparent.
162  *
163  * 4 - Support multiple fault handlers per object depending on object's
164  *     backing storage (a.k.a. MMAP_OFFSET).
165  *
166  * 5 - Support multiple partial mmaps(mmap part of BO + unmap a offset, multiple
167  *     times with different size and offset).
168  *
169  * Restrictions:
170  *
171  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
172  *    hangs on some architectures, corruption on others. An attempt to service
173  *    a GTT page fault from a snoopable object will generate a SIGBUS.
174  *
175  *  * the object must be able to fit into RAM (physical memory, though no
176  *    limited to the mappable aperture).
177  *
178  *
179  * Caveats:
180  *
181  *  * a new GTT page fault will synchronize rendering from the GPU and flush
182  *    all data to system memory. Subsequent access will not be synchronized.
183  *
184  *  * all mappings are revoked on runtime device suspend.
185  *
186  *  * there are only 8, 16 or 32 fence registers to share between all users
187  *    (older machines require fence register for display and blitter access
188  *    as well). Contention of the fence registers will cause the previous users
189  *    to be unmapped and any new access will generate new page faults.
190  *
191  *  * running out of memory while servicing a fault may generate a SIGBUS,
192  *    rather than the expected SIGSEGV.
193  */
194 int i915_gem_mmap_gtt_version(void)
195 {
196 	return 5;
197 }
198 
199 static inline struct i915_gtt_view
200 compute_partial_view(const struct drm_i915_gem_object *obj,
201 		     pgoff_t page_offset,
202 		     unsigned int chunk)
203 {
204 	struct i915_gtt_view view;
205 
206 	if (i915_gem_object_is_tiled(obj))
207 		chunk = roundup(chunk, tile_row_pages(obj) ?: 1);
208 
209 	view.type = I915_GTT_VIEW_PARTIAL;
210 	view.partial.offset = rounddown(page_offset, chunk);
211 	view.partial.size =
212 		min_t(unsigned int, chunk,
213 		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
214 
215 	/* If the partial covers the entire object, just create a normal VMA. */
216 	if (chunk >= obj->base.size >> PAGE_SHIFT)
217 		view.type = I915_GTT_VIEW_NORMAL;
218 
219 	return view;
220 }
221 
222 static vm_fault_t i915_error_to_vmf_fault(int err)
223 {
224 	switch (err) {
225 	default:
226 		WARN_ONCE(err, "unhandled error in %s: %i\n", __func__, err);
227 		fallthrough;
228 	case -EIO: /* shmemfs failure from swap device */
229 	case -EFAULT: /* purged object */
230 	case -ENODEV: /* bad object, how did you get here! */
231 	case -ENXIO: /* unable to access backing store (on device) */
232 		return VM_FAULT_SIGBUS;
233 
234 	case -ENOMEM: /* our allocation failure */
235 		return VM_FAULT_OOM;
236 
237 	case 0:
238 	case -EAGAIN:
239 	case -ENOSPC: /* transient failure to evict? */
240 	case -ENOBUFS: /* temporarily out of fences? */
241 	case -ERESTARTSYS:
242 	case -EINTR:
243 	case -EBUSY:
244 		/*
245 		 * EBUSY is ok: this just means that another thread
246 		 * already did the job.
247 		 */
248 		return VM_FAULT_NOPAGE;
249 	}
250 }
251 
252 static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
253 {
254 	struct vm_area_struct *area = vmf->vma;
255 	struct i915_mmap_offset *mmo = area->vm_private_data;
256 	struct drm_i915_gem_object *obj = mmo->obj;
257 	unsigned long obj_offset;
258 	resource_size_t iomap;
259 	int err;
260 
261 	/* Sanity check that we allow writing into this object */
262 	if (unlikely(i915_gem_object_is_readonly(obj) &&
263 		     area->vm_flags & VM_WRITE))
264 		return VM_FAULT_SIGBUS;
265 
266 	if (i915_gem_object_lock_interruptible(obj, NULL))
267 		return VM_FAULT_NOPAGE;
268 
269 	err = i915_gem_object_pin_pages(obj);
270 	if (err)
271 		goto out;
272 
273 	iomap = -1;
274 	if (!i915_gem_object_has_struct_page(obj)) {
275 		iomap = obj->mm.region->iomap.base;
276 		iomap -= obj->mm.region->region.start;
277 	}
278 
279 	obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node);
280 	/* PTEs are revoked in obj->ops->put_pages() */
281 	err = remap_io_sg(area,
282 			  area->vm_start, area->vm_end - area->vm_start,
283 			  obj->mm.pages->sgl, obj_offset, iomap);
284 
285 	if (area->vm_flags & VM_WRITE) {
286 		GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
287 		obj->mm.dirty = true;
288 	}
289 
290 	i915_gem_object_unpin_pages(obj);
291 
292 out:
293 	i915_gem_object_unlock(obj);
294 	return i915_error_to_vmf_fault(err);
295 }
296 
297 static void set_address_limits(struct vm_area_struct *area,
298 			       struct i915_vma *vma,
299 			       unsigned long obj_offset,
300 			       resource_size_t gmadr_start,
301 			       unsigned long *start_vaddr,
302 			       unsigned long *end_vaddr,
303 			       unsigned long *pfn)
304 {
305 	unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */
306 	long start, end; /* memory boundaries */
307 
308 	/*
309 	 * Let's move into the ">> PAGE_SHIFT"
310 	 * domain to be sure not to lose bits
311 	 */
312 	vm_start = area->vm_start >> PAGE_SHIFT;
313 	vm_end = area->vm_end >> PAGE_SHIFT;
314 	vma_size = vma->size >> PAGE_SHIFT;
315 
316 	/*
317 	 * Calculate the memory boundaries by considering the offset
318 	 * provided by the user during memory mapping and the offset
319 	 * provided for the partial mapping.
320 	 */
321 	start = vm_start;
322 	start -= obj_offset;
323 	start += vma->gtt_view.partial.offset;
324 	end = start + vma_size;
325 
326 	start = max_t(long, start, vm_start);
327 	end = min_t(long, end, vm_end);
328 
329 	/* Let's move back into the "<< PAGE_SHIFT" domain */
330 	*start_vaddr = (unsigned long)start << PAGE_SHIFT;
331 	*end_vaddr = (unsigned long)end << PAGE_SHIFT;
332 
333 	*pfn = (gmadr_start + i915_ggtt_offset(vma)) >> PAGE_SHIFT;
334 	*pfn += (*start_vaddr - area->vm_start) >> PAGE_SHIFT;
335 	*pfn += obj_offset - vma->gtt_view.partial.offset;
336 }
337 
338 static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
339 {
340 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
341 	struct vm_area_struct *area = vmf->vma;
342 	struct i915_mmap_offset *mmo = area->vm_private_data;
343 	struct drm_i915_gem_object *obj = mmo->obj;
344 	struct drm_device *dev = obj->base.dev;
345 	struct drm_i915_private *i915 = to_i915(dev);
346 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
347 	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
348 	bool write = area->vm_flags & VM_WRITE;
349 	struct i915_gem_ww_ctx ww;
350 	unsigned long obj_offset;
351 	unsigned long start, end; /* memory boundaries */
352 	intel_wakeref_t wakeref;
353 	struct i915_vma *vma;
354 	pgoff_t page_offset;
355 	unsigned long pfn;
356 	int srcu;
357 	int ret;
358 
359 	obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node);
360 	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
361 	page_offset += obj_offset;
362 
363 	trace_i915_gem_object_fault(obj, page_offset, true, write);
364 
365 	wakeref = intel_runtime_pm_get(rpm);
366 
367 	i915_gem_ww_ctx_init(&ww, true);
368 retry:
369 	ret = i915_gem_object_lock(obj, &ww);
370 	if (ret)
371 		goto err_rpm;
372 
373 	/* Sanity check that we allow writing into this object */
374 	if (i915_gem_object_is_readonly(obj) && write) {
375 		ret = -EFAULT;
376 		goto err_rpm;
377 	}
378 
379 	ret = i915_gem_object_pin_pages(obj);
380 	if (ret)
381 		goto err_rpm;
382 
383 	ret = intel_gt_reset_lock_interruptible(ggtt->vm.gt, &srcu);
384 	if (ret)
385 		goto err_pages;
386 
387 	/* Now pin it into the GTT as needed */
388 	vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
389 					  PIN_MAPPABLE |
390 					  PIN_NONBLOCK /* NOWARN */ |
391 					  PIN_NOEVICT);
392 	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
393 		/* Use a partial view if it is bigger than available space */
394 		struct i915_gtt_view view =
395 			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
396 		unsigned int flags;
397 
398 		flags = PIN_MAPPABLE | PIN_NOSEARCH;
399 		if (view.type == I915_GTT_VIEW_NORMAL)
400 			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
401 
402 		/*
403 		 * Userspace is now writing through an untracked VMA, abandon
404 		 * all hope that the hardware is able to track future writes.
405 		 */
406 
407 		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
408 		if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
409 			flags = PIN_MAPPABLE;
410 			view.type = I915_GTT_VIEW_PARTIAL;
411 			vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
412 		}
413 
414 		/*
415 		 * The entire mappable GGTT is pinned? Unexpected!
416 		 * Try to evict the object we locked too, as normally we skip it
417 		 * due to lack of short term pinning inside execbuf.
418 		 */
419 		if (vma == ERR_PTR(-ENOSPC)) {
420 			ret = mutex_lock_interruptible(&ggtt->vm.mutex);
421 			if (!ret) {
422 				ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
423 				mutex_unlock(&ggtt->vm.mutex);
424 			}
425 			if (ret)
426 				goto err_reset;
427 			vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
428 		}
429 	}
430 	if (IS_ERR(vma)) {
431 		ret = PTR_ERR(vma);
432 		goto err_reset;
433 	}
434 
435 	/* Access to snoopable pages through the GTT is incoherent. */
436 	/*
437 	 * For objects created by userspace through GEM_CREATE with pat_index
438 	 * set by set_pat extension, coherency is managed by userspace, make
439 	 * sure we don't fail handling the vm fault by calling
440 	 * i915_gem_object_has_cache_level() which always return true for such
441 	 * objects. Otherwise this helper function would fall back to checking
442 	 * whether the object is un-cached.
443 	 */
444 	if (!(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
445 	      HAS_LLC(i915))) {
446 		ret = -EFAULT;
447 		goto err_unpin;
448 	}
449 
450 	ret = i915_vma_pin_fence(vma);
451 	if (ret)
452 		goto err_unpin;
453 
454 	/*
455 	 * Dump all the necessary parameters in this function to perform the
456 	 * arithmetic calculation for the virtual address start and end and
457 	 * the PFN (Page Frame Number).
458 	 */
459 	set_address_limits(area, vma, obj_offset, ggtt->gmadr.start,
460 			   &start, &end, &pfn);
461 
462 	/* Finally, remap it using the new GTT offset */
463 	ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap);
464 	if (ret)
465 		goto err_fence;
466 
467 	assert_rpm_wakelock_held(rpm);
468 
469 	/* Mark as being mmapped into userspace for later revocation */
470 	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
471 	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
472 		list_add(&obj->userfault_link, &to_gt(i915)->ggtt->userfault_list);
473 	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
474 
475 	/* Track the mmo associated with the fenced vma */
476 	vma->mmo = mmo;
477 
478 	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
479 		intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref,
480 				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
481 
482 	if (write) {
483 		GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
484 		i915_vma_set_ggtt_write(vma);
485 		obj->mm.dirty = true;
486 	}
487 
488 err_fence:
489 	i915_vma_unpin_fence(vma);
490 err_unpin:
491 	__i915_vma_unpin(vma);
492 err_reset:
493 	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
494 err_pages:
495 	i915_gem_object_unpin_pages(obj);
496 err_rpm:
497 	if (ret == -EDEADLK) {
498 		ret = i915_gem_ww_ctx_backoff(&ww);
499 		if (!ret)
500 			goto retry;
501 	}
502 	i915_gem_ww_ctx_fini(&ww);
503 	intel_runtime_pm_put(rpm, wakeref);
504 	return i915_error_to_vmf_fault(ret);
505 }
506 
507 static int
508 vm_access(struct vm_area_struct *area, unsigned long addr,
509 	  void *buf, int len, int write)
510 {
511 	struct i915_mmap_offset *mmo = area->vm_private_data;
512 	struct drm_i915_gem_object *obj = mmo->obj;
513 	struct i915_gem_ww_ctx ww;
514 	void *vaddr;
515 	int err = 0;
516 
517 	if (i915_gem_object_is_readonly(obj) && write)
518 		return -EACCES;
519 
520 	addr -= area->vm_start;
521 	if (range_overflows_t(u64, addr, len, obj->base.size))
522 		return -EINVAL;
523 
524 	i915_gem_ww_ctx_init(&ww, true);
525 retry:
526 	err = i915_gem_object_lock(obj, &ww);
527 	if (err)
528 		goto out;
529 
530 	/* As this is primarily for debugging, let's focus on simplicity */
531 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC);
532 	if (IS_ERR(vaddr)) {
533 		err = PTR_ERR(vaddr);
534 		goto out;
535 	}
536 
537 	if (write) {
538 		memcpy(vaddr + addr, buf, len);
539 		__i915_gem_object_flush_map(obj, addr, len);
540 	} else {
541 		memcpy(buf, vaddr + addr, len);
542 	}
543 
544 	i915_gem_object_unpin_map(obj);
545 out:
546 	if (err == -EDEADLK) {
547 		err = i915_gem_ww_ctx_backoff(&ww);
548 		if (!err)
549 			goto retry;
550 	}
551 	i915_gem_ww_ctx_fini(&ww);
552 
553 	if (err)
554 		return err;
555 
556 	return len;
557 }
558 
559 void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
560 {
561 	struct i915_vma *vma;
562 
563 	GEM_BUG_ON(!obj->userfault_count);
564 
565 	for_each_ggtt_vma(vma, obj)
566 		i915_vma_revoke_mmap(vma);
567 
568 	GEM_BUG_ON(obj->userfault_count);
569 }
570 
571 /*
572  * It is vital that we remove the page mapping if we have mapped a tiled
573  * object through the GTT and then lose the fence register due to
574  * resource pressure. Similarly if the object has been moved out of the
575  * aperture, than pages mapped into userspace must be revoked. Removing the
576  * mapping will then trigger a page fault on the next user access, allowing
577  * fixup by vm_fault_gtt().
578  */
579 void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
580 {
581 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
582 	intel_wakeref_t wakeref;
583 
584 	/*
585 	 * Serialisation between user GTT access and our code depends upon
586 	 * revoking the CPU's PTE whilst the mutex is held. The next user
587 	 * pagefault then has to wait until we release the mutex.
588 	 *
589 	 * Note that RPM complicates somewhat by adding an additional
590 	 * requirement that operations to the GGTT be made holding the RPM
591 	 * wakeref.
592 	 */
593 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
594 	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
595 
596 	if (!obj->userfault_count)
597 		goto out;
598 
599 	__i915_gem_object_release_mmap_gtt(obj);
600 
601 	/*
602 	 * Ensure that the CPU's PTE are revoked and there are not outstanding
603 	 * memory transactions from userspace before we return. The TLB
604 	 * flushing implied above by changing the PTE above *should* be
605 	 * sufficient, an extra barrier here just provides us with a bit
606 	 * of paranoid documentation about our requirement to serialise
607 	 * memory writes before touching registers / GSM.
608 	 */
609 	wmb();
610 
611 out:
612 	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
613 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
614 }
615 
616 void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj)
617 {
618 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
619 	struct ttm_device *bdev = bo->bdev;
620 
621 	drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);
622 
623 	/*
624 	 * We have exclusive access here via runtime suspend. All other callers
625 	 * must first grab the rpm wakeref.
626 	 */
627 	GEM_BUG_ON(!obj->userfault_count);
628 	list_del(&obj->userfault_link);
629 	obj->userfault_count = 0;
630 }
631 
632 void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
633 {
634 	struct i915_mmap_offset *mmo, *mn;
635 
636 	if (obj->ops->unmap_virtual)
637 		obj->ops->unmap_virtual(obj);
638 
639 	spin_lock(&obj->mmo.lock);
640 	rbtree_postorder_for_each_entry_safe(mmo, mn,
641 					     &obj->mmo.offsets, offset) {
642 		/*
643 		 * vma_node_unmap for GTT mmaps handled already in
644 		 * __i915_gem_object_release_mmap_gtt
645 		 */
646 		if (mmo->mmap_type == I915_MMAP_TYPE_GTT)
647 			continue;
648 
649 		spin_unlock(&obj->mmo.lock);
650 		drm_vma_node_unmap(&mmo->vma_node,
651 				   obj->base.dev->anon_inode->i_mapping);
652 		spin_lock(&obj->mmo.lock);
653 	}
654 	spin_unlock(&obj->mmo.lock);
655 }
656 
657 static struct i915_mmap_offset *
658 lookup_mmo(struct drm_i915_gem_object *obj,
659 	   enum i915_mmap_type mmap_type)
660 {
661 	struct rb_node *rb;
662 
663 	spin_lock(&obj->mmo.lock);
664 	rb = obj->mmo.offsets.rb_node;
665 	while (rb) {
666 		struct i915_mmap_offset *mmo =
667 			rb_entry(rb, typeof(*mmo), offset);
668 
669 		if (mmo->mmap_type == mmap_type) {
670 			spin_unlock(&obj->mmo.lock);
671 			return mmo;
672 		}
673 
674 		if (mmo->mmap_type < mmap_type)
675 			rb = rb->rb_right;
676 		else
677 			rb = rb->rb_left;
678 	}
679 	spin_unlock(&obj->mmo.lock);
680 
681 	return NULL;
682 }
683 
684 static struct i915_mmap_offset *
685 insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo)
686 {
687 	struct rb_node *rb, **p;
688 
689 	spin_lock(&obj->mmo.lock);
690 	rb = NULL;
691 	p = &obj->mmo.offsets.rb_node;
692 	while (*p) {
693 		struct i915_mmap_offset *pos;
694 
695 		rb = *p;
696 		pos = rb_entry(rb, typeof(*pos), offset);
697 
698 		if (pos->mmap_type == mmo->mmap_type) {
699 			spin_unlock(&obj->mmo.lock);
700 			drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
701 					      &mmo->vma_node);
702 			kfree(mmo);
703 			return pos;
704 		}
705 
706 		if (pos->mmap_type < mmo->mmap_type)
707 			p = &rb->rb_right;
708 		else
709 			p = &rb->rb_left;
710 	}
711 	rb_link_node(&mmo->offset, rb, p);
712 	rb_insert_color(&mmo->offset, &obj->mmo.offsets);
713 	spin_unlock(&obj->mmo.lock);
714 
715 	return mmo;
716 }
717 
718 static struct i915_mmap_offset *
719 mmap_offset_attach(struct drm_i915_gem_object *obj,
720 		   enum i915_mmap_type mmap_type,
721 		   struct drm_file *file)
722 {
723 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
724 	struct i915_mmap_offset *mmo;
725 	int err;
726 
727 	GEM_BUG_ON(obj->ops->mmap_offset || obj->ops->mmap_ops);
728 
729 	mmo = lookup_mmo(obj, mmap_type);
730 	if (mmo)
731 		goto out;
732 
733 	mmo = kmalloc(sizeof(*mmo), GFP_KERNEL);
734 	if (!mmo)
735 		return ERR_PTR(-ENOMEM);
736 
737 	mmo->obj = obj;
738 	mmo->mmap_type = mmap_type;
739 	drm_vma_node_reset(&mmo->vma_node);
740 
741 	err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
742 				 &mmo->vma_node, obj->base.size / PAGE_SIZE);
743 	if (likely(!err))
744 		goto insert;
745 
746 	/* Attempt to reap some mmap space from dead objects */
747 	err = intel_gt_retire_requests_timeout(to_gt(i915), MAX_SCHEDULE_TIMEOUT,
748 					       NULL);
749 	if (err)
750 		goto err;
751 
752 	i915_gem_drain_freed_objects(i915);
753 	err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
754 				 &mmo->vma_node, obj->base.size / PAGE_SIZE);
755 	if (err)
756 		goto err;
757 
758 insert:
759 	mmo = insert_mmo(obj, mmo);
760 	GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo);
761 out:
762 	if (file)
763 		drm_vma_node_allow_once(&mmo->vma_node, file);
764 	return mmo;
765 
766 err:
767 	kfree(mmo);
768 	return ERR_PTR(err);
769 }
770 
771 static int
772 __assign_mmap_offset(struct drm_i915_gem_object *obj,
773 		     enum i915_mmap_type mmap_type,
774 		     u64 *offset, struct drm_file *file)
775 {
776 	struct i915_mmap_offset *mmo;
777 
778 	if (i915_gem_object_never_mmap(obj))
779 		return -ENODEV;
780 
781 	if (obj->ops->mmap_offset)  {
782 		if (mmap_type != I915_MMAP_TYPE_FIXED)
783 			return -ENODEV;
784 
785 		*offset = obj->ops->mmap_offset(obj);
786 		return 0;
787 	}
788 
789 	if (mmap_type == I915_MMAP_TYPE_FIXED)
790 		return -ENODEV;
791 
792 	if (mmap_type != I915_MMAP_TYPE_GTT &&
793 	    !i915_gem_object_has_struct_page(obj) &&
794 	    !i915_gem_object_has_iomem(obj))
795 		return -ENODEV;
796 
797 	mmo = mmap_offset_attach(obj, mmap_type, file);
798 	if (IS_ERR(mmo))
799 		return PTR_ERR(mmo);
800 
801 	*offset = drm_vma_node_offset_addr(&mmo->vma_node);
802 	return 0;
803 }
804 
805 static int
806 __assign_mmap_offset_handle(struct drm_file *file,
807 			    u32 handle,
808 			    enum i915_mmap_type mmap_type,
809 			    u64 *offset)
810 {
811 	struct drm_i915_gem_object *obj;
812 	int err;
813 
814 	obj = i915_gem_object_lookup(file, handle);
815 	if (!obj)
816 		return -ENOENT;
817 
818 	err = i915_gem_object_lock_interruptible(obj, NULL);
819 	if (err)
820 		goto out_put;
821 	err = __assign_mmap_offset(obj, mmap_type, offset, file);
822 	i915_gem_object_unlock(obj);
823 out_put:
824 	i915_gem_object_put(obj);
825 	return err;
826 }
827 
828 int
829 i915_gem_dumb_mmap_offset(struct drm_file *file,
830 			  struct drm_device *dev,
831 			  u32 handle,
832 			  u64 *offset)
833 {
834 	struct drm_i915_private *i915 = to_i915(dev);
835 	enum i915_mmap_type mmap_type;
836 
837 	if (HAS_LMEM(to_i915(dev)))
838 		mmap_type = I915_MMAP_TYPE_FIXED;
839 	else if (pat_enabled())
840 		mmap_type = I915_MMAP_TYPE_WC;
841 	else if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
842 		return -ENODEV;
843 	else
844 		mmap_type = I915_MMAP_TYPE_GTT;
845 
846 	return __assign_mmap_offset_handle(file, handle, mmap_type, offset);
847 }
848 
849 /**
850  * i915_gem_mmap_offset_ioctl - prepare an object for GTT mmap'ing
851  * @dev: DRM device
852  * @data: GTT mapping ioctl data
853  * @file: GEM object info
854  *
855  * Simply returns the fake offset to userspace so it can mmap it.
856  * The mmap call will end up in drm_gem_mmap(), which will set things
857  * up so we can get faults in the handler above.
858  *
859  * The fault handler will take care of binding the object into the GTT
860  * (since it may have been evicted to make room for something), allocating
861  * a fence register, and mapping the appropriate aperture address into
862  * userspace.
863  */
864 int
865 i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
866 			   struct drm_file *file)
867 {
868 	struct drm_i915_private *i915 = to_i915(dev);
869 	struct drm_i915_gem_mmap_offset *args = data;
870 	enum i915_mmap_type type;
871 	int err;
872 
873 	/*
874 	 * Historically we failed to check args.pad and args.offset
875 	 * and so we cannot use those fields for user input and we cannot
876 	 * add -EINVAL for them as the ABI is fixed, i.e. old userspace
877 	 * may be feeding in garbage in those fields.
878 	 *
879 	 * if (args->pad) return -EINVAL; is verbotten!
880 	 */
881 
882 	err = i915_user_extensions(u64_to_user_ptr(args->extensions),
883 				   NULL, 0, NULL);
884 	if (err)
885 		return err;
886 
887 	switch (args->flags) {
888 	case I915_MMAP_OFFSET_GTT:
889 		if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
890 			return -ENODEV;
891 		type = I915_MMAP_TYPE_GTT;
892 		break;
893 
894 	case I915_MMAP_OFFSET_WC:
895 		if (!pat_enabled())
896 			return -ENODEV;
897 		type = I915_MMAP_TYPE_WC;
898 		break;
899 
900 	case I915_MMAP_OFFSET_WB:
901 		type = I915_MMAP_TYPE_WB;
902 		break;
903 
904 	case I915_MMAP_OFFSET_UC:
905 		if (!pat_enabled())
906 			return -ENODEV;
907 		type = I915_MMAP_TYPE_UC;
908 		break;
909 
910 	case I915_MMAP_OFFSET_FIXED:
911 		type = I915_MMAP_TYPE_FIXED;
912 		break;
913 
914 	default:
915 		return -EINVAL;
916 	}
917 
918 	return __assign_mmap_offset_handle(file, args->handle, type, &args->offset);
919 }
920 
921 static void vm_open(struct vm_area_struct *vma)
922 {
923 	struct i915_mmap_offset *mmo = vma->vm_private_data;
924 	struct drm_i915_gem_object *obj = mmo->obj;
925 
926 	GEM_BUG_ON(!obj);
927 	i915_gem_object_get(obj);
928 }
929 
930 static void vm_close(struct vm_area_struct *vma)
931 {
932 	struct i915_mmap_offset *mmo = vma->vm_private_data;
933 	struct drm_i915_gem_object *obj = mmo->obj;
934 
935 	GEM_BUG_ON(!obj);
936 	i915_gem_object_put(obj);
937 }
938 
939 static const struct vm_operations_struct vm_ops_gtt = {
940 	.fault = vm_fault_gtt,
941 	.access = vm_access,
942 	.open = vm_open,
943 	.close = vm_close,
944 };
945 
946 static const struct vm_operations_struct vm_ops_cpu = {
947 	.fault = vm_fault_cpu,
948 	.access = vm_access,
949 	.open = vm_open,
950 	.close = vm_close,
951 };
952 
953 static int singleton_release(struct inode *inode, struct file *file)
954 {
955 	struct drm_i915_private *i915 = file->private_data;
956 
957 	cmpxchg(&i915->gem.mmap_singleton, file, NULL);
958 	drm_dev_put(&i915->drm);
959 
960 	return 0;
961 }
962 
963 static const struct file_operations singleton_fops = {
964 	.owner = THIS_MODULE,
965 	.release = singleton_release,
966 };
967 
968 static struct file *mmap_singleton(struct drm_i915_private *i915)
969 {
970 	struct file *file;
971 
972 	file = get_file_active(&i915->gem.mmap_singleton);
973 	if (file)
974 		return file;
975 
976 	file = anon_inode_getfile("i915.gem", &singleton_fops, i915, O_RDWR);
977 	if (IS_ERR(file))
978 		return file;
979 
980 	/* Everyone shares a single global address space */
981 	file->f_mapping = i915->drm.anon_inode->i_mapping;
982 
983 	smp_store_mb(i915->gem.mmap_singleton, file);
984 	drm_dev_get(&i915->drm);
985 
986 	return file;
987 }
988 
989 static int
990 i915_gem_object_mmap(struct drm_i915_gem_object *obj,
991 		     struct i915_mmap_offset *mmo,
992 		     struct vm_area_struct *vma)
993 {
994 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
995 	struct drm_device *dev = &i915->drm;
996 	struct file *anon;
997 
998 	if (i915_gem_object_is_readonly(obj)) {
999 		if (vma->vm_flags & VM_WRITE) {
1000 			i915_gem_object_put(obj);
1001 			return -EINVAL;
1002 		}
1003 		vm_flags_clear(vma, VM_MAYWRITE);
1004 	}
1005 
1006 	anon = mmap_singleton(to_i915(dev));
1007 	if (IS_ERR(anon)) {
1008 		i915_gem_object_put(obj);
1009 		return PTR_ERR(anon);
1010 	}
1011 
1012 	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO);
1013 
1014 	/*
1015 	 * We keep the ref on mmo->obj, not vm_file, but we require
1016 	 * vma->vm_file->f_mapping, see vma_link(), for later revocation.
1017 	 * Our userspace is accustomed to having per-file resource cleanup
1018 	 * (i.e. contexts, objects and requests) on their close(fd), which
1019 	 * requires avoiding extraneous references to their filp, hence why
1020 	 * we prefer to use an anonymous file for their mmaps.
1021 	 */
1022 	vma_set_file(vma, anon);
1023 	/* Drop the initial creation reference, the vma is now holding one. */
1024 	fput(anon);
1025 
1026 	if (obj->ops->mmap_ops) {
1027 		vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags));
1028 		vma->vm_ops = obj->ops->mmap_ops;
1029 		vma->vm_private_data = obj->base.vma_node.driver_private;
1030 		return 0;
1031 	}
1032 
1033 	vma->vm_private_data = mmo;
1034 
1035 	switch (mmo->mmap_type) {
1036 	case I915_MMAP_TYPE_WC:
1037 		vma->vm_page_prot =
1038 			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1039 		vma->vm_ops = &vm_ops_cpu;
1040 		break;
1041 
1042 	case I915_MMAP_TYPE_FIXED:
1043 		GEM_WARN_ON(1);
1044 		fallthrough;
1045 	case I915_MMAP_TYPE_WB:
1046 		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
1047 		vma->vm_ops = &vm_ops_cpu;
1048 		break;
1049 
1050 	case I915_MMAP_TYPE_UC:
1051 		vma->vm_page_prot =
1052 			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
1053 		vma->vm_ops = &vm_ops_cpu;
1054 		break;
1055 
1056 	case I915_MMAP_TYPE_GTT:
1057 		vma->vm_page_prot =
1058 			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1059 		vma->vm_ops = &vm_ops_gtt;
1060 		break;
1061 	}
1062 	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
1063 
1064 	return 0;
1065 }
1066 
1067 /*
1068  * This overcomes the limitation in drm_gem_mmap's assignment of a
1069  * drm_gem_object as the vma->vm_private_data. Since we need to
1070  * be able to resolve multiple mmap offsets which could be tied
1071  * to a single gem object.
1072  */
1073 int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
1074 {
1075 	struct drm_vma_offset_node *node;
1076 	struct drm_file *priv = filp->private_data;
1077 	struct drm_device *dev = priv->minor->dev;
1078 	struct drm_i915_gem_object *obj = NULL;
1079 	struct i915_mmap_offset *mmo = NULL;
1080 
1081 	if (drm_dev_is_unplugged(dev))
1082 		return -ENODEV;
1083 
1084 	rcu_read_lock();
1085 	drm_vma_offset_lock_lookup(dev->vma_offset_manager);
1086 	node = drm_vma_offset_lookup_locked(dev->vma_offset_manager,
1087 					    vma->vm_pgoff,
1088 					    vma_pages(vma));
1089 	if (node && drm_vma_node_is_allowed(node, priv)) {
1090 		/*
1091 		 * Skip 0-refcnted objects as it is in the process of being
1092 		 * destroyed and will be invalid when the vma manager lock
1093 		 * is released.
1094 		 */
1095 		if (!node->driver_private) {
1096 			mmo = container_of(node, struct i915_mmap_offset, vma_node);
1097 			obj = i915_gem_object_get_rcu(mmo->obj);
1098 
1099 			GEM_BUG_ON(obj && obj->ops->mmap_ops);
1100 		} else {
1101 			obj = i915_gem_object_get_rcu
1102 				(container_of(node, struct drm_i915_gem_object,
1103 					      base.vma_node));
1104 
1105 			GEM_BUG_ON(obj && !obj->ops->mmap_ops);
1106 		}
1107 	}
1108 	drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
1109 	rcu_read_unlock();
1110 	if (!obj)
1111 		return node ? -EACCES : -EINVAL;
1112 
1113 	return i915_gem_object_mmap(obj, mmo, vma);
1114 }
1115 
1116 int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma)
1117 {
1118 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
1119 	struct drm_device *dev = &i915->drm;
1120 	struct i915_mmap_offset *mmo = NULL;
1121 	enum i915_mmap_type mmap_type;
1122 	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
1123 
1124 	if (drm_dev_is_unplugged(dev))
1125 		return -ENODEV;
1126 
1127 	/* handle ttm object */
1128 	if (obj->ops->mmap_ops) {
1129 		/*
1130 		 * ttm fault handler, ttm_bo_vm_fault_reserved() uses fake offset
1131 		 * to calculate page offset so set that up.
1132 		 */
1133 		vma->vm_pgoff += drm_vma_node_start(&obj->base.vma_node);
1134 	} else {
1135 		/* handle stolen and smem objects */
1136 		mmap_type = i915_ggtt_has_aperture(ggtt) ? I915_MMAP_TYPE_GTT : I915_MMAP_TYPE_WC;
1137 		mmo = mmap_offset_attach(obj, mmap_type, NULL);
1138 		if (IS_ERR(mmo))
1139 			return PTR_ERR(mmo);
1140 
1141 		vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node);
1142 	}
1143 
1144 	/*
1145 	 * When we install vm_ops for mmap we are too late for
1146 	 * the vm_ops->open() which increases the ref_count of
1147 	 * this obj and then it gets decreased by the vm_ops->close().
1148 	 * To balance this increase the obj ref_count here.
1149 	 */
1150 	obj = i915_gem_object_get(obj);
1151 	return i915_gem_object_mmap(obj, mmo, vma);
1152 }
1153 
1154 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1155 #include "selftests/i915_gem_mman.c"
1156 #endif
1157