xref: /linux/drivers/gpu/drm/msm/msm_gem_vma.c (revision bed29492d413349e5b13f21936655064cdb63c91)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2016 Red Hat
4  * Author: Rob Clark <robdclark@gmail.com>
5  */
6 
7 #include "drm/drm_file.h"
8 #include "drm/msm_drm.h"
9 #include "linux/file.h"
10 #include "linux/sync_file.h"
11 
12 #include "msm_drv.h"
13 #include "msm_gem.h"
14 #include "msm_gpu.h"
15 #include "msm_mmu.h"
16 #include "msm_syncobj.h"
17 
18 #define vm_dbg(fmt, ...) pr_debug("%s:%d: "fmt"\n", __func__, __LINE__, ##__VA_ARGS__)
19 
20 static uint vm_log_shift = 0;
21 MODULE_PARM_DESC(vm_log_shift, "Length of VM op log");
22 module_param_named(vm_log_shift, vm_log_shift, uint, 0600);
23 
24 /**
25  * struct msm_vm_map_op - create new pgtable mapping
26  */
27 struct msm_vm_map_op {
28 	/** @iova: start address for mapping */
29 	uint64_t iova;
30 	/** @range: size of the region to map */
31 	uint64_t range;
32 	/** @offset: offset into @sgt to map */
33 	uint64_t offset;
34 	/** @sgt: pages to map, or NULL for a PRR mapping */
35 	struct sg_table *sgt;
36 	/** @prot: the mapping protection flags */
37 	int prot;
38 
39 	/**
40 	 * @queue_id: The id of the submitqueue the operation is performed
41 	 * on, or zero for (in particular) UNMAP ops triggered outside of
42 	 * a submitqueue (ie. process cleanup)
43 	 */
44 	int queue_id;
45 };
46 
47 /**
48  * struct msm_vm_unmap_op - unmap a range of pages from pgtable
49  */
50 struct msm_vm_unmap_op {
51 	/** @iova: start address for unmap */
52 	uint64_t iova;
53 	/** @range: size of region to unmap */
54 	uint64_t range;
55 
56 	/** @reason: The reason for the unmap */
57 	const char *reason;
58 
59 	/**
60 	 * @queue_id: The id of the submitqueue the operation is performed
61 	 * on, or zero for (in particular) UNMAP ops triggered outside of
62 	 * a submitqueue (ie. process cleanup)
63 	 */
64 	int queue_id;
65 };
66 
67 /**
68  * struct msm_vm_op - A MAP or UNMAP operation
69  */
70 struct msm_vm_op {
71 	/** @op: The operation type */
72 	enum {
73 		MSM_VM_OP_MAP = 1,
74 		MSM_VM_OP_UNMAP,
75 	} op;
76 	union {
77 		/** @map: Parameters used if op == MSM_VMA_OP_MAP */
78 		struct msm_vm_map_op map;
79 		/** @unmap: Parameters used if op == MSM_VMA_OP_UNMAP */
80 		struct msm_vm_unmap_op unmap;
81 	};
82 	/** @node: list head in msm_vm_bind_job::vm_ops */
83 	struct list_head node;
84 
85 	/**
86 	 * @obj: backing object for pages to be mapped/unmapped
87 	 *
88 	 * Async unmap ops, in particular, must hold a reference to the
89 	 * original GEM object backing the mapping that will be unmapped.
90 	 * But the same can be required in the map path, for example if
91 	 * there is not a corresponding unmap op, such as process exit.
92 	 *
93 	 * This ensures that the pages backing the mapping are not freed
94 	 * before the mapping is torn down.
95 	 */
96 	struct drm_gem_object *obj;
97 };
98 
99 /**
100  * struct msm_vm_bind_job - Tracking for a VM_BIND ioctl
101  *
102  * A table of userspace requested VM updates (MSM_VM_BIND_OP_UNMAP/MAP/MAP_NULL)
103  * gets applied to the vm, generating a list of VM ops (MSM_VM_OP_MAP/UNMAP)
104  * which are applied to the pgtables asynchronously.  For example a userspace
105  * requested MSM_VM_BIND_OP_MAP could end up generating both an MSM_VM_OP_UNMAP
106  * to unmap an existing mapping, and a MSM_VM_OP_MAP to apply the new mapping.
107  */
108 struct msm_vm_bind_job {
109 	/** @base: base class for drm_sched jobs */
110 	struct drm_sched_job base;
111 	/** @vm: The VM being operated on */
112 	struct drm_gpuvm *vm;
113 	/** @fence: The fence that is signaled when job completes */
114 	struct dma_fence *fence;
115 	/** @queue: The queue that the job runs on */
116 	struct msm_gpu_submitqueue *queue;
117 	/** @prealloc: Tracking for pre-allocated MMU pgtable pages */
118 	struct msm_mmu_prealloc prealloc;
119 	/** @vm_ops: a list of struct msm_vm_op */
120 	struct list_head vm_ops;
121 	/** @bos_pinned: are the GEM objects being bound pinned? */
122 	bool bos_pinned;
123 	/** @nr_ops: the number of userspace requested ops */
124 	unsigned int nr_ops;
125 	/**
126 	 * @ops: the userspace requested ops
127 	 *
128 	 * The userspace requested ops are copied/parsed and validated
129 	 * before we start applying the updates to try to do as much up-
130 	 * front error checking as possible, to avoid the VM being in an
131 	 * undefined state due to partially executed VM_BIND.
132 	 *
133 	 * This table also serves to hold a reference to the backing GEM
134 	 * objects.
135 	 */
136 	struct msm_vm_bind_op {
137 		uint32_t op;
138 		uint32_t flags;
139 		union {
140 			struct drm_gem_object *obj;
141 			uint32_t handle;
142 		};
143 		uint64_t obj_offset;
144 		uint64_t iova;
145 		uint64_t range;
146 	} ops[];
147 };
148 
149 #define job_foreach_bo(obj, _job) \
150 	for (unsigned i = 0; i < (_job)->nr_ops; i++) \
151 		if ((obj = (_job)->ops[i].obj))
152 
153 static inline struct msm_vm_bind_job *to_msm_vm_bind_job(struct drm_sched_job *job)
154 {
155 	return container_of(job, struct msm_vm_bind_job, base);
156 }
157 
158 static void
159 msm_gem_vm_free(struct drm_gpuvm *gpuvm)
160 {
161 	struct msm_gem_vm *vm = container_of(gpuvm, struct msm_gem_vm, base);
162 
163 	drm_mm_takedown(&vm->mm);
164 	if (vm->mmu)
165 		vm->mmu->funcs->destroy(vm->mmu);
166 	dma_fence_put(vm->last_fence);
167 	put_pid(vm->pid);
168 	kfree(vm->log);
169 	kfree(vm);
170 }
171 
172 /**
173  * msm_gem_vm_unusable() - Mark a VM as unusable
174  * @gpuvm: the VM to mark unusable
175  */
176 void
177 msm_gem_vm_unusable(struct drm_gpuvm *gpuvm)
178 {
179 	struct msm_gem_vm *vm = to_msm_vm(gpuvm);
180 	uint32_t vm_log_len = (1 << vm->log_shift);
181 	uint32_t vm_log_mask = vm_log_len - 1;
182 	uint32_t nr_vm_logs;
183 	int first;
184 
185 	vm->unusable = true;
186 
187 	/* Bail if no log, or empty log: */
188 	if (!vm->log || !vm->log[0].op)
189 		return;
190 
191 	mutex_lock(&vm->mmu_lock);
192 
193 	/*
194 	 * log_idx is the next entry to overwrite, meaning it is the oldest, or
195 	 * first, entry (other than the special case handled below where the
196 	 * log hasn't wrapped around yet)
197 	 */
198 	first = vm->log_idx;
199 
200 	if (!vm->log[first].op) {
201 		/*
202 		 * If the next log entry has not been written yet, then only
203 		 * entries 0 to idx-1 are valid (ie. we haven't wrapped around
204 		 * yet)
205 		 */
206 		nr_vm_logs = MAX(0, first - 1);
207 		first = 0;
208 	} else {
209 		nr_vm_logs = vm_log_len;
210 	}
211 
212 	pr_err("vm-log:\n");
213 	for (int i = 0; i < nr_vm_logs; i++) {
214 		int idx = (i + first) & vm_log_mask;
215 		struct msm_gem_vm_log_entry *e = &vm->log[idx];
216 		pr_err("  - %s:%d: 0x%016llx-0x%016llx\n",
217 		       e->op, e->queue_id, e->iova,
218 		       e->iova + e->range);
219 	}
220 
221 	mutex_unlock(&vm->mmu_lock);
222 }
223 
224 static void
225 vm_log(struct msm_gem_vm *vm, const char *op, uint64_t iova, uint64_t range, int queue_id)
226 {
227 	int idx;
228 
229 	if (!vm->managed)
230 		lockdep_assert_held(&vm->mmu_lock);
231 
232 	vm_dbg("%s:%p:%d: %016llx %016llx", op, vm, queue_id, iova, iova + range);
233 
234 	if (!vm->log)
235 		return;
236 
237 	idx = vm->log_idx;
238 	vm->log[idx].op = op;
239 	vm->log[idx].iova = iova;
240 	vm->log[idx].range = range;
241 	vm->log[idx].queue_id = queue_id;
242 	vm->log_idx = (vm->log_idx + 1) & ((1 << vm->log_shift) - 1);
243 }
244 
245 static void
246 vm_unmap_op(struct msm_gem_vm *vm, const struct msm_vm_unmap_op *op)
247 {
248 	const char *reason = op->reason;
249 
250 	if (!reason)
251 		reason = "unmap";
252 
253 	vm_log(vm, reason, op->iova, op->range, op->queue_id);
254 
255 	vm->mmu->funcs->unmap(vm->mmu, op->iova, op->range);
256 }
257 
258 static int
259 vm_map_op(struct msm_gem_vm *vm, const struct msm_vm_map_op *op)
260 {
261 	vm_log(vm, "map", op->iova, op->range, op->queue_id);
262 
263 	return vm->mmu->funcs->map(vm->mmu, op->iova, op->sgt, op->offset,
264 				   op->range, op->prot);
265 }
266 
267 /* Actually unmap memory for the vma */
268 void msm_gem_vma_unmap(struct drm_gpuva *vma, const char *reason)
269 {
270 	struct msm_gem_vm *vm = to_msm_vm(vma->vm);
271 	struct msm_gem_vma *msm_vma = to_msm_vma(vma);
272 
273 	/* Don't do anything if the memory isn't mapped */
274 	if (!msm_vma->mapped)
275 		return;
276 
277 	/*
278 	 * The mmu_lock is only needed when preallocation is used.  But
279 	 * in that case we don't need to worry about recursion into
280 	 * shrinker
281 	 */
282 	if (!vm->managed)
283 		 mutex_lock(&vm->mmu_lock);
284 
285 	vm_unmap_op(vm, &(struct msm_vm_unmap_op){
286 		.iova = vma->va.addr,
287 		.range = vma->va.range,
288 		.reason = reason,
289 	});
290 
291 	if (!vm->managed)
292 		mutex_unlock(&vm->mmu_lock);
293 
294 	msm_vma->mapped = false;
295 }
296 
297 /* Map and pin vma: */
298 int
299 msm_gem_vma_map(struct drm_gpuva *vma, int prot, struct sg_table *sgt)
300 {
301 	struct msm_gem_vm *vm = to_msm_vm(vma->vm);
302 	struct msm_gem_vma *msm_vma = to_msm_vma(vma);
303 	int ret;
304 
305 	if (GEM_WARN_ON(!vma->va.addr))
306 		return -EINVAL;
307 
308 	if (msm_vma->mapped)
309 		return 0;
310 
311 	msm_vma->mapped = true;
312 
313 	/*
314 	 * The mmu_lock is only needed when preallocation is used.  But
315 	 * in that case we don't need to worry about recursion into
316 	 * shrinker
317 	 */
318 	if (!vm->managed)
319 		mutex_lock(&vm->mmu_lock);
320 
321 	/*
322 	 * NOTE: if not using pgtable preallocation, we cannot hold
323 	 * a lock across map/unmap which is also used in the job_run()
324 	 * path, as this can cause deadlock in job_run() vs shrinker/
325 	 * reclaim.
326 	 */
327 	ret = vm_map_op(vm, &(struct msm_vm_map_op){
328 		.iova = vma->va.addr,
329 		.range = vma->va.range,
330 		.offset = vma->gem.offset,
331 		.sgt = sgt,
332 		.prot = prot,
333 	});
334 
335 	if (!vm->managed)
336 		mutex_unlock(&vm->mmu_lock);
337 
338 	if (ret)
339 		msm_vma->mapped = false;
340 
341 	return ret;
342 }
343 
344 /* Close an iova.  Warn if it is still in use */
345 void msm_gem_vma_close(struct drm_gpuva *vma)
346 {
347 	struct msm_gem_vm *vm = to_msm_vm(vma->vm);
348 	struct msm_gem_vma *msm_vma = to_msm_vma(vma);
349 
350 	GEM_WARN_ON(msm_vma->mapped);
351 
352 	drm_gpuvm_resv_assert_held(&vm->base);
353 
354 	if (vma->gem.obj)
355 		msm_gem_assert_locked(vma->gem.obj);
356 
357 	if (vma->va.addr && vm->managed)
358 		drm_mm_remove_node(&msm_vma->node);
359 
360 	drm_gpuva_remove(vma);
361 	drm_gpuva_unlink(vma);
362 
363 	kfree(vma);
364 }
365 
366 /* Create a new vma and allocate an iova for it */
367 struct drm_gpuva *
368 msm_gem_vma_new(struct drm_gpuvm *gpuvm, struct drm_gem_object *obj,
369 		u64 offset, u64 range_start, u64 range_end)
370 {
371 	struct msm_gem_vm *vm = to_msm_vm(gpuvm);
372 	struct drm_gpuvm_bo *vm_bo;
373 	struct msm_gem_vma *vma;
374 	int ret;
375 
376 	/* _NO_SHARE objs cannot be mapped outside of their "host" vm: */
377 	if (obj && (to_msm_bo(obj)->flags & MSM_BO_NO_SHARE) &&
378 	    GEM_WARN_ON(obj->resv != drm_gpuvm_resv(gpuvm))) {
379 		return ERR_PTR(-EINVAL);
380 	}
381 
382 	drm_gpuvm_resv_assert_held(&vm->base);
383 
384 	vma = kzalloc_obj(*vma);
385 	if (!vma)
386 		return ERR_PTR(-ENOMEM);
387 
388 	if (vm->managed) {
389 		BUG_ON(offset != 0);
390 		BUG_ON(!obj);  /* NULL mappings not valid for kernel managed VM */
391 		ret = drm_mm_insert_node_in_range(&vm->mm, &vma->node,
392 						obj->size, PAGE_SIZE, 0,
393 						range_start, range_end, 0);
394 
395 		if (ret)
396 			goto err_free_vma;
397 
398 		range_start = vma->node.start;
399 		range_end   = range_start + obj->size;
400 	}
401 
402 	if (obj)
403 		GEM_WARN_ON((range_end - range_start) > obj->size);
404 
405 	struct drm_gpuva_op_map op_map = {
406 		.va.addr = range_start,
407 		.va.range = range_end - range_start,
408 		.gem.obj = obj,
409 		.gem.offset = offset,
410 	};
411 
412 	drm_gpuva_init_from_op(&vma->base, &op_map);
413 	vma->mapped = false;
414 
415 	ret = drm_gpuva_insert(&vm->base, &vma->base);
416 	if (ret)
417 		goto err_free_range;
418 
419 	if (!obj)
420 		return &vma->base;
421 
422 	vm_bo = drm_gpuvm_bo_obtain_locked(&vm->base, obj);
423 	if (IS_ERR(vm_bo)) {
424 		ret = PTR_ERR(vm_bo);
425 		goto err_va_remove;
426 	}
427 
428 	drm_gpuvm_bo_extobj_add(vm_bo);
429 	drm_gpuva_link(&vma->base, vm_bo);
430 	GEM_WARN_ON(drm_gpuvm_bo_put(vm_bo));
431 
432 	return &vma->base;
433 
434 err_va_remove:
435 	drm_gpuva_remove(&vma->base);
436 err_free_range:
437 	if (vm->managed)
438 		drm_mm_remove_node(&vma->node);
439 err_free_vma:
440 	kfree(vma);
441 	return ERR_PTR(ret);
442 }
443 
444 static int
445 msm_gem_vm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
446 {
447 	struct drm_gem_object *obj = vm_bo->obj;
448 	struct drm_gpuva *vma;
449 	int ret;
450 
451 	vm_dbg("validate: %p", obj);
452 
453 	msm_gem_assert_locked(obj);
454 
455 	drm_gpuvm_bo_for_each_va (vma, vm_bo) {
456 		ret = msm_gem_pin_vma_locked(obj, vma);
457 		if (ret)
458 			return ret;
459 	}
460 
461 	return 0;
462 }
463 
464 struct op_arg {
465 	unsigned flags;
466 	struct msm_vm_bind_job *job;
467 	const struct msm_vm_bind_op *op;
468 	bool kept;
469 };
470 
471 static int
472 vm_op_enqueue(struct op_arg *arg, struct msm_vm_op _op)
473 {
474 	struct msm_vm_op *op = kmalloc_obj(*op);
475 	if (!op)
476 		return -ENOMEM;
477 
478 	*op = _op;
479 	list_add_tail(&op->node, &arg->job->vm_ops);
480 
481 	if (op->obj)
482 		drm_gem_object_get(op->obj);
483 
484 	return 0;
485 }
486 
487 static struct drm_gpuva *
488 vma_from_op(struct op_arg *arg, struct drm_gpuva_op_map *op)
489 {
490 	return msm_gem_vma_new(arg->job->vm, op->gem.obj, op->gem.offset,
491 			       op->va.addr, op->va.addr + op->va.range);
492 }
493 
494 static int
495 msm_gem_vm_sm_step_map(struct drm_gpuva_op *op, void *_arg)
496 {
497 	struct op_arg *arg = _arg;
498 	struct msm_vm_bind_job *job = arg->job;
499 	struct drm_gem_object *obj = op->map.gem.obj;
500 	struct drm_gpuva *vma;
501 	struct sg_table *sgt;
502 	unsigned prot;
503 	int ret;
504 
505 	if (arg->kept)
506 		return 0;
507 
508 	vma = vma_from_op(arg, &op->map);
509 	if (WARN_ON(IS_ERR(vma)))
510 		return PTR_ERR(vma);
511 
512 	vm_dbg("%p:%p:%p: %016llx %016llx", vma->vm, vma, vma->gem.obj,
513 	       vma->va.addr, vma->va.range);
514 
515 	if (obj) {
516 		sgt = to_msm_bo(obj)->sgt;
517 		prot = msm_gem_prot(obj);
518 	} else {
519 		sgt = NULL;
520 		prot = IOMMU_READ | IOMMU_WRITE;
521 	}
522 
523 	ret = vm_op_enqueue(arg, (struct msm_vm_op){
524 		.op = MSM_VM_OP_MAP,
525 		.map = {
526 			.sgt = sgt,
527 			.iova = vma->va.addr,
528 			.range = vma->va.range,
529 			.offset = vma->gem.offset,
530 			.prot = prot,
531 			.queue_id = job->queue->id,
532 		},
533 		.obj = vma->gem.obj,
534 	});
535 
536 	if (ret)
537 		return ret;
538 
539 	vma->flags = ((struct op_arg *)arg)->flags;
540 	to_msm_vma(vma)->mapped = true;
541 
542 	return 0;
543 }
544 
545 static int
546 msm_gem_vm_sm_step_remap(struct drm_gpuva_op *op, void *arg)
547 {
548 	struct msm_vm_bind_job *job = ((struct op_arg *)arg)->job;
549 	struct drm_gpuvm *vm = job->vm;
550 	struct drm_gpuva *orig_vma = op->remap.unmap->va;
551 	struct drm_gpuva *prev_vma = NULL, *next_vma = NULL;
552 	struct drm_gpuvm_bo *vm_bo = orig_vma->vm_bo;
553 	bool mapped = to_msm_vma(orig_vma)->mapped;
554 	unsigned flags;
555 	int ret;
556 
557 	vm_dbg("orig_vma: %p:%p:%p: %016llx %016llx", vm, orig_vma,
558 	       orig_vma->gem.obj, orig_vma->va.addr, orig_vma->va.range);
559 
560 	if (mapped) {
561 		uint64_t unmap_start, unmap_range;
562 
563 		drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range);
564 
565 		ret = vm_op_enqueue(arg, (struct msm_vm_op){
566 			.op = MSM_VM_OP_UNMAP,
567 			.unmap = {
568 				.iova = unmap_start,
569 				.range = unmap_range,
570 				.queue_id = job->queue->id,
571 			},
572 			.obj = orig_vma->gem.obj,
573 		});
574 
575 		if (ret)
576 			return ret;
577 
578 		/*
579 		 * Part of this GEM obj is still mapped, but we're going to kill the
580 		 * existing VMA and replace it with one or two new ones (ie. two if
581 		 * the unmapped range is in the middle of the existing (unmap) VMA).
582 		 * So just set the state to unmapped:
583 		 */
584 		to_msm_vma(orig_vma)->mapped = false;
585 	}
586 
587 	/*
588 	 * Hold a ref to the vm_bo between the msm_gem_vma_close() and the
589 	 * creation of the new prev/next vma's, in case the vm_bo is tracked
590 	 * in the VM's evict list:
591 	 */
592 	if (vm_bo)
593 		drm_gpuvm_bo_get(vm_bo);
594 
595 	/*
596 	 * The prev_vma and/or next_vma are replacing the unmapped vma, and
597 	 * therefore should preserve it's flags:
598 	 */
599 	flags = orig_vma->flags;
600 
601 	msm_gem_vma_close(orig_vma);
602 
603 	if (op->remap.prev) {
604 		prev_vma = vma_from_op(arg, op->remap.prev);
605 		if (WARN_ON(IS_ERR(prev_vma)))
606 			return PTR_ERR(prev_vma);
607 
608 		vm_dbg("prev_vma: %p:%p: %016llx %016llx", vm, prev_vma, prev_vma->va.addr, prev_vma->va.range);
609 		to_msm_vma(prev_vma)->mapped = mapped;
610 		prev_vma->flags = flags;
611 	}
612 
613 	if (op->remap.next) {
614 		next_vma = vma_from_op(arg, op->remap.next);
615 		if (WARN_ON(IS_ERR(next_vma)))
616 			return PTR_ERR(next_vma);
617 
618 		vm_dbg("next_vma: %p:%p: %016llx %016llx", vm, next_vma, next_vma->va.addr, next_vma->va.range);
619 		to_msm_vma(next_vma)->mapped = mapped;
620 		next_vma->flags = flags;
621 	}
622 
623 	if (!mapped)
624 		drm_gpuvm_bo_evict(vm_bo, true);
625 
626 	/* Drop the previous ref: */
627 	drm_gpuvm_bo_put(vm_bo);
628 
629 	return 0;
630 }
631 
632 static int
633 msm_gem_vm_sm_step_unmap(struct drm_gpuva_op *op, void *_arg)
634 {
635 	struct op_arg *arg = _arg;
636 	struct msm_vm_bind_job *job = arg->job;
637 	struct drm_gpuva *vma = op->unmap.va;
638 	struct msm_gem_vma *msm_vma = to_msm_vma(vma);
639 	int ret;
640 
641 	vm_dbg("%p:%p:%p: %016llx %016llx", vma->vm, vma, vma->gem.obj,
642 	       vma->va.addr, vma->va.range);
643 
644 	/*
645 	 * Detect in-place remap.  Turnip does this to change the vma flags,
646 	 * in particular MSM_VMA_DUMP.  In this case we want to avoid actually
647 	 * touching the page tables, as that would require synchronization
648 	 * against SUBMIT jobs running on the GPU.
649 	 */
650 	if (op->unmap.keep &&
651 	    (arg->op->op == MSM_VM_BIND_OP_MAP) &&
652 	    (vma->gem.obj == arg->op->obj) &&
653 	    (vma->gem.offset == arg->op->obj_offset) &&
654 	    (vma->va.addr == arg->op->iova) &&
655 	    (vma->va.range == arg->op->range)) {
656 		/* We are only expecting a single in-place unmap+map cb pair: */
657 		WARN_ON(arg->kept);
658 
659 		/* Leave the existing VMA in place, but signal that to the map cb: */
660 		arg->kept = true;
661 
662 		/* Only flags are changing, so update that in-place: */
663 		unsigned orig_flags = vma->flags & (DRM_GPUVA_USERBITS - 1);
664 		vma->flags = orig_flags | arg->flags;
665 
666 		return 0;
667 	}
668 
669 	if (!msm_vma->mapped)
670 		goto out_close;
671 
672 	ret = vm_op_enqueue(arg, (struct msm_vm_op){
673 		.op = MSM_VM_OP_UNMAP,
674 		.unmap = {
675 			.iova = vma->va.addr,
676 			.range = vma->va.range,
677 			.queue_id = job->queue->id,
678 		},
679 		.obj = vma->gem.obj,
680 	});
681 
682 	if (ret)
683 		return ret;
684 
685 	msm_vma->mapped = false;
686 
687 out_close:
688 	msm_gem_vma_close(vma);
689 
690 	return 0;
691 }
692 
693 static const struct drm_gpuvm_ops msm_gpuvm_ops = {
694 	.vm_free = msm_gem_vm_free,
695 	.vm_bo_validate = msm_gem_vm_bo_validate,
696 	.sm_step_map = msm_gem_vm_sm_step_map,
697 	.sm_step_remap = msm_gem_vm_sm_step_remap,
698 	.sm_step_unmap = msm_gem_vm_sm_step_unmap,
699 };
700 
701 static struct dma_fence *
702 msm_vma_job_run(struct drm_sched_job *_job)
703 {
704 	struct msm_vm_bind_job *job = to_msm_vm_bind_job(_job);
705 	struct drm_device *dev = job->vm->drm;
706 	struct msm_gem_vm *vm = to_msm_vm(job->vm);
707 	struct drm_gem_object *obj;
708 	int ret = vm->unusable ? -EINVAL : 0;
709 
710 	vm_dbg("");
711 
712 	mutex_lock(&vm->mmu_lock);
713 	vm->mmu->prealloc = &job->prealloc;
714 
715 	while (!list_empty(&job->vm_ops)) {
716 		struct msm_vm_op *op =
717 			list_first_entry(&job->vm_ops, struct msm_vm_op, node);
718 
719 		switch (op->op) {
720 		case MSM_VM_OP_MAP:
721 			/*
722 			 * On error, stop trying to map new things.. but we
723 			 * still want to process the unmaps (or in particular,
724 			 * the drm_gem_object_put()s)
725 			 */
726 			if (!ret)
727 				ret = vm_map_op(vm, &op->map);
728 			break;
729 		case MSM_VM_OP_UNMAP:
730 			vm_unmap_op(vm, &op->unmap);
731 			break;
732 		}
733 		drm_gem_object_put(op->obj);
734 		list_del(&op->node);
735 		kfree(op);
736 	}
737 
738 	vm->mmu->prealloc = NULL;
739 	mutex_unlock(&vm->mmu_lock);
740 
741 	/*
742 	 * We failed to perform at least _some_ of the pgtable updates, so
743 	 * now the VM is in an undefined state.  Game over!
744 	 */
745 	if (ret)
746 		msm_gem_vm_unusable(job->vm);
747 
748 	mutex_lock(&dev->gem_lru_mutex);
749 
750 	job_foreach_bo (obj, job) {
751 		msm_gem_unpin_active(obj);
752 	}
753 
754 	mutex_unlock(&dev->gem_lru_mutex);
755 
756 	/* VM_BIND ops are synchronous, so no fence to wait on: */
757 	return NULL;
758 }
759 
760 static void
761 msm_vma_job_free(struct drm_sched_job *_job)
762 {
763 	struct msm_vm_bind_job *job = to_msm_vm_bind_job(_job);
764 	struct msm_gem_vm *vm = to_msm_vm(job->vm);
765 	struct drm_gem_object *obj;
766 
767 	vm->mmu->funcs->prealloc_cleanup(vm->mmu, &job->prealloc);
768 
769 	atomic_sub(job->prealloc.count, &vm->prealloc_throttle.in_flight);
770 
771 	drm_sched_job_cleanup(_job);
772 
773 	job_foreach_bo (obj, job)
774 		drm_gem_object_put(obj);
775 
776 	msm_submitqueue_put(job->queue);
777 	dma_fence_put(job->fence);
778 
779 	/* In error paths, we could have unexecuted ops: */
780 	while (!list_empty(&job->vm_ops)) {
781 		struct msm_vm_op *op =
782 			list_first_entry(&job->vm_ops, struct msm_vm_op, node);
783 		list_del(&op->node);
784 		kfree(op);
785 	}
786 
787 	wake_up(&vm->prealloc_throttle.wait);
788 
789 	kfree(job);
790 }
791 
792 static const struct drm_sched_backend_ops msm_vm_bind_ops = {
793 	.run_job = msm_vma_job_run,
794 	.free_job = msm_vma_job_free
795 };
796 
797 /**
798  * msm_gem_vm_create() - Create and initialize a &msm_gem_vm
799  * @drm: the drm device
800  * @mmu: the backing MMU objects handling mapping/unmapping
801  * @name: the name of the VM
802  * @va_start: the start offset of the VA space
803  * @va_size: the size of the VA space
804  * @managed: is it a kernel managed VM?
805  *
806  * In a kernel managed VM, the kernel handles address allocation, and only
807  * synchronous operations are supported.  In a user managed VM, userspace
808  * handles virtual address allocation, and both async and sync operations
809  * are supported.
810  *
811  * Returns: pointer to the created &struct drm_gpuvm on success
812  * or an ERR_PTR(-errno) on failure.
813  */
814 struct drm_gpuvm *
815 msm_gem_vm_create(struct drm_device *drm, struct msm_mmu *mmu, const char *name,
816 		  u64 va_start, u64 va_size, bool managed)
817 {
818 	/*
819 	 * We mostly want to use DRM_GPUVM_RESV_PROTECTED, except that
820 	 * makes drm_gpuvm_bo_evict() a no-op for extobjs (ie. we loose
821 	 * tracking that an extobj is evicted) :facepalm:
822 	 */
823 	enum drm_gpuvm_flags flags = 0;
824 	struct msm_gem_vm *vm;
825 	struct drm_gem_object *dummy_gem;
826 	int ret = 0;
827 
828 	if (IS_ERR(mmu))
829 		return ERR_CAST(mmu);
830 
831 	vm = kzalloc_obj(*vm);
832 	if (!vm)
833 		return ERR_PTR(-ENOMEM);
834 
835 	dummy_gem = drm_gpuvm_resv_object_alloc(drm);
836 	if (!dummy_gem) {
837 		ret = -ENOMEM;
838 		goto err_free_vm;
839 	}
840 
841 	if (!managed) {
842 		struct drm_sched_init_args args = {
843 			.ops = &msm_vm_bind_ops,
844 			.credit_limit = 1,
845 			.timeout = MAX_SCHEDULE_TIMEOUT,
846 			.name = "msm-vm-bind",
847 			.dev = drm->dev,
848 		};
849 
850 		ret = drm_sched_init(&vm->sched, &args);
851 		if (ret)
852 			goto err_free_dummy;
853 
854 		init_waitqueue_head(&vm->prealloc_throttle.wait);
855 	}
856 
857 	drm_gpuvm_init(&vm->base, name, flags, drm, dummy_gem,
858 		       va_start, va_size, 0, 0, &msm_gpuvm_ops);
859 	drm_gem_object_put(dummy_gem);
860 
861 	vm->mmu = mmu;
862 	mutex_init(&vm->mmu_lock);
863 	vm->managed = managed;
864 
865 	drm_mm_init(&vm->mm, va_start, va_size);
866 
867 	/*
868 	 * We don't really need vm log for kernel managed VMs, as the kernel
869 	 * is responsible for ensuring that GEM objs are mapped if they are
870 	 * used by a submit.  Furthermore we piggyback on mmu_lock to serialize
871 	 * access to the log.
872 	 *
873 	 * Limit the max log_shift to 8 to prevent userspace from asking us
874 	 * for an unreasonable log size.
875 	 */
876 	if (!managed)
877 		vm->log_shift = MIN(vm_log_shift, 8);
878 
879 	if (vm->log_shift) {
880 		vm->log = kmalloc_objs(vm->log[0], 1 << vm->log_shift,
881 				       GFP_KERNEL | __GFP_ZERO);
882 	}
883 
884 	return &vm->base;
885 
886 err_free_dummy:
887 	drm_gem_object_put(dummy_gem);
888 
889 err_free_vm:
890 	kfree(vm);
891 	return ERR_PTR(ret);
892 }
893 
894 /**
895  * msm_gem_vm_close() - Close a VM
896  * @gpuvm: The VM to close
897  *
898  * Called when the drm device file is closed, to tear down VM related resources
899  * (which will drop refcounts to GEM objects that were still mapped into the
900  * VM at the time).
901  */
902 void
903 msm_gem_vm_close(struct drm_gpuvm *gpuvm)
904 {
905 	struct msm_gem_vm *vm = to_msm_vm(gpuvm);
906 	struct drm_gpuva *vma, *tmp;
907 	struct drm_exec exec;
908 
909 	/*
910 	 * For kernel managed VMs, the VMAs are torn down when the handle is
911 	 * closed, so nothing more to do.
912 	 */
913 	if (vm->managed)
914 		return;
915 
916 	if (vm->last_fence)
917 		dma_fence_wait(vm->last_fence, false);
918 
919 	/* Kill the scheduler now, so we aren't racing with it for cleanup: */
920 	drm_sched_stop(&vm->sched, NULL);
921 	drm_sched_fini(&vm->sched);
922 
923 	/* Tear down any remaining mappings: */
924 	drm_exec_init(&exec, 0, 2);
925 	drm_exec_until_all_locked (&exec) {
926 		drm_exec_lock_obj(&exec, drm_gpuvm_resv_obj(gpuvm));
927 		drm_exec_retry_on_contention(&exec);
928 
929 		drm_gpuvm_for_each_va_safe (vma, tmp, gpuvm) {
930 			struct drm_gem_object *obj = vma->gem.obj;
931 
932 			/*
933 			 * MSM_BO_NO_SHARE objects share the same resv as the
934 			 * VM, in which case the obj is already locked:
935 			 */
936 			if (obj && (obj->resv == drm_gpuvm_resv(gpuvm)))
937 				obj = NULL;
938 
939 			if (obj) {
940 				drm_exec_lock_obj(&exec, obj);
941 				drm_exec_retry_on_contention(&exec);
942 			}
943 
944 			msm_gem_vma_unmap(vma, "close");
945 			msm_gem_vma_close(vma);
946 
947 			if (obj) {
948 				drm_exec_unlock_obj(&exec, obj);
949 			}
950 		}
951 	}
952 	drm_exec_fini(&exec);
953 }
954 
955 
956 static struct msm_vm_bind_job *
957 vm_bind_job_create(struct drm_device *dev, struct drm_file *file,
958 		   struct msm_gpu_submitqueue *queue, uint32_t nr_ops)
959 {
960 	struct msm_vm_bind_job *job;
961 	int ret;
962 
963 	job = kzalloc_flex(*job, ops, nr_ops, GFP_KERNEL | __GFP_NOWARN);
964 	if (!job)
965 		return ERR_PTR(-ENOMEM);
966 
967 	ret = drm_sched_job_init(&job->base, queue->entity, 1, queue,
968 				 file->client_id);
969 	if (ret) {
970 		kfree(job);
971 		return ERR_PTR(ret);
972 	}
973 
974 	job->vm = msm_context_vm(dev, queue->ctx);
975 	job->queue = queue;
976 	INIT_LIST_HEAD(&job->vm_ops);
977 
978 	return job;
979 }
980 
981 static bool invalid_alignment(uint64_t addr)
982 {
983 	/*
984 	 * Technically this is about GPU alignment, not CPU alignment.  But
985 	 * I've not seen any qcom SoC where the SMMU does not support the
986 	 * CPU's smallest page size.
987 	 */
988 	return !PAGE_ALIGNED(addr);
989 }
990 
991 static int
992 lookup_op(struct msm_vm_bind_job *job, const struct drm_msm_vm_bind_op *op)
993 {
994 	struct drm_device *dev = job->vm->drm;
995 	struct msm_drm_private *priv = dev->dev_private;
996 	int i = job->nr_ops++;
997 	int ret = 0;
998 
999 	job->ops[i].op = op->op;
1000 	job->ops[i].handle = op->handle;
1001 	job->ops[i].obj_offset = op->obj_offset;
1002 	job->ops[i].iova = op->iova;
1003 	job->ops[i].range = op->range;
1004 	job->ops[i].flags = op->flags;
1005 
1006 	if (op->flags & ~MSM_VM_BIND_OP_FLAGS)
1007 		ret = UERR(EINVAL, dev, "invalid flags: %x\n", op->flags);
1008 
1009 	if (invalid_alignment(op->iova))
1010 		ret = UERR(EINVAL, dev, "invalid address: %016llx\n", op->iova);
1011 
1012 	if (invalid_alignment(op->obj_offset))
1013 		ret = UERR(EINVAL, dev, "invalid bo_offset: %016llx\n", op->obj_offset);
1014 
1015 	if (invalid_alignment(op->range))
1016 		ret = UERR(EINVAL, dev, "invalid range: %016llx\n", op->range);
1017 
1018 	if (!drm_gpuvm_range_valid(job->vm, op->iova, op->range))
1019 		ret = UERR(EINVAL, dev, "invalid range: %016llx, %016llx\n", op->iova, op->range);
1020 
1021 	/*
1022 	 * MAP must specify a valid handle.  But the handle MBZ for
1023 	 * UNMAP or MAP_NULL.
1024 	 */
1025 	if (op->op == MSM_VM_BIND_OP_MAP) {
1026 		if (!op->handle)
1027 			ret = UERR(EINVAL, dev, "invalid handle\n");
1028 	} else if (op->handle) {
1029 		ret = UERR(EINVAL, dev, "handle must be zero\n");
1030 	}
1031 
1032 	switch (op->op) {
1033 	case MSM_VM_BIND_OP_MAP:
1034 	case MSM_VM_BIND_OP_MAP_NULL:
1035 	case MSM_VM_BIND_OP_UNMAP:
1036 		break;
1037 	default:
1038 		ret = UERR(EINVAL, dev, "invalid op: %u\n", op->op);
1039 		break;
1040 	}
1041 
1042 	if ((op->op == MSM_VM_BIND_OP_MAP_NULL) &&
1043 	    !adreno_smmu_has_prr(priv->gpu)) {
1044 		ret = UERR(EINVAL, dev, "PRR not supported\n");
1045 	}
1046 
1047 	return ret;
1048 }
1049 
1050 /*
1051  * ioctl parsing, parameter validation, and GEM handle lookup
1052  */
1053 static int
1054 vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args,
1055 		       struct drm_file *file, int *nr_bos)
1056 {
1057 	struct drm_device *dev = job->vm->drm;
1058 	int ret = 0;
1059 	int cnt = 0;
1060 	int i = -1;
1061 
1062 	if (args->nr_ops == 1) {
1063 		/* Single op case, the op is inlined: */
1064 		ret = lookup_op(job, &args->op);
1065 	} else {
1066 		for (unsigned i = 0; i < args->nr_ops; i++) {
1067 			struct drm_msm_vm_bind_op op;
1068 			void __user *userptr =
1069 				u64_to_user_ptr(args->ops + (i * sizeof(op)));
1070 
1071 			/* make sure we don't have garbage flags, in case we hit
1072 			 * error path before flags is initialized:
1073 			 */
1074 			job->ops[i].flags = 0;
1075 
1076 			if (copy_from_user(&op, userptr, sizeof(op))) {
1077 				ret = -EFAULT;
1078 				break;
1079 			}
1080 
1081 			ret = lookup_op(job, &op);
1082 			if (ret)
1083 				break;
1084 		}
1085 	}
1086 
1087 	if (ret) {
1088 		job->nr_ops = 0;
1089 		goto out;
1090 	}
1091 
1092 	spin_lock(&file->table_lock);
1093 
1094 	for (i = 0; i < args->nr_ops; i++) {
1095 		struct msm_vm_bind_op *op = &job->ops[i];
1096 		struct drm_gem_object *obj;
1097 
1098 		if (!op->handle) {
1099 			op->obj = NULL;
1100 			continue;
1101 		}
1102 
1103 		/*
1104 		 * normally use drm_gem_object_lookup(), but for bulk lookup
1105 		 * all under single table_lock just hit object_idr directly:
1106 		 */
1107 		obj = idr_find(&file->object_idr, op->handle);
1108 		if (!obj) {
1109 			ret = UERR(EINVAL, dev, "invalid handle %u at index %u\n", op->handle, i);
1110 			goto out_unlock;
1111 		}
1112 
1113 		drm_gem_object_get(obj);
1114 
1115 		op->obj = obj;
1116 		cnt++;
1117 
1118 		if ((op->range + op->obj_offset) > obj->size) {
1119 			ret = UERR(EINVAL, dev, "invalid range: %016llx + %016llx > %016zx\n",
1120 				   op->range, op->obj_offset, obj->size);
1121 			goto out_unlock;
1122 		}
1123 	}
1124 
1125 	*nr_bos = cnt;
1126 
1127 out_unlock:
1128 	spin_unlock(&file->table_lock);
1129 
1130 	if (ret) {
1131 		for (; i >= 0; i--) {
1132 			struct msm_vm_bind_op *op = &job->ops[i];
1133 
1134 			if (!op->obj)
1135 				continue;
1136 
1137 			drm_gem_object_put(op->obj);
1138 			op->obj = NULL;
1139 		}
1140 	}
1141 out:
1142 	return ret;
1143 }
1144 
1145 static void
1146 prealloc_count(struct msm_vm_bind_job *job,
1147 	       struct msm_vm_bind_op *first,
1148 	       struct msm_vm_bind_op *last)
1149 {
1150 	struct msm_mmu *mmu = to_msm_vm(job->vm)->mmu;
1151 
1152 	if (!first)
1153 		return;
1154 
1155 	uint64_t start_iova = first->iova;
1156 	uint64_t end_iova = last->iova + last->range;
1157 
1158 	mmu->funcs->prealloc_count(mmu, &job->prealloc, start_iova, end_iova - start_iova);
1159 }
1160 
1161 static bool
1162 ops_are_same_pte(struct msm_vm_bind_op *first, struct msm_vm_bind_op *next)
1163 {
1164 	/*
1165 	 * Last level pte covers 2MB.. so we should merge two ops, from
1166 	 * the PoV of figuring out how much pgtable pages to pre-allocate
1167 	 * if they land in the same 2MB range:
1168 	 */
1169 	uint64_t pte_mask = ~(SZ_2M - 1);
1170 	return ((first->iova + first->range) & pte_mask) == (next->iova & pte_mask);
1171 }
1172 
1173 /*
1174  * Determine the amount of memory to prealloc for pgtables.  For sparse images,
1175  * in particular, userspace plays some tricks with the order of page mappings
1176  * to get the desired swizzle pattern, resulting in a large # of tiny MAP ops.
1177  * So detect when multiple MAP operations are physically contiguous, and count
1178  * them as a single mapping.  Otherwise the prealloc_count() will not realize
1179  * they can share pagetable pages and vastly overcount.
1180  */
1181 static int
1182 vm_bind_prealloc_count(struct msm_vm_bind_job *job)
1183 {
1184 	struct msm_vm_bind_op *first = NULL, *last = NULL;
1185 	struct msm_gem_vm *vm = to_msm_vm(job->vm);
1186 	int ret;
1187 
1188 	for (int i = 0; i < job->nr_ops; i++) {
1189 		struct msm_vm_bind_op *op = &job->ops[i];
1190 
1191 		/* We only care about MAP/MAP_NULL: */
1192 		if (op->op == MSM_VM_BIND_OP_UNMAP)
1193 			continue;
1194 
1195 		/*
1196 		 * If op is contiguous with last in the current range, then
1197 		 * it becomes the new last in the range and we continue
1198 		 * looping:
1199 		 */
1200 		if (last && ops_are_same_pte(last, op)) {
1201 			last = op;
1202 			continue;
1203 		}
1204 
1205 		/*
1206 		 * If op is not contiguous with the current range, flush
1207 		 * the current range and start anew:
1208 		 */
1209 		prealloc_count(job, first, last);
1210 		first = last = op;
1211 	}
1212 
1213 	/* Flush the remaining range: */
1214 	prealloc_count(job, first, last);
1215 
1216 	/*
1217 	 * Now that we know the needed amount to pre-alloc, throttle on pending
1218 	 * VM_BIND jobs if we already have too much pre-alloc memory in flight
1219 	 */
1220 	ret = wait_event_interruptible(
1221 			vm->prealloc_throttle.wait,
1222 			atomic_read(&vm->prealloc_throttle.in_flight) <= 1024);
1223 	if (ret)
1224 		return ret;
1225 
1226 	atomic_add(job->prealloc.count, &vm->prealloc_throttle.in_flight);
1227 
1228 	return 0;
1229 }
1230 
1231 /*
1232  * Lock VM and GEM objects
1233  */
1234 static int
1235 vm_bind_job_lock_objects(struct msm_vm_bind_job *job, struct drm_exec *exec)
1236 {
1237 	int ret;
1238 
1239 	/* Lock VM and objects: */
1240 	drm_exec_until_all_locked (exec) {
1241 		ret = drm_exec_lock_obj(exec, drm_gpuvm_resv_obj(job->vm));
1242 		drm_exec_retry_on_contention(exec);
1243 		if (ret)
1244 			return ret;
1245 
1246 		for (unsigned i = 0; i < job->nr_ops; i++) {
1247 			const struct msm_vm_bind_op *op = &job->ops[i];
1248 
1249 			switch (op->op) {
1250 			case MSM_VM_BIND_OP_UNMAP:
1251 				ret = drm_gpuvm_sm_unmap_exec_lock(job->vm, exec,
1252 							      op->iova,
1253 							      op->range);
1254 				break;
1255 			case MSM_VM_BIND_OP_MAP:
1256 			case MSM_VM_BIND_OP_MAP_NULL: {
1257 				struct drm_gpuvm_map_req map_req = {
1258 					.map.va.addr = op->iova,
1259 					.map.va.range = op->range,
1260 					.map.gem.obj = op->obj,
1261 					.map.gem.offset = op->obj_offset,
1262 				};
1263 
1264 				ret = drm_gpuvm_sm_map_exec_lock(job->vm, exec, 1, &map_req);
1265 				break;
1266 			}
1267 			default:
1268 				/*
1269 				 * lookup_op() should have already thrown an error for
1270 				 * invalid ops
1271 				 */
1272 				WARN_ON("unreachable");
1273 			}
1274 
1275 			drm_exec_retry_on_contention(exec);
1276 			if (ret)
1277 				return ret;
1278 		}
1279 	}
1280 
1281 	return 0;
1282 }
1283 
1284 /*
1285  * Pin GEM objects, ensuring that we have backing pages.  Pinning will move
1286  * the object to the pinned LRU so that the shrinker knows to first consider
1287  * other objects for evicting.
1288  */
1289 static int
1290 vm_bind_job_pin_objects(struct msm_vm_bind_job *job)
1291 {
1292 	struct drm_gem_object *obj;
1293 
1294 	/*
1295 	 * First loop, before holding the LRU lock, avoids holding the
1296 	 * LRU lock while calling msm_gem_pin_vma_locked (which could
1297 	 * trigger get_pages())
1298 	 */
1299 	job_foreach_bo (obj, job) {
1300 		struct page **pages;
1301 
1302 		pages = msm_gem_get_pages_locked(obj, MSM_MADV_WILLNEED);
1303 		if (IS_ERR(pages))
1304 			return PTR_ERR(pages);
1305 	}
1306 
1307 	struct drm_device *dev = job->vm->drm;
1308 
1309 	/*
1310 	 * A second loop while holding the LRU lock (a) avoids acquiring/dropping
1311 	 * the LRU lock for each individual bo, while (b) avoiding holding the
1312 	 * LRU lock while calling msm_gem_pin_vma_locked() (which could trigger
1313 	 * get_pages() which could trigger reclaim.. and if we held the LRU lock
1314 	 * could trigger deadlock with the shrinker).
1315 	 */
1316 	mutex_lock(&dev->gem_lru_mutex);
1317 	job_foreach_bo (obj, job)
1318 		msm_gem_pin_obj_locked(obj);
1319 	mutex_unlock(&dev->gem_lru_mutex);
1320 
1321 	job->bos_pinned = true;
1322 
1323 	return 0;
1324 }
1325 
1326 /*
1327  * Unpin GEM objects.  Normally this is done after the bind job is run.
1328  */
1329 static void
1330 vm_bind_job_unpin_objects(struct msm_vm_bind_job *job)
1331 {
1332 	struct drm_gem_object *obj;
1333 
1334 	if (!job->bos_pinned)
1335 		return;
1336 
1337 	job_foreach_bo (obj, job)
1338 		msm_gem_unpin_locked(obj);
1339 
1340 	job->bos_pinned = false;
1341 }
1342 
1343 /*
1344  * Pre-allocate pgtable memory, and translate the VM bind requests into a
1345  * sequence of pgtable updates to be applied asynchronously.
1346  */
1347 static int
1348 vm_bind_job_prepare(struct msm_vm_bind_job *job)
1349 {
1350 	struct msm_gem_vm *vm = to_msm_vm(job->vm);
1351 	struct msm_mmu *mmu = vm->mmu;
1352 	int ret;
1353 
1354 	ret = mmu->funcs->prealloc_allocate(mmu, &job->prealloc);
1355 	if (ret)
1356 		return ret;
1357 
1358 	for (unsigned i = 0; i < job->nr_ops; i++) {
1359 		const struct msm_vm_bind_op *op = &job->ops[i];
1360 		struct op_arg arg = {
1361 			.job = job,
1362 			.op = op,
1363 		};
1364 
1365 		switch (op->op) {
1366 		case MSM_VM_BIND_OP_UNMAP:
1367 			ret = drm_gpuvm_sm_unmap(job->vm, &arg, op->iova,
1368 						 op->range);
1369 			break;
1370 		case MSM_VM_BIND_OP_MAP:
1371 			if (op->flags & MSM_VM_BIND_OP_DUMP)
1372 				arg.flags |= MSM_VMA_DUMP;
1373 			fallthrough;
1374 		case MSM_VM_BIND_OP_MAP_NULL: {
1375 			struct drm_gpuvm_map_req map_req = {
1376 				.map.va.addr = op->iova,
1377 				.map.va.range = op->range,
1378 				.map.gem.obj = op->obj,
1379 				.map.gem.offset = op->obj_offset,
1380 			};
1381 
1382 			ret = drm_gpuvm_sm_map(job->vm, &arg, &map_req);
1383 			break;
1384 		}
1385 		default:
1386 			/*
1387 			 * lookup_op() should have already thrown an error for
1388 			 * invalid ops
1389 			 */
1390 			BUG_ON("unreachable");
1391 		}
1392 
1393 		if (ret) {
1394 			/*
1395 			 * If we've already started modifying the vm, we can't
1396 			 * adequetly describe to userspace the intermediate
1397 			 * state the vm is in.  So throw up our hands!
1398 			 */
1399 			if (i > 0)
1400 				msm_gem_vm_unusable(job->vm);
1401 			return ret;
1402 		}
1403 	}
1404 
1405 	return 0;
1406 }
1407 
1408 /*
1409  * Attach fences to the GEM objects being bound.  This will signify to
1410  * the shrinker that they are busy even after dropping the locks (ie.
1411  * drm_exec_fini())
1412  */
1413 static void
1414 vm_bind_job_attach_fences(struct msm_vm_bind_job *job)
1415 {
1416 	for (unsigned i = 0; i < job->nr_ops; i++) {
1417 		struct drm_gem_object *obj = job->ops[i].obj;
1418 
1419 		if (!obj)
1420 			continue;
1421 
1422 		dma_resv_add_fence(obj->resv, job->fence,
1423 				   DMA_RESV_USAGE_KERNEL);
1424 	}
1425 }
1426 
1427 int
1428 msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file)
1429 {
1430 	struct msm_drm_private *priv = dev->dev_private;
1431 	struct drm_msm_vm_bind *args = data;
1432 	struct msm_context *ctx = file->driver_priv;
1433 	struct msm_vm_bind_job *job = NULL;
1434 	struct msm_gpu *gpu = priv->gpu;
1435 	struct msm_gpu_submitqueue *queue;
1436 	struct msm_syncobj_post_dep *post_deps = NULL;
1437 	struct drm_syncobj **syncobjs_to_reset = NULL;
1438 	struct sync_file *sync_file = NULL;
1439 	struct dma_fence *fence;
1440 	int out_fence_fd = -1;
1441 	int ret, nr_bos = 0;
1442 	unsigned i;
1443 
1444 	if (!gpu)
1445 		return -ENXIO;
1446 
1447 	/*
1448 	 * Maybe we could allow just UNMAP ops?  OTOH userspace should just
1449 	 * immediately close the device file and all will be torn down.
1450 	 */
1451 	if (to_msm_vm(msm_context_vm(dev, ctx))->unusable)
1452 		return UERR(EPIPE, dev, "context is unusable");
1453 
1454 	/*
1455 	 * Technically, you cannot create a VM_BIND submitqueue in the first
1456 	 * place, if you haven't opted in to VM_BIND context.  But it is
1457 	 * cleaner / less confusing, to check this case directly.
1458 	 */
1459 	if (!msm_context_is_vmbind(ctx))
1460 		return UERR(EINVAL, dev, "context does not support vmbind");
1461 
1462 	if (args->flags & ~MSM_VM_BIND_FLAGS)
1463 		return UERR(EINVAL, dev, "invalid flags");
1464 
1465 	queue = msm_submitqueue_get(ctx, args->queue_id);
1466 	if (!queue)
1467 		return -ENOENT;
1468 
1469 	if (!(queue->flags & MSM_SUBMITQUEUE_VM_BIND)) {
1470 		ret = UERR(EINVAL, dev, "Invalid queue type");
1471 		goto out_post_unlock;
1472 	}
1473 
1474 	if (args->flags & MSM_VM_BIND_FENCE_FD_OUT) {
1475 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
1476 		if (out_fence_fd < 0) {
1477 			ret = out_fence_fd;
1478 			goto out_post_unlock;
1479 		}
1480 	}
1481 
1482 	job = vm_bind_job_create(dev, file, queue, args->nr_ops);
1483 	if (IS_ERR(job)) {
1484 		ret = PTR_ERR(job);
1485 		goto out_post_unlock;
1486 	}
1487 
1488 	ret = mutex_lock_interruptible(&queue->lock);
1489 	if (ret)
1490 		goto out_post_unlock;
1491 
1492 	if (args->flags & MSM_VM_BIND_FENCE_FD_IN) {
1493 		struct dma_fence *in_fence;
1494 
1495 		in_fence = sync_file_get_fence(args->fence_fd);
1496 
1497 		if (!in_fence) {
1498 			ret = UERR(EINVAL, dev, "invalid in-fence");
1499 			goto out_unlock;
1500 		}
1501 
1502 		ret = drm_sched_job_add_dependency(&job->base, in_fence);
1503 		if (ret)
1504 			goto out_unlock;
1505 	}
1506 
1507 	if (args->in_syncobjs > 0) {
1508 		syncobjs_to_reset = msm_syncobj_parse_deps(dev, &job->base,
1509 							   file, args->in_syncobjs,
1510 							   args->nr_in_syncobjs,
1511 							   args->syncobj_stride);
1512 		if (IS_ERR(syncobjs_to_reset)) {
1513 			ret = PTR_ERR(syncobjs_to_reset);
1514 			goto out_unlock;
1515 		}
1516 	}
1517 
1518 	if (args->out_syncobjs > 0) {
1519 		post_deps = msm_syncobj_parse_post_deps(dev, file,
1520 							args->out_syncobjs,
1521 							args->nr_out_syncobjs,
1522 							args->syncobj_stride);
1523 		if (IS_ERR(post_deps)) {
1524 			ret = PTR_ERR(post_deps);
1525 			goto out_unlock;
1526 		}
1527 	}
1528 
1529 	ret = vm_bind_job_lookup_ops(job, args, file, &nr_bos);
1530 	if (ret)
1531 		goto out_unlock;
1532 
1533 	ret = vm_bind_prealloc_count(job);
1534 	if (ret)
1535 		goto out_unlock;
1536 
1537 	struct drm_exec exec;
1538 	unsigned flags = DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT;
1539 	drm_exec_init(&exec, flags, nr_bos + 1);
1540 
1541 	ret = vm_bind_job_lock_objects(job, &exec);
1542 	if (ret)
1543 		goto out;
1544 
1545 	ret = vm_bind_job_pin_objects(job);
1546 	if (ret)
1547 		goto out;
1548 
1549 	ret = vm_bind_job_prepare(job);
1550 	if (ret)
1551 		goto out;
1552 
1553 	drm_sched_job_arm(&job->base);
1554 
1555 	job->fence = dma_fence_get(&job->base.s_fence->finished);
1556 
1557 	if (args->flags & MSM_VM_BIND_FENCE_FD_OUT) {
1558 		sync_file = sync_file_create(job->fence);
1559 		if (!sync_file)
1560 			ret = -ENOMEM;
1561 	}
1562 
1563 	if (ret)
1564 		goto out;
1565 
1566 	vm_bind_job_attach_fences(job);
1567 
1568 	/*
1569 	 * The job can be free'd (and fence unref'd) at any point after
1570 	 * drm_sched_entity_push_job(), so we need to hold our own ref
1571 	 */
1572 	fence = dma_fence_get(job->fence);
1573 
1574 	drm_sched_entity_push_job(&job->base);
1575 
1576 	msm_syncobj_reset(syncobjs_to_reset, args->nr_in_syncobjs);
1577 	msm_syncobj_process_post_deps(post_deps, args->nr_out_syncobjs, fence);
1578 
1579 	dma_fence_put(fence);
1580 
1581 out:
1582 	if (ret)
1583 		vm_bind_job_unpin_objects(job);
1584 
1585 	drm_exec_fini(&exec);
1586 out_unlock:
1587 	mutex_unlock(&queue->lock);
1588 out_post_unlock:
1589 	if (ret) {
1590 		if (out_fence_fd >= 0)
1591 			put_unused_fd(out_fence_fd);
1592 		if (sync_file)
1593 			fput(sync_file->file);
1594 	} else if (sync_file) {
1595 		fd_install(out_fence_fd, sync_file->file);
1596 		args->fence_fd = out_fence_fd;
1597 	}
1598 
1599 	if (!IS_ERR_OR_NULL(job)) {
1600 		if (ret)
1601 			msm_vma_job_free(&job->base);
1602 	} else {
1603 		/*
1604 		 * If the submit hasn't yet taken ownership of the queue
1605 		 * then we need to drop the reference ourself:
1606 		 */
1607 		msm_submitqueue_put(queue);
1608 	}
1609 
1610 	if (!IS_ERR_OR_NULL(post_deps)) {
1611 		for (i = 0; i < args->nr_out_syncobjs; ++i) {
1612 			kfree(post_deps[i].chain);
1613 			drm_syncobj_put(post_deps[i].syncobj);
1614 		}
1615 		kfree(post_deps);
1616 	}
1617 
1618 	if (!IS_ERR_OR_NULL(syncobjs_to_reset)) {
1619 		for (i = 0; i < args->nr_in_syncobjs; ++i) {
1620 			if (syncobjs_to_reset[i])
1621 				drm_syncobj_put(syncobjs_to_reset[i]);
1622 		}
1623 		kfree(syncobjs_to_reset);
1624 	}
1625 
1626 	return ret;
1627 }
1628