xref: /linux/drivers/gpu/drm/xe/xe_exec.c (revision 08516de501fae647fb29bf3b62718de56cc24014)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_exec.h"
7 
8 #include <drm/drm_device.h>
9 #include <drm/drm_file.h>
10 #include <drm/xe_drm.h>
11 #include <linux/delay.h>
12 
13 #include "xe_bo.h"
14 #include "xe_device.h"
15 #include "xe_engine.h"
16 #include "xe_macros.h"
17 #include "xe_sched_job.h"
18 #include "xe_sync.h"
19 #include "xe_vm.h"
20 
21 /**
22  * DOC: Execbuf (User GPU command submission)
23  *
24  * Execs have historically been rather complicated in DRM drivers (at least in
25  * the i915) because a few things:
26  *
27  * - Passing in a list BO which are read / written to creating implicit syncs
28  * - Binding at exec time
29  * - Flow controlling the ring at exec time
30  *
31  * In XE we avoid all of this complication by not allowing a BO list to be
32  * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
33  * seperate operations, and using the DRM scheduler to flow control the ring.
34  * Let's deep dive on each of these.
35  *
36  * We can get away from a BO list by forcing the user to use in / out fences on
37  * every exec rather than the kernel tracking dependencies of BO (e.g. if the
38  * user knows an exec writes to a BO and reads from the BO in the next exec, it
39  * is the user's responsibility to pass in / out fence between the two execs).
40  *
41  * Implicit dependencies for external BOs are handled by using the dma-buf
42  * implicit dependency uAPI (TODO: add link). To make this works each exec must
43  * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
44  * BO mapped in the VM.
45  *
46  * We do not allow a user to trigger a bind at exec time rather we have a VM
47  * bind IOCTL which uses the same in / out fence interface as exec. In that
48  * sense, a VM bind is basically the same operation as an exec from the user
49  * perspective. e.g. If an exec depends on a VM bind use the in / out fence
50  * interface (struct drm_xe_sync) to synchronize like syncing between two
51  * dependent execs.
52  *
53  * Although a user cannot trigger a bind, we still have to rebind userptrs in
54  * the VM that have been invalidated since the last exec, likewise we also have
55  * to rebind BOs that have been evicted by the kernel. We schedule these rebinds
56  * behind any pending kernel operations on any external BOs in VM or any BOs
57  * private to the VM. This is accomplished by the rebinds waiting on BOs
58  * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
59  * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and
60  * in DMA_RESV_USAGE_WRITE for external BOs).
61  *
62  * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
63  * mode VMs we use preempt fences and a rebind worker (TODO: add link).
64  *
65  * There is no need to flow control the ring in the exec as we write the ring at
66  * submission time and set the DRM scheduler max job limit SIZE_OF_RING /
67  * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the
68  * ring is available.
69  *
70  * All of this results in a rather simple exec implementation.
71  *
72  * Flow
73  * ~~~~
74  *
75  * .. code-block::
76  *
77  *	Parse input arguments
78  *	Wait for any async VM bind passed as in-fences to start
79  *	<----------------------------------------------------------------------|
80  *	Lock global VM lock in read mode                                       |
81  *	Pin userptrs (also finds userptr invalidated since last exec)          |
82  *	Lock exec (VM dma-resv lock, external BOs dma-resv locks)              |
83  *	Validate BOs that have been evicted                                    |
84  *	Create job                                                             |
85  *	Rebind invalidated userptrs + evicted BOs (non-compute-mode)           |
86  *	Add rebind fence dependency to job                                     |
87  *	Add job VM dma-resv bookkeeping slot (non-compute mode)                |
88  *	Add job to external BOs dma-resv write slots (non-compute mode)        |
89  *	Check if any userptrs invalidated since pin ------ Drop locks ---------|
90  *	Install in / out fences for job
91  *	Submit job
92  *	Unlock all
93  */
94 
95 #define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000
96 
97 static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
98 			 struct ttm_validate_buffer tv_onstack[],
99 			 struct ttm_validate_buffer **tv,
100 			 struct list_head *objs)
101 {
102 	struct xe_vm *vm = e->vm;
103 	struct xe_vma *vma;
104 	LIST_HEAD(dups);
105 	ktime_t end = 0;
106 	int err = 0;
107 
108 	*tv = NULL;
109 	if (xe_vm_no_dma_fences(e->vm))
110 		return 0;
111 
112 retry:
113 	err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1);
114 	if (err)
115 		return err;
116 
117 	/*
118 	 * Validate BOs that have been evicted (i.e. make sure the
119 	 * BOs have valid placements possibly moving an evicted BO back
120 	 * to a location where the GPU can access it).
121 	 */
122 	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
123 		if (xe_vma_is_userptr(vma))
124 			continue;
125 
126 		err = xe_bo_validate(vma->bo, vm, false);
127 		if (err) {
128 			xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs);
129 			*tv = NULL;
130 			break;
131 		}
132 	}
133 
134 	/*
135 	 * With multiple active VMs, under memory pressure, it is possible that
136 	 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
137 	 * Until ttm properly handles locking in such scenarios, best thing the
138 	 * driver can do is retry with a timeout.
139 	 */
140 	if (err == -ENOMEM) {
141 		ktime_t cur = ktime_get();
142 
143 		end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS);
144 		if (ktime_before(cur, end)) {
145 			msleep(20);
146 			goto retry;
147 		}
148 	}
149 
150 	return err;
151 }
152 
153 static void xe_exec_end(struct xe_engine *e,
154 			struct ttm_validate_buffer *tv_onstack,
155 			struct ttm_validate_buffer *tv,
156 			struct ww_acquire_ctx *ww,
157 			struct list_head *objs)
158 {
159 	if (!xe_vm_no_dma_fences(e->vm))
160 		xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs);
161 }
162 
163 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
164 {
165 	struct xe_device *xe = to_xe_device(dev);
166 	struct xe_file *xef = to_xe_file(file);
167 	struct drm_xe_exec *args = data;
168 	struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
169 	u64 __user *addresses_user = u64_to_user_ptr(args->address);
170 	struct xe_engine *engine;
171 	struct xe_sync_entry *syncs = NULL;
172 	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
173 	struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV];
174 	struct ttm_validate_buffer *tv = NULL;
175 	u32 i, num_syncs = 0;
176 	struct xe_sched_job *job;
177 	struct dma_fence *rebind_fence;
178 	struct xe_vm *vm;
179 	struct ww_acquire_ctx ww;
180 	struct list_head objs;
181 	bool write_locked;
182 	int err = 0;
183 
184 	if (XE_IOCTL_ERR(xe, args->extensions) ||
185 	    XE_IOCTL_ERR(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
186 	    XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]))
187 		return -EINVAL;
188 
189 	engine = xe_engine_lookup(xef, args->engine_id);
190 	if (XE_IOCTL_ERR(xe, !engine))
191 		return -ENOENT;
192 
193 	if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM))
194 		return -EINVAL;
195 
196 	if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer))
197 		return -EINVAL;
198 
199 	if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) {
200 		err = -ECANCELED;
201 		goto err_engine;
202 	}
203 
204 	if (args->num_syncs) {
205 		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
206 		if (!syncs) {
207 			err = -ENOMEM;
208 			goto err_engine;
209 		}
210 	}
211 
212 	vm = engine->vm;
213 
214 	for (i = 0; i < args->num_syncs; i++) {
215 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
216 					  &syncs_user[i], true,
217 					  xe_vm_no_dma_fences(vm));
218 		if (err)
219 			goto err_syncs;
220 	}
221 
222 	if (xe_engine_is_parallel(engine)) {
223 		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
224 				       engine->width);
225 		if (err) {
226 			err = -EFAULT;
227 			goto err_syncs;
228 		}
229 	}
230 
231 	/*
232 	 * We can't install a job into the VM dma-resv shared slot before an
233 	 * async VM bind passed in as a fence without the risk of deadlocking as
234 	 * the bind can trigger an eviction which in turn depends on anything in
235 	 * the VM dma-resv shared slots. Not an ideal solution, but we wait for
236 	 * all dependent async VM binds to start (install correct fences into
237 	 * dma-resv slots) before moving forward.
238 	 */
239 	if (!xe_vm_no_dma_fences(vm) &&
240 	    vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) {
241 		for (i = 0; i < args->num_syncs; i++) {
242 			struct dma_fence *fence = syncs[i].fence;
243 			if (fence) {
244 				err = xe_vm_async_fence_wait_start(fence);
245 				if (err)
246 					goto err_syncs;
247 			}
248 		}
249 	}
250 
251 retry:
252 	if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) {
253 		err = down_write_killable(&vm->lock);
254 		write_locked = true;
255 	} else {
256 		/* We don't allow execs while the VM is in error state */
257 		err = down_read_interruptible(&vm->lock);
258 		write_locked = false;
259 	}
260 	if (err)
261 		goto err_syncs;
262 
263 	/* We don't allow execs while the VM is in error state */
264 	if (vm->async_ops.error) {
265 		err = vm->async_ops.error;
266 		goto err_unlock_list;
267 	}
268 
269 	/*
270 	 * Extreme corner where we exit a VM error state with a munmap style VM
271 	 * unbind inflight which requires a rebind. In this case the rebind
272 	 * needs to install some fences into the dma-resv slots. The worker to
273 	 * do this queued, let that worker make progress by dropping vm->lock,
274 	 * flushing the worker and retrying the exec.
275 	 */
276 	if (vm->async_ops.munmap_rebind_inflight) {
277 		if (write_locked)
278 			up_write(&vm->lock);
279 		else
280 			up_read(&vm->lock);
281 		flush_work(&vm->async_ops.work);
282 		goto retry;
283 	}
284 
285 	if (write_locked) {
286 		err = xe_vm_userptr_pin(vm);
287 		downgrade_write(&vm->lock);
288 		write_locked = false;
289 		if (err)
290 			goto err_unlock_list;
291 	}
292 
293 	err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs);
294 	if (err)
295 		goto err_unlock_list;
296 
297 	if (xe_vm_is_closed(engine->vm)) {
298 		drm_warn(&xe->drm, "Trying to schedule after vm is closed\n");
299 		err = -EIO;
300 		goto err_engine_end;
301 	}
302 
303 	job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ?
304 				  addresses : &args->address);
305 	if (IS_ERR(job)) {
306 		err = PTR_ERR(job);
307 		goto err_engine_end;
308 	}
309 
310 	/*
311 	 * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
312 	 * VM mode only.
313 	 */
314 	rebind_fence = xe_vm_rebind(vm, false);
315 	if (IS_ERR(rebind_fence)) {
316 		err = PTR_ERR(rebind_fence);
317 		goto err_put_job;
318 	}
319 
320 	/*
321 	 * We store the rebind_fence in the VM so subsequent execs don't get
322 	 * scheduled before the rebinds of userptrs / evicted BOs is complete.
323 	 */
324 	if (rebind_fence) {
325 		dma_fence_put(vm->rebind_fence);
326 		vm->rebind_fence = rebind_fence;
327 	}
328 	if (vm->rebind_fence) {
329 		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
330 			     &vm->rebind_fence->flags)) {
331 			dma_fence_put(vm->rebind_fence);
332 			vm->rebind_fence = NULL;
333 		} else {
334 			dma_fence_get(vm->rebind_fence);
335 			err = drm_sched_job_add_dependency(&job->drm,
336 							   vm->rebind_fence);
337 			if (err)
338 				goto err_put_job;
339 		}
340 	}
341 
342 	/* Wait behind munmap style rebinds */
343 	if (!xe_vm_no_dma_fences(vm)) {
344 		err = drm_sched_job_add_resv_dependencies(&job->drm,
345 							  &vm->resv,
346 							  DMA_RESV_USAGE_KERNEL);
347 		if (err)
348 			goto err_put_job;
349 	}
350 
351 	for (i = 0; i < num_syncs && !err; i++)
352 		err = xe_sync_entry_add_deps(&syncs[i], job);
353 	if (err)
354 		goto err_put_job;
355 
356 	if (!xe_vm_no_dma_fences(vm)) {
357 		err = down_read_interruptible(&vm->userptr.notifier_lock);
358 		if (err)
359 			goto err_put_job;
360 
361 		err = __xe_vm_userptr_needs_repin(vm);
362 		if (err)
363 			goto err_repin;
364 	}
365 
366 	/*
367 	 * Point of no return, if we error after this point just set an error on
368 	 * the job and let the DRM scheduler / backend clean up the job.
369 	 */
370 	xe_sched_job_arm(job);
371 	if (!xe_vm_no_dma_fences(vm)) {
372 		/* Block userptr invalidations / BO eviction */
373 		dma_resv_add_fence(&vm->resv,
374 				   &job->drm.s_fence->finished,
375 				   DMA_RESV_USAGE_BOOKKEEP);
376 
377 		/*
378 		 * Make implicit sync work across drivers, assuming all external
379 		 * BOs are written as we don't pass in a read / write list.
380 		 */
381 		xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished,
382 					DMA_RESV_USAGE_WRITE);
383 	}
384 
385 	for (i = 0; i < num_syncs; i++)
386 		xe_sync_entry_signal(&syncs[i], job,
387 				     &job->drm.s_fence->finished);
388 
389 	xe_sched_job_push(job);
390 	xe_vm_reactivate_rebind(vm);
391 
392 err_repin:
393 	if (!xe_vm_no_dma_fences(vm))
394 		up_read(&vm->userptr.notifier_lock);
395 err_put_job:
396 	if (err)
397 		xe_sched_job_put(job);
398 err_engine_end:
399 	xe_exec_end(engine, tv_onstack, tv, &ww, &objs);
400 err_unlock_list:
401 	if (write_locked)
402 		up_write(&vm->lock);
403 	else
404 		up_read(&vm->lock);
405 	if (err == -EAGAIN)
406 		goto retry;
407 err_syncs:
408 	for (i = 0; i < num_syncs; i++)
409 		xe_sync_entry_cleanup(&syncs[i]);
410 	kfree(syncs);
411 err_engine:
412 	xe_engine_put(engine);
413 
414 	return err;
415 }
416