xref: /linux/drivers/gpu/drm/nouveau/nouveau_sched.c (revision 189f164e573e18d9f8876dbd3ad8fcbe11f93037)
1 // SPDX-License-Identifier: MIT
2 
3 #include <linux/slab.h>
4 #include <drm/gpu_scheduler.h>
5 #include <drm/drm_syncobj.h>
6 
7 #include "nouveau_drv.h"
8 #include "nouveau_gem.h"
9 #include "nouveau_mem.h"
10 #include "nouveau_dma.h"
11 #include "nouveau_exec.h"
12 #include "nouveau_abi16.h"
13 #include "nouveau_sched.h"
14 
15 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS		10000
16 
17 /* Starts at 0, since the DRM scheduler interprets those parameters as (initial)
18  * index to the run-queue array.
19  */
20 enum nouveau_sched_priority {
21 	NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL,
22 	NOUVEAU_SCHED_PRIORITY_COUNT,
23 };
24 
25 int
nouveau_job_init(struct nouveau_job * job,struct nouveau_job_args * args)26 nouveau_job_init(struct nouveau_job *job,
27 		 struct nouveau_job_args *args)
28 {
29 	struct nouveau_sched *sched = args->sched;
30 	int ret;
31 
32 	INIT_LIST_HEAD(&job->entry);
33 
34 	job->file_priv = args->file_priv;
35 	job->cli = nouveau_cli(args->file_priv);
36 	job->sched = sched;
37 
38 	job->sync = args->sync;
39 	job->resv_usage = args->resv_usage;
40 
41 	job->ops = args->ops;
42 
43 	job->in_sync.count = args->in_sync.count;
44 	if (job->in_sync.count) {
45 		if (job->sync)
46 			return -EINVAL;
47 
48 		job->in_sync.data = kmemdup(args->in_sync.s,
49 					 sizeof(*args->in_sync.s) *
50 					 args->in_sync.count,
51 					 GFP_KERNEL);
52 		if (!job->in_sync.data)
53 			return -ENOMEM;
54 	}
55 
56 	job->out_sync.count = args->out_sync.count;
57 	if (job->out_sync.count) {
58 		if (job->sync) {
59 			ret = -EINVAL;
60 			goto err_free_in_sync;
61 		}
62 
63 		job->out_sync.data = kmemdup(args->out_sync.s,
64 					  sizeof(*args->out_sync.s) *
65 					  args->out_sync.count,
66 					  GFP_KERNEL);
67 		if (!job->out_sync.data) {
68 			ret = -ENOMEM;
69 			goto err_free_in_sync;
70 		}
71 
72 		job->out_sync.objs = kzalloc_objs(*job->out_sync.objs,
73 						  job->out_sync.count);
74 		if (!job->out_sync.objs) {
75 			ret = -ENOMEM;
76 			goto err_free_out_sync;
77 		}
78 
79 		job->out_sync.chains = kzalloc_objs(*job->out_sync.chains,
80 						    job->out_sync.count);
81 		if (!job->out_sync.chains) {
82 			ret = -ENOMEM;
83 			goto err_free_objs;
84 		}
85 	}
86 
87 	ret = drm_sched_job_init(&job->base, &sched->entity,
88 				 args->credits, NULL,
89 				 job->file_priv->client_id);
90 	if (ret)
91 		goto err_free_chains;
92 
93 	job->state = NOUVEAU_JOB_INITIALIZED;
94 
95 	return 0;
96 
97 err_free_chains:
98 	kfree(job->out_sync.chains);
99 err_free_objs:
100 	kfree(job->out_sync.objs);
101 err_free_out_sync:
102 	kfree(job->out_sync.data);
103 err_free_in_sync:
104 	kfree(job->in_sync.data);
105 return ret;
106 }
107 
108 void
nouveau_job_fini(struct nouveau_job * job)109 nouveau_job_fini(struct nouveau_job *job)
110 {
111 	dma_fence_put(job->done_fence);
112 	drm_sched_job_cleanup(&job->base);
113 
114 	job->ops->free(job);
115 }
116 
117 void
nouveau_job_done(struct nouveau_job * job)118 nouveau_job_done(struct nouveau_job *job)
119 {
120 	struct nouveau_sched *sched = job->sched;
121 
122 	spin_lock(&sched->job.list.lock);
123 	list_del(&job->entry);
124 	spin_unlock(&sched->job.list.lock);
125 
126 	wake_up(&sched->job.wq);
127 }
128 
129 void
nouveau_job_free(struct nouveau_job * job)130 nouveau_job_free(struct nouveau_job *job)
131 {
132 	kfree(job->in_sync.data);
133 	kfree(job->out_sync.data);
134 	kfree(job->out_sync.objs);
135 	kfree(job->out_sync.chains);
136 }
137 
138 static int
sync_find_fence(struct nouveau_job * job,struct drm_nouveau_sync * sync,struct dma_fence ** fence)139 sync_find_fence(struct nouveau_job *job,
140 		struct drm_nouveau_sync *sync,
141 		struct dma_fence **fence)
142 {
143 	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
144 	u64 point = 0;
145 	int ret;
146 
147 	if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
148 	    stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
149 		return -EOPNOTSUPP;
150 
151 	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
152 		point = sync->timeline_value;
153 
154 	ret = drm_syncobj_find_fence(job->file_priv,
155 				     sync->handle, point,
156 				     0 /* flags */, fence);
157 	if (ret)
158 		return ret;
159 
160 	return 0;
161 }
162 
163 static int
nouveau_job_add_deps(struct nouveau_job * job)164 nouveau_job_add_deps(struct nouveau_job *job)
165 {
166 	struct dma_fence *in_fence = NULL;
167 	int ret, i;
168 
169 	for (i = 0; i < job->in_sync.count; i++) {
170 		struct drm_nouveau_sync *sync = &job->in_sync.data[i];
171 
172 		ret = sync_find_fence(job, sync, &in_fence);
173 		if (ret) {
174 			NV_PRINTK(warn, job->cli,
175 				  "Failed to find syncobj (-> in): handle=%d\n",
176 				  sync->handle);
177 			return ret;
178 		}
179 
180 		ret = drm_sched_job_add_dependency(&job->base, in_fence);
181 		if (ret)
182 			return ret;
183 	}
184 
185 	return 0;
186 }
187 
188 static void
nouveau_job_fence_attach_cleanup(struct nouveau_job * job)189 nouveau_job_fence_attach_cleanup(struct nouveau_job *job)
190 {
191 	int i;
192 
193 	for (i = 0; i < job->out_sync.count; i++) {
194 		struct drm_syncobj *obj = job->out_sync.objs[i];
195 		struct dma_fence_chain *chain = job->out_sync.chains[i];
196 
197 		if (obj)
198 			drm_syncobj_put(obj);
199 
200 		if (chain)
201 			dma_fence_chain_free(chain);
202 	}
203 }
204 
205 static int
nouveau_job_fence_attach_prepare(struct nouveau_job * job)206 nouveau_job_fence_attach_prepare(struct nouveau_job *job)
207 {
208 	int i, ret;
209 
210 	for (i = 0; i < job->out_sync.count; i++) {
211 		struct drm_nouveau_sync *sync = &job->out_sync.data[i];
212 		struct drm_syncobj **pobj = &job->out_sync.objs[i];
213 		struct dma_fence_chain **pchain = &job->out_sync.chains[i];
214 		u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
215 
216 		if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
217 		    stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
218 			ret = -EINVAL;
219 			goto err_sync_cleanup;
220 		}
221 
222 		*pobj = drm_syncobj_find(job->file_priv, sync->handle);
223 		if (!*pobj) {
224 			NV_PRINTK(warn, job->cli,
225 				  "Failed to find syncobj (-> out): handle=%d\n",
226 				  sync->handle);
227 			ret = -ENOENT;
228 			goto err_sync_cleanup;
229 		}
230 
231 		if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
232 			*pchain = dma_fence_chain_alloc();
233 			if (!*pchain) {
234 				ret = -ENOMEM;
235 				goto err_sync_cleanup;
236 			}
237 		}
238 	}
239 
240 	return 0;
241 
242 err_sync_cleanup:
243 	nouveau_job_fence_attach_cleanup(job);
244 	return ret;
245 }
246 
247 static void
nouveau_job_fence_attach(struct nouveau_job * job)248 nouveau_job_fence_attach(struct nouveau_job *job)
249 {
250 	struct dma_fence *fence = job->done_fence;
251 	int i;
252 
253 	for (i = 0; i < job->out_sync.count; i++) {
254 		struct drm_nouveau_sync *sync = &job->out_sync.data[i];
255 		struct drm_syncobj **pobj = &job->out_sync.objs[i];
256 		struct dma_fence_chain **pchain = &job->out_sync.chains[i];
257 		u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
258 
259 		if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
260 			drm_syncobj_add_point(*pobj, *pchain, fence,
261 					      sync->timeline_value);
262 		} else {
263 			drm_syncobj_replace_fence(*pobj, fence);
264 		}
265 
266 		drm_syncobj_put(*pobj);
267 		*pobj = NULL;
268 		*pchain = NULL;
269 	}
270 }
271 
272 int
nouveau_job_submit(struct nouveau_job * job)273 nouveau_job_submit(struct nouveau_job *job)
274 {
275 	struct nouveau_sched *sched = job->sched;
276 	struct dma_fence *done_fence = NULL;
277 	struct drm_gpuvm_exec vm_exec = {
278 		.vm = &nouveau_cli_uvmm(job->cli)->base,
279 		.flags = DRM_EXEC_IGNORE_DUPLICATES,
280 		.num_fences = 1,
281 	};
282 	int ret;
283 
284 	ret = nouveau_job_add_deps(job);
285 	if (ret)
286 		goto err;
287 
288 	ret = nouveau_job_fence_attach_prepare(job);
289 	if (ret)
290 		goto err;
291 
292 	/* Make sure the job appears on the sched_entity's queue in the same
293 	 * order as it was submitted.
294 	 */
295 	mutex_lock(&sched->mutex);
296 
297 	/* Guarantee we won't fail after the submit() callback returned
298 	 * successfully.
299 	 */
300 	if (job->ops->submit) {
301 		ret = job->ops->submit(job, &vm_exec);
302 		if (ret)
303 			goto err_cleanup;
304 	}
305 
306 	/* Submit was successful; add the job to the schedulers job list. */
307 	spin_lock(&sched->job.list.lock);
308 	list_add(&job->entry, &sched->job.list.head);
309 	spin_unlock(&sched->job.list.lock);
310 
311 	drm_sched_job_arm(&job->base);
312 	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
313 	if (job->sync)
314 		done_fence = dma_fence_get(job->done_fence);
315 
316 	if (job->ops->armed_submit)
317 		job->ops->armed_submit(job, &vm_exec);
318 
319 	nouveau_job_fence_attach(job);
320 
321 	/* Set job state before pushing the job to the scheduler,
322 	 * such that we do not overwrite the job state set in run().
323 	 */
324 	job->state = NOUVEAU_JOB_SUBMIT_SUCCESS;
325 
326 	drm_sched_entity_push_job(&job->base);
327 
328 	mutex_unlock(&sched->mutex);
329 
330 	if (done_fence) {
331 		dma_fence_wait(done_fence, true);
332 		dma_fence_put(done_fence);
333 	}
334 
335 	return 0;
336 
337 err_cleanup:
338 	mutex_unlock(&sched->mutex);
339 	nouveau_job_fence_attach_cleanup(job);
340 err:
341 	job->state = NOUVEAU_JOB_SUBMIT_FAILED;
342 	return ret;
343 }
344 
345 static struct dma_fence *
nouveau_job_run(struct nouveau_job * job)346 nouveau_job_run(struct nouveau_job *job)
347 {
348 	struct dma_fence *fence;
349 
350 	fence = job->ops->run(job);
351 	if (IS_ERR(fence))
352 		job->state = NOUVEAU_JOB_RUN_FAILED;
353 	else
354 		job->state = NOUVEAU_JOB_RUN_SUCCESS;
355 
356 	return fence;
357 }
358 
359 static struct dma_fence *
nouveau_sched_run_job(struct drm_sched_job * sched_job)360 nouveau_sched_run_job(struct drm_sched_job *sched_job)
361 {
362 	struct nouveau_job *job = to_nouveau_job(sched_job);
363 
364 	return nouveau_job_run(job);
365 }
366 
367 static enum drm_gpu_sched_stat
nouveau_sched_timedout_job(struct drm_sched_job * sched_job)368 nouveau_sched_timedout_job(struct drm_sched_job *sched_job)
369 {
370 	struct drm_gpu_scheduler *sched = sched_job->sched;
371 	struct nouveau_job *job = to_nouveau_job(sched_job);
372 	enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_RESET;
373 
374 	drm_sched_stop(sched, sched_job);
375 
376 	if (job->ops->timeout)
377 		stat = job->ops->timeout(job);
378 	else
379 		NV_PRINTK(warn, job->cli, "Generic job timeout.\n");
380 
381 	drm_sched_start(sched, 0);
382 
383 	return stat;
384 }
385 
386 static void
nouveau_sched_free_job(struct drm_sched_job * sched_job)387 nouveau_sched_free_job(struct drm_sched_job *sched_job)
388 {
389 	struct nouveau_job *job = to_nouveau_job(sched_job);
390 
391 	nouveau_job_fini(job);
392 }
393 
394 static const struct drm_sched_backend_ops nouveau_sched_ops = {
395 	.run_job = nouveau_sched_run_job,
396 	.timedout_job = nouveau_sched_timedout_job,
397 	.free_job = nouveau_sched_free_job,
398 };
399 
400 static int
nouveau_sched_init(struct nouveau_sched * sched,struct nouveau_drm * drm,struct workqueue_struct * wq,u32 credit_limit)401 nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
402 		   struct workqueue_struct *wq, u32 credit_limit)
403 {
404 	struct drm_gpu_scheduler *drm_sched = &sched->base;
405 	struct drm_sched_entity *entity = &sched->entity;
406 	struct drm_sched_init_args args = {
407 		.ops = &nouveau_sched_ops,
408 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
409 		.credit_limit = credit_limit,
410 		.timeout = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS),
411 		.name = "nouveau_sched",
412 		.dev = drm->dev->dev
413 	};
414 	int ret;
415 
416 	if (!wq) {
417 		wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE,
418 				     current->pid);
419 		if (!wq)
420 			return -ENOMEM;
421 
422 		sched->wq = wq;
423 	}
424 
425 	args.submit_wq = wq,
426 
427 	ret = drm_sched_init(drm_sched, &args);
428 	if (ret)
429 		goto fail_wq;
430 
431 	/* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use
432 	 * when we want to have a single run-queue only.
433 	 *
434 	 * It's not documented, but one will find out when trying to use any
435 	 * other priority running into faults, because the scheduler uses the
436 	 * priority as array index.
437 	 *
438 	 * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not
439 	 * matching the enum type used in drm_sched_entity_init().
440 	 */
441 	ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL,
442 				    &drm_sched, 1, NULL);
443 	if (ret)
444 		goto fail_sched;
445 
446 	mutex_init(&sched->mutex);
447 	spin_lock_init(&sched->job.list.lock);
448 	INIT_LIST_HEAD(&sched->job.list.head);
449 	init_waitqueue_head(&sched->job.wq);
450 
451 	return 0;
452 
453 fail_sched:
454 	drm_sched_fini(drm_sched);
455 fail_wq:
456 	if (sched->wq)
457 		destroy_workqueue(sched->wq);
458 	return ret;
459 }
460 
461 int
nouveau_sched_create(struct nouveau_sched ** psched,struct nouveau_drm * drm,struct workqueue_struct * wq,u32 credit_limit)462 nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
463 		     struct workqueue_struct *wq, u32 credit_limit)
464 {
465 	struct nouveau_sched *sched;
466 	int ret;
467 
468 	sched = kzalloc_obj(*sched);
469 	if (!sched)
470 		return -ENOMEM;
471 
472 	ret = nouveau_sched_init(sched, drm, wq, credit_limit);
473 	if (ret) {
474 		kfree(sched);
475 		return ret;
476 	}
477 
478 	*psched = sched;
479 
480 	return 0;
481 }
482 
483 static bool
nouveau_sched_job_list_empty(struct nouveau_sched * sched)484 nouveau_sched_job_list_empty(struct nouveau_sched *sched)
485 {
486 	bool empty;
487 
488 	spin_lock(&sched->job.list.lock);
489 	empty = list_empty(&sched->job.list.head);
490 	spin_unlock(&sched->job.list.lock);
491 
492 	return empty;
493 }
494 
495 static void
nouveau_sched_fini(struct nouveau_sched * sched)496 nouveau_sched_fini(struct nouveau_sched *sched)
497 {
498 	struct drm_gpu_scheduler *drm_sched = &sched->base;
499 	struct drm_sched_entity *entity = &sched->entity;
500 
501 	wait_event(sched->job.wq, nouveau_sched_job_list_empty(sched));
502 
503 	drm_sched_entity_fini(entity);
504 	drm_sched_fini(drm_sched);
505 
506 	/* Destroy workqueue after scheduler tear down, otherwise it might still
507 	 * be in use.
508 	 */
509 	if (sched->wq)
510 		destroy_workqueue(sched->wq);
511 }
512 
513 void
nouveau_sched_destroy(struct nouveau_sched ** psched)514 nouveau_sched_destroy(struct nouveau_sched **psched)
515 {
516 	struct nouveau_sched *sched = *psched;
517 
518 	nouveau_sched_fini(sched);
519 	kfree(sched);
520 
521 	*psched = NULL;
522 }
523