xref: /linux/drivers/gpu/drm/nouveau/nouveau_sched.c (revision 965c995c9a4b395471ff48790a0155ee986ca405)
1 // SPDX-License-Identifier: MIT
2 
3 #include <linux/slab.h>
4 #include <drm/gpu_scheduler.h>
5 #include <drm/drm_syncobj.h>
6 
7 #include "nouveau_drv.h"
8 #include "nouveau_gem.h"
9 #include "nouveau_mem.h"
10 #include "nouveau_dma.h"
11 #include "nouveau_exec.h"
12 #include "nouveau_abi16.h"
13 #include "nouveau_sched.h"
14 
15 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS		10000
16 
17 /* Starts at 0, since the DRM scheduler interprets those parameters as (initial)
18  * index to the run-queue array.
19  */
20 enum nouveau_sched_priority {
21 	NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL,
22 	NOUVEAU_SCHED_PRIORITY_COUNT,
23 };
24 
25 int
nouveau_job_init(struct nouveau_job * job,struct nouveau_job_args * args)26 nouveau_job_init(struct nouveau_job *job,
27 		 struct nouveau_job_args *args)
28 {
29 	struct nouveau_sched *sched = args->sched;
30 	int ret;
31 
32 	INIT_LIST_HEAD(&job->entry);
33 
34 	job->file_priv = args->file_priv;
35 	job->cli = nouveau_cli(args->file_priv);
36 	job->sched = sched;
37 
38 	job->sync = args->sync;
39 	job->resv_usage = args->resv_usage;
40 
41 	job->ops = args->ops;
42 
43 	job->in_sync.count = args->in_sync.count;
44 	if (job->in_sync.count) {
45 		if (job->sync)
46 			return -EINVAL;
47 
48 		job->in_sync.data = kmemdup(args->in_sync.s,
49 					 sizeof(*args->in_sync.s) *
50 					 args->in_sync.count,
51 					 GFP_KERNEL);
52 		if (!job->in_sync.data)
53 			return -ENOMEM;
54 	}
55 
56 	job->out_sync.count = args->out_sync.count;
57 	if (job->out_sync.count) {
58 		if (job->sync) {
59 			ret = -EINVAL;
60 			goto err_free_in_sync;
61 		}
62 
63 		job->out_sync.data = kmemdup(args->out_sync.s,
64 					  sizeof(*args->out_sync.s) *
65 					  args->out_sync.count,
66 					  GFP_KERNEL);
67 		if (!job->out_sync.data) {
68 			ret = -ENOMEM;
69 			goto err_free_in_sync;
70 		}
71 
72 		job->out_sync.objs = kcalloc(job->out_sync.count,
73 					     sizeof(*job->out_sync.objs),
74 					     GFP_KERNEL);
75 		if (!job->out_sync.objs) {
76 			ret = -ENOMEM;
77 			goto err_free_out_sync;
78 		}
79 
80 		job->out_sync.chains = kcalloc(job->out_sync.count,
81 					       sizeof(*job->out_sync.chains),
82 					       GFP_KERNEL);
83 		if (!job->out_sync.chains) {
84 			ret = -ENOMEM;
85 			goto err_free_objs;
86 		}
87 	}
88 
89 	ret = drm_sched_job_init(&job->base, &sched->entity,
90 				 args->credits, NULL,
91 				 job->file_priv->client_id);
92 	if (ret)
93 		goto err_free_chains;
94 
95 	job->state = NOUVEAU_JOB_INITIALIZED;
96 
97 	return 0;
98 
99 err_free_chains:
100 	kfree(job->out_sync.chains);
101 err_free_objs:
102 	kfree(job->out_sync.objs);
103 err_free_out_sync:
104 	kfree(job->out_sync.data);
105 err_free_in_sync:
106 	kfree(job->in_sync.data);
107 return ret;
108 }
109 
110 void
nouveau_job_fini(struct nouveau_job * job)111 nouveau_job_fini(struct nouveau_job *job)
112 {
113 	dma_fence_put(job->done_fence);
114 	drm_sched_job_cleanup(&job->base);
115 
116 	job->ops->free(job);
117 }
118 
119 void
nouveau_job_done(struct nouveau_job * job)120 nouveau_job_done(struct nouveau_job *job)
121 {
122 	struct nouveau_sched *sched = job->sched;
123 
124 	spin_lock(&sched->job.list.lock);
125 	list_del(&job->entry);
126 	spin_unlock(&sched->job.list.lock);
127 
128 	wake_up(&sched->job.wq);
129 }
130 
131 void
nouveau_job_free(struct nouveau_job * job)132 nouveau_job_free(struct nouveau_job *job)
133 {
134 	kfree(job->in_sync.data);
135 	kfree(job->out_sync.data);
136 	kfree(job->out_sync.objs);
137 	kfree(job->out_sync.chains);
138 }
139 
140 static int
sync_find_fence(struct nouveau_job * job,struct drm_nouveau_sync * sync,struct dma_fence ** fence)141 sync_find_fence(struct nouveau_job *job,
142 		struct drm_nouveau_sync *sync,
143 		struct dma_fence **fence)
144 {
145 	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
146 	u64 point = 0;
147 	int ret;
148 
149 	if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
150 	    stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
151 		return -EOPNOTSUPP;
152 
153 	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
154 		point = sync->timeline_value;
155 
156 	ret = drm_syncobj_find_fence(job->file_priv,
157 				     sync->handle, point,
158 				     0 /* flags */, fence);
159 	if (ret)
160 		return ret;
161 
162 	return 0;
163 }
164 
165 static int
nouveau_job_add_deps(struct nouveau_job * job)166 nouveau_job_add_deps(struct nouveau_job *job)
167 {
168 	struct dma_fence *in_fence = NULL;
169 	int ret, i;
170 
171 	for (i = 0; i < job->in_sync.count; i++) {
172 		struct drm_nouveau_sync *sync = &job->in_sync.data[i];
173 
174 		ret = sync_find_fence(job, sync, &in_fence);
175 		if (ret) {
176 			NV_PRINTK(warn, job->cli,
177 				  "Failed to find syncobj (-> in): handle=%d\n",
178 				  sync->handle);
179 			return ret;
180 		}
181 
182 		ret = drm_sched_job_add_dependency(&job->base, in_fence);
183 		if (ret)
184 			return ret;
185 	}
186 
187 	return 0;
188 }
189 
190 static void
nouveau_job_fence_attach_cleanup(struct nouveau_job * job)191 nouveau_job_fence_attach_cleanup(struct nouveau_job *job)
192 {
193 	int i;
194 
195 	for (i = 0; i < job->out_sync.count; i++) {
196 		struct drm_syncobj *obj = job->out_sync.objs[i];
197 		struct dma_fence_chain *chain = job->out_sync.chains[i];
198 
199 		if (obj)
200 			drm_syncobj_put(obj);
201 
202 		if (chain)
203 			dma_fence_chain_free(chain);
204 	}
205 }
206 
207 static int
nouveau_job_fence_attach_prepare(struct nouveau_job * job)208 nouveau_job_fence_attach_prepare(struct nouveau_job *job)
209 {
210 	int i, ret;
211 
212 	for (i = 0; i < job->out_sync.count; i++) {
213 		struct drm_nouveau_sync *sync = &job->out_sync.data[i];
214 		struct drm_syncobj **pobj = &job->out_sync.objs[i];
215 		struct dma_fence_chain **pchain = &job->out_sync.chains[i];
216 		u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
217 
218 		if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
219 		    stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
220 			ret = -EINVAL;
221 			goto err_sync_cleanup;
222 		}
223 
224 		*pobj = drm_syncobj_find(job->file_priv, sync->handle);
225 		if (!*pobj) {
226 			NV_PRINTK(warn, job->cli,
227 				  "Failed to find syncobj (-> out): handle=%d\n",
228 				  sync->handle);
229 			ret = -ENOENT;
230 			goto err_sync_cleanup;
231 		}
232 
233 		if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
234 			*pchain = dma_fence_chain_alloc();
235 			if (!*pchain) {
236 				ret = -ENOMEM;
237 				goto err_sync_cleanup;
238 			}
239 		}
240 	}
241 
242 	return 0;
243 
244 err_sync_cleanup:
245 	nouveau_job_fence_attach_cleanup(job);
246 	return ret;
247 }
248 
249 static void
nouveau_job_fence_attach(struct nouveau_job * job)250 nouveau_job_fence_attach(struct nouveau_job *job)
251 {
252 	struct dma_fence *fence = job->done_fence;
253 	int i;
254 
255 	for (i = 0; i < job->out_sync.count; i++) {
256 		struct drm_nouveau_sync *sync = &job->out_sync.data[i];
257 		struct drm_syncobj **pobj = &job->out_sync.objs[i];
258 		struct dma_fence_chain **pchain = &job->out_sync.chains[i];
259 		u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
260 
261 		if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
262 			drm_syncobj_add_point(*pobj, *pchain, fence,
263 					      sync->timeline_value);
264 		} else {
265 			drm_syncobj_replace_fence(*pobj, fence);
266 		}
267 
268 		drm_syncobj_put(*pobj);
269 		*pobj = NULL;
270 		*pchain = NULL;
271 	}
272 }
273 
274 int
nouveau_job_submit(struct nouveau_job * job)275 nouveau_job_submit(struct nouveau_job *job)
276 {
277 	struct nouveau_sched *sched = job->sched;
278 	struct dma_fence *done_fence = NULL;
279 	struct drm_gpuvm_exec vm_exec = {
280 		.vm = &nouveau_cli_uvmm(job->cli)->base,
281 		.flags = DRM_EXEC_IGNORE_DUPLICATES,
282 		.num_fences = 1,
283 	};
284 	int ret;
285 
286 	ret = nouveau_job_add_deps(job);
287 	if (ret)
288 		goto err;
289 
290 	ret = nouveau_job_fence_attach_prepare(job);
291 	if (ret)
292 		goto err;
293 
294 	/* Make sure the job appears on the sched_entity's queue in the same
295 	 * order as it was submitted.
296 	 */
297 	mutex_lock(&sched->mutex);
298 
299 	/* Guarantee we won't fail after the submit() callback returned
300 	 * successfully.
301 	 */
302 	if (job->ops->submit) {
303 		ret = job->ops->submit(job, &vm_exec);
304 		if (ret)
305 			goto err_cleanup;
306 	}
307 
308 	/* Submit was successful; add the job to the schedulers job list. */
309 	spin_lock(&sched->job.list.lock);
310 	list_add(&job->entry, &sched->job.list.head);
311 	spin_unlock(&sched->job.list.lock);
312 
313 	drm_sched_job_arm(&job->base);
314 	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
315 	if (job->sync)
316 		done_fence = dma_fence_get(job->done_fence);
317 
318 	if (job->ops->armed_submit)
319 		job->ops->armed_submit(job, &vm_exec);
320 
321 	nouveau_job_fence_attach(job);
322 
323 	/* Set job state before pushing the job to the scheduler,
324 	 * such that we do not overwrite the job state set in run().
325 	 */
326 	job->state = NOUVEAU_JOB_SUBMIT_SUCCESS;
327 
328 	drm_sched_entity_push_job(&job->base);
329 
330 	mutex_unlock(&sched->mutex);
331 
332 	if (done_fence) {
333 		dma_fence_wait(done_fence, true);
334 		dma_fence_put(done_fence);
335 	}
336 
337 	return 0;
338 
339 err_cleanup:
340 	mutex_unlock(&sched->mutex);
341 	nouveau_job_fence_attach_cleanup(job);
342 err:
343 	job->state = NOUVEAU_JOB_SUBMIT_FAILED;
344 	return ret;
345 }
346 
347 static struct dma_fence *
nouveau_job_run(struct nouveau_job * job)348 nouveau_job_run(struct nouveau_job *job)
349 {
350 	struct dma_fence *fence;
351 
352 	fence = job->ops->run(job);
353 	if (IS_ERR(fence))
354 		job->state = NOUVEAU_JOB_RUN_FAILED;
355 	else
356 		job->state = NOUVEAU_JOB_RUN_SUCCESS;
357 
358 	return fence;
359 }
360 
361 static struct dma_fence *
nouveau_sched_run_job(struct drm_sched_job * sched_job)362 nouveau_sched_run_job(struct drm_sched_job *sched_job)
363 {
364 	struct nouveau_job *job = to_nouveau_job(sched_job);
365 
366 	return nouveau_job_run(job);
367 }
368 
369 static enum drm_gpu_sched_stat
nouveau_sched_timedout_job(struct drm_sched_job * sched_job)370 nouveau_sched_timedout_job(struct drm_sched_job *sched_job)
371 {
372 	struct drm_gpu_scheduler *sched = sched_job->sched;
373 	struct nouveau_job *job = to_nouveau_job(sched_job);
374 	enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_RESET;
375 
376 	drm_sched_stop(sched, sched_job);
377 
378 	if (job->ops->timeout)
379 		stat = job->ops->timeout(job);
380 	else
381 		NV_PRINTK(warn, job->cli, "Generic job timeout.\n");
382 
383 	drm_sched_start(sched, 0);
384 
385 	return stat;
386 }
387 
388 static void
nouveau_sched_free_job(struct drm_sched_job * sched_job)389 nouveau_sched_free_job(struct drm_sched_job *sched_job)
390 {
391 	struct nouveau_job *job = to_nouveau_job(sched_job);
392 
393 	nouveau_job_fini(job);
394 }
395 
396 static const struct drm_sched_backend_ops nouveau_sched_ops = {
397 	.run_job = nouveau_sched_run_job,
398 	.timedout_job = nouveau_sched_timedout_job,
399 	.free_job = nouveau_sched_free_job,
400 };
401 
402 static int
nouveau_sched_init(struct nouveau_sched * sched,struct nouveau_drm * drm,struct workqueue_struct * wq,u32 credit_limit)403 nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
404 		   struct workqueue_struct *wq, u32 credit_limit)
405 {
406 	struct drm_gpu_scheduler *drm_sched = &sched->base;
407 	struct drm_sched_entity *entity = &sched->entity;
408 	struct drm_sched_init_args args = {
409 		.ops = &nouveau_sched_ops,
410 		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
411 		.credit_limit = credit_limit,
412 		.timeout = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS),
413 		.name = "nouveau_sched",
414 		.dev = drm->dev->dev
415 	};
416 	int ret;
417 
418 	if (!wq) {
419 		wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE,
420 				     current->pid);
421 		if (!wq)
422 			return -ENOMEM;
423 
424 		sched->wq = wq;
425 	}
426 
427 	args.submit_wq = wq,
428 
429 	ret = drm_sched_init(drm_sched, &args);
430 	if (ret)
431 		goto fail_wq;
432 
433 	/* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use
434 	 * when we want to have a single run-queue only.
435 	 *
436 	 * It's not documented, but one will find out when trying to use any
437 	 * other priority running into faults, because the scheduler uses the
438 	 * priority as array index.
439 	 *
440 	 * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not
441 	 * matching the enum type used in drm_sched_entity_init().
442 	 */
443 	ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL,
444 				    &drm_sched, 1, NULL);
445 	if (ret)
446 		goto fail_sched;
447 
448 	mutex_init(&sched->mutex);
449 	spin_lock_init(&sched->job.list.lock);
450 	INIT_LIST_HEAD(&sched->job.list.head);
451 	init_waitqueue_head(&sched->job.wq);
452 
453 	return 0;
454 
455 fail_sched:
456 	drm_sched_fini(drm_sched);
457 fail_wq:
458 	if (sched->wq)
459 		destroy_workqueue(sched->wq);
460 	return ret;
461 }
462 
463 int
nouveau_sched_create(struct nouveau_sched ** psched,struct nouveau_drm * drm,struct workqueue_struct * wq,u32 credit_limit)464 nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
465 		     struct workqueue_struct *wq, u32 credit_limit)
466 {
467 	struct nouveau_sched *sched;
468 	int ret;
469 
470 	sched = kzalloc(sizeof(*sched), GFP_KERNEL);
471 	if (!sched)
472 		return -ENOMEM;
473 
474 	ret = nouveau_sched_init(sched, drm, wq, credit_limit);
475 	if (ret) {
476 		kfree(sched);
477 		return ret;
478 	}
479 
480 	*psched = sched;
481 
482 	return 0;
483 }
484 
485 
486 static void
nouveau_sched_fini(struct nouveau_sched * sched)487 nouveau_sched_fini(struct nouveau_sched *sched)
488 {
489 	struct drm_gpu_scheduler *drm_sched = &sched->base;
490 	struct drm_sched_entity *entity = &sched->entity;
491 
492 	rmb(); /* for list_empty to work without lock */
493 	wait_event(sched->job.wq, list_empty(&sched->job.list.head));
494 
495 	drm_sched_entity_fini(entity);
496 	drm_sched_fini(drm_sched);
497 
498 	/* Destroy workqueue after scheduler tear down, otherwise it might still
499 	 * be in use.
500 	 */
501 	if (sched->wq)
502 		destroy_workqueue(sched->wq);
503 }
504 
505 void
nouveau_sched_destroy(struct nouveau_sched ** psched)506 nouveau_sched_destroy(struct nouveau_sched **psched)
507 {
508 	struct nouveau_sched *sched = *psched;
509 
510 	nouveau_sched_fini(sched);
511 	kfree(sched);
512 
513 	*psched = NULL;
514 }
515