1 // SPDX-License-Identifier: MIT
2
3 #include <linux/slab.h>
4 #include <drm/gpu_scheduler.h>
5 #include <drm/drm_syncobj.h>
6
7 #include "nouveau_drv.h"
8 #include "nouveau_gem.h"
9 #include "nouveau_mem.h"
10 #include "nouveau_dma.h"
11 #include "nouveau_exec.h"
12 #include "nouveau_abi16.h"
13 #include "nouveau_sched.h"
14
15 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000
16
17 /* Starts at 0, since the DRM scheduler interprets those parameters as (initial)
18 * index to the run-queue array.
19 */
20 enum nouveau_sched_priority {
21 NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL,
22 NOUVEAU_SCHED_PRIORITY_COUNT,
23 };
24
25 int
nouveau_job_init(struct nouveau_job * job,struct nouveau_job_args * args)26 nouveau_job_init(struct nouveau_job *job,
27 struct nouveau_job_args *args)
28 {
29 struct nouveau_sched *sched = args->sched;
30 int ret;
31
32 INIT_LIST_HEAD(&job->entry);
33
34 job->file_priv = args->file_priv;
35 job->cli = nouveau_cli(args->file_priv);
36 job->sched = sched;
37
38 job->sync = args->sync;
39 job->resv_usage = args->resv_usage;
40
41 job->ops = args->ops;
42
43 job->in_sync.count = args->in_sync.count;
44 if (job->in_sync.count) {
45 if (job->sync)
46 return -EINVAL;
47
48 job->in_sync.data = kmemdup(args->in_sync.s,
49 sizeof(*args->in_sync.s) *
50 args->in_sync.count,
51 GFP_KERNEL);
52 if (!job->in_sync.data)
53 return -ENOMEM;
54 }
55
56 job->out_sync.count = args->out_sync.count;
57 if (job->out_sync.count) {
58 if (job->sync) {
59 ret = -EINVAL;
60 goto err_free_in_sync;
61 }
62
63 job->out_sync.data = kmemdup(args->out_sync.s,
64 sizeof(*args->out_sync.s) *
65 args->out_sync.count,
66 GFP_KERNEL);
67 if (!job->out_sync.data) {
68 ret = -ENOMEM;
69 goto err_free_in_sync;
70 }
71
72 job->out_sync.objs = kcalloc(job->out_sync.count,
73 sizeof(*job->out_sync.objs),
74 GFP_KERNEL);
75 if (!job->out_sync.objs) {
76 ret = -ENOMEM;
77 goto err_free_out_sync;
78 }
79
80 job->out_sync.chains = kcalloc(job->out_sync.count,
81 sizeof(*job->out_sync.chains),
82 GFP_KERNEL);
83 if (!job->out_sync.chains) {
84 ret = -ENOMEM;
85 goto err_free_objs;
86 }
87 }
88
89 ret = drm_sched_job_init(&job->base, &sched->entity,
90 args->credits, NULL,
91 job->file_priv->client_id);
92 if (ret)
93 goto err_free_chains;
94
95 job->state = NOUVEAU_JOB_INITIALIZED;
96
97 return 0;
98
99 err_free_chains:
100 kfree(job->out_sync.chains);
101 err_free_objs:
102 kfree(job->out_sync.objs);
103 err_free_out_sync:
104 kfree(job->out_sync.data);
105 err_free_in_sync:
106 kfree(job->in_sync.data);
107 return ret;
108 }
109
110 void
nouveau_job_fini(struct nouveau_job * job)111 nouveau_job_fini(struct nouveau_job *job)
112 {
113 dma_fence_put(job->done_fence);
114 drm_sched_job_cleanup(&job->base);
115
116 job->ops->free(job);
117 }
118
119 void
nouveau_job_done(struct nouveau_job * job)120 nouveau_job_done(struct nouveau_job *job)
121 {
122 struct nouveau_sched *sched = job->sched;
123
124 spin_lock(&sched->job.list.lock);
125 list_del(&job->entry);
126 spin_unlock(&sched->job.list.lock);
127
128 wake_up(&sched->job.wq);
129 }
130
131 void
nouveau_job_free(struct nouveau_job * job)132 nouveau_job_free(struct nouveau_job *job)
133 {
134 kfree(job->in_sync.data);
135 kfree(job->out_sync.data);
136 kfree(job->out_sync.objs);
137 kfree(job->out_sync.chains);
138 }
139
140 static int
sync_find_fence(struct nouveau_job * job,struct drm_nouveau_sync * sync,struct dma_fence ** fence)141 sync_find_fence(struct nouveau_job *job,
142 struct drm_nouveau_sync *sync,
143 struct dma_fence **fence)
144 {
145 u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
146 u64 point = 0;
147 int ret;
148
149 if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
150 stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
151 return -EOPNOTSUPP;
152
153 if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
154 point = sync->timeline_value;
155
156 ret = drm_syncobj_find_fence(job->file_priv,
157 sync->handle, point,
158 0 /* flags */, fence);
159 if (ret)
160 return ret;
161
162 return 0;
163 }
164
165 static int
nouveau_job_add_deps(struct nouveau_job * job)166 nouveau_job_add_deps(struct nouveau_job *job)
167 {
168 struct dma_fence *in_fence = NULL;
169 int ret, i;
170
171 for (i = 0; i < job->in_sync.count; i++) {
172 struct drm_nouveau_sync *sync = &job->in_sync.data[i];
173
174 ret = sync_find_fence(job, sync, &in_fence);
175 if (ret) {
176 NV_PRINTK(warn, job->cli,
177 "Failed to find syncobj (-> in): handle=%d\n",
178 sync->handle);
179 return ret;
180 }
181
182 ret = drm_sched_job_add_dependency(&job->base, in_fence);
183 if (ret)
184 return ret;
185 }
186
187 return 0;
188 }
189
190 static void
nouveau_job_fence_attach_cleanup(struct nouveau_job * job)191 nouveau_job_fence_attach_cleanup(struct nouveau_job *job)
192 {
193 int i;
194
195 for (i = 0; i < job->out_sync.count; i++) {
196 struct drm_syncobj *obj = job->out_sync.objs[i];
197 struct dma_fence_chain *chain = job->out_sync.chains[i];
198
199 if (obj)
200 drm_syncobj_put(obj);
201
202 if (chain)
203 dma_fence_chain_free(chain);
204 }
205 }
206
207 static int
nouveau_job_fence_attach_prepare(struct nouveau_job * job)208 nouveau_job_fence_attach_prepare(struct nouveau_job *job)
209 {
210 int i, ret;
211
212 for (i = 0; i < job->out_sync.count; i++) {
213 struct drm_nouveau_sync *sync = &job->out_sync.data[i];
214 struct drm_syncobj **pobj = &job->out_sync.objs[i];
215 struct dma_fence_chain **pchain = &job->out_sync.chains[i];
216 u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
217
218 if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
219 stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
220 ret = -EINVAL;
221 goto err_sync_cleanup;
222 }
223
224 *pobj = drm_syncobj_find(job->file_priv, sync->handle);
225 if (!*pobj) {
226 NV_PRINTK(warn, job->cli,
227 "Failed to find syncobj (-> out): handle=%d\n",
228 sync->handle);
229 ret = -ENOENT;
230 goto err_sync_cleanup;
231 }
232
233 if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
234 *pchain = dma_fence_chain_alloc();
235 if (!*pchain) {
236 ret = -ENOMEM;
237 goto err_sync_cleanup;
238 }
239 }
240 }
241
242 return 0;
243
244 err_sync_cleanup:
245 nouveau_job_fence_attach_cleanup(job);
246 return ret;
247 }
248
249 static void
nouveau_job_fence_attach(struct nouveau_job * job)250 nouveau_job_fence_attach(struct nouveau_job *job)
251 {
252 struct dma_fence *fence = job->done_fence;
253 int i;
254
255 for (i = 0; i < job->out_sync.count; i++) {
256 struct drm_nouveau_sync *sync = &job->out_sync.data[i];
257 struct drm_syncobj **pobj = &job->out_sync.objs[i];
258 struct dma_fence_chain **pchain = &job->out_sync.chains[i];
259 u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
260
261 if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
262 drm_syncobj_add_point(*pobj, *pchain, fence,
263 sync->timeline_value);
264 } else {
265 drm_syncobj_replace_fence(*pobj, fence);
266 }
267
268 drm_syncobj_put(*pobj);
269 *pobj = NULL;
270 *pchain = NULL;
271 }
272 }
273
274 int
nouveau_job_submit(struct nouveau_job * job)275 nouveau_job_submit(struct nouveau_job *job)
276 {
277 struct nouveau_sched *sched = job->sched;
278 struct dma_fence *done_fence = NULL;
279 struct drm_gpuvm_exec vm_exec = {
280 .vm = &nouveau_cli_uvmm(job->cli)->base,
281 .flags = DRM_EXEC_IGNORE_DUPLICATES,
282 .num_fences = 1,
283 };
284 int ret;
285
286 ret = nouveau_job_add_deps(job);
287 if (ret)
288 goto err;
289
290 ret = nouveau_job_fence_attach_prepare(job);
291 if (ret)
292 goto err;
293
294 /* Make sure the job appears on the sched_entity's queue in the same
295 * order as it was submitted.
296 */
297 mutex_lock(&sched->mutex);
298
299 /* Guarantee we won't fail after the submit() callback returned
300 * successfully.
301 */
302 if (job->ops->submit) {
303 ret = job->ops->submit(job, &vm_exec);
304 if (ret)
305 goto err_cleanup;
306 }
307
308 /* Submit was successful; add the job to the schedulers job list. */
309 spin_lock(&sched->job.list.lock);
310 list_add(&job->entry, &sched->job.list.head);
311 spin_unlock(&sched->job.list.lock);
312
313 drm_sched_job_arm(&job->base);
314 job->done_fence = dma_fence_get(&job->base.s_fence->finished);
315 if (job->sync)
316 done_fence = dma_fence_get(job->done_fence);
317
318 if (job->ops->armed_submit)
319 job->ops->armed_submit(job, &vm_exec);
320
321 nouveau_job_fence_attach(job);
322
323 /* Set job state before pushing the job to the scheduler,
324 * such that we do not overwrite the job state set in run().
325 */
326 job->state = NOUVEAU_JOB_SUBMIT_SUCCESS;
327
328 drm_sched_entity_push_job(&job->base);
329
330 mutex_unlock(&sched->mutex);
331
332 if (done_fence) {
333 dma_fence_wait(done_fence, true);
334 dma_fence_put(done_fence);
335 }
336
337 return 0;
338
339 err_cleanup:
340 mutex_unlock(&sched->mutex);
341 nouveau_job_fence_attach_cleanup(job);
342 err:
343 job->state = NOUVEAU_JOB_SUBMIT_FAILED;
344 return ret;
345 }
346
347 static struct dma_fence *
nouveau_job_run(struct nouveau_job * job)348 nouveau_job_run(struct nouveau_job *job)
349 {
350 struct dma_fence *fence;
351
352 fence = job->ops->run(job);
353 if (IS_ERR(fence))
354 job->state = NOUVEAU_JOB_RUN_FAILED;
355 else
356 job->state = NOUVEAU_JOB_RUN_SUCCESS;
357
358 return fence;
359 }
360
361 static struct dma_fence *
nouveau_sched_run_job(struct drm_sched_job * sched_job)362 nouveau_sched_run_job(struct drm_sched_job *sched_job)
363 {
364 struct nouveau_job *job = to_nouveau_job(sched_job);
365
366 return nouveau_job_run(job);
367 }
368
369 static enum drm_gpu_sched_stat
nouveau_sched_timedout_job(struct drm_sched_job * sched_job)370 nouveau_sched_timedout_job(struct drm_sched_job *sched_job)
371 {
372 struct drm_gpu_scheduler *sched = sched_job->sched;
373 struct nouveau_job *job = to_nouveau_job(sched_job);
374 enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_RESET;
375
376 drm_sched_stop(sched, sched_job);
377
378 if (job->ops->timeout)
379 stat = job->ops->timeout(job);
380 else
381 NV_PRINTK(warn, job->cli, "Generic job timeout.\n");
382
383 drm_sched_start(sched, 0);
384
385 return stat;
386 }
387
388 static void
nouveau_sched_free_job(struct drm_sched_job * sched_job)389 nouveau_sched_free_job(struct drm_sched_job *sched_job)
390 {
391 struct nouveau_job *job = to_nouveau_job(sched_job);
392
393 nouveau_job_fini(job);
394 }
395
396 static const struct drm_sched_backend_ops nouveau_sched_ops = {
397 .run_job = nouveau_sched_run_job,
398 .timedout_job = nouveau_sched_timedout_job,
399 .free_job = nouveau_sched_free_job,
400 };
401
402 static int
nouveau_sched_init(struct nouveau_sched * sched,struct nouveau_drm * drm,struct workqueue_struct * wq,u32 credit_limit)403 nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
404 struct workqueue_struct *wq, u32 credit_limit)
405 {
406 struct drm_gpu_scheduler *drm_sched = &sched->base;
407 struct drm_sched_entity *entity = &sched->entity;
408 struct drm_sched_init_args args = {
409 .ops = &nouveau_sched_ops,
410 .num_rqs = DRM_SCHED_PRIORITY_COUNT,
411 .credit_limit = credit_limit,
412 .timeout = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS),
413 .name = "nouveau_sched",
414 .dev = drm->dev->dev
415 };
416 int ret;
417
418 if (!wq) {
419 wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE,
420 current->pid);
421 if (!wq)
422 return -ENOMEM;
423
424 sched->wq = wq;
425 }
426
427 args.submit_wq = wq,
428
429 ret = drm_sched_init(drm_sched, &args);
430 if (ret)
431 goto fail_wq;
432
433 /* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use
434 * when we want to have a single run-queue only.
435 *
436 * It's not documented, but one will find out when trying to use any
437 * other priority running into faults, because the scheduler uses the
438 * priority as array index.
439 *
440 * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not
441 * matching the enum type used in drm_sched_entity_init().
442 */
443 ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL,
444 &drm_sched, 1, NULL);
445 if (ret)
446 goto fail_sched;
447
448 mutex_init(&sched->mutex);
449 spin_lock_init(&sched->job.list.lock);
450 INIT_LIST_HEAD(&sched->job.list.head);
451 init_waitqueue_head(&sched->job.wq);
452
453 return 0;
454
455 fail_sched:
456 drm_sched_fini(drm_sched);
457 fail_wq:
458 if (sched->wq)
459 destroy_workqueue(sched->wq);
460 return ret;
461 }
462
463 int
nouveau_sched_create(struct nouveau_sched ** psched,struct nouveau_drm * drm,struct workqueue_struct * wq,u32 credit_limit)464 nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
465 struct workqueue_struct *wq, u32 credit_limit)
466 {
467 struct nouveau_sched *sched;
468 int ret;
469
470 sched = kzalloc(sizeof(*sched), GFP_KERNEL);
471 if (!sched)
472 return -ENOMEM;
473
474 ret = nouveau_sched_init(sched, drm, wq, credit_limit);
475 if (ret) {
476 kfree(sched);
477 return ret;
478 }
479
480 *psched = sched;
481
482 return 0;
483 }
484
485
486 static void
nouveau_sched_fini(struct nouveau_sched * sched)487 nouveau_sched_fini(struct nouveau_sched *sched)
488 {
489 struct drm_gpu_scheduler *drm_sched = &sched->base;
490 struct drm_sched_entity *entity = &sched->entity;
491
492 rmb(); /* for list_empty to work without lock */
493 wait_event(sched->job.wq, list_empty(&sched->job.list.head));
494
495 drm_sched_entity_fini(entity);
496 drm_sched_fini(drm_sched);
497
498 /* Destroy workqueue after scheduler tear down, otherwise it might still
499 * be in use.
500 */
501 if (sched->wq)
502 destroy_workqueue(sched->wq);
503 }
504
505 void
nouveau_sched_destroy(struct nouveau_sched ** psched)506 nouveau_sched_destroy(struct nouveau_sched **psched)
507 {
508 struct nouveau_sched *sched = *psched;
509
510 nouveau_sched_fini(sched);
511 kfree(sched);
512
513 *psched = NULL;
514 }
515