1 // SPDX-License-Identifier: MIT
2
3 #include "nouveau_drv.h"
4 #include "nouveau_gem.h"
5 #include "nouveau_mem.h"
6 #include "nouveau_dma.h"
7 #include "nouveau_exec.h"
8 #include "nouveau_abi16.h"
9 #include "nouveau_chan.h"
10 #include "nouveau_sched.h"
11 #include "nouveau_uvmm.h"
12
13 #include <nvif/class.h>
14
15 /**
16 * DOC: Overview
17 *
18 * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
19 * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
20 *
21 * In order to use the UAPI firstly a user client must initialize the VA space
22 * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
23 * should be managed by the kernel and which by the UMD.
24 *
25 * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
26 * userspace-managable portion of the VA space. It provides operations to map
27 * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
28 * backed by a GEM object and the kernel will ignore GEM handles provided
29 * alongside a sparse mapping.
30 *
31 * Userspace may request memory backed mappings either within or outside of the
32 * bounds (but not crossing those bounds) of a previously mapped sparse
33 * mapping. Subsequently requested memory backed mappings within a sparse
34 * mapping will take precedence over the corresponding range of the sparse
35 * mapping. If such memory backed mappings are unmapped the kernel will make
36 * sure that the corresponding sparse mapping will take their place again.
37 * Requests to unmap a sparse mapping that still contains memory backed mappings
38 * will result in those memory backed mappings being unmapped first.
39 *
40 * Unmap requests are not bound to the range of existing mappings and can even
41 * overlap the bounds of sparse mappings. For such a request the kernel will
42 * make sure to unmap all memory backed mappings within the given range,
43 * splitting up memory backed mappings which are only partially contained
44 * within the given range. Unmap requests with the sparse flag set must match
45 * the range of a previously mapped sparse mapping exactly though.
46 *
47 * While the kernel generally permits arbitrary sequences and ranges of memory
48 * backed mappings being mapped and unmapped, either within a single or multiple
49 * VM_BIND ioctl calls, there are some restrictions for sparse mappings.
50 *
51 * The kernel does not permit to:
52 * - unmap non-existent sparse mappings
53 * - unmap a sparse mapping and map a new sparse mapping overlapping the range
54 * of the previously unmapped sparse mapping within the same VM_BIND ioctl
55 * - unmap a sparse mapping and map new memory backed mappings overlapping the
56 * range of the previously unmapped sparse mapping within the same VM_BIND
57 * ioctl
58 *
59 * When using the VM_BIND ioctl to request the kernel to map memory to a given
60 * virtual address in the GPU's VA space there is no guarantee that the actual
61 * mappings are created in the GPU's MMU. If the given memory is swapped out
62 * at the time the bind operation is executed the kernel will stash the mapping
63 * details into it's internal allocator and create the actual MMU mappings once
64 * the memory is swapped back in. While this is transparent for userspace, it is
65 * guaranteed that all the backing memory is swapped back in and all the memory
66 * mappings, as requested by userspace previously, are actually mapped once the
67 * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
68 *
69 * A VM_BIND job can be executed either synchronously or asynchronously. If
70 * executed asynchronously, userspace may provide a list of syncobjs this job
71 * will wait for and/or a list of syncobj the kernel will signal once the
72 * VM_BIND job finished execution. If executed synchronously the ioctl will
73 * block until the bind job is finished. For synchronous jobs the kernel will
74 * not permit any syncobjs submitted to the kernel.
75 *
76 * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
77 * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
78 * the option to synchronize them with syncobjs.
79 *
80 * Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
81 *
82 * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
83 * an up to date view of the VA space. However, the actual mappings might still
84 * be pending. Hence, EXEC jobs require to have the particular fences - of
85 * the corresponding VM_BIND jobs they depend on - attached to them.
86 */
87
88 static int
nouveau_exec_job_submit(struct nouveau_job * job,struct drm_gpuvm_exec * vme)89 nouveau_exec_job_submit(struct nouveau_job *job,
90 struct drm_gpuvm_exec *vme)
91 {
92 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
93 struct nouveau_cli *cli = job->cli;
94 struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
95 int ret;
96
97 /* Create a new fence, but do not emit yet. */
98 ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
99 if (ret)
100 return ret;
101
102 nouveau_uvmm_lock(uvmm);
103 ret = drm_gpuvm_exec_lock(vme);
104 if (ret) {
105 nouveau_uvmm_unlock(uvmm);
106 return ret;
107 }
108 nouveau_uvmm_unlock(uvmm);
109
110 ret = drm_gpuvm_exec_validate(vme);
111 if (ret) {
112 drm_gpuvm_exec_unlock(vme);
113 return ret;
114 }
115
116 return 0;
117 }
118
119 static void
nouveau_exec_job_armed_submit(struct nouveau_job * job,struct drm_gpuvm_exec * vme)120 nouveau_exec_job_armed_submit(struct nouveau_job *job,
121 struct drm_gpuvm_exec *vme)
122 {
123 drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
124 job->resv_usage, job->resv_usage);
125 drm_gpuvm_exec_unlock(vme);
126 }
127
128 static struct dma_fence *
nouveau_exec_job_run(struct nouveau_job * job)129 nouveau_exec_job_run(struct nouveau_job *job)
130 {
131 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
132 struct nouveau_channel *chan = exec_job->chan;
133 struct nouveau_fence *fence = exec_job->fence;
134 int i, ret;
135
136 ret = nvif_chan_gpfifo_wait(&chan->chan, exec_job->push.count + 1, 16);
137 if (ret) {
138 NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
139 return ERR_PTR(ret);
140 }
141
142 for (i = 0; i < exec_job->push.count; i++) {
143 struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
144 bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
145
146 nvif_chan_gpfifo_push(&chan->chan, p->va, p->va_len, no_prefetch);
147 }
148
149 nvif_chan_gpfifo_post(&chan->chan);
150
151 ret = nouveau_fence_emit(fence);
152 if (ret) {
153 nouveau_fence_unref(&exec_job->fence);
154 NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
155 WIND_RING(chan);
156 return ERR_PTR(ret);
157 }
158
159 /* The fence was emitted successfully, set the job's fence pointer to
160 * NULL in order to avoid freeing it up when the job is cleaned up.
161 */
162 exec_job->fence = NULL;
163
164 return &fence->base;
165 }
166
167 static void
nouveau_exec_job_free(struct nouveau_job * job)168 nouveau_exec_job_free(struct nouveau_job *job)
169 {
170 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
171
172 nouveau_job_done(job);
173 nouveau_job_free(job);
174
175 kfree(exec_job->fence);
176 kfree(exec_job->push.s);
177 kfree(exec_job);
178 }
179
180 static enum drm_gpu_sched_stat
nouveau_exec_job_timeout(struct nouveau_job * job)181 nouveau_exec_job_timeout(struct nouveau_job *job)
182 {
183 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
184 struct nouveau_channel *chan = exec_job->chan;
185
186 if (unlikely(!atomic_read(&chan->killed)))
187 nouveau_channel_kill(chan);
188
189 NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
190 chan->chid);
191
192 return DRM_GPU_SCHED_STAT_RESET;
193 }
194
195 static const struct nouveau_job_ops nouveau_exec_job_ops = {
196 .submit = nouveau_exec_job_submit,
197 .armed_submit = nouveau_exec_job_armed_submit,
198 .run = nouveau_exec_job_run,
199 .free = nouveau_exec_job_free,
200 .timeout = nouveau_exec_job_timeout,
201 };
202
203 int
nouveau_exec_job_init(struct nouveau_exec_job ** pjob,struct nouveau_exec_job_args * __args)204 nouveau_exec_job_init(struct nouveau_exec_job **pjob,
205 struct nouveau_exec_job_args *__args)
206 {
207 struct nouveau_exec_job *job;
208 struct nouveau_job_args args = {};
209 int i, ret;
210
211 for (i = 0; i < __args->push.count; i++) {
212 struct drm_nouveau_exec_push *p = &__args->push.s[i];
213
214 if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
215 NV_PRINTK(err, nouveau_cli(__args->file_priv),
216 "pushbuf size exceeds limit: 0x%x max 0x%x\n",
217 p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
218 return -EINVAL;
219 }
220 }
221
222 job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL);
223 if (!job)
224 return -ENOMEM;
225
226 job->push.count = __args->push.count;
227 if (__args->push.count) {
228 job->push.s = kmemdup(__args->push.s,
229 sizeof(*__args->push.s) *
230 __args->push.count,
231 GFP_KERNEL);
232 if (!job->push.s) {
233 ret = -ENOMEM;
234 goto err_free_job;
235 }
236 }
237
238 args.file_priv = __args->file_priv;
239 job->chan = __args->chan;
240
241 args.sched = __args->sched;
242 /* Plus one to account for the HW fence. */
243 args.credits = job->push.count + 1;
244
245 args.in_sync.count = __args->in_sync.count;
246 args.in_sync.s = __args->in_sync.s;
247
248 args.out_sync.count = __args->out_sync.count;
249 args.out_sync.s = __args->out_sync.s;
250
251 args.ops = &nouveau_exec_job_ops;
252 args.resv_usage = DMA_RESV_USAGE_WRITE;
253
254 ret = nouveau_job_init(&job->base, &args);
255 if (ret)
256 goto err_free_pushs;
257
258 return 0;
259
260 err_free_pushs:
261 kfree(job->push.s);
262 err_free_job:
263 kfree(job);
264 *pjob = NULL;
265
266 return ret;
267 }
268
269 static int
nouveau_exec(struct nouveau_exec_job_args * args)270 nouveau_exec(struct nouveau_exec_job_args *args)
271 {
272 struct nouveau_exec_job *job;
273 int ret;
274
275 ret = nouveau_exec_job_init(&job, args);
276 if (ret)
277 return ret;
278
279 ret = nouveau_job_submit(&job->base);
280 if (ret)
281 goto err_job_fini;
282
283 return 0;
284
285 err_job_fini:
286 nouveau_job_fini(&job->base);
287 return ret;
288 }
289
290 static int
nouveau_exec_ucopy(struct nouveau_exec_job_args * args,struct drm_nouveau_exec * req)291 nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
292 struct drm_nouveau_exec *req)
293 {
294 struct drm_nouveau_sync **s;
295 u32 inc = req->wait_count;
296 u64 ins = req->wait_ptr;
297 u32 outc = req->sig_count;
298 u64 outs = req->sig_ptr;
299 u32 pushc = req->push_count;
300 u64 pushs = req->push_ptr;
301 int ret;
302
303 if (pushc) {
304 args->push.count = pushc;
305 args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s));
306 if (IS_ERR(args->push.s))
307 return PTR_ERR(args->push.s);
308 }
309
310 if (inc) {
311 s = &args->in_sync.s;
312
313 args->in_sync.count = inc;
314 *s = u_memcpya(ins, inc, sizeof(**s));
315 if (IS_ERR(*s)) {
316 ret = PTR_ERR(*s);
317 goto err_free_pushs;
318 }
319 }
320
321 if (outc) {
322 s = &args->out_sync.s;
323
324 args->out_sync.count = outc;
325 *s = u_memcpya(outs, outc, sizeof(**s));
326 if (IS_ERR(*s)) {
327 ret = PTR_ERR(*s);
328 goto err_free_ins;
329 }
330 }
331
332 return 0;
333
334 err_free_pushs:
335 u_free(args->push.s);
336 err_free_ins:
337 u_free(args->in_sync.s);
338 return ret;
339 }
340
341 static void
nouveau_exec_ufree(struct nouveau_exec_job_args * args)342 nouveau_exec_ufree(struct nouveau_exec_job_args *args)
343 {
344 u_free(args->push.s);
345 u_free(args->in_sync.s);
346 u_free(args->out_sync.s);
347 }
348
349 int
nouveau_exec_ioctl_exec(struct drm_device * dev,void * data,struct drm_file * file_priv)350 nouveau_exec_ioctl_exec(struct drm_device *dev,
351 void *data,
352 struct drm_file *file_priv)
353 {
354 struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
355 struct nouveau_cli *cli = nouveau_cli(file_priv);
356 struct nouveau_abi16_chan *chan16;
357 struct nouveau_channel *chan = NULL;
358 struct nouveau_exec_job_args args = {};
359 struct drm_nouveau_exec *req = data;
360 int push_max, ret = 0;
361
362 if (unlikely(!abi16))
363 return -ENOMEM;
364
365 /* abi16 locks already */
366 if (unlikely(!nouveau_cli_uvmm(cli)))
367 return nouveau_abi16_put(abi16, -ENOSYS);
368
369 list_for_each_entry(chan16, &abi16->channels, head) {
370 if (chan16->chan->chid == req->channel) {
371 chan = chan16->chan;
372 break;
373 }
374 }
375
376 if (!chan)
377 return nouveau_abi16_put(abi16, -ENOENT);
378
379 if (unlikely(atomic_read(&chan->killed)))
380 return nouveau_abi16_put(abi16, -ENODEV);
381
382 if (chan->user.oclass < NV50_CHANNEL_GPFIFO)
383 return nouveau_abi16_put(abi16, -ENOSYS);
384
385 push_max = nouveau_exec_push_max_from_ib_max(chan->chan.gpfifo.max);
386 if (unlikely(req->push_count > push_max)) {
387 NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
388 req->push_count, push_max);
389 return nouveau_abi16_put(abi16, -EINVAL);
390 }
391
392 ret = nouveau_exec_ucopy(&args, req);
393 if (ret)
394 goto out;
395
396 args.sched = chan16->sched;
397 args.file_priv = file_priv;
398 args.chan = chan;
399
400 ret = nouveau_exec(&args);
401 if (ret)
402 goto out_free_args;
403
404 out_free_args:
405 nouveau_exec_ufree(&args);
406 out:
407 return nouveau_abi16_put(abi16, ret);
408 }
409