1 // SPDX-License-Identifier: MIT 2 3 #include "nouveau_drv.h" 4 #include "nouveau_gem.h" 5 #include "nouveau_mem.h" 6 #include "nouveau_dma.h" 7 #include "nouveau_exec.h" 8 #include "nouveau_abi16.h" 9 #include "nouveau_chan.h" 10 #include "nouveau_sched.h" 11 #include "nouveau_uvmm.h" 12 13 /** 14 * DOC: Overview 15 * 16 * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT, 17 * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC. 18 * 19 * In order to use the UAPI firstly a user client must initialize the VA space 20 * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space 21 * should be managed by the kernel and which by the UMD. 22 * 23 * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the 24 * userspace-managable portion of the VA space. It provides operations to map 25 * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not 26 * backed by a GEM object and the kernel will ignore GEM handles provided 27 * alongside a sparse mapping. 28 * 29 * Userspace may request memory backed mappings either within or outside of the 30 * bounds (but not crossing those bounds) of a previously mapped sparse 31 * mapping. Subsequently requested memory backed mappings within a sparse 32 * mapping will take precedence over the corresponding range of the sparse 33 * mapping. If such memory backed mappings are unmapped the kernel will make 34 * sure that the corresponding sparse mapping will take their place again. 35 * Requests to unmap a sparse mapping that still contains memory backed mappings 36 * will result in those memory backed mappings being unmapped first. 37 * 38 * Unmap requests are not bound to the range of existing mappings and can even 39 * overlap the bounds of sparse mappings. For such a request the kernel will 40 * make sure to unmap all memory backed mappings within the given range, 41 * splitting up memory backed mappings which are only partially contained 42 * within the given range. Unmap requests with the sparse flag set must match 43 * the range of a previously mapped sparse mapping exactly though. 44 * 45 * While the kernel generally permits arbitrary sequences and ranges of memory 46 * backed mappings being mapped and unmapped, either within a single or multiple 47 * VM_BIND ioctl calls, there are some restrictions for sparse mappings. 48 * 49 * The kernel does not permit to: 50 * - unmap non-existent sparse mappings 51 * - unmap a sparse mapping and map a new sparse mapping overlapping the range 52 * of the previously unmapped sparse mapping within the same VM_BIND ioctl 53 * - unmap a sparse mapping and map new memory backed mappings overlapping the 54 * range of the previously unmapped sparse mapping within the same VM_BIND 55 * ioctl 56 * 57 * When using the VM_BIND ioctl to request the kernel to map memory to a given 58 * virtual address in the GPU's VA space there is no guarantee that the actual 59 * mappings are created in the GPU's MMU. If the given memory is swapped out 60 * at the time the bind operation is executed the kernel will stash the mapping 61 * details into it's internal alloctor and create the actual MMU mappings once 62 * the memory is swapped back in. While this is transparent for userspace, it is 63 * guaranteed that all the backing memory is swapped back in and all the memory 64 * mappings, as requested by userspace previously, are actually mapped once the 65 * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job. 66 * 67 * A VM_BIND job can be executed either synchronously or asynchronously. If 68 * exectued asynchronously, userspace may provide a list of syncobjs this job 69 * will wait for and/or a list of syncobj the kernel will signal once the 70 * VM_BIND job finished execution. If executed synchronously the ioctl will 71 * block until the bind job is finished. For synchronous jobs the kernel will 72 * not permit any syncobjs submitted to the kernel. 73 * 74 * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC 75 * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide 76 * the option to synchronize them with syncobjs. 77 * 78 * Besides that, EXEC jobs can be scheduled for a specified channel to execute on. 79 * 80 * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have 81 * an up to date view of the VA space. However, the actual mappings might still 82 * be pending. Hence, EXEC jobs require to have the particular fences - of 83 * the corresponding VM_BIND jobs they depent on - attached to them. 84 */ 85 86 static int 87 nouveau_exec_job_submit(struct nouveau_job *job, 88 struct drm_gpuvm_exec *vme) 89 { 90 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 91 struct nouveau_cli *cli = job->cli; 92 struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); 93 int ret; 94 95 /* Create a new fence, but do not emit yet. */ 96 ret = nouveau_fence_create(&exec_job->fence, exec_job->chan); 97 if (ret) 98 return ret; 99 100 nouveau_uvmm_lock(uvmm); 101 ret = drm_gpuvm_exec_lock(vme); 102 if (ret) { 103 nouveau_uvmm_unlock(uvmm); 104 return ret; 105 } 106 nouveau_uvmm_unlock(uvmm); 107 108 ret = drm_gpuvm_exec_validate(vme); 109 if (ret) { 110 drm_gpuvm_exec_unlock(vme); 111 return ret; 112 } 113 114 return 0; 115 } 116 117 static void 118 nouveau_exec_job_armed_submit(struct nouveau_job *job, 119 struct drm_gpuvm_exec *vme) 120 { 121 drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, 122 job->resv_usage, job->resv_usage); 123 drm_gpuvm_exec_unlock(vme); 124 } 125 126 static struct dma_fence * 127 nouveau_exec_job_run(struct nouveau_job *job) 128 { 129 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 130 struct nouveau_channel *chan = exec_job->chan; 131 struct nouveau_fence *fence = exec_job->fence; 132 int i, ret; 133 134 ret = nouveau_dma_wait(chan, exec_job->push.count + 1, 16); 135 if (ret) { 136 NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret); 137 return ERR_PTR(ret); 138 } 139 140 for (i = 0; i < exec_job->push.count; i++) { 141 struct drm_nouveau_exec_push *p = &exec_job->push.s[i]; 142 bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH; 143 144 nv50_dma_push(chan, p->va, p->va_len, no_prefetch); 145 } 146 147 ret = nouveau_fence_emit(fence); 148 if (ret) { 149 nouveau_fence_unref(&exec_job->fence); 150 NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret); 151 WIND_RING(chan); 152 return ERR_PTR(ret); 153 } 154 155 /* The fence was emitted successfully, set the job's fence pointer to 156 * NULL in order to avoid freeing it up when the job is cleaned up. 157 */ 158 exec_job->fence = NULL; 159 160 return &fence->base; 161 } 162 163 static void 164 nouveau_exec_job_free(struct nouveau_job *job) 165 { 166 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 167 168 nouveau_job_done(job); 169 nouveau_job_free(job); 170 171 kfree(exec_job->fence); 172 kfree(exec_job->push.s); 173 kfree(exec_job); 174 } 175 176 static enum drm_gpu_sched_stat 177 nouveau_exec_job_timeout(struct nouveau_job *job) 178 { 179 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 180 struct nouveau_channel *chan = exec_job->chan; 181 182 if (unlikely(!atomic_read(&chan->killed))) 183 nouveau_channel_kill(chan); 184 185 NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", 186 chan->chid); 187 188 return DRM_GPU_SCHED_STAT_NOMINAL; 189 } 190 191 static struct nouveau_job_ops nouveau_exec_job_ops = { 192 .submit = nouveau_exec_job_submit, 193 .armed_submit = nouveau_exec_job_armed_submit, 194 .run = nouveau_exec_job_run, 195 .free = nouveau_exec_job_free, 196 .timeout = nouveau_exec_job_timeout, 197 }; 198 199 int 200 nouveau_exec_job_init(struct nouveau_exec_job **pjob, 201 struct nouveau_exec_job_args *__args) 202 { 203 struct nouveau_exec_job *job; 204 struct nouveau_job_args args = {}; 205 int i, ret; 206 207 for (i = 0; i < __args->push.count; i++) { 208 struct drm_nouveau_exec_push *p = &__args->push.s[i]; 209 210 if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) { 211 NV_PRINTK(err, nouveau_cli(__args->file_priv), 212 "pushbuf size exceeds limit: 0x%x max 0x%x\n", 213 p->va_len, NV50_DMA_PUSH_MAX_LENGTH); 214 return -EINVAL; 215 } 216 } 217 218 job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL); 219 if (!job) 220 return -ENOMEM; 221 222 job->push.count = __args->push.count; 223 if (__args->push.count) { 224 job->push.s = kmemdup(__args->push.s, 225 sizeof(*__args->push.s) * 226 __args->push.count, 227 GFP_KERNEL); 228 if (!job->push.s) { 229 ret = -ENOMEM; 230 goto err_free_job; 231 } 232 } 233 234 args.file_priv = __args->file_priv; 235 job->chan = __args->chan; 236 237 args.sched = __args->sched; 238 /* Plus one to account for the HW fence. */ 239 args.credits = job->push.count + 1; 240 241 args.in_sync.count = __args->in_sync.count; 242 args.in_sync.s = __args->in_sync.s; 243 244 args.out_sync.count = __args->out_sync.count; 245 args.out_sync.s = __args->out_sync.s; 246 247 args.ops = &nouveau_exec_job_ops; 248 args.resv_usage = DMA_RESV_USAGE_WRITE; 249 250 ret = nouveau_job_init(&job->base, &args); 251 if (ret) 252 goto err_free_pushs; 253 254 return 0; 255 256 err_free_pushs: 257 kfree(job->push.s); 258 err_free_job: 259 kfree(job); 260 *pjob = NULL; 261 262 return ret; 263 } 264 265 static int 266 nouveau_exec(struct nouveau_exec_job_args *args) 267 { 268 struct nouveau_exec_job *job; 269 int ret; 270 271 ret = nouveau_exec_job_init(&job, args); 272 if (ret) 273 return ret; 274 275 ret = nouveau_job_submit(&job->base); 276 if (ret) 277 goto err_job_fini; 278 279 return 0; 280 281 err_job_fini: 282 nouveau_job_fini(&job->base); 283 return ret; 284 } 285 286 static int 287 nouveau_exec_ucopy(struct nouveau_exec_job_args *args, 288 struct drm_nouveau_exec *req) 289 { 290 struct drm_nouveau_sync **s; 291 u32 inc = req->wait_count; 292 u64 ins = req->wait_ptr; 293 u32 outc = req->sig_count; 294 u64 outs = req->sig_ptr; 295 u32 pushc = req->push_count; 296 u64 pushs = req->push_ptr; 297 int ret; 298 299 if (pushc) { 300 args->push.count = pushc; 301 args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s)); 302 if (IS_ERR(args->push.s)) 303 return PTR_ERR(args->push.s); 304 } 305 306 if (inc) { 307 s = &args->in_sync.s; 308 309 args->in_sync.count = inc; 310 *s = u_memcpya(ins, inc, sizeof(**s)); 311 if (IS_ERR(*s)) { 312 ret = PTR_ERR(*s); 313 goto err_free_pushs; 314 } 315 } 316 317 if (outc) { 318 s = &args->out_sync.s; 319 320 args->out_sync.count = outc; 321 *s = u_memcpya(outs, outc, sizeof(**s)); 322 if (IS_ERR(*s)) { 323 ret = PTR_ERR(*s); 324 goto err_free_ins; 325 } 326 } 327 328 return 0; 329 330 err_free_pushs: 331 u_free(args->push.s); 332 err_free_ins: 333 u_free(args->in_sync.s); 334 return ret; 335 } 336 337 static void 338 nouveau_exec_ufree(struct nouveau_exec_job_args *args) 339 { 340 u_free(args->push.s); 341 u_free(args->in_sync.s); 342 u_free(args->out_sync.s); 343 } 344 345 int 346 nouveau_exec_ioctl_exec(struct drm_device *dev, 347 void *data, 348 struct drm_file *file_priv) 349 { 350 struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv); 351 struct nouveau_cli *cli = nouveau_cli(file_priv); 352 struct nouveau_abi16_chan *chan16; 353 struct nouveau_channel *chan = NULL; 354 struct nouveau_exec_job_args args = {}; 355 struct drm_nouveau_exec *req = data; 356 int push_max, ret = 0; 357 358 if (unlikely(!abi16)) 359 return -ENOMEM; 360 361 /* abi16 locks already */ 362 if (unlikely(!nouveau_cli_uvmm(cli))) 363 return nouveau_abi16_put(abi16, -ENOSYS); 364 365 list_for_each_entry(chan16, &abi16->channels, head) { 366 if (chan16->chan->chid == req->channel) { 367 chan = chan16->chan; 368 break; 369 } 370 } 371 372 if (!chan) 373 return nouveau_abi16_put(abi16, -ENOENT); 374 375 if (unlikely(atomic_read(&chan->killed))) 376 return nouveau_abi16_put(abi16, -ENODEV); 377 378 if (!chan->dma.ib_max) 379 return nouveau_abi16_put(abi16, -ENOSYS); 380 381 push_max = nouveau_exec_push_max_from_ib_max(chan->dma.ib_max); 382 if (unlikely(req->push_count > push_max)) { 383 NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n", 384 req->push_count, push_max); 385 return nouveau_abi16_put(abi16, -EINVAL); 386 } 387 388 ret = nouveau_exec_ucopy(&args, req); 389 if (ret) 390 goto out; 391 392 args.sched = chan16->sched; 393 args.file_priv = file_priv; 394 args.chan = chan; 395 396 ret = nouveau_exec(&args); 397 if (ret) 398 goto out_free_args; 399 400 out_free_args: 401 nouveau_exec_ufree(&args); 402 out: 403 return nouveau_abi16_put(abi16, ret); 404 } 405