1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include <drm/drm_device.h> 7 #include <drm/drm_file.h> 8 #include <drm/xe_drm.h> 9 10 #include "xe_bo.h" 11 #include "xe_device.h" 12 #include "xe_engine.h" 13 #include "xe_exec.h" 14 #include "xe_macros.h" 15 #include "xe_sched_job.h" 16 #include "xe_sync.h" 17 #include "xe_vm.h" 18 19 /** 20 * DOC: Execbuf (User GPU command submission) 21 * 22 * Execs have historically been rather complicated in DRM drivers (at least in 23 * the i915) because a few things: 24 * 25 * - Passing in a list BO which are read / written to creating implicit syncs 26 * - Binding at exec time 27 * - Flow controlling the ring at exec time 28 * 29 * In XE we avoid all of this complication by not allowing a BO list to be 30 * passed into an exec, using the dma-buf implicit sync uAPI, have binds as 31 * seperate operations, and using the DRM scheduler to flow control the ring. 32 * Let's deep dive on each of these. 33 * 34 * We can get away from a BO list by forcing the user to use in / out fences on 35 * every exec rather than the kernel tracking dependencies of BO (e.g. if the 36 * user knows an exec writes to a BO and reads from the BO in the next exec, it 37 * is the user's responsibility to pass in / out fence between the two execs). 38 * 39 * Implicit dependencies for external BOs are handled by using the dma-buf 40 * implicit dependency uAPI (TODO: add link). To make this works each exec must 41 * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external 42 * BO mapped in the VM. 43 * 44 * We do not allow a user to trigger a bind at exec time rather we have a VM 45 * bind IOCTL which uses the same in / out fence interface as exec. In that 46 * sense, a VM bind is basically the same operation as an exec from the user 47 * perspective. e.g. If an exec depends on a VM bind use the in / out fence 48 * interface (struct drm_xe_sync) to synchronize like syncing between two 49 * dependent execs. 50 * 51 * Although a user cannot trigger a bind, we still have to rebind userptrs in 52 * the VM that have been invalidated since the last exec, likewise we also have 53 * to rebind BOs that have been evicted by the kernel. We schedule these rebinds 54 * behind any pending kernel operations on any external BOs in VM or any BOs 55 * private to the VM. This is accomplished by the rebinds waiting on BOs 56 * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs 57 * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and 58 * in DMA_RESV_USAGE_WRITE for external BOs). 59 * 60 * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute 61 * mode VMs we use preempt fences and a rebind worker (TODO: add link). 62 * 63 * There is no need to flow control the ring in the exec as we write the ring at 64 * submission time and set the DRM scheduler max job limit SIZE_OF_RING / 65 * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the 66 * ring is available. 67 * 68 * All of this results in a rather simple exec implementation. 69 * 70 * Flow 71 * ~~~~ 72 * 73 * .. code-block:: 74 * 75 * Parse input arguments 76 * Wait for any async VM bind passed as in-fences to start 77 * <----------------------------------------------------------------------| 78 * Lock global VM lock in read mode | 79 * Pin userptrs (also finds userptr invalidated since last exec) | 80 * Lock exec (VM dma-resv lock, external BOs dma-resv locks) | 81 * Validate BOs that have been evicted | 82 * Create job | 83 * Rebind invalidated userptrs + evicted BOs (non-compute-mode) | 84 * Add rebind fence dependency to job | 85 * Add job VM dma-resv bookkeeping slot (non-compute mode) | 86 * Add job to external BOs dma-resv write slots (non-compute mode) | 87 * Check if any userptrs invalidated since pin ------ Drop locks ---------| 88 * Install in / out fences for job 89 * Submit job 90 * Unlock all 91 */ 92 93 static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, 94 struct ttm_validate_buffer tv_onstack[], 95 struct ttm_validate_buffer **tv, 96 struct list_head *objs) 97 { 98 struct xe_vm *vm = e->vm; 99 struct xe_vma *vma; 100 LIST_HEAD(dups); 101 int err; 102 103 *tv = NULL; 104 if (xe_vm_no_dma_fences(e->vm)) 105 return 0; 106 107 err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1); 108 if (err) 109 return err; 110 111 /* 112 * Validate BOs that have been evicted (i.e. make sure the 113 * BOs have valid placements possibly moving an evicted BO back 114 * to a location where the GPU can access it). 115 */ 116 list_for_each_entry(vma, &vm->rebind_list, rebind_link) { 117 if (xe_vma_is_userptr(vma)) 118 continue; 119 120 err = xe_bo_validate(vma->bo, vm, false); 121 if (err) { 122 xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs); 123 *tv = NULL; 124 return err; 125 } 126 } 127 128 return 0; 129 } 130 131 static void xe_exec_end(struct xe_engine *e, 132 struct ttm_validate_buffer *tv_onstack, 133 struct ttm_validate_buffer *tv, 134 struct ww_acquire_ctx *ww, 135 struct list_head *objs) 136 { 137 if (!xe_vm_no_dma_fences(e->vm)) 138 xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs); 139 } 140 141 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 142 { 143 struct xe_device *xe = to_xe_device(dev); 144 struct xe_file *xef = to_xe_file(file); 145 struct drm_xe_exec *args = data; 146 struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs); 147 u64 __user *addresses_user = u64_to_user_ptr(args->address); 148 struct xe_engine *engine; 149 struct xe_sync_entry *syncs = NULL; 150 u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; 151 struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; 152 struct ttm_validate_buffer *tv = NULL; 153 u32 i, num_syncs = 0; 154 struct xe_sched_job *job; 155 struct dma_fence *rebind_fence; 156 struct xe_vm *vm; 157 struct ww_acquire_ctx ww; 158 struct list_head objs; 159 bool write_locked; 160 int err = 0; 161 162 if (XE_IOCTL_ERR(xe, args->extensions)) 163 return -EINVAL; 164 165 engine = xe_engine_lookup(xef, args->engine_id); 166 if (XE_IOCTL_ERR(xe, !engine)) 167 return -ENOENT; 168 169 if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM)) 170 return -EINVAL; 171 172 if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer)) 173 return -EINVAL; 174 175 if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) { 176 err = -ECANCELED; 177 goto err_engine; 178 } 179 180 if (args->num_syncs) { 181 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); 182 if (!syncs) { 183 err = -ENOMEM; 184 goto err_engine; 185 } 186 } 187 188 vm = engine->vm; 189 190 for (i = 0; i < args->num_syncs; i++) { 191 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], 192 &syncs_user[i], true, 193 xe_vm_no_dma_fences(vm)); 194 if (err) 195 goto err_syncs; 196 } 197 198 if (xe_engine_is_parallel(engine)) { 199 err = __copy_from_user(addresses, addresses_user, sizeof(u64) * 200 engine->width); 201 if (err) { 202 err = -EFAULT; 203 goto err_syncs; 204 } 205 } 206 207 /* 208 * We can't install a job into the VM dma-resv shared slot before an 209 * async VM bind passed in as a fence without the risk of deadlocking as 210 * the bind can trigger an eviction which in turn depends on anything in 211 * the VM dma-resv shared slots. Not an ideal solution, but we wait for 212 * all dependent async VM binds to start (install correct fences into 213 * dma-resv slots) before moving forward. 214 */ 215 if (!xe_vm_no_dma_fences(vm) && 216 vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) { 217 for (i = 0; i < args->num_syncs; i++) { 218 struct dma_fence *fence = syncs[i].fence; 219 if (fence) { 220 err = xe_vm_async_fence_wait_start(fence); 221 if (err) 222 goto err_syncs; 223 } 224 } 225 } 226 227 retry: 228 if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) { 229 err = down_write_killable(&vm->lock); 230 write_locked = true; 231 } else { 232 /* We don't allow execs while the VM is in error state */ 233 err = down_read_interruptible(&vm->lock); 234 write_locked = false; 235 } 236 if (err) 237 goto err_syncs; 238 239 /* We don't allow execs while the VM is in error state */ 240 if (vm->async_ops.error) { 241 err = vm->async_ops.error; 242 goto err_unlock_list; 243 } 244 245 /* 246 * Extreme corner where we exit a VM error state with a munmap style VM 247 * unbind inflight which requires a rebind. In this case the rebind 248 * needs to install some fences into the dma-resv slots. The worker to 249 * do this queued, let that worker make progress by dropping vm->lock, 250 * flushing the worker and retrying the exec. 251 */ 252 if (vm->async_ops.munmap_rebind_inflight) { 253 if (write_locked) 254 up_write(&vm->lock); 255 else 256 up_read(&vm->lock); 257 flush_work(&vm->async_ops.work); 258 goto retry; 259 } 260 261 if (write_locked) { 262 err = xe_vm_userptr_pin(vm); 263 downgrade_write(&vm->lock); 264 write_locked = false; 265 if (err) 266 goto err_unlock_list; 267 } 268 269 err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs); 270 if (err) 271 goto err_unlock_list; 272 273 if (xe_vm_is_closed(engine->vm)) { 274 drm_warn(&xe->drm, "Trying to schedule after vm is closed\n"); 275 err = -EIO; 276 goto err_engine_end; 277 } 278 279 job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ? 280 addresses : &args->address); 281 if (IS_ERR(job)) { 282 err = PTR_ERR(job); 283 goto err_engine_end; 284 } 285 286 /* 287 * Rebind any invalidated userptr or evicted BOs in the VM, non-compute 288 * VM mode only. 289 */ 290 rebind_fence = xe_vm_rebind(vm, false); 291 if (IS_ERR(rebind_fence)) { 292 err = PTR_ERR(rebind_fence); 293 goto err_put_job; 294 } 295 296 /* 297 * We store the rebind_fence in the VM so subsequent execs don't get 298 * scheduled before the rebinds of userptrs / evicted BOs is complete. 299 */ 300 if (rebind_fence) { 301 dma_fence_put(vm->rebind_fence); 302 vm->rebind_fence = rebind_fence; 303 } 304 if (vm->rebind_fence) { 305 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 306 &vm->rebind_fence->flags)) { 307 dma_fence_put(vm->rebind_fence); 308 vm->rebind_fence = NULL; 309 } else { 310 dma_fence_get(vm->rebind_fence); 311 err = drm_sched_job_add_dependency(&job->drm, 312 vm->rebind_fence); 313 if (err) 314 goto err_put_job; 315 } 316 } 317 318 /* Wait behind munmap style rebinds */ 319 if (!xe_vm_no_dma_fences(vm)) { 320 err = drm_sched_job_add_resv_dependencies(&job->drm, 321 &vm->resv, 322 DMA_RESV_USAGE_KERNEL); 323 if (err) 324 goto err_put_job; 325 } 326 327 for (i = 0; i < num_syncs && !err; i++) 328 err = xe_sync_entry_add_deps(&syncs[i], job); 329 if (err) 330 goto err_put_job; 331 332 if (!xe_vm_no_dma_fences(vm)) { 333 err = down_read_interruptible(&vm->userptr.notifier_lock); 334 if (err) 335 goto err_put_job; 336 337 err = __xe_vm_userptr_needs_repin(vm); 338 if (err) 339 goto err_repin; 340 } 341 342 /* 343 * Point of no return, if we error after this point just set an error on 344 * the job and let the DRM scheduler / backend clean up the job. 345 */ 346 xe_sched_job_arm(job); 347 if (!xe_vm_no_dma_fences(vm)) { 348 /* Block userptr invalidations / BO eviction */ 349 dma_resv_add_fence(&vm->resv, 350 &job->drm.s_fence->finished, 351 DMA_RESV_USAGE_BOOKKEEP); 352 353 /* 354 * Make implicit sync work across drivers, assuming all external 355 * BOs are written as we don't pass in a read / write list. 356 */ 357 xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished, 358 DMA_RESV_USAGE_WRITE); 359 } 360 361 for (i = 0; i < num_syncs; i++) 362 xe_sync_entry_signal(&syncs[i], job, 363 &job->drm.s_fence->finished); 364 365 xe_sched_job_push(job); 366 367 err_repin: 368 if (!xe_vm_no_dma_fences(vm)) 369 up_read(&vm->userptr.notifier_lock); 370 err_put_job: 371 if (err) 372 xe_sched_job_put(job); 373 err_engine_end: 374 xe_exec_end(engine, tv_onstack, tv, &ww, &objs); 375 err_unlock_list: 376 if (write_locked) 377 up_write(&vm->lock); 378 else 379 up_read(&vm->lock); 380 if (err == -EAGAIN) 381 goto retry; 382 err_syncs: 383 for (i = 0; i < num_syncs; i++) 384 xe_sync_entry_cleanup(&syncs[i]); 385 kfree(syncs); 386 err_engine: 387 xe_engine_put(engine); 388 389 return err; 390 } 391