1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2008,2010 Intel Corporation 5 */ 6 7 #include <linux/dma-resv.h> 8 #include <linux/highmem.h> 9 #include <linux/sync_file.h> 10 #include <linux/uaccess.h> 11 12 #include <drm/drm_auth.h> 13 #include <drm/drm_syncobj.h> 14 15 #include "display/intel_frontbuffer.h" 16 17 #include "gem/i915_gem_ioctls.h" 18 #include "gt/intel_context.h" 19 #include "gt/intel_gpu_commands.h" 20 #include "gt/intel_gt.h" 21 #include "gt/intel_gt_buffer_pool.h" 22 #include "gt/intel_gt_pm.h" 23 #include "gt/intel_ring.h" 24 25 #include "pxp/intel_pxp.h" 26 27 #include "i915_cmd_parser.h" 28 #include "i915_drv.h" 29 #include "i915_file_private.h" 30 #include "i915_gem_clflush.h" 31 #include "i915_gem_context.h" 32 #include "i915_gem_evict.h" 33 #include "i915_gem_ioctls.h" 34 #include "i915_reg.h" 35 #include "i915_trace.h" 36 #include "i915_user_extensions.h" 37 38 struct eb_vma { 39 struct i915_vma *vma; 40 unsigned int flags; 41 42 /** This vma's place in the execbuf reservation list */ 43 struct drm_i915_gem_exec_object2 *exec; 44 struct list_head bind_link; 45 struct list_head reloc_link; 46 47 struct hlist_node node; 48 u32 handle; 49 }; 50 51 enum { 52 FORCE_CPU_RELOC = 1, 53 FORCE_GTT_RELOC, 54 FORCE_GPU_RELOC, 55 #define DBG_FORCE_RELOC 0 /* choose one of the above! */ 56 }; 57 58 /* __EXEC_OBJECT_ flags > BIT(29) defined in i915_vma.h */ 59 #define __EXEC_OBJECT_HAS_PIN BIT(29) 60 #define __EXEC_OBJECT_HAS_FENCE BIT(28) 61 #define __EXEC_OBJECT_USERPTR_INIT BIT(27) 62 #define __EXEC_OBJECT_NEEDS_MAP BIT(26) 63 #define __EXEC_OBJECT_NEEDS_BIAS BIT(25) 64 #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 25) /* all of the above + */ 65 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) 66 67 #define __EXEC_HAS_RELOC BIT(31) 68 #define __EXEC_ENGINE_PINNED BIT(30) 69 #define __EXEC_USERPTR_USED BIT(29) 70 #define __EXEC_INTERNAL_FLAGS (~0u << 29) 71 #define UPDATE PIN_OFFSET_FIXED 72 73 #define BATCH_OFFSET_BIAS (256*1024) 74 75 #define __I915_EXEC_ILLEGAL_FLAGS \ 76 (__I915_EXEC_UNKNOWN_FLAGS | \ 77 I915_EXEC_CONSTANTS_MASK | \ 78 I915_EXEC_RESOURCE_STREAMER) 79 80 /* Catch emission of unexpected errors for CI! */ 81 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 82 #undef EINVAL 83 #define EINVAL ({ \ 84 DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ 85 22; \ 86 }) 87 #endif 88 89 /** 90 * DOC: User command execution 91 * 92 * Userspace submits commands to be executed on the GPU as an instruction 93 * stream within a GEM object we call a batchbuffer. This instructions may 94 * refer to other GEM objects containing auxiliary state such as kernels, 95 * samplers, render targets and even secondary batchbuffers. Userspace does 96 * not know where in the GPU memory these objects reside and so before the 97 * batchbuffer is passed to the GPU for execution, those addresses in the 98 * batchbuffer and auxiliary objects are updated. This is known as relocation, 99 * or patching. To try and avoid having to relocate each object on the next 100 * execution, userspace is told the location of those objects in this pass, 101 * but this remains just a hint as the kernel may choose a new location for 102 * any object in the future. 103 * 104 * At the level of talking to the hardware, submitting a batchbuffer for the 105 * GPU to execute is to add content to a buffer from which the HW 106 * command streamer is reading. 107 * 108 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. 109 * Execlists, this command is not placed on the same buffer as the 110 * remaining items. 111 * 112 * 2. Add a command to invalidate caches to the buffer. 113 * 114 * 3. Add a batchbuffer start command to the buffer; the start command is 115 * essentially a token together with the GPU address of the batchbuffer 116 * to be executed. 117 * 118 * 4. Add a pipeline flush to the buffer. 119 * 120 * 5. Add a memory write command to the buffer to record when the GPU 121 * is done executing the batchbuffer. The memory write writes the 122 * global sequence number of the request, ``i915_request::global_seqno``; 123 * the i915 driver uses the current value in the register to determine 124 * if the GPU has completed the batchbuffer. 125 * 126 * 6. Add a user interrupt command to the buffer. This command instructs 127 * the GPU to issue an interrupt when the command, pipeline flush and 128 * memory write are completed. 129 * 130 * 7. Inform the hardware of the additional commands added to the buffer 131 * (by updating the tail pointer). 132 * 133 * Processing an execbuf ioctl is conceptually split up into a few phases. 134 * 135 * 1. Validation - Ensure all the pointers, handles and flags are valid. 136 * 2. Reservation - Assign GPU address space for every object 137 * 3. Relocation - Update any addresses to point to the final locations 138 * 4. Serialisation - Order the request with respect to its dependencies 139 * 5. Construction - Construct a request to execute the batchbuffer 140 * 6. Submission (at some point in the future execution) 141 * 142 * Reserving resources for the execbuf is the most complicated phase. We 143 * neither want to have to migrate the object in the address space, nor do 144 * we want to have to update any relocations pointing to this object. Ideally, 145 * we want to leave the object where it is and for all the existing relocations 146 * to match. If the object is given a new address, or if userspace thinks the 147 * object is elsewhere, we have to parse all the relocation entries and update 148 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that 149 * all the target addresses in all of its objects match the value in the 150 * relocation entries and that they all match the presumed offsets given by the 151 * list of execbuffer objects. Using this knowledge, we know that if we haven't 152 * moved any buffers, all the relocation entries are valid and we can skip 153 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU 154 * hang.) The requirement for using I915_EXEC_NO_RELOC are: 155 * 156 * The addresses written in the objects must match the corresponding 157 * reloc.presumed_offset which in turn must match the corresponding 158 * execobject.offset. 159 * 160 * Any render targets written to in the batch must be flagged with 161 * EXEC_OBJECT_WRITE. 162 * 163 * To avoid stalling, execobject.offset should match the current 164 * address of that object within the active context. 165 * 166 * The reservation is done is multiple phases. First we try and keep any 167 * object already bound in its current location - so as long as meets the 168 * constraints imposed by the new execbuffer. Any object left unbound after the 169 * first pass is then fitted into any available idle space. If an object does 170 * not fit, all objects are removed from the reservation and the process rerun 171 * after sorting the objects into a priority order (more difficult to fit 172 * objects are tried first). Failing that, the entire VM is cleared and we try 173 * to fit the execbuf once last time before concluding that it simply will not 174 * fit. 175 * 176 * A small complication to all of this is that we allow userspace not only to 177 * specify an alignment and a size for the object in the address space, but 178 * we also allow userspace to specify the exact offset. This objects are 179 * simpler to place (the location is known a priori) all we have to do is make 180 * sure the space is available. 181 * 182 * Once all the objects are in place, patching up the buried pointers to point 183 * to the final locations is a fairly simple job of walking over the relocation 184 * entry arrays, looking up the right address and rewriting the value into 185 * the object. Simple! ... The relocation entries are stored in user memory 186 * and so to access them we have to copy them into a local buffer. That copy 187 * has to avoid taking any pagefaults as they may lead back to a GEM object 188 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split 189 * the relocation into multiple passes. First we try to do everything within an 190 * atomic context (avoid the pagefaults) which requires that we never wait. If 191 * we detect that we may wait, or if we need to fault, then we have to fallback 192 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm 193 * bells yet?) Dropping the mutex means that we lose all the state we have 194 * built up so far for the execbuf and we must reset any global data. However, 195 * we do leave the objects pinned in their final locations - which is a 196 * potential issue for concurrent execbufs. Once we have left the mutex, we can 197 * allocate and copy all the relocation entries into a large array at our 198 * leisure, reacquire the mutex, reclaim all the objects and other state and 199 * then proceed to update any incorrect addresses with the objects. 200 * 201 * As we process the relocation entries, we maintain a record of whether the 202 * object is being written to. Using NORELOC, we expect userspace to provide 203 * this information instead. We also check whether we can skip the relocation 204 * by comparing the expected value inside the relocation entry with the target's 205 * final address. If they differ, we have to map the current object and rewrite 206 * the 4 or 8 byte pointer within. 207 * 208 * Serialising an execbuf is quite simple according to the rules of the GEM 209 * ABI. Execution within each context is ordered by the order of submission. 210 * Writes to any GEM object are in order of submission and are exclusive. Reads 211 * from a GEM object are unordered with respect to other reads, but ordered by 212 * writes. A write submitted after a read cannot occur before the read, and 213 * similarly any read submitted after a write cannot occur before the write. 214 * Writes are ordered between engines such that only one write occurs at any 215 * time (completing any reads beforehand) - using semaphores where available 216 * and CPU serialisation otherwise. Other GEM access obey the same rules, any 217 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU 218 * reads before starting, and any read (either using set-domain or pread) must 219 * flush all GPU writes before starting. (Note we only employ a barrier before, 220 * we currently rely on userspace not concurrently starting a new execution 221 * whilst reading or writing to an object. This may be an advantage or not 222 * depending on how much you trust userspace not to shoot themselves in the 223 * foot.) Serialisation may just result in the request being inserted into 224 * a DAG awaiting its turn, but most simple is to wait on the CPU until 225 * all dependencies are resolved. 226 * 227 * After all of that, is just a matter of closing the request and handing it to 228 * the hardware (well, leaving it in a queue to be executed). However, we also 229 * offer the ability for batchbuffers to be run with elevated privileges so 230 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) 231 * Before any batch is given extra privileges we first must check that it 232 * contains no nefarious instructions, we check that each instruction is from 233 * our whitelist and all registers are also from an allowed list. We first 234 * copy the user's batchbuffer to a shadow (so that the user doesn't have 235 * access to it, either by the CPU or GPU as we scan it) and then parse each 236 * instruction. If everything is ok, we set a flag telling the hardware to run 237 * the batchbuffer in trusted mode, otherwise the ioctl is rejected. 238 */ 239 240 struct eb_fence { 241 struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ 242 struct dma_fence *dma_fence; 243 u64 value; 244 struct dma_fence_chain *chain_fence; 245 }; 246 247 struct i915_execbuffer { 248 struct drm_i915_private *i915; /** i915 backpointer */ 249 struct drm_file *file; /** per-file lookup tables and limits */ 250 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ 251 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ 252 struct eb_vma *vma; 253 254 struct intel_gt *gt; /* gt for the execbuf */ 255 struct intel_context *context; /* logical state for the request */ 256 struct i915_gem_context *gem_context; /** caller's context */ 257 intel_wakeref_t wakeref; 258 intel_wakeref_t wakeref_gt0; 259 260 /** our requests to build */ 261 struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; 262 /** identity of the batch obj/vma */ 263 struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1]; 264 struct i915_vma *trampoline; /** trampoline used for chaining */ 265 266 /** used for excl fence in dma_resv objects when > 1 BB submitted */ 267 struct dma_fence *composite_fence; 268 269 /** actual size of execobj[] as we may extend it for the cmdparser */ 270 unsigned int buffer_count; 271 272 /* number of batches in execbuf IOCTL */ 273 unsigned int num_batches; 274 275 /** list of vma not yet bound during reservation phase */ 276 struct list_head unbound; 277 278 /** list of vma that have execobj.relocation_count */ 279 struct list_head relocs; 280 281 struct i915_gem_ww_ctx ww; 282 283 /** 284 * Track the most recently used object for relocations, as we 285 * frequently have to perform multiple relocations within the same 286 * obj/page 287 */ 288 struct reloc_cache { 289 struct drm_mm_node node; /** temporary GTT binding */ 290 unsigned long vaddr; /** Current kmap address */ 291 unsigned long page; /** Currently mapped page index */ 292 unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */ 293 bool use_64bit_reloc : 1; 294 bool has_llc : 1; 295 bool has_fence : 1; 296 bool needs_unfenced : 1; 297 } reloc_cache; 298 299 u64 invalid_flags; /** Set of execobj.flags that are invalid */ 300 301 /** Length of batch within object */ 302 u64 batch_len[MAX_ENGINE_INSTANCE + 1]; 303 u32 batch_start_offset; /** Location within object of batch */ 304 u32 batch_flags; /** Flags composed for emit_bb_start() */ 305 struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ 306 307 /** 308 * Indicate either the size of the hastable used to resolve 309 * relocation handles, or if negative that we are using a direct 310 * index into the execobj[]. 311 */ 312 int lut_size; 313 struct hlist_head *buckets; /** ht for relocation handles */ 314 315 struct eb_fence *fences; 316 unsigned long num_fences; 317 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) 318 struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1]; 319 #endif 320 }; 321 322 static int eb_parse(struct i915_execbuffer *eb); 323 static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); 324 static void eb_unpin_engine(struct i915_execbuffer *eb); 325 static void eb_capture_release(struct i915_execbuffer *eb); 326 327 static bool eb_use_cmdparser(const struct i915_execbuffer *eb) 328 { 329 return intel_engine_requires_cmd_parser(eb->context->engine) || 330 (intel_engine_using_cmd_parser(eb->context->engine) && 331 eb->args->batch_len); 332 } 333 334 static int eb_create(struct i915_execbuffer *eb) 335 { 336 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { 337 unsigned int size = 1 + ilog2(eb->buffer_count); 338 339 /* 340 * Without a 1:1 association between relocation handles and 341 * the execobject[] index, we instead create a hashtable. 342 * We size it dynamically based on available memory, starting 343 * first with 1:1 assocative hash and scaling back until 344 * the allocation succeeds. 345 * 346 * Later on we use a positive lut_size to indicate we are 347 * using this hashtable, and a negative value to indicate a 348 * direct lookup. 349 */ 350 do { 351 gfp_t flags; 352 353 /* While we can still reduce the allocation size, don't 354 * raise a warning and allow the allocation to fail. 355 * On the last pass though, we want to try as hard 356 * as possible to perform the allocation and warn 357 * if it fails. 358 */ 359 flags = GFP_KERNEL; 360 if (size > 1) 361 flags |= __GFP_NORETRY | __GFP_NOWARN; 362 363 eb->buckets = kzalloc(sizeof(struct hlist_head) << size, 364 flags); 365 if (eb->buckets) 366 break; 367 } while (--size); 368 369 if (unlikely(!size)) 370 return -ENOMEM; 371 372 eb->lut_size = size; 373 } else { 374 eb->lut_size = -eb->buffer_count; 375 } 376 377 return 0; 378 } 379 380 static bool 381 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, 382 const struct i915_vma *vma, 383 unsigned int flags) 384 { 385 const u64 start = i915_vma_offset(vma); 386 const u64 size = i915_vma_size(vma); 387 388 if (size < entry->pad_to_size) 389 return true; 390 391 if (entry->alignment && !IS_ALIGNED(start, entry->alignment)) 392 return true; 393 394 if (flags & EXEC_OBJECT_PINNED && 395 start != entry->offset) 396 return true; 397 398 if (flags & __EXEC_OBJECT_NEEDS_BIAS && 399 start < BATCH_OFFSET_BIAS) 400 return true; 401 402 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && 403 (start + size + 4095) >> 32) 404 return true; 405 406 if (flags & __EXEC_OBJECT_NEEDS_MAP && 407 !i915_vma_is_map_and_fenceable(vma)) 408 return true; 409 410 return false; 411 } 412 413 static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, 414 unsigned int exec_flags) 415 { 416 u64 pin_flags = 0; 417 418 if (exec_flags & EXEC_OBJECT_NEEDS_GTT) 419 pin_flags |= PIN_GLOBAL; 420 421 /* 422 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, 423 * limit address to the first 4GBs for unflagged objects. 424 */ 425 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 426 pin_flags |= PIN_ZONE_4G; 427 428 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) 429 pin_flags |= PIN_MAPPABLE; 430 431 if (exec_flags & EXEC_OBJECT_PINNED) 432 pin_flags |= entry->offset | PIN_OFFSET_FIXED; 433 else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) 434 pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 435 436 return pin_flags; 437 } 438 439 static int 440 eb_pin_vma(struct i915_execbuffer *eb, 441 const struct drm_i915_gem_exec_object2 *entry, 442 struct eb_vma *ev) 443 { 444 struct i915_vma *vma = ev->vma; 445 u64 pin_flags; 446 int err; 447 448 if (vma->node.size) 449 pin_flags = __i915_vma_offset(vma); 450 else 451 pin_flags = entry->offset & PIN_OFFSET_MASK; 452 453 pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE; 454 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)) 455 pin_flags |= PIN_GLOBAL; 456 457 /* Attempt to reuse the current location if available */ 458 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags); 459 if (err == -EDEADLK) 460 return err; 461 462 if (unlikely(err)) { 463 if (entry->flags & EXEC_OBJECT_PINNED) 464 return err; 465 466 /* Failing that pick any _free_ space if suitable */ 467 err = i915_vma_pin_ww(vma, &eb->ww, 468 entry->pad_to_size, 469 entry->alignment, 470 eb_pin_flags(entry, ev->flags) | 471 PIN_USER | PIN_NOEVICT | PIN_VALIDATE); 472 if (unlikely(err)) 473 return err; 474 } 475 476 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { 477 err = i915_vma_pin_fence(vma); 478 if (unlikely(err)) 479 return err; 480 481 if (vma->fence) 482 ev->flags |= __EXEC_OBJECT_HAS_FENCE; 483 } 484 485 ev->flags |= __EXEC_OBJECT_HAS_PIN; 486 if (eb_vma_misplaced(entry, vma, ev->flags)) 487 return -EBADSLT; 488 489 return 0; 490 } 491 492 static void 493 eb_unreserve_vma(struct eb_vma *ev) 494 { 495 if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) 496 __i915_vma_unpin_fence(ev->vma); 497 498 ev->flags &= ~__EXEC_OBJECT_RESERVED; 499 } 500 501 static int 502 eb_validate_vma(struct i915_execbuffer *eb, 503 struct drm_i915_gem_exec_object2 *entry, 504 struct i915_vma *vma) 505 { 506 /* Relocations are disallowed for all platforms after TGL-LP. This 507 * also covers all platforms with local memory. 508 */ 509 if (entry->relocation_count && 510 GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) 511 return -EINVAL; 512 513 if (unlikely(entry->flags & eb->invalid_flags)) 514 return -EINVAL; 515 516 if (unlikely(entry->alignment && 517 !is_power_of_2_u64(entry->alignment))) 518 return -EINVAL; 519 520 /* 521 * Offset can be used as input (EXEC_OBJECT_PINNED), reject 522 * any non-page-aligned or non-canonical addresses. 523 */ 524 if (unlikely(entry->flags & EXEC_OBJECT_PINNED && 525 entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) 526 return -EINVAL; 527 528 /* pad_to_size was once a reserved field, so sanitize it */ 529 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { 530 if (unlikely(offset_in_page(entry->pad_to_size))) 531 return -EINVAL; 532 } else { 533 entry->pad_to_size = 0; 534 } 535 /* 536 * From drm_mm perspective address space is continuous, 537 * so from this point we're always using non-canonical 538 * form internally. 539 */ 540 entry->offset = gen8_noncanonical_addr(entry->offset); 541 542 if (!eb->reloc_cache.has_fence) { 543 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; 544 } else { 545 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || 546 eb->reloc_cache.needs_unfenced) && 547 i915_gem_object_is_tiled(vma->obj)) 548 entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; 549 } 550 551 return 0; 552 } 553 554 static bool 555 is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx) 556 { 557 return eb->args->flags & I915_EXEC_BATCH_FIRST ? 558 buffer_idx < eb->num_batches : 559 buffer_idx >= eb->args->buffer_count - eb->num_batches; 560 } 561 562 static int 563 eb_add_vma(struct i915_execbuffer *eb, 564 unsigned int *current_batch, 565 unsigned int i, 566 struct i915_vma *vma) 567 { 568 struct drm_i915_private *i915 = eb->i915; 569 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 570 struct eb_vma *ev = &eb->vma[i]; 571 572 ev->vma = vma; 573 ev->exec = entry; 574 ev->flags = entry->flags; 575 576 if (eb->lut_size > 0) { 577 ev->handle = entry->handle; 578 hlist_add_head(&ev->node, 579 &eb->buckets[hash_32(entry->handle, 580 eb->lut_size)]); 581 } 582 583 if (entry->relocation_count) 584 list_add_tail(&ev->reloc_link, &eb->relocs); 585 586 /* 587 * SNA is doing fancy tricks with compressing batch buffers, which leads 588 * to negative relocation deltas. Usually that works out ok since the 589 * relocate address is still positive, except when the batch is placed 590 * very low in the GTT. Ensure this doesn't happen. 591 * 592 * Note that actual hangs have only been observed on gen7, but for 593 * paranoia do it everywhere. 594 */ 595 if (is_batch_buffer(eb, i)) { 596 if (entry->relocation_count && 597 !(ev->flags & EXEC_OBJECT_PINNED)) 598 ev->flags |= __EXEC_OBJECT_NEEDS_BIAS; 599 if (eb->reloc_cache.has_fence) 600 ev->flags |= EXEC_OBJECT_NEEDS_FENCE; 601 602 eb->batches[*current_batch] = ev; 603 604 if (unlikely(ev->flags & EXEC_OBJECT_WRITE)) { 605 drm_dbg(&i915->drm, 606 "Attempting to use self-modifying batch buffer\n"); 607 return -EINVAL; 608 } 609 610 if (range_overflows_t(u64, 611 eb->batch_start_offset, 612 eb->args->batch_len, 613 ev->vma->size)) { 614 drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); 615 return -EINVAL; 616 } 617 618 if (eb->args->batch_len == 0) 619 eb->batch_len[*current_batch] = ev->vma->size - 620 eb->batch_start_offset; 621 else 622 eb->batch_len[*current_batch] = eb->args->batch_len; 623 if (unlikely(eb->batch_len[*current_batch] == 0)) { /* impossible! */ 624 drm_dbg(&i915->drm, "Invalid batch length\n"); 625 return -EINVAL; 626 } 627 628 ++*current_batch; 629 } 630 631 return 0; 632 } 633 634 static int use_cpu_reloc(const struct reloc_cache *cache, 635 const struct drm_i915_gem_object *obj) 636 { 637 if (!i915_gem_object_has_struct_page(obj)) 638 return false; 639 640 if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) 641 return true; 642 643 if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) 644 return false; 645 646 /* 647 * For objects created by userspace through GEM_CREATE with pat_index 648 * set by set_pat extension, i915_gem_object_has_cache_level() always 649 * return true, otherwise the call would fall back to checking whether 650 * the object is un-cached. 651 */ 652 return (cache->has_llc || 653 obj->cache_dirty || 654 !i915_gem_object_has_cache_level(obj, I915_CACHE_NONE)); 655 } 656 657 static int eb_reserve_vma(struct i915_execbuffer *eb, 658 struct eb_vma *ev, 659 u64 pin_flags) 660 { 661 struct drm_i915_gem_exec_object2 *entry = ev->exec; 662 struct i915_vma *vma = ev->vma; 663 int err; 664 665 if (drm_mm_node_allocated(&vma->node) && 666 eb_vma_misplaced(entry, vma, ev->flags)) { 667 err = i915_vma_unbind(vma); 668 if (err) 669 return err; 670 } 671 672 err = i915_vma_pin_ww(vma, &eb->ww, 673 entry->pad_to_size, entry->alignment, 674 eb_pin_flags(entry, ev->flags) | pin_flags); 675 if (err) 676 return err; 677 678 if (entry->offset != i915_vma_offset(vma)) { 679 entry->offset = i915_vma_offset(vma) | UPDATE; 680 eb->args->flags |= __EXEC_HAS_RELOC; 681 } 682 683 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { 684 err = i915_vma_pin_fence(vma); 685 if (unlikely(err)) 686 return err; 687 688 if (vma->fence) 689 ev->flags |= __EXEC_OBJECT_HAS_FENCE; 690 } 691 692 ev->flags |= __EXEC_OBJECT_HAS_PIN; 693 GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags)); 694 695 return 0; 696 } 697 698 static bool eb_unbind(struct i915_execbuffer *eb, bool force) 699 { 700 const unsigned int count = eb->buffer_count; 701 unsigned int i; 702 struct list_head last; 703 bool unpinned = false; 704 705 /* Resort *all* the objects into priority order */ 706 INIT_LIST_HEAD(&eb->unbound); 707 INIT_LIST_HEAD(&last); 708 709 for (i = 0; i < count; i++) { 710 struct eb_vma *ev = &eb->vma[i]; 711 unsigned int flags = ev->flags; 712 713 if (!force && flags & EXEC_OBJECT_PINNED && 714 flags & __EXEC_OBJECT_HAS_PIN) 715 continue; 716 717 unpinned = true; 718 eb_unreserve_vma(ev); 719 720 if (flags & EXEC_OBJECT_PINNED) 721 /* Pinned must have their slot */ 722 list_add(&ev->bind_link, &eb->unbound); 723 else if (flags & __EXEC_OBJECT_NEEDS_MAP) 724 /* Map require the lowest 256MiB (aperture) */ 725 list_add_tail(&ev->bind_link, &eb->unbound); 726 else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 727 /* Prioritise 4GiB region for restricted bo */ 728 list_add(&ev->bind_link, &last); 729 else 730 list_add_tail(&ev->bind_link, &last); 731 } 732 733 list_splice_tail(&last, &eb->unbound); 734 return unpinned; 735 } 736 737 static int eb_reserve(struct i915_execbuffer *eb) 738 { 739 struct eb_vma *ev; 740 unsigned int pass; 741 int err = 0; 742 743 /* 744 * We have one more buffers that we couldn't bind, which could be due to 745 * various reasons. To resolve this we have 4 passes, with every next 746 * level turning the screws tighter: 747 * 748 * 0. Unbind all objects that do not match the GTT constraints for the 749 * execbuffer (fenceable, mappable, alignment etc). Bind all new 750 * objects. This avoids unnecessary unbinding of later objects in order 751 * to make room for the earlier objects *unless* we need to defragment. 752 * 753 * 1. Reorder the buffers, where objects with the most restrictive 754 * placement requirements go first (ignoring fixed location buffers for 755 * now). For example, objects needing the mappable aperture (the first 756 * 256M of GTT), should go first vs objects that can be placed just 757 * about anywhere. Repeat the previous pass. 758 * 759 * 2. Consider buffers that are pinned at a fixed location. Also try to 760 * evict the entire VM this time, leaving only objects that we were 761 * unable to lock. Try again to bind the buffers. (still using the new 762 * buffer order). 763 * 764 * 3. We likely have object lock contention for one or more stubborn 765 * objects in the VM, for which we need to evict to make forward 766 * progress (perhaps we are fighting the shrinker?). When evicting the 767 * VM this time around, anything that we can't lock we now track using 768 * the busy_bo, using the full lock (after dropping the vm->mutex to 769 * prevent deadlocks), instead of trylock. We then continue to evict the 770 * VM, this time with the stubborn object locked, which we can now 771 * hopefully unbind (if still bound in the VM). Repeat until the VM is 772 * evicted. Finally we should be able bind everything. 773 */ 774 for (pass = 0; pass <= 3; pass++) { 775 int pin_flags = PIN_USER | PIN_VALIDATE; 776 777 if (pass == 0) 778 pin_flags |= PIN_NONBLOCK; 779 780 if (pass >= 1) 781 eb_unbind(eb, pass >= 2); 782 783 if (pass == 2) { 784 err = mutex_lock_interruptible(&eb->context->vm->mutex); 785 if (!err) { 786 err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); 787 mutex_unlock(&eb->context->vm->mutex); 788 } 789 if (err) 790 return err; 791 } 792 793 if (pass == 3) { 794 retry: 795 err = mutex_lock_interruptible(&eb->context->vm->mutex); 796 if (!err) { 797 struct drm_i915_gem_object *busy_bo = NULL; 798 799 err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo); 800 mutex_unlock(&eb->context->vm->mutex); 801 if (err && busy_bo) { 802 err = i915_gem_object_lock(busy_bo, &eb->ww); 803 i915_gem_object_put(busy_bo); 804 if (!err) 805 goto retry; 806 } 807 } 808 if (err) 809 return err; 810 } 811 812 list_for_each_entry(ev, &eb->unbound, bind_link) { 813 err = eb_reserve_vma(eb, ev, pin_flags); 814 if (err) 815 break; 816 } 817 818 if (err != -ENOSPC) 819 break; 820 } 821 822 return err; 823 } 824 825 static int eb_select_context(struct i915_execbuffer *eb) 826 { 827 struct i915_gem_context *ctx; 828 829 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); 830 if (unlikely(IS_ERR(ctx))) 831 return PTR_ERR(ctx); 832 833 eb->gem_context = ctx; 834 if (i915_gem_context_has_full_ppgtt(ctx)) 835 eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; 836 837 return 0; 838 } 839 840 static int __eb_add_lut(struct i915_execbuffer *eb, 841 u32 handle, struct i915_vma *vma) 842 { 843 struct i915_gem_context *ctx = eb->gem_context; 844 struct i915_lut_handle *lut; 845 int err; 846 847 lut = i915_lut_handle_alloc(); 848 if (unlikely(!lut)) 849 return -ENOMEM; 850 851 i915_vma_get(vma); 852 if (!atomic_fetch_inc(&vma->open_count)) 853 i915_vma_reopen(vma); 854 lut->handle = handle; 855 lut->ctx = ctx; 856 857 /* Check that the context hasn't been closed in the meantime */ 858 err = -EINTR; 859 if (!mutex_lock_interruptible(&ctx->lut_mutex)) { 860 if (likely(!i915_gem_context_is_closed(ctx))) 861 err = radix_tree_insert(&ctx->handles_vma, handle, vma); 862 else 863 err = -ENOENT; 864 if (err == 0) { /* And nor has this handle */ 865 struct drm_i915_gem_object *obj = vma->obj; 866 867 spin_lock(&obj->lut_lock); 868 if (idr_find(&eb->file->object_idr, handle) == obj) { 869 list_add(&lut->obj_link, &obj->lut_list); 870 } else { 871 radix_tree_delete(&ctx->handles_vma, handle); 872 err = -ENOENT; 873 } 874 spin_unlock(&obj->lut_lock); 875 } 876 mutex_unlock(&ctx->lut_mutex); 877 } 878 if (unlikely(err)) 879 goto err; 880 881 return 0; 882 883 err: 884 i915_vma_close(vma); 885 i915_vma_put(vma); 886 i915_lut_handle_free(lut); 887 return err; 888 } 889 890 static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) 891 { 892 struct i915_address_space *vm = eb->context->vm; 893 894 do { 895 struct drm_i915_gem_object *obj; 896 struct i915_vma *vma; 897 int err; 898 899 rcu_read_lock(); 900 vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle); 901 if (likely(vma && vma->vm == vm)) 902 vma = i915_vma_tryget(vma); 903 rcu_read_unlock(); 904 if (likely(vma)) 905 return vma; 906 907 obj = i915_gem_object_lookup(eb->file, handle); 908 if (unlikely(!obj)) 909 return ERR_PTR(-ENOENT); 910 911 /* 912 * If the user has opted-in for protected-object tracking, make 913 * sure the object encryption can be used. 914 * We only need to do this when the object is first used with 915 * this context, because the context itself will be banned when 916 * the protected objects become invalid. 917 */ 918 if (i915_gem_context_uses_protected_content(eb->gem_context) && 919 i915_gem_object_is_protected(obj)) { 920 err = intel_pxp_key_check(eb->i915->pxp, obj, true); 921 if (err) { 922 i915_gem_object_put(obj); 923 return ERR_PTR(err); 924 } 925 } 926 927 vma = i915_vma_instance(obj, vm, NULL); 928 if (IS_ERR(vma)) { 929 i915_gem_object_put(obj); 930 return vma; 931 } 932 933 err = __eb_add_lut(eb, handle, vma); 934 if (likely(!err)) 935 return vma; 936 937 i915_gem_object_put(obj); 938 if (err != -EEXIST) 939 return ERR_PTR(err); 940 } while (1); 941 } 942 943 static int eb_lookup_vmas(struct i915_execbuffer *eb) 944 { 945 unsigned int i, current_batch = 0; 946 int err = 0; 947 948 INIT_LIST_HEAD(&eb->relocs); 949 950 for (i = 0; i < eb->buffer_count; i++) { 951 struct i915_vma *vma; 952 953 vma = eb_lookup_vma(eb, eb->exec[i].handle); 954 if (IS_ERR(vma)) { 955 err = PTR_ERR(vma); 956 goto err; 957 } 958 959 err = eb_validate_vma(eb, &eb->exec[i], vma); 960 if (unlikely(err)) { 961 i915_vma_put(vma); 962 goto err; 963 } 964 965 err = eb_add_vma(eb, ¤t_batch, i, vma); 966 if (err) 967 return err; 968 969 if (i915_gem_object_is_userptr(vma->obj)) { 970 err = i915_gem_object_userptr_submit_init(vma->obj); 971 if (err) { 972 if (i + 1 < eb->buffer_count) { 973 /* 974 * Execbuffer code expects last vma entry to be NULL, 975 * since we already initialized this entry, 976 * set the next value to NULL or we mess up 977 * cleanup handling. 978 */ 979 eb->vma[i + 1].vma = NULL; 980 } 981 982 return err; 983 } 984 985 eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT; 986 eb->args->flags |= __EXEC_USERPTR_USED; 987 } 988 } 989 990 return 0; 991 992 err: 993 eb->vma[i].vma = NULL; 994 return err; 995 } 996 997 static int eb_lock_vmas(struct i915_execbuffer *eb) 998 { 999 unsigned int i; 1000 int err; 1001 1002 for (i = 0; i < eb->buffer_count; i++) { 1003 struct eb_vma *ev = &eb->vma[i]; 1004 struct i915_vma *vma = ev->vma; 1005 1006 err = i915_gem_object_lock(vma->obj, &eb->ww); 1007 if (err) 1008 return err; 1009 } 1010 1011 return 0; 1012 } 1013 1014 static int eb_validate_vmas(struct i915_execbuffer *eb) 1015 { 1016 unsigned int i; 1017 int err; 1018 1019 INIT_LIST_HEAD(&eb->unbound); 1020 1021 err = eb_lock_vmas(eb); 1022 if (err) 1023 return err; 1024 1025 for (i = 0; i < eb->buffer_count; i++) { 1026 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 1027 struct eb_vma *ev = &eb->vma[i]; 1028 struct i915_vma *vma = ev->vma; 1029 1030 err = eb_pin_vma(eb, entry, ev); 1031 if (err == -EDEADLK) 1032 return err; 1033 1034 if (!err) { 1035 if (entry->offset != i915_vma_offset(vma)) { 1036 entry->offset = i915_vma_offset(vma) | UPDATE; 1037 eb->args->flags |= __EXEC_HAS_RELOC; 1038 } 1039 } else { 1040 eb_unreserve_vma(ev); 1041 1042 list_add_tail(&ev->bind_link, &eb->unbound); 1043 if (drm_mm_node_allocated(&vma->node)) { 1044 err = i915_vma_unbind(vma); 1045 if (err) 1046 return err; 1047 } 1048 } 1049 1050 /* Reserve enough slots to accommodate composite fences */ 1051 err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches); 1052 if (err) 1053 return err; 1054 1055 GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && 1056 eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); 1057 } 1058 1059 if (!list_empty(&eb->unbound)) 1060 return eb_reserve(eb); 1061 1062 return 0; 1063 } 1064 1065 static struct eb_vma * 1066 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) 1067 { 1068 if (eb->lut_size < 0) { 1069 if (handle >= -eb->lut_size) 1070 return NULL; 1071 return &eb->vma[handle]; 1072 } else { 1073 struct hlist_head *head; 1074 struct eb_vma *ev; 1075 1076 head = &eb->buckets[hash_32(handle, eb->lut_size)]; 1077 hlist_for_each_entry(ev, head, node) { 1078 if (ev->handle == handle) 1079 return ev; 1080 } 1081 return NULL; 1082 } 1083 } 1084 1085 static void eb_release_vmas(struct i915_execbuffer *eb, bool final) 1086 { 1087 const unsigned int count = eb->buffer_count; 1088 unsigned int i; 1089 1090 for (i = 0; i < count; i++) { 1091 struct eb_vma *ev = &eb->vma[i]; 1092 struct i915_vma *vma = ev->vma; 1093 1094 if (!vma) 1095 break; 1096 1097 eb_unreserve_vma(ev); 1098 1099 if (final) 1100 i915_vma_put(vma); 1101 } 1102 1103 eb_capture_release(eb); 1104 eb_unpin_engine(eb); 1105 } 1106 1107 static void eb_destroy(const struct i915_execbuffer *eb) 1108 { 1109 if (eb->lut_size > 0) 1110 kfree(eb->buckets); 1111 } 1112 1113 static u64 1114 relocation_target(const struct drm_i915_gem_relocation_entry *reloc, 1115 const struct i915_vma *target) 1116 { 1117 return gen8_canonical_addr((int)reloc->delta + i915_vma_offset(target)); 1118 } 1119 1120 static void reloc_cache_init(struct reloc_cache *cache, 1121 struct drm_i915_private *i915) 1122 { 1123 cache->page = -1; 1124 cache->vaddr = 0; 1125 /* Must be a variable in the struct to allow GCC to unroll. */ 1126 cache->graphics_ver = GRAPHICS_VER(i915); 1127 cache->has_llc = HAS_LLC(i915); 1128 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); 1129 cache->has_fence = cache->graphics_ver < 4; 1130 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; 1131 cache->node.flags = 0; 1132 } 1133 1134 static void *unmask_page(unsigned long p) 1135 { 1136 return (void *)(uintptr_t)(p & PAGE_MASK); 1137 } 1138 1139 static unsigned int unmask_flags(unsigned long p) 1140 { 1141 return p & ~PAGE_MASK; 1142 } 1143 1144 #define KMAP 0x4 /* after CLFLUSH_FLAGS */ 1145 1146 static struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) 1147 { 1148 struct drm_i915_private *i915 = 1149 container_of(cache, struct i915_execbuffer, reloc_cache)->i915; 1150 return to_gt(i915)->ggtt; 1151 } 1152 1153 static void reloc_cache_unmap(struct reloc_cache *cache) 1154 { 1155 void *vaddr; 1156 1157 if (!cache->vaddr) 1158 return; 1159 1160 vaddr = unmask_page(cache->vaddr); 1161 if (cache->vaddr & KMAP) 1162 kunmap_local(vaddr); 1163 else 1164 io_mapping_unmap_atomic((void __iomem *)vaddr); 1165 } 1166 1167 static void reloc_cache_remap(struct reloc_cache *cache, 1168 struct drm_i915_gem_object *obj) 1169 { 1170 void *vaddr; 1171 1172 if (!cache->vaddr) 1173 return; 1174 1175 if (cache->vaddr & KMAP) { 1176 struct page *page = i915_gem_object_get_page(obj, cache->page); 1177 1178 vaddr = kmap_local_page(page); 1179 cache->vaddr = unmask_flags(cache->vaddr) | 1180 (unsigned long)vaddr; 1181 } else { 1182 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1183 unsigned long offset; 1184 1185 offset = cache->node.start; 1186 if (!drm_mm_node_allocated(&cache->node)) 1187 offset += cache->page << PAGE_SHIFT; 1188 1189 cache->vaddr = (unsigned long) 1190 io_mapping_map_atomic_wc(&ggtt->iomap, offset); 1191 } 1192 } 1193 1194 static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) 1195 { 1196 void *vaddr; 1197 1198 if (!cache->vaddr) 1199 return; 1200 1201 vaddr = unmask_page(cache->vaddr); 1202 if (cache->vaddr & KMAP) { 1203 struct drm_i915_gem_object *obj = 1204 (struct drm_i915_gem_object *)cache->node.mm; 1205 if (cache->vaddr & CLFLUSH_AFTER) 1206 mb(); 1207 1208 kunmap_local(vaddr); 1209 i915_gem_object_finish_access(obj); 1210 } else { 1211 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1212 1213 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 1214 io_mapping_unmap_atomic((void __iomem *)vaddr); 1215 1216 if (drm_mm_node_allocated(&cache->node)) { 1217 ggtt->vm.clear_range(&ggtt->vm, 1218 cache->node.start, 1219 cache->node.size); 1220 mutex_lock(&ggtt->vm.mutex); 1221 drm_mm_remove_node(&cache->node); 1222 mutex_unlock(&ggtt->vm.mutex); 1223 } else { 1224 i915_vma_unpin((struct i915_vma *)cache->node.mm); 1225 } 1226 } 1227 1228 cache->vaddr = 0; 1229 cache->page = -1; 1230 } 1231 1232 static void *reloc_kmap(struct drm_i915_gem_object *obj, 1233 struct reloc_cache *cache, 1234 unsigned long pageno) 1235 { 1236 void *vaddr; 1237 struct page *page; 1238 1239 if (cache->vaddr) { 1240 kunmap_local(unmask_page(cache->vaddr)); 1241 } else { 1242 unsigned int flushes; 1243 int err; 1244 1245 err = i915_gem_object_prepare_write(obj, &flushes); 1246 if (err) 1247 return ERR_PTR(err); 1248 1249 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); 1250 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); 1251 1252 cache->vaddr = flushes | KMAP; 1253 cache->node.mm = (void *)obj; 1254 if (flushes) 1255 mb(); 1256 } 1257 1258 page = i915_gem_object_get_page(obj, pageno); 1259 if (!obj->mm.dirty) 1260 set_page_dirty(page); 1261 1262 vaddr = kmap_local_page(page); 1263 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; 1264 cache->page = pageno; 1265 1266 return vaddr; 1267 } 1268 1269 static void *reloc_iomap(struct i915_vma *batch, 1270 struct i915_execbuffer *eb, 1271 unsigned long page) 1272 { 1273 struct drm_i915_gem_object *obj = batch->obj; 1274 struct reloc_cache *cache = &eb->reloc_cache; 1275 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1276 unsigned long offset; 1277 void *vaddr; 1278 1279 if (cache->vaddr) { 1280 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 1281 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); 1282 } else { 1283 struct i915_vma *vma = ERR_PTR(-ENODEV); 1284 int err; 1285 1286 if (i915_gem_object_is_tiled(obj)) 1287 return ERR_PTR(-EINVAL); 1288 1289 if (use_cpu_reloc(cache, obj)) 1290 return NULL; 1291 1292 err = i915_gem_object_set_to_gtt_domain(obj, true); 1293 if (err) 1294 return ERR_PTR(err); 1295 1296 /* 1297 * i915_gem_object_ggtt_pin_ww may attempt to remove the batch 1298 * VMA from the object list because we no longer pin. 1299 * 1300 * Only attempt to pin the batch buffer to ggtt if the current batch 1301 * is not inside ggtt, or the batch buffer is not misplaced. 1302 */ 1303 if (!i915_is_ggtt(batch->vm) || 1304 !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) { 1305 vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, 1306 PIN_MAPPABLE | 1307 PIN_NONBLOCK /* NOWARN */ | 1308 PIN_NOEVICT); 1309 } 1310 1311 if (vma == ERR_PTR(-EDEADLK)) 1312 return vma; 1313 1314 if (IS_ERR(vma)) { 1315 memset(&cache->node, 0, sizeof(cache->node)); 1316 mutex_lock(&ggtt->vm.mutex); 1317 err = drm_mm_insert_node_in_range 1318 (&ggtt->vm.mm, &cache->node, 1319 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 1320 0, ggtt->mappable_end, 1321 DRM_MM_INSERT_LOW); 1322 mutex_unlock(&ggtt->vm.mutex); 1323 if (err) /* no inactive aperture space, use cpu reloc */ 1324 return NULL; 1325 } else { 1326 cache->node.start = i915_ggtt_offset(vma); 1327 cache->node.mm = (void *)vma; 1328 } 1329 } 1330 1331 offset = cache->node.start; 1332 if (drm_mm_node_allocated(&cache->node)) { 1333 ggtt->vm.insert_page(&ggtt->vm, 1334 i915_gem_object_get_dma_address(obj, page), 1335 offset, 1336 i915_gem_get_pat_index(ggtt->vm.i915, 1337 I915_CACHE_NONE), 1338 0); 1339 } else { 1340 offset += page << PAGE_SHIFT; 1341 } 1342 1343 vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, 1344 offset); 1345 cache->page = page; 1346 cache->vaddr = (unsigned long)vaddr; 1347 1348 return vaddr; 1349 } 1350 1351 static void *reloc_vaddr(struct i915_vma *vma, 1352 struct i915_execbuffer *eb, 1353 unsigned long page) 1354 { 1355 struct reloc_cache *cache = &eb->reloc_cache; 1356 void *vaddr; 1357 1358 if (cache->page == page) { 1359 vaddr = unmask_page(cache->vaddr); 1360 } else { 1361 vaddr = NULL; 1362 if ((cache->vaddr & KMAP) == 0) 1363 vaddr = reloc_iomap(vma, eb, page); 1364 if (!vaddr) 1365 vaddr = reloc_kmap(vma->obj, cache, page); 1366 } 1367 1368 return vaddr; 1369 } 1370 1371 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) 1372 { 1373 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { 1374 if (flushes & CLFLUSH_BEFORE) 1375 drm_clflush_virt_range(addr, sizeof(*addr)); 1376 1377 *addr = value; 1378 1379 /* 1380 * Writes to the same cacheline are serialised by the CPU 1381 * (including clflush). On the write path, we only require 1382 * that it hits memory in an orderly fashion and place 1383 * mb barriers at the start and end of the relocation phase 1384 * to ensure ordering of clflush wrt to the system. 1385 */ 1386 if (flushes & CLFLUSH_AFTER) 1387 drm_clflush_virt_range(addr, sizeof(*addr)); 1388 } else 1389 *addr = value; 1390 } 1391 1392 static u64 1393 relocate_entry(struct i915_vma *vma, 1394 const struct drm_i915_gem_relocation_entry *reloc, 1395 struct i915_execbuffer *eb, 1396 const struct i915_vma *target) 1397 { 1398 u64 target_addr = relocation_target(reloc, target); 1399 u64 offset = reloc->offset; 1400 bool wide = eb->reloc_cache.use_64bit_reloc; 1401 void *vaddr; 1402 1403 repeat: 1404 vaddr = reloc_vaddr(vma, eb, 1405 offset >> PAGE_SHIFT); 1406 if (IS_ERR(vaddr)) 1407 return PTR_ERR(vaddr); 1408 1409 GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); 1410 clflush_write32(vaddr + offset_in_page(offset), 1411 lower_32_bits(target_addr), 1412 eb->reloc_cache.vaddr); 1413 1414 if (wide) { 1415 offset += sizeof(u32); 1416 target_addr >>= 32; 1417 wide = false; 1418 goto repeat; 1419 } 1420 1421 return target->node.start | UPDATE; 1422 } 1423 1424 static u64 1425 eb_relocate_entry(struct i915_execbuffer *eb, 1426 struct eb_vma *ev, 1427 const struct drm_i915_gem_relocation_entry *reloc) 1428 { 1429 struct drm_i915_private *i915 = eb->i915; 1430 struct eb_vma *target; 1431 int err; 1432 1433 /* we've already hold a reference to all valid objects */ 1434 target = eb_get_vma(eb, reloc->target_handle); 1435 if (unlikely(!target)) 1436 return -ENOENT; 1437 1438 /* Validate that the target is in a valid r/w GPU domain */ 1439 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 1440 drm_dbg(&i915->drm, "reloc with multiple write domains: " 1441 "target %d offset %d " 1442 "read %08x write %08x\n", 1443 reloc->target_handle, 1444 (int) reloc->offset, 1445 reloc->read_domains, 1446 reloc->write_domain); 1447 return -EINVAL; 1448 } 1449 if (unlikely((reloc->write_domain | reloc->read_domains) 1450 & ~I915_GEM_GPU_DOMAINS)) { 1451 drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: " 1452 "target %d offset %d " 1453 "read %08x write %08x\n", 1454 reloc->target_handle, 1455 (int) reloc->offset, 1456 reloc->read_domains, 1457 reloc->write_domain); 1458 return -EINVAL; 1459 } 1460 1461 if (reloc->write_domain) { 1462 target->flags |= EXEC_OBJECT_WRITE; 1463 1464 /* 1465 * Sandybridge PPGTT errata: We need a global gtt mapping 1466 * for MI and pipe_control writes because the gpu doesn't 1467 * properly redirect them through the ppgtt for non_secure 1468 * batchbuffers. 1469 */ 1470 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 1471 GRAPHICS_VER(eb->i915) == 6 && 1472 !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) { 1473 struct i915_vma *vma = target->vma; 1474 1475 reloc_cache_unmap(&eb->reloc_cache); 1476 mutex_lock(&vma->vm->mutex); 1477 err = i915_vma_bind(target->vma, 1478 target->vma->obj->pat_index, 1479 PIN_GLOBAL, NULL, NULL); 1480 mutex_unlock(&vma->vm->mutex); 1481 reloc_cache_remap(&eb->reloc_cache, ev->vma->obj); 1482 if (err) 1483 return err; 1484 } 1485 } 1486 1487 /* 1488 * If the relocation already has the right value in it, no 1489 * more work needs to be done. 1490 */ 1491 if (!DBG_FORCE_RELOC && 1492 gen8_canonical_addr(i915_vma_offset(target->vma)) == reloc->presumed_offset) 1493 return 0; 1494 1495 /* Check that the relocation address is valid... */ 1496 if (unlikely(reloc->offset > 1497 ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { 1498 drm_dbg(&i915->drm, "Relocation beyond object bounds: " 1499 "target %d offset %d size %d.\n", 1500 reloc->target_handle, 1501 (int)reloc->offset, 1502 (int)ev->vma->size); 1503 return -EINVAL; 1504 } 1505 if (unlikely(reloc->offset & 3)) { 1506 drm_dbg(&i915->drm, "Relocation not 4-byte aligned: " 1507 "target %d offset %d.\n", 1508 reloc->target_handle, 1509 (int)reloc->offset); 1510 return -EINVAL; 1511 } 1512 1513 /* 1514 * If we write into the object, we need to force the synchronisation 1515 * barrier, either with an asynchronous clflush or if we executed the 1516 * patching using the GPU (though that should be serialised by the 1517 * timeline). To be completely sure, and since we are required to 1518 * do relocations we are already stalling, disable the user's opt 1519 * out of our synchronisation. 1520 */ 1521 ev->flags &= ~EXEC_OBJECT_ASYNC; 1522 1523 /* and update the user's relocation entry */ 1524 return relocate_entry(ev->vma, reloc, eb, target->vma); 1525 } 1526 1527 static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) 1528 { 1529 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 1530 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; 1531 const struct drm_i915_gem_exec_object2 *entry = ev->exec; 1532 struct drm_i915_gem_relocation_entry __user *urelocs = 1533 u64_to_user_ptr(entry->relocs_ptr); 1534 unsigned long remain = entry->relocation_count; 1535 1536 if (unlikely(remain > N_RELOC(ULONG_MAX))) 1537 return -EINVAL; 1538 1539 /* 1540 * We must check that the entire relocation array is safe 1541 * to read. However, if the array is not writable the user loses 1542 * the updated relocation values. 1543 */ 1544 if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs)))) 1545 return -EFAULT; 1546 1547 do { 1548 struct drm_i915_gem_relocation_entry *r = stack; 1549 unsigned int count = 1550 min_t(unsigned long, remain, ARRAY_SIZE(stack)); 1551 unsigned int copied; 1552 1553 /* 1554 * This is the fast path and we cannot handle a pagefault 1555 * whilst holding the struct mutex lest the user pass in the 1556 * relocations contained within a mmaped bo. For in such a case 1557 * we, the page fault handler would call i915_gem_fault() and 1558 * we would try to acquire the struct mutex again. Obviously 1559 * this is bad and so lockdep complains vehemently. 1560 */ 1561 pagefault_disable(); 1562 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); 1563 pagefault_enable(); 1564 if (unlikely(copied)) { 1565 remain = -EFAULT; 1566 goto out; 1567 } 1568 1569 remain -= count; 1570 do { 1571 u64 offset = eb_relocate_entry(eb, ev, r); 1572 1573 if (likely(offset == 0)) { 1574 } else if ((s64)offset < 0) { 1575 remain = (int)offset; 1576 goto out; 1577 } else { 1578 /* 1579 * Note that reporting an error now 1580 * leaves everything in an inconsistent 1581 * state as we have *already* changed 1582 * the relocation value inside the 1583 * object. As we have not changed the 1584 * reloc.presumed_offset or will not 1585 * change the execobject.offset, on the 1586 * call we may not rewrite the value 1587 * inside the object, leaving it 1588 * dangling and causing a GPU hang. Unless 1589 * userspace dynamically rebuilds the 1590 * relocations on each execbuf rather than 1591 * presume a static tree. 1592 * 1593 * We did previously check if the relocations 1594 * were writable (access_ok), an error now 1595 * would be a strange race with mprotect, 1596 * having already demonstrated that we 1597 * can read from this userspace address. 1598 */ 1599 offset = gen8_canonical_addr(offset & ~UPDATE); 1600 __put_user(offset, 1601 &urelocs[r - stack].presumed_offset); 1602 } 1603 } while (r++, --count); 1604 urelocs += ARRAY_SIZE(stack); 1605 } while (remain); 1606 out: 1607 reloc_cache_reset(&eb->reloc_cache, eb); 1608 return remain; 1609 } 1610 1611 static int 1612 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev) 1613 { 1614 const struct drm_i915_gem_exec_object2 *entry = ev->exec; 1615 struct drm_i915_gem_relocation_entry *relocs = 1616 u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1617 unsigned int i; 1618 int err; 1619 1620 for (i = 0; i < entry->relocation_count; i++) { 1621 u64 offset = eb_relocate_entry(eb, ev, &relocs[i]); 1622 1623 if ((s64)offset < 0) { 1624 err = (int)offset; 1625 goto err; 1626 } 1627 } 1628 err = 0; 1629 err: 1630 reloc_cache_reset(&eb->reloc_cache, eb); 1631 return err; 1632 } 1633 1634 static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) 1635 { 1636 const char __user *addr, *end; 1637 unsigned long size; 1638 char __maybe_unused c; 1639 1640 size = entry->relocation_count; 1641 if (size == 0) 1642 return 0; 1643 1644 if (size > N_RELOC(ULONG_MAX)) 1645 return -EINVAL; 1646 1647 addr = u64_to_user_ptr(entry->relocs_ptr); 1648 size *= sizeof(struct drm_i915_gem_relocation_entry); 1649 if (!access_ok(addr, size)) 1650 return -EFAULT; 1651 1652 end = addr + size; 1653 for (; addr < end; addr += PAGE_SIZE) { 1654 int err = __get_user(c, addr); 1655 if (err) 1656 return err; 1657 } 1658 return __get_user(c, end - 1); 1659 } 1660 1661 static int eb_copy_relocations(const struct i915_execbuffer *eb) 1662 { 1663 struct drm_i915_gem_relocation_entry *relocs; 1664 const unsigned int count = eb->buffer_count; 1665 unsigned int i; 1666 int err; 1667 1668 for (i = 0; i < count; i++) { 1669 const unsigned int nreloc = eb->exec[i].relocation_count; 1670 struct drm_i915_gem_relocation_entry __user *urelocs; 1671 unsigned long size; 1672 unsigned long copied; 1673 1674 if (nreloc == 0) 1675 continue; 1676 1677 err = check_relocations(&eb->exec[i]); 1678 if (err) 1679 goto err; 1680 1681 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); 1682 size = nreloc * sizeof(*relocs); 1683 1684 relocs = kvmalloc_array(1, size, GFP_KERNEL); 1685 if (!relocs) { 1686 err = -ENOMEM; 1687 goto err; 1688 } 1689 1690 /* copy_from_user is limited to < 4GiB */ 1691 copied = 0; 1692 do { 1693 unsigned int len = 1694 min_t(u64, BIT_ULL(31), size - copied); 1695 1696 if (__copy_from_user((char *)relocs + copied, 1697 (char __user *)urelocs + copied, 1698 len)) 1699 goto end; 1700 1701 copied += len; 1702 } while (copied < size); 1703 1704 /* 1705 * As we do not update the known relocation offsets after 1706 * relocating (due to the complexities in lock handling), 1707 * we need to mark them as invalid now so that we force the 1708 * relocation processing next time. Just in case the target 1709 * object is evicted and then rebound into its old 1710 * presumed_offset before the next execbuffer - if that 1711 * happened we would make the mistake of assuming that the 1712 * relocations were valid. 1713 */ 1714 if (!user_access_begin(urelocs, size)) 1715 goto end; 1716 1717 for (copied = 0; copied < nreloc; copied++) 1718 unsafe_put_user(-1, 1719 &urelocs[copied].presumed_offset, 1720 end_user); 1721 user_access_end(); 1722 1723 eb->exec[i].relocs_ptr = (uintptr_t)relocs; 1724 } 1725 1726 return 0; 1727 1728 end_user: 1729 user_access_end(); 1730 end: 1731 kvfree(relocs); 1732 err = -EFAULT; 1733 err: 1734 while (i--) { 1735 relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); 1736 if (eb->exec[i].relocation_count) 1737 kvfree(relocs); 1738 } 1739 return err; 1740 } 1741 1742 static int eb_prefault_relocations(const struct i915_execbuffer *eb) 1743 { 1744 const unsigned int count = eb->buffer_count; 1745 unsigned int i; 1746 1747 for (i = 0; i < count; i++) { 1748 int err; 1749 1750 err = check_relocations(&eb->exec[i]); 1751 if (err) 1752 return err; 1753 } 1754 1755 return 0; 1756 } 1757 1758 static int eb_reinit_userptr(struct i915_execbuffer *eb) 1759 { 1760 const unsigned int count = eb->buffer_count; 1761 unsigned int i; 1762 int ret; 1763 1764 if (likely(!(eb->args->flags & __EXEC_USERPTR_USED))) 1765 return 0; 1766 1767 for (i = 0; i < count; i++) { 1768 struct eb_vma *ev = &eb->vma[i]; 1769 1770 if (!i915_gem_object_is_userptr(ev->vma->obj)) 1771 continue; 1772 1773 ret = i915_gem_object_userptr_submit_init(ev->vma->obj); 1774 if (ret) 1775 return ret; 1776 1777 ev->flags |= __EXEC_OBJECT_USERPTR_INIT; 1778 } 1779 1780 return 0; 1781 } 1782 1783 static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) 1784 { 1785 bool have_copy = false; 1786 struct eb_vma *ev; 1787 int err = 0; 1788 1789 repeat: 1790 if (signal_pending(current)) { 1791 err = -ERESTARTSYS; 1792 goto out; 1793 } 1794 1795 /* We may process another execbuffer during the unlock... */ 1796 eb_release_vmas(eb, false); 1797 i915_gem_ww_ctx_fini(&eb->ww); 1798 1799 /* 1800 * We take 3 passes through the slowpatch. 1801 * 1802 * 1 - we try to just prefault all the user relocation entries and 1803 * then attempt to reuse the atomic pagefault disabled fast path again. 1804 * 1805 * 2 - we copy the user entries to a local buffer here outside of the 1806 * local and allow ourselves to wait upon any rendering before 1807 * relocations 1808 * 1809 * 3 - we already have a local copy of the relocation entries, but 1810 * were interrupted (EAGAIN) whilst waiting for the objects, try again. 1811 */ 1812 if (!err) { 1813 err = eb_prefault_relocations(eb); 1814 } else if (!have_copy) { 1815 err = eb_copy_relocations(eb); 1816 have_copy = err == 0; 1817 } else { 1818 cond_resched(); 1819 err = 0; 1820 } 1821 1822 if (!err) 1823 err = eb_reinit_userptr(eb); 1824 1825 i915_gem_ww_ctx_init(&eb->ww, true); 1826 if (err) 1827 goto out; 1828 1829 /* reacquire the objects */ 1830 repeat_validate: 1831 err = eb_pin_engine(eb, false); 1832 if (err) 1833 goto err; 1834 1835 err = eb_validate_vmas(eb); 1836 if (err) 1837 goto err; 1838 1839 GEM_BUG_ON(!eb->batches[0]); 1840 1841 list_for_each_entry(ev, &eb->relocs, reloc_link) { 1842 if (!have_copy) { 1843 err = eb_relocate_vma(eb, ev); 1844 if (err) 1845 break; 1846 } else { 1847 err = eb_relocate_vma_slow(eb, ev); 1848 if (err) 1849 break; 1850 } 1851 } 1852 1853 if (err == -EDEADLK) 1854 goto err; 1855 1856 if (err && !have_copy) 1857 goto repeat; 1858 1859 if (err) 1860 goto err; 1861 1862 /* as last step, parse the command buffer */ 1863 err = eb_parse(eb); 1864 if (err) 1865 goto err; 1866 1867 /* 1868 * Leave the user relocations as are, this is the painfully slow path, 1869 * and we want to avoid the complication of dropping the lock whilst 1870 * having buffers reserved in the aperture and so causing spurious 1871 * ENOSPC for random operations. 1872 */ 1873 1874 err: 1875 if (err == -EDEADLK) { 1876 eb_release_vmas(eb, false); 1877 err = i915_gem_ww_ctx_backoff(&eb->ww); 1878 if (!err) 1879 goto repeat_validate; 1880 } 1881 1882 if (err == -EAGAIN) 1883 goto repeat; 1884 1885 out: 1886 if (have_copy) { 1887 const unsigned int count = eb->buffer_count; 1888 unsigned int i; 1889 1890 for (i = 0; i < count; i++) { 1891 const struct drm_i915_gem_exec_object2 *entry = 1892 &eb->exec[i]; 1893 struct drm_i915_gem_relocation_entry *relocs; 1894 1895 if (!entry->relocation_count) 1896 continue; 1897 1898 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1899 kvfree(relocs); 1900 } 1901 } 1902 1903 return err; 1904 } 1905 1906 static int eb_relocate_parse(struct i915_execbuffer *eb) 1907 { 1908 int err; 1909 bool throttle = true; 1910 1911 retry: 1912 err = eb_pin_engine(eb, throttle); 1913 if (err) { 1914 if (err != -EDEADLK) 1915 return err; 1916 1917 goto err; 1918 } 1919 1920 /* only throttle once, even if we didn't need to throttle */ 1921 throttle = false; 1922 1923 err = eb_validate_vmas(eb); 1924 if (err == -EAGAIN) 1925 goto slow; 1926 else if (err) 1927 goto err; 1928 1929 /* The objects are in their final locations, apply the relocations. */ 1930 if (eb->args->flags & __EXEC_HAS_RELOC) { 1931 struct eb_vma *ev; 1932 1933 list_for_each_entry(ev, &eb->relocs, reloc_link) { 1934 err = eb_relocate_vma(eb, ev); 1935 if (err) 1936 break; 1937 } 1938 1939 if (err == -EDEADLK) 1940 goto err; 1941 else if (err) 1942 goto slow; 1943 } 1944 1945 if (!err) 1946 err = eb_parse(eb); 1947 1948 err: 1949 if (err == -EDEADLK) { 1950 eb_release_vmas(eb, false); 1951 err = i915_gem_ww_ctx_backoff(&eb->ww); 1952 if (!err) 1953 goto retry; 1954 } 1955 1956 return err; 1957 1958 slow: 1959 err = eb_relocate_parse_slow(eb); 1960 if (err) 1961 /* 1962 * If the user expects the execobject.offset and 1963 * reloc.presumed_offset to be an exact match, 1964 * as for using NO_RELOC, then we cannot update 1965 * the execobject.offset until we have completed 1966 * relocation. 1967 */ 1968 eb->args->flags &= ~__EXEC_HAS_RELOC; 1969 1970 return err; 1971 } 1972 1973 /* 1974 * Using two helper loops for the order of which requests / batches are created 1975 * and added the to backend. Requests are created in order from the parent to 1976 * the last child. Requests are added in the reverse order, from the last child 1977 * to parent. This is done for locking reasons as the timeline lock is acquired 1978 * during request creation and released when the request is added to the 1979 * backend. To make lockdep happy (see intel_context_timeline_lock) this must be 1980 * the ordering. 1981 */ 1982 #define for_each_batch_create_order(_eb, _i) \ 1983 for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i)) 1984 #define for_each_batch_add_order(_eb, _i) \ 1985 BUILD_BUG_ON(!typecheck(int, _i)); \ 1986 for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i)) 1987 1988 static struct i915_request * 1989 eb_find_first_request_added(struct i915_execbuffer *eb) 1990 { 1991 int i; 1992 1993 for_each_batch_add_order(eb, i) 1994 if (eb->requests[i]) 1995 return eb->requests[i]; 1996 1997 GEM_BUG_ON("Request not found"); 1998 1999 return NULL; 2000 } 2001 2002 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) 2003 2004 /* Stage with GFP_KERNEL allocations before we enter the signaling critical path */ 2005 static int eb_capture_stage(struct i915_execbuffer *eb) 2006 { 2007 const unsigned int count = eb->buffer_count; 2008 unsigned int i = count, j; 2009 2010 while (i--) { 2011 struct eb_vma *ev = &eb->vma[i]; 2012 struct i915_vma *vma = ev->vma; 2013 unsigned int flags = ev->flags; 2014 2015 if (!(flags & EXEC_OBJECT_CAPTURE)) 2016 continue; 2017 2018 if (i915_gem_context_is_recoverable(eb->gem_context) && 2019 (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0))) 2020 return -EINVAL; 2021 2022 for_each_batch_create_order(eb, j) { 2023 struct i915_capture_list *capture; 2024 2025 capture = kmalloc(sizeof(*capture), GFP_KERNEL); 2026 if (!capture) 2027 continue; 2028 2029 capture->next = eb->capture_lists[j]; 2030 capture->vma_res = i915_vma_resource_get(vma->resource); 2031 eb->capture_lists[j] = capture; 2032 } 2033 } 2034 2035 return 0; 2036 } 2037 2038 /* Commit once we're in the critical path */ 2039 static void eb_capture_commit(struct i915_execbuffer *eb) 2040 { 2041 unsigned int j; 2042 2043 for_each_batch_create_order(eb, j) { 2044 struct i915_request *rq = eb->requests[j]; 2045 2046 if (!rq) 2047 break; 2048 2049 rq->capture_list = eb->capture_lists[j]; 2050 eb->capture_lists[j] = NULL; 2051 } 2052 } 2053 2054 /* 2055 * Release anything that didn't get committed due to errors. 2056 * The capture_list will otherwise be freed at request retire. 2057 */ 2058 static void eb_capture_release(struct i915_execbuffer *eb) 2059 { 2060 unsigned int j; 2061 2062 for_each_batch_create_order(eb, j) { 2063 if (eb->capture_lists[j]) { 2064 i915_request_free_capture_list(eb->capture_lists[j]); 2065 eb->capture_lists[j] = NULL; 2066 } 2067 } 2068 } 2069 2070 static void eb_capture_list_clear(struct i915_execbuffer *eb) 2071 { 2072 memset(eb->capture_lists, 0, sizeof(eb->capture_lists)); 2073 } 2074 2075 #else 2076 2077 static int eb_capture_stage(struct i915_execbuffer *eb) 2078 { 2079 return 0; 2080 } 2081 2082 static void eb_capture_commit(struct i915_execbuffer *eb) 2083 { 2084 } 2085 2086 static void eb_capture_release(struct i915_execbuffer *eb) 2087 { 2088 } 2089 2090 static void eb_capture_list_clear(struct i915_execbuffer *eb) 2091 { 2092 } 2093 2094 #endif 2095 2096 static int eb_move_to_gpu(struct i915_execbuffer *eb) 2097 { 2098 const unsigned int count = eb->buffer_count; 2099 unsigned int i = count; 2100 int err = 0, j; 2101 2102 while (i--) { 2103 struct eb_vma *ev = &eb->vma[i]; 2104 struct i915_vma *vma = ev->vma; 2105 unsigned int flags = ev->flags; 2106 struct drm_i915_gem_object *obj = vma->obj; 2107 2108 assert_vma_held(vma); 2109 2110 /* 2111 * If the GPU is not _reading_ through the CPU cache, we need 2112 * to make sure that any writes (both previous GPU writes from 2113 * before a change in snooping levels and normal CPU writes) 2114 * caught in that cache are flushed to main memory. 2115 * 2116 * We want to say 2117 * obj->cache_dirty && 2118 * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) 2119 * but gcc's optimiser doesn't handle that as well and emits 2120 * two jumps instead of one. Maybe one day... 2121 * 2122 * FIXME: There is also sync flushing in set_pages(), which 2123 * serves a different purpose(some of the time at least). 2124 * 2125 * We should consider: 2126 * 2127 * 1. Rip out the async flush code. 2128 * 2129 * 2. Or make the sync flushing use the async clflush path 2130 * using mandatory fences underneath. Currently the below 2131 * async flush happens after we bind the object. 2132 */ 2133 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { 2134 if (i915_gem_clflush_object(obj, 0)) 2135 flags &= ~EXEC_OBJECT_ASYNC; 2136 } 2137 2138 /* We only need to await on the first request */ 2139 if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) { 2140 err = i915_request_await_object 2141 (eb_find_first_request_added(eb), obj, 2142 flags & EXEC_OBJECT_WRITE); 2143 } 2144 2145 for_each_batch_add_order(eb, j) { 2146 if (err) 2147 break; 2148 if (!eb->requests[j]) 2149 continue; 2150 2151 err = _i915_vma_move_to_active(vma, eb->requests[j], 2152 j ? NULL : 2153 eb->composite_fence ? 2154 eb->composite_fence : 2155 &eb->requests[j]->fence, 2156 flags | __EXEC_OBJECT_NO_RESERVE | 2157 __EXEC_OBJECT_NO_REQUEST_AWAIT); 2158 } 2159 } 2160 2161 #ifdef CONFIG_MMU_NOTIFIER 2162 if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { 2163 read_lock(&eb->i915->mm.notifier_lock); 2164 2165 /* 2166 * count is always at least 1, otherwise __EXEC_USERPTR_USED 2167 * could not have been set 2168 */ 2169 for (i = 0; i < count; i++) { 2170 struct eb_vma *ev = &eb->vma[i]; 2171 struct drm_i915_gem_object *obj = ev->vma->obj; 2172 2173 if (!i915_gem_object_is_userptr(obj)) 2174 continue; 2175 2176 err = i915_gem_object_userptr_submit_done(obj); 2177 if (err) 2178 break; 2179 } 2180 2181 read_unlock(&eb->i915->mm.notifier_lock); 2182 } 2183 #endif 2184 2185 if (unlikely(err)) 2186 goto err_skip; 2187 2188 /* Unconditionally flush any chipset caches (for streaming writes). */ 2189 intel_gt_chipset_flush(eb->gt); 2190 eb_capture_commit(eb); 2191 2192 return 0; 2193 2194 err_skip: 2195 for_each_batch_create_order(eb, j) { 2196 if (!eb->requests[j]) 2197 break; 2198 2199 i915_request_set_error_once(eb->requests[j], err); 2200 } 2201 return err; 2202 } 2203 2204 static int i915_gem_check_execbuffer(struct drm_i915_private *i915, 2205 struct drm_i915_gem_execbuffer2 *exec) 2206 { 2207 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) 2208 return -EINVAL; 2209 2210 /* Kernel clipping was a DRI1 misfeature */ 2211 if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | 2212 I915_EXEC_USE_EXTENSIONS))) { 2213 if (exec->num_cliprects || exec->cliprects_ptr) 2214 return -EINVAL; 2215 } 2216 2217 if (exec->DR4 == 0xffffffff) { 2218 drm_dbg(&i915->drm, "UXA submitting garbage DR4, fixing up\n"); 2219 exec->DR4 = 0; 2220 } 2221 if (exec->DR1 || exec->DR4) 2222 return -EINVAL; 2223 2224 if ((exec->batch_start_offset | exec->batch_len) & 0x7) 2225 return -EINVAL; 2226 2227 return 0; 2228 } 2229 2230 static int i915_reset_gen7_sol_offsets(struct i915_request *rq) 2231 { 2232 u32 *cs; 2233 int i; 2234 2235 if (GRAPHICS_VER(rq->i915) != 7 || rq->engine->id != RCS0) { 2236 drm_dbg(&rq->i915->drm, "sol reset is gen7/rcs only\n"); 2237 return -EINVAL; 2238 } 2239 2240 cs = intel_ring_begin(rq, 4 * 2 + 2); 2241 if (IS_ERR(cs)) 2242 return PTR_ERR(cs); 2243 2244 *cs++ = MI_LOAD_REGISTER_IMM(4); 2245 for (i = 0; i < 4; i++) { 2246 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); 2247 *cs++ = 0; 2248 } 2249 *cs++ = MI_NOOP; 2250 intel_ring_advance(rq, cs); 2251 2252 return 0; 2253 } 2254 2255 static struct i915_vma * 2256 shadow_batch_pin(struct i915_execbuffer *eb, 2257 struct drm_i915_gem_object *obj, 2258 struct i915_address_space *vm, 2259 unsigned int flags) 2260 { 2261 struct i915_vma *vma; 2262 int err; 2263 2264 vma = i915_vma_instance(obj, vm, NULL); 2265 if (IS_ERR(vma)) 2266 return vma; 2267 2268 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE); 2269 if (err) 2270 return ERR_PTR(err); 2271 2272 return vma; 2273 } 2274 2275 static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) 2276 { 2277 /* 2278 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 2279 * batch" bit. Hence we need to pin secure batches into the global gtt. 2280 * hsw should have this fixed, but bdw mucks it up again. */ 2281 if (eb->batch_flags & I915_DISPATCH_SECURE) 2282 return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE); 2283 2284 return NULL; 2285 } 2286 2287 static int eb_parse(struct i915_execbuffer *eb) 2288 { 2289 struct drm_i915_private *i915 = eb->i915; 2290 struct intel_gt_buffer_pool_node *pool = eb->batch_pool; 2291 struct i915_vma *shadow, *trampoline, *batch; 2292 unsigned long len; 2293 int err; 2294 2295 if (!eb_use_cmdparser(eb)) { 2296 batch = eb_dispatch_secure(eb, eb->batches[0]->vma); 2297 if (IS_ERR(batch)) 2298 return PTR_ERR(batch); 2299 2300 goto secure_batch; 2301 } 2302 2303 if (intel_context_is_parallel(eb->context)) 2304 return -EINVAL; 2305 2306 len = eb->batch_len[0]; 2307 if (!CMDPARSER_USES_GGTT(eb->i915)) { 2308 /* 2309 * ppGTT backed shadow buffers must be mapped RO, to prevent 2310 * post-scan tampering 2311 */ 2312 if (!eb->context->vm->has_read_only) { 2313 drm_dbg(&i915->drm, 2314 "Cannot prevent post-scan tampering without RO capable vm\n"); 2315 return -EINVAL; 2316 } 2317 } else { 2318 len += I915_CMD_PARSER_TRAMPOLINE_SIZE; 2319 } 2320 if (unlikely(len < eb->batch_len[0])) /* last paranoid check of overflow */ 2321 return -EINVAL; 2322 2323 if (!pool) { 2324 pool = intel_gt_get_buffer_pool(eb->gt, len, 2325 I915_MAP_WB); 2326 if (IS_ERR(pool)) 2327 return PTR_ERR(pool); 2328 eb->batch_pool = pool; 2329 } 2330 2331 err = i915_gem_object_lock(pool->obj, &eb->ww); 2332 if (err) 2333 return err; 2334 2335 shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER); 2336 if (IS_ERR(shadow)) 2337 return PTR_ERR(shadow); 2338 2339 intel_gt_buffer_pool_mark_used(pool); 2340 i915_gem_object_set_readonly(shadow->obj); 2341 shadow->private = pool; 2342 2343 trampoline = NULL; 2344 if (CMDPARSER_USES_GGTT(eb->i915)) { 2345 trampoline = shadow; 2346 2347 shadow = shadow_batch_pin(eb, pool->obj, 2348 &eb->gt->ggtt->vm, 2349 PIN_GLOBAL); 2350 if (IS_ERR(shadow)) 2351 return PTR_ERR(shadow); 2352 2353 shadow->private = pool; 2354 2355 eb->batch_flags |= I915_DISPATCH_SECURE; 2356 } 2357 2358 batch = eb_dispatch_secure(eb, shadow); 2359 if (IS_ERR(batch)) 2360 return PTR_ERR(batch); 2361 2362 err = dma_resv_reserve_fences(shadow->obj->base.resv, 1); 2363 if (err) 2364 return err; 2365 2366 err = intel_engine_cmd_parser(eb->context->engine, 2367 eb->batches[0]->vma, 2368 eb->batch_start_offset, 2369 eb->batch_len[0], 2370 shadow, trampoline); 2371 if (err) 2372 return err; 2373 2374 eb->batches[0] = &eb->vma[eb->buffer_count++]; 2375 eb->batches[0]->vma = i915_vma_get(shadow); 2376 eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN; 2377 2378 eb->trampoline = trampoline; 2379 eb->batch_start_offset = 0; 2380 2381 secure_batch: 2382 if (batch) { 2383 if (intel_context_is_parallel(eb->context)) 2384 return -EINVAL; 2385 2386 eb->batches[0] = &eb->vma[eb->buffer_count++]; 2387 eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN; 2388 eb->batches[0]->vma = i915_vma_get(batch); 2389 } 2390 return 0; 2391 } 2392 2393 static int eb_request_submit(struct i915_execbuffer *eb, 2394 struct i915_request *rq, 2395 struct i915_vma *batch, 2396 u64 batch_len) 2397 { 2398 int err; 2399 2400 if (intel_context_nopreempt(rq->context)) 2401 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 2402 2403 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { 2404 err = i915_reset_gen7_sol_offsets(rq); 2405 if (err) 2406 return err; 2407 } 2408 2409 /* 2410 * After we completed waiting for other engines (using HW semaphores) 2411 * then we can signal that this request/batch is ready to run. This 2412 * allows us to determine if the batch is still waiting on the GPU 2413 * or actually running by checking the breadcrumb. 2414 */ 2415 if (rq->context->engine->emit_init_breadcrumb) { 2416 err = rq->context->engine->emit_init_breadcrumb(rq); 2417 if (err) 2418 return err; 2419 } 2420 2421 err = rq->context->engine->emit_bb_start(rq, 2422 i915_vma_offset(batch) + 2423 eb->batch_start_offset, 2424 batch_len, 2425 eb->batch_flags); 2426 if (err) 2427 return err; 2428 2429 if (eb->trampoline) { 2430 GEM_BUG_ON(intel_context_is_parallel(rq->context)); 2431 GEM_BUG_ON(eb->batch_start_offset); 2432 err = rq->context->engine->emit_bb_start(rq, 2433 i915_vma_offset(eb->trampoline) + 2434 batch_len, 0, 0); 2435 if (err) 2436 return err; 2437 } 2438 2439 return 0; 2440 } 2441 2442 static int eb_submit(struct i915_execbuffer *eb) 2443 { 2444 unsigned int i; 2445 int err; 2446 2447 err = eb_move_to_gpu(eb); 2448 2449 for_each_batch_create_order(eb, i) { 2450 if (!eb->requests[i]) 2451 break; 2452 2453 trace_i915_request_queue(eb->requests[i], eb->batch_flags); 2454 if (!err) 2455 err = eb_request_submit(eb, eb->requests[i], 2456 eb->batches[i]->vma, 2457 eb->batch_len[i]); 2458 } 2459 2460 return err; 2461 } 2462 2463 /* 2464 * Find one BSD ring to dispatch the corresponding BSD command. 2465 * The engine index is returned. 2466 */ 2467 static unsigned int 2468 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, 2469 struct drm_file *file) 2470 { 2471 struct drm_i915_file_private *file_priv = file->driver_priv; 2472 2473 /* Check whether the file_priv has already selected one ring. */ 2474 if ((int)file_priv->bsd_engine < 0) 2475 file_priv->bsd_engine = 2476 get_random_u32_below(dev_priv->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]); 2477 2478 return file_priv->bsd_engine; 2479 } 2480 2481 static const enum intel_engine_id user_ring_map[] = { 2482 [I915_EXEC_DEFAULT] = RCS0, 2483 [I915_EXEC_RENDER] = RCS0, 2484 [I915_EXEC_BLT] = BCS0, 2485 [I915_EXEC_BSD] = VCS0, 2486 [I915_EXEC_VEBOX] = VECS0 2487 }; 2488 2489 static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce) 2490 { 2491 struct intel_ring *ring = ce->ring; 2492 struct intel_timeline *tl = ce->timeline; 2493 struct i915_request *rq; 2494 2495 /* 2496 * Completely unscientific finger-in-the-air estimates for suitable 2497 * maximum user request size (to avoid blocking) and then backoff. 2498 */ 2499 if (intel_ring_update_space(ring) >= PAGE_SIZE) 2500 return NULL; 2501 2502 /* 2503 * Find a request that after waiting upon, there will be at least half 2504 * the ring available. The hysteresis allows us to compete for the 2505 * shared ring and should mean that we sleep less often prior to 2506 * claiming our resources, but not so long that the ring completely 2507 * drains before we can submit our next request. 2508 */ 2509 list_for_each_entry(rq, &tl->requests, link) { 2510 if (rq->ring != ring) 2511 continue; 2512 2513 if (__intel_ring_space(rq->postfix, 2514 ring->emit, ring->size) > ring->size / 2) 2515 break; 2516 } 2517 if (&rq->link == &tl->requests) 2518 return NULL; /* weird, we will check again later for real */ 2519 2520 return i915_request_get(rq); 2521 } 2522 2523 static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce, 2524 bool throttle) 2525 { 2526 struct intel_timeline *tl; 2527 struct i915_request *rq = NULL; 2528 2529 /* 2530 * Take a local wakeref for preparing to dispatch the execbuf as 2531 * we expect to access the hardware fairly frequently in the 2532 * process, and require the engine to be kept awake between accesses. 2533 * Upon dispatch, we acquire another prolonged wakeref that we hold 2534 * until the timeline is idle, which in turn releases the wakeref 2535 * taken on the engine, and the parent device. 2536 */ 2537 tl = intel_context_timeline_lock(ce); 2538 if (IS_ERR(tl)) 2539 return PTR_ERR(tl); 2540 2541 intel_context_enter(ce); 2542 if (throttle) 2543 rq = eb_throttle(eb, ce); 2544 intel_context_timeline_unlock(tl); 2545 2546 if (rq) { 2547 bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; 2548 long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT; 2549 2550 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 2551 timeout) < 0) { 2552 i915_request_put(rq); 2553 2554 /* 2555 * Error path, cannot use intel_context_timeline_lock as 2556 * that is user interruptable and this clean up step 2557 * must be done. 2558 */ 2559 mutex_lock(&ce->timeline->mutex); 2560 intel_context_exit(ce); 2561 mutex_unlock(&ce->timeline->mutex); 2562 2563 if (nonblock) 2564 return -EWOULDBLOCK; 2565 else 2566 return -EINTR; 2567 } 2568 i915_request_put(rq); 2569 } 2570 2571 return 0; 2572 } 2573 2574 static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle) 2575 { 2576 struct intel_context *ce = eb->context, *child; 2577 int err; 2578 int i = 0, j = 0; 2579 2580 GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); 2581 2582 if (unlikely(intel_context_is_banned(ce))) 2583 return -EIO; 2584 2585 /* 2586 * Pinning the contexts may generate requests in order to acquire 2587 * GGTT space, so do this first before we reserve a seqno for 2588 * ourselves. 2589 */ 2590 err = intel_context_pin_ww(ce, &eb->ww); 2591 if (err) 2592 return err; 2593 for_each_child(ce, child) { 2594 err = intel_context_pin_ww(child, &eb->ww); 2595 GEM_BUG_ON(err); /* perma-pinned should incr a counter */ 2596 } 2597 2598 for_each_child(ce, child) { 2599 err = eb_pin_timeline(eb, child, throttle); 2600 if (err) 2601 goto unwind; 2602 ++i; 2603 } 2604 err = eb_pin_timeline(eb, ce, throttle); 2605 if (err) 2606 goto unwind; 2607 2608 eb->args->flags |= __EXEC_ENGINE_PINNED; 2609 return 0; 2610 2611 unwind: 2612 for_each_child(ce, child) { 2613 if (j++ < i) { 2614 mutex_lock(&child->timeline->mutex); 2615 intel_context_exit(child); 2616 mutex_unlock(&child->timeline->mutex); 2617 } 2618 } 2619 for_each_child(ce, child) 2620 intel_context_unpin(child); 2621 intel_context_unpin(ce); 2622 return err; 2623 } 2624 2625 static void eb_unpin_engine(struct i915_execbuffer *eb) 2626 { 2627 struct intel_context *ce = eb->context, *child; 2628 2629 if (!(eb->args->flags & __EXEC_ENGINE_PINNED)) 2630 return; 2631 2632 eb->args->flags &= ~__EXEC_ENGINE_PINNED; 2633 2634 for_each_child(ce, child) { 2635 mutex_lock(&child->timeline->mutex); 2636 intel_context_exit(child); 2637 mutex_unlock(&child->timeline->mutex); 2638 2639 intel_context_unpin(child); 2640 } 2641 2642 mutex_lock(&ce->timeline->mutex); 2643 intel_context_exit(ce); 2644 mutex_unlock(&ce->timeline->mutex); 2645 2646 intel_context_unpin(ce); 2647 } 2648 2649 static unsigned int 2650 eb_select_legacy_ring(struct i915_execbuffer *eb) 2651 { 2652 struct drm_i915_private *i915 = eb->i915; 2653 struct drm_i915_gem_execbuffer2 *args = eb->args; 2654 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; 2655 2656 if (user_ring_id != I915_EXEC_BSD && 2657 (args->flags & I915_EXEC_BSD_MASK)) { 2658 drm_dbg(&i915->drm, 2659 "execbuf with non bsd ring but with invalid " 2660 "bsd dispatch flags: %d\n", (int)(args->flags)); 2661 return -1; 2662 } 2663 2664 if (user_ring_id == I915_EXEC_BSD && 2665 i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO] > 1) { 2666 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; 2667 2668 if (bsd_idx == I915_EXEC_BSD_DEFAULT) { 2669 bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file); 2670 } else if (bsd_idx >= I915_EXEC_BSD_RING1 && 2671 bsd_idx <= I915_EXEC_BSD_RING2) { 2672 bsd_idx >>= I915_EXEC_BSD_SHIFT; 2673 bsd_idx--; 2674 } else { 2675 drm_dbg(&i915->drm, 2676 "execbuf with unknown bsd ring: %u\n", 2677 bsd_idx); 2678 return -1; 2679 } 2680 2681 return _VCS(bsd_idx); 2682 } 2683 2684 if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { 2685 drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n", 2686 user_ring_id); 2687 return -1; 2688 } 2689 2690 return user_ring_map[user_ring_id]; 2691 } 2692 2693 static int 2694 eb_select_engine(struct i915_execbuffer *eb) 2695 { 2696 struct intel_context *ce, *child; 2697 struct intel_gt *gt; 2698 unsigned int idx; 2699 int err; 2700 2701 if (i915_gem_context_user_engines(eb->gem_context)) 2702 idx = eb->args->flags & I915_EXEC_RING_MASK; 2703 else 2704 idx = eb_select_legacy_ring(eb); 2705 2706 ce = i915_gem_context_get_engine(eb->gem_context, idx); 2707 if (IS_ERR(ce)) 2708 return PTR_ERR(ce); 2709 2710 if (intel_context_is_parallel(ce)) { 2711 if (eb->buffer_count < ce->parallel.number_children + 1) { 2712 intel_context_put(ce); 2713 return -EINVAL; 2714 } 2715 if (eb->batch_start_offset || eb->args->batch_len) { 2716 intel_context_put(ce); 2717 return -EINVAL; 2718 } 2719 } 2720 eb->num_batches = ce->parallel.number_children + 1; 2721 gt = ce->engine->gt; 2722 2723 for_each_child(ce, child) 2724 intel_context_get(child); 2725 eb->wakeref = intel_gt_pm_get(ce->engine->gt); 2726 /* 2727 * Keep GT0 active on MTL so that i915_vma_parked() doesn't 2728 * free VMAs while execbuf ioctl is validating VMAs. 2729 */ 2730 if (gt->info.id) 2731 eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915)); 2732 2733 if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { 2734 err = intel_context_alloc_state(ce); 2735 if (err) 2736 goto err; 2737 } 2738 for_each_child(ce, child) { 2739 if (!test_bit(CONTEXT_ALLOC_BIT, &child->flags)) { 2740 err = intel_context_alloc_state(child); 2741 if (err) 2742 goto err; 2743 } 2744 } 2745 2746 /* 2747 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2748 * EIO if the GPU is already wedged. 2749 */ 2750 err = intel_gt_terminally_wedged(ce->engine->gt); 2751 if (err) 2752 goto err; 2753 2754 if (!i915_vm_tryget(ce->vm)) { 2755 err = -ENOENT; 2756 goto err; 2757 } 2758 2759 eb->context = ce; 2760 eb->gt = ce->engine->gt; 2761 2762 /* 2763 * Make sure engine pool stays alive even if we call intel_context_put 2764 * during ww handling. The pool is destroyed when last pm reference 2765 * is dropped, which breaks our -EDEADLK handling. 2766 */ 2767 return err; 2768 2769 err: 2770 if (gt->info.id) 2771 intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0); 2772 2773 intel_gt_pm_put(ce->engine->gt, eb->wakeref); 2774 for_each_child(ce, child) 2775 intel_context_put(child); 2776 intel_context_put(ce); 2777 return err; 2778 } 2779 2780 static void 2781 eb_put_engine(struct i915_execbuffer *eb) 2782 { 2783 struct intel_context *child; 2784 2785 i915_vm_put(eb->context->vm); 2786 /* 2787 * This works in conjunction with eb_select_engine() to prevent 2788 * i915_vma_parked() from interfering while execbuf validates vmas. 2789 */ 2790 if (eb->gt->info.id) 2791 intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0); 2792 intel_gt_pm_put(eb->context->engine->gt, eb->wakeref); 2793 for_each_child(eb->context, child) 2794 intel_context_put(child); 2795 intel_context_put(eb->context); 2796 } 2797 2798 static void 2799 __free_fence_array(struct eb_fence *fences, unsigned int n) 2800 { 2801 while (n--) { 2802 drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); 2803 dma_fence_put(fences[n].dma_fence); 2804 dma_fence_chain_free(fences[n].chain_fence); 2805 } 2806 kvfree(fences); 2807 } 2808 2809 static int 2810 add_timeline_fence_array(struct i915_execbuffer *eb, 2811 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences) 2812 { 2813 struct drm_i915_gem_exec_fence __user *user_fences; 2814 u64 __user *user_values; 2815 struct eb_fence *f; 2816 u64 nfences; 2817 int err = 0; 2818 2819 nfences = timeline_fences->fence_count; 2820 if (!nfences) 2821 return 0; 2822 2823 /* Check multiplication overflow for access_ok() and kvmalloc_array() */ 2824 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); 2825 if (nfences > min_t(unsigned long, 2826 ULONG_MAX / sizeof(*user_fences), 2827 SIZE_MAX / sizeof(*f)) - eb->num_fences) 2828 return -EINVAL; 2829 2830 user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); 2831 if (!access_ok(user_fences, nfences * sizeof(*user_fences))) 2832 return -EFAULT; 2833 2834 user_values = u64_to_user_ptr(timeline_fences->values_ptr); 2835 if (!access_ok(user_values, nfences * sizeof(*user_values))) 2836 return -EFAULT; 2837 2838 f = krealloc(eb->fences, 2839 (eb->num_fences + nfences) * sizeof(*f), 2840 __GFP_NOWARN | GFP_KERNEL); 2841 if (!f) 2842 return -ENOMEM; 2843 2844 eb->fences = f; 2845 f += eb->num_fences; 2846 2847 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & 2848 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); 2849 2850 while (nfences--) { 2851 struct drm_i915_gem_exec_fence user_fence; 2852 struct drm_syncobj *syncobj; 2853 struct dma_fence *fence = NULL; 2854 u64 point; 2855 2856 if (__copy_from_user(&user_fence, 2857 user_fences++, 2858 sizeof(user_fence))) 2859 return -EFAULT; 2860 2861 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) 2862 return -EINVAL; 2863 2864 if (__get_user(point, user_values++)) 2865 return -EFAULT; 2866 2867 syncobj = drm_syncobj_find(eb->file, user_fence.handle); 2868 if (!syncobj) { 2869 drm_dbg(&eb->i915->drm, 2870 "Invalid syncobj handle provided\n"); 2871 return -ENOENT; 2872 } 2873 2874 fence = drm_syncobj_fence_get(syncobj); 2875 2876 if (!fence && user_fence.flags && 2877 !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2878 drm_dbg(&eb->i915->drm, 2879 "Syncobj handle has no fence\n"); 2880 drm_syncobj_put(syncobj); 2881 return -EINVAL; 2882 } 2883 2884 if (fence) 2885 err = dma_fence_chain_find_seqno(&fence, point); 2886 2887 if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2888 drm_dbg(&eb->i915->drm, 2889 "Syncobj handle missing requested point %llu\n", 2890 point); 2891 dma_fence_put(fence); 2892 drm_syncobj_put(syncobj); 2893 return err; 2894 } 2895 2896 /* 2897 * A point might have been signaled already and 2898 * garbage collected from the timeline. In this case 2899 * just ignore the point and carry on. 2900 */ 2901 if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2902 drm_syncobj_put(syncobj); 2903 continue; 2904 } 2905 2906 /* 2907 * For timeline syncobjs we need to preallocate chains for 2908 * later signaling. 2909 */ 2910 if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { 2911 /* 2912 * Waiting and signaling the same point (when point != 2913 * 0) would break the timeline. 2914 */ 2915 if (user_fence.flags & I915_EXEC_FENCE_WAIT) { 2916 drm_dbg(&eb->i915->drm, 2917 "Trying to wait & signal the same timeline point.\n"); 2918 dma_fence_put(fence); 2919 drm_syncobj_put(syncobj); 2920 return -EINVAL; 2921 } 2922 2923 f->chain_fence = dma_fence_chain_alloc(); 2924 if (!f->chain_fence) { 2925 drm_syncobj_put(syncobj); 2926 dma_fence_put(fence); 2927 return -ENOMEM; 2928 } 2929 } else { 2930 f->chain_fence = NULL; 2931 } 2932 2933 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); 2934 f->dma_fence = fence; 2935 f->value = point; 2936 f++; 2937 eb->num_fences++; 2938 } 2939 2940 return 0; 2941 } 2942 2943 static int add_fence_array(struct i915_execbuffer *eb) 2944 { 2945 struct drm_i915_gem_execbuffer2 *args = eb->args; 2946 struct drm_i915_gem_exec_fence __user *user; 2947 unsigned long num_fences = args->num_cliprects; 2948 struct eb_fence *f; 2949 2950 if (!(args->flags & I915_EXEC_FENCE_ARRAY)) 2951 return 0; 2952 2953 if (!num_fences) 2954 return 0; 2955 2956 /* Check multiplication overflow for access_ok() and kvmalloc_array() */ 2957 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); 2958 if (num_fences > min_t(unsigned long, 2959 ULONG_MAX / sizeof(*user), 2960 SIZE_MAX / sizeof(*f) - eb->num_fences)) 2961 return -EINVAL; 2962 2963 user = u64_to_user_ptr(args->cliprects_ptr); 2964 if (!access_ok(user, num_fences * sizeof(*user))) 2965 return -EFAULT; 2966 2967 f = krealloc(eb->fences, 2968 (eb->num_fences + num_fences) * sizeof(*f), 2969 __GFP_NOWARN | GFP_KERNEL); 2970 if (!f) 2971 return -ENOMEM; 2972 2973 eb->fences = f; 2974 f += eb->num_fences; 2975 while (num_fences--) { 2976 struct drm_i915_gem_exec_fence user_fence; 2977 struct drm_syncobj *syncobj; 2978 struct dma_fence *fence = NULL; 2979 2980 if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) 2981 return -EFAULT; 2982 2983 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) 2984 return -EINVAL; 2985 2986 syncobj = drm_syncobj_find(eb->file, user_fence.handle); 2987 if (!syncobj) { 2988 drm_dbg(&eb->i915->drm, 2989 "Invalid syncobj handle provided\n"); 2990 return -ENOENT; 2991 } 2992 2993 if (user_fence.flags & I915_EXEC_FENCE_WAIT) { 2994 fence = drm_syncobj_fence_get(syncobj); 2995 if (!fence) { 2996 drm_dbg(&eb->i915->drm, 2997 "Syncobj handle has no fence\n"); 2998 drm_syncobj_put(syncobj); 2999 return -EINVAL; 3000 } 3001 } 3002 3003 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & 3004 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); 3005 3006 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); 3007 f->dma_fence = fence; 3008 f->value = 0; 3009 f->chain_fence = NULL; 3010 f++; 3011 eb->num_fences++; 3012 } 3013 3014 return 0; 3015 } 3016 3017 static void put_fence_array(struct eb_fence *fences, int num_fences) 3018 { 3019 if (fences) 3020 __free_fence_array(fences, num_fences); 3021 } 3022 3023 static int 3024 await_fence_array(struct i915_execbuffer *eb, 3025 struct i915_request *rq) 3026 { 3027 unsigned int n; 3028 int err; 3029 3030 for (n = 0; n < eb->num_fences; n++) { 3031 if (!eb->fences[n].dma_fence) 3032 continue; 3033 3034 err = i915_request_await_dma_fence(rq, eb->fences[n].dma_fence); 3035 if (err < 0) 3036 return err; 3037 } 3038 3039 return 0; 3040 } 3041 3042 static void signal_fence_array(const struct i915_execbuffer *eb, 3043 struct dma_fence * const fence) 3044 { 3045 unsigned int n; 3046 3047 for (n = 0; n < eb->num_fences; n++) { 3048 struct drm_syncobj *syncobj; 3049 unsigned int flags; 3050 3051 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); 3052 if (!(flags & I915_EXEC_FENCE_SIGNAL)) 3053 continue; 3054 3055 if (eb->fences[n].chain_fence) { 3056 drm_syncobj_add_point(syncobj, 3057 eb->fences[n].chain_fence, 3058 fence, 3059 eb->fences[n].value); 3060 /* 3061 * The chain's ownership is transferred to the 3062 * timeline. 3063 */ 3064 eb->fences[n].chain_fence = NULL; 3065 } else { 3066 drm_syncobj_replace_fence(syncobj, fence); 3067 } 3068 } 3069 } 3070 3071 static int 3072 parse_timeline_fences(struct i915_user_extension __user *ext, void *data) 3073 { 3074 struct i915_execbuffer *eb = data; 3075 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; 3076 3077 if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences))) 3078 return -EFAULT; 3079 3080 return add_timeline_fence_array(eb, &timeline_fences); 3081 } 3082 3083 static void retire_requests(struct intel_timeline *tl, struct i915_request *end) 3084 { 3085 struct i915_request *rq, *rn; 3086 3087 list_for_each_entry_safe(rq, rn, &tl->requests, link) 3088 if (rq == end || !i915_request_retire(rq)) 3089 break; 3090 } 3091 3092 static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq, 3093 int err, bool last_parallel) 3094 { 3095 struct intel_timeline * const tl = i915_request_timeline(rq); 3096 struct i915_sched_attr attr = {}; 3097 struct i915_request *prev; 3098 3099 lockdep_assert_held(&tl->mutex); 3100 lockdep_unpin_lock(&tl->mutex, rq->cookie); 3101 3102 trace_i915_request_add(rq); 3103 3104 prev = __i915_request_commit(rq); 3105 3106 /* Check that the context wasn't destroyed before submission */ 3107 if (likely(!intel_context_is_closed(eb->context))) { 3108 attr = eb->gem_context->sched; 3109 } else { 3110 /* Serialise with context_close via the add_to_timeline */ 3111 i915_request_set_error_once(rq, -ENOENT); 3112 __i915_request_skip(rq); 3113 err = -ENOENT; /* override any transient errors */ 3114 } 3115 3116 if (intel_context_is_parallel(eb->context)) { 3117 if (err) { 3118 __i915_request_skip(rq); 3119 set_bit(I915_FENCE_FLAG_SKIP_PARALLEL, 3120 &rq->fence.flags); 3121 } 3122 if (last_parallel) 3123 set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, 3124 &rq->fence.flags); 3125 } 3126 3127 __i915_request_queue(rq, &attr); 3128 3129 /* Try to clean up the client's timeline after submitting the request */ 3130 if (prev) 3131 retire_requests(tl, prev); 3132 3133 mutex_unlock(&tl->mutex); 3134 3135 return err; 3136 } 3137 3138 static int eb_requests_add(struct i915_execbuffer *eb, int err) 3139 { 3140 int i; 3141 3142 /* 3143 * We iterate in reverse order of creation to release timeline mutexes in 3144 * same order. 3145 */ 3146 for_each_batch_add_order(eb, i) { 3147 struct i915_request *rq = eb->requests[i]; 3148 3149 if (!rq) 3150 continue; 3151 err |= eb_request_add(eb, rq, err, i == 0); 3152 } 3153 3154 return err; 3155 } 3156 3157 static const i915_user_extension_fn execbuf_extensions[] = { 3158 [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, 3159 }; 3160 3161 static int 3162 parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args, 3163 struct i915_execbuffer *eb) 3164 { 3165 if (!(args->flags & I915_EXEC_USE_EXTENSIONS)) 3166 return 0; 3167 3168 /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot 3169 * have another flag also using it at the same time. 3170 */ 3171 if (eb->args->flags & I915_EXEC_FENCE_ARRAY) 3172 return -EINVAL; 3173 3174 if (args->num_cliprects != 0) 3175 return -EINVAL; 3176 3177 return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr), 3178 execbuf_extensions, 3179 ARRAY_SIZE(execbuf_extensions), 3180 eb); 3181 } 3182 3183 static void eb_requests_get(struct i915_execbuffer *eb) 3184 { 3185 unsigned int i; 3186 3187 for_each_batch_create_order(eb, i) { 3188 if (!eb->requests[i]) 3189 break; 3190 3191 i915_request_get(eb->requests[i]); 3192 } 3193 } 3194 3195 static void eb_requests_put(struct i915_execbuffer *eb) 3196 { 3197 unsigned int i; 3198 3199 for_each_batch_create_order(eb, i) { 3200 if (!eb->requests[i]) 3201 break; 3202 3203 i915_request_put(eb->requests[i]); 3204 } 3205 } 3206 3207 static struct sync_file * 3208 eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) 3209 { 3210 struct sync_file *out_fence = NULL; 3211 struct dma_fence_array *fence_array; 3212 struct dma_fence **fences; 3213 unsigned int i; 3214 3215 GEM_BUG_ON(!intel_context_is_parent(eb->context)); 3216 3217 fences = kmalloc_array(eb->num_batches, sizeof(*fences), GFP_KERNEL); 3218 if (!fences) 3219 return ERR_PTR(-ENOMEM); 3220 3221 for_each_batch_create_order(eb, i) { 3222 fences[i] = &eb->requests[i]->fence; 3223 __set_bit(I915_FENCE_FLAG_COMPOSITE, 3224 &eb->requests[i]->fence.flags); 3225 } 3226 3227 fence_array = dma_fence_array_create(eb->num_batches, 3228 fences, 3229 eb->context->parallel.fence_context, 3230 eb->context->parallel.seqno++, 3231 false); 3232 if (!fence_array) { 3233 kfree(fences); 3234 return ERR_PTR(-ENOMEM); 3235 } 3236 3237 /* Move ownership to the dma_fence_array created above */ 3238 for_each_batch_create_order(eb, i) 3239 dma_fence_get(fences[i]); 3240 3241 if (out_fence_fd != -1) { 3242 out_fence = sync_file_create(&fence_array->base); 3243 /* sync_file now owns fence_arry, drop creation ref */ 3244 dma_fence_put(&fence_array->base); 3245 if (!out_fence) 3246 return ERR_PTR(-ENOMEM); 3247 } 3248 3249 eb->composite_fence = &fence_array->base; 3250 3251 return out_fence; 3252 } 3253 3254 static struct sync_file * 3255 eb_fences_add(struct i915_execbuffer *eb, struct i915_request *rq, 3256 struct dma_fence *in_fence, int out_fence_fd) 3257 { 3258 struct sync_file *out_fence = NULL; 3259 int err; 3260 3261 if (unlikely(eb->gem_context->syncobj)) { 3262 struct dma_fence *fence; 3263 3264 fence = drm_syncobj_fence_get(eb->gem_context->syncobj); 3265 err = i915_request_await_dma_fence(rq, fence); 3266 dma_fence_put(fence); 3267 if (err) 3268 return ERR_PTR(err); 3269 } 3270 3271 if (in_fence) { 3272 if (eb->args->flags & I915_EXEC_FENCE_SUBMIT) 3273 err = i915_request_await_execution(rq, in_fence); 3274 else 3275 err = i915_request_await_dma_fence(rq, in_fence); 3276 if (err < 0) 3277 return ERR_PTR(err); 3278 } 3279 3280 if (eb->fences) { 3281 err = await_fence_array(eb, rq); 3282 if (err) 3283 return ERR_PTR(err); 3284 } 3285 3286 if (intel_context_is_parallel(eb->context)) { 3287 out_fence = eb_composite_fence_create(eb, out_fence_fd); 3288 if (IS_ERR(out_fence)) 3289 return ERR_PTR(-ENOMEM); 3290 } else if (out_fence_fd != -1) { 3291 out_fence = sync_file_create(&rq->fence); 3292 if (!out_fence) 3293 return ERR_PTR(-ENOMEM); 3294 } 3295 3296 return out_fence; 3297 } 3298 3299 static struct intel_context * 3300 eb_find_context(struct i915_execbuffer *eb, unsigned int context_number) 3301 { 3302 struct intel_context *child; 3303 3304 if (likely(context_number == 0)) 3305 return eb->context; 3306 3307 for_each_child(eb->context, child) 3308 if (!--context_number) 3309 return child; 3310 3311 GEM_BUG_ON("Context not found"); 3312 3313 return NULL; 3314 } 3315 3316 static struct sync_file * 3317 eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, 3318 int out_fence_fd) 3319 { 3320 struct sync_file *out_fence = NULL; 3321 unsigned int i; 3322 3323 for_each_batch_create_order(eb, i) { 3324 /* Allocate a request for this batch buffer nice and early. */ 3325 eb->requests[i] = i915_request_create(eb_find_context(eb, i)); 3326 if (IS_ERR(eb->requests[i])) { 3327 out_fence = ERR_CAST(eb->requests[i]); 3328 eb->requests[i] = NULL; 3329 return out_fence; 3330 } 3331 3332 /* 3333 * Only the first request added (committed to backend) has to 3334 * take the in fences into account as all subsequent requests 3335 * will have fences inserted inbetween them. 3336 */ 3337 if (i + 1 == eb->num_batches) { 3338 out_fence = eb_fences_add(eb, eb->requests[i], 3339 in_fence, out_fence_fd); 3340 if (IS_ERR(out_fence)) 3341 return out_fence; 3342 } 3343 3344 /* 3345 * Not really on stack, but we don't want to call 3346 * kfree on the batch_snapshot when we put it, so use the 3347 * _onstack interface. 3348 */ 3349 if (eb->batches[i]->vma) 3350 eb->requests[i]->batch_res = 3351 i915_vma_resource_get(eb->batches[i]->vma->resource); 3352 if (eb->batch_pool) { 3353 GEM_BUG_ON(intel_context_is_parallel(eb->context)); 3354 intel_gt_buffer_pool_mark_active(eb->batch_pool, 3355 eb->requests[i]); 3356 } 3357 } 3358 3359 return out_fence; 3360 } 3361 3362 static int 3363 i915_gem_do_execbuffer(struct drm_device *dev, 3364 struct drm_file *file, 3365 struct drm_i915_gem_execbuffer2 *args, 3366 struct drm_i915_gem_exec_object2 *exec) 3367 { 3368 struct drm_i915_private *i915 = to_i915(dev); 3369 struct i915_execbuffer eb; 3370 struct dma_fence *in_fence = NULL; 3371 struct sync_file *out_fence = NULL; 3372 int out_fence_fd = -1; 3373 int err; 3374 3375 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); 3376 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & 3377 ~__EXEC_OBJECT_UNKNOWN_FLAGS); 3378 3379 eb.i915 = i915; 3380 eb.file = file; 3381 eb.args = args; 3382 if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) 3383 args->flags |= __EXEC_HAS_RELOC; 3384 3385 eb.exec = exec; 3386 eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); 3387 eb.vma[0].vma = NULL; 3388 eb.batch_pool = NULL; 3389 3390 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 3391 reloc_cache_init(&eb.reloc_cache, eb.i915); 3392 3393 eb.buffer_count = args->buffer_count; 3394 eb.batch_start_offset = args->batch_start_offset; 3395 eb.trampoline = NULL; 3396 3397 eb.fences = NULL; 3398 eb.num_fences = 0; 3399 3400 eb_capture_list_clear(&eb); 3401 3402 memset(eb.requests, 0, sizeof(struct i915_request *) * 3403 ARRAY_SIZE(eb.requests)); 3404 eb.composite_fence = NULL; 3405 3406 eb.batch_flags = 0; 3407 if (args->flags & I915_EXEC_SECURE) { 3408 if (GRAPHICS_VER(i915) >= 11) 3409 return -ENODEV; 3410 3411 /* Return -EPERM to trigger fallback code on old binaries. */ 3412 if (!HAS_SECURE_BATCHES(i915)) 3413 return -EPERM; 3414 3415 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) 3416 return -EPERM; 3417 3418 eb.batch_flags |= I915_DISPATCH_SECURE; 3419 } 3420 if (args->flags & I915_EXEC_IS_PINNED) 3421 eb.batch_flags |= I915_DISPATCH_PINNED; 3422 3423 err = parse_execbuf2_extensions(args, &eb); 3424 if (err) 3425 goto err_ext; 3426 3427 err = add_fence_array(&eb); 3428 if (err) 3429 goto err_ext; 3430 3431 #define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT) 3432 if (args->flags & IN_FENCES) { 3433 if ((args->flags & IN_FENCES) == IN_FENCES) 3434 return -EINVAL; 3435 3436 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); 3437 if (!in_fence) { 3438 err = -EINVAL; 3439 goto err_ext; 3440 } 3441 } 3442 #undef IN_FENCES 3443 3444 if (args->flags & I915_EXEC_FENCE_OUT) { 3445 out_fence_fd = get_unused_fd_flags(O_CLOEXEC); 3446 if (out_fence_fd < 0) { 3447 err = out_fence_fd; 3448 goto err_in_fence; 3449 } 3450 } 3451 3452 err = eb_create(&eb); 3453 if (err) 3454 goto err_out_fence; 3455 3456 GEM_BUG_ON(!eb.lut_size); 3457 3458 err = eb_select_context(&eb); 3459 if (unlikely(err)) 3460 goto err_destroy; 3461 3462 err = eb_select_engine(&eb); 3463 if (unlikely(err)) 3464 goto err_context; 3465 3466 err = eb_lookup_vmas(&eb); 3467 if (err) { 3468 eb_release_vmas(&eb, true); 3469 goto err_engine; 3470 } 3471 3472 i915_gem_ww_ctx_init(&eb.ww, true); 3473 3474 err = eb_relocate_parse(&eb); 3475 if (err) { 3476 /* 3477 * If the user expects the execobject.offset and 3478 * reloc.presumed_offset to be an exact match, 3479 * as for using NO_RELOC, then we cannot update 3480 * the execobject.offset until we have completed 3481 * relocation. 3482 */ 3483 args->flags &= ~__EXEC_HAS_RELOC; 3484 goto err_vma; 3485 } 3486 3487 ww_acquire_done(&eb.ww.ctx); 3488 err = eb_capture_stage(&eb); 3489 if (err) 3490 goto err_vma; 3491 3492 out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); 3493 if (IS_ERR(out_fence)) { 3494 err = PTR_ERR(out_fence); 3495 out_fence = NULL; 3496 if (eb.requests[0]) 3497 goto err_request; 3498 else 3499 goto err_vma; 3500 } 3501 3502 err = eb_submit(&eb); 3503 3504 err_request: 3505 eb_requests_get(&eb); 3506 err = eb_requests_add(&eb, err); 3507 3508 if (eb.fences) 3509 signal_fence_array(&eb, eb.composite_fence ? 3510 eb.composite_fence : 3511 &eb.requests[0]->fence); 3512 3513 if (unlikely(eb.gem_context->syncobj)) { 3514 drm_syncobj_replace_fence(eb.gem_context->syncobj, 3515 eb.composite_fence ? 3516 eb.composite_fence : 3517 &eb.requests[0]->fence); 3518 } 3519 3520 if (out_fence) { 3521 if (err == 0) { 3522 fd_install(out_fence_fd, out_fence->file); 3523 args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ 3524 args->rsvd2 |= (u64)out_fence_fd << 32; 3525 out_fence_fd = -1; 3526 } else { 3527 fput(out_fence->file); 3528 } 3529 } 3530 3531 if (!out_fence && eb.composite_fence) 3532 dma_fence_put(eb.composite_fence); 3533 3534 eb_requests_put(&eb); 3535 3536 err_vma: 3537 eb_release_vmas(&eb, true); 3538 WARN_ON(err == -EDEADLK); 3539 i915_gem_ww_ctx_fini(&eb.ww); 3540 3541 if (eb.batch_pool) 3542 intel_gt_buffer_pool_put(eb.batch_pool); 3543 err_engine: 3544 eb_put_engine(&eb); 3545 err_context: 3546 i915_gem_context_put(eb.gem_context); 3547 err_destroy: 3548 eb_destroy(&eb); 3549 err_out_fence: 3550 if (out_fence_fd != -1) 3551 put_unused_fd(out_fence_fd); 3552 err_in_fence: 3553 dma_fence_put(in_fence); 3554 err_ext: 3555 put_fence_array(eb.fences, eb.num_fences); 3556 return err; 3557 } 3558 3559 static size_t eb_element_size(void) 3560 { 3561 return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma); 3562 } 3563 3564 static bool check_buffer_count(size_t count) 3565 { 3566 const size_t sz = eb_element_size(); 3567 3568 /* 3569 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup 3570 * array size (see eb_create()). Otherwise, we can accept an array as 3571 * large as can be addressed (though use large arrays at your peril)! 3572 */ 3573 3574 return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); 3575 } 3576 3577 int 3578 i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, 3579 struct drm_file *file) 3580 { 3581 struct drm_i915_private *i915 = to_i915(dev); 3582 struct drm_i915_gem_execbuffer2 *args = data; 3583 struct drm_i915_gem_exec_object2 *exec2_list; 3584 const size_t count = args->buffer_count; 3585 int err; 3586 3587 if (!check_buffer_count(count)) { 3588 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count); 3589 return -EINVAL; 3590 } 3591 3592 err = i915_gem_check_execbuffer(i915, args); 3593 if (err) 3594 return err; 3595 3596 /* Allocate extra slots for use by the command parser */ 3597 exec2_list = kvmalloc_array(count + 2, eb_element_size(), 3598 __GFP_NOWARN | GFP_KERNEL); 3599 if (exec2_list == NULL) { 3600 drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n", 3601 count); 3602 return -ENOMEM; 3603 } 3604 if (copy_from_user(exec2_list, 3605 u64_to_user_ptr(args->buffers_ptr), 3606 sizeof(*exec2_list) * count)) { 3607 drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count); 3608 kvfree(exec2_list); 3609 return -EFAULT; 3610 } 3611 3612 err = i915_gem_do_execbuffer(dev, file, args, exec2_list); 3613 3614 /* 3615 * Now that we have begun execution of the batchbuffer, we ignore 3616 * any new error after this point. Also given that we have already 3617 * updated the associated relocations, we try to write out the current 3618 * object locations irrespective of any error. 3619 */ 3620 if (args->flags & __EXEC_HAS_RELOC) { 3621 struct drm_i915_gem_exec_object2 __user *user_exec_list = 3622 u64_to_user_ptr(args->buffers_ptr); 3623 unsigned int i; 3624 3625 /* Copy the new buffer offsets back to the user's exec list. */ 3626 /* 3627 * Note: count * sizeof(*user_exec_list) does not overflow, 3628 * because we checked 'count' in check_buffer_count(). 3629 * 3630 * And this range already got effectively checked earlier 3631 * when we did the "copy_from_user()" above. 3632 */ 3633 if (!user_write_access_begin(user_exec_list, 3634 count * sizeof(*user_exec_list))) 3635 goto end; 3636 3637 for (i = 0; i < args->buffer_count; i++) { 3638 if (!(exec2_list[i].offset & UPDATE)) 3639 continue; 3640 3641 exec2_list[i].offset = 3642 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); 3643 unsafe_put_user(exec2_list[i].offset, 3644 &user_exec_list[i].offset, 3645 end_user); 3646 } 3647 end_user: 3648 user_write_access_end(); 3649 end:; 3650 } 3651 3652 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; 3653 kvfree(exec2_list); 3654 return err; 3655 } 3656