1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2008,2010 Intel Corporation 5 */ 6 7 #include <linux/dma-resv.h> 8 #include <linux/highmem.h> 9 #include <linux/sync_file.h> 10 #include <linux/uaccess.h> 11 12 #include <drm/drm_auth.h> 13 #include <drm/drm_syncobj.h> 14 15 #include "display/intel_frontbuffer.h" 16 17 #include "gem/i915_gem_ioctls.h" 18 #include "gt/intel_context.h" 19 #include "gt/intel_gpu_commands.h" 20 #include "gt/intel_gt.h" 21 #include "gt/intel_gt_buffer_pool.h" 22 #include "gt/intel_gt_pm.h" 23 #include "gt/intel_ring.h" 24 25 #include "pxp/intel_pxp.h" 26 27 #include "i915_cmd_parser.h" 28 #include "i915_drv.h" 29 #include "i915_file_private.h" 30 #include "i915_gem_clflush.h" 31 #include "i915_gem_context.h" 32 #include "i915_gem_evict.h" 33 #include "i915_gem_ioctls.h" 34 #include "i915_reg.h" 35 #include "i915_trace.h" 36 #include "i915_user_extensions.h" 37 38 struct eb_vma { 39 struct i915_vma *vma; 40 unsigned int flags; 41 42 /** This vma's place in the execbuf reservation list */ 43 struct drm_i915_gem_exec_object2 *exec; 44 struct list_head bind_link; 45 struct list_head reloc_link; 46 47 struct hlist_node node; 48 u32 handle; 49 }; 50 51 enum { 52 FORCE_CPU_RELOC = 1, 53 FORCE_GTT_RELOC, 54 FORCE_GPU_RELOC, 55 #define DBG_FORCE_RELOC 0 /* choose one of the above! */ 56 }; 57 58 /* __EXEC_OBJECT_ flags > BIT(29) defined in i915_vma.h */ 59 #define __EXEC_OBJECT_HAS_PIN BIT(29) 60 #define __EXEC_OBJECT_HAS_FENCE BIT(28) 61 #define __EXEC_OBJECT_USERPTR_INIT BIT(27) 62 #define __EXEC_OBJECT_NEEDS_MAP BIT(26) 63 #define __EXEC_OBJECT_NEEDS_BIAS BIT(25) 64 #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 25) /* all of the above + */ 65 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) 66 67 #define __EXEC_HAS_RELOC BIT(31) 68 #define __EXEC_ENGINE_PINNED BIT(30) 69 #define __EXEC_USERPTR_USED BIT(29) 70 #define __EXEC_INTERNAL_FLAGS (~0u << 29) 71 #define UPDATE PIN_OFFSET_FIXED 72 73 #define BATCH_OFFSET_BIAS (256*1024) 74 75 #define __I915_EXEC_ILLEGAL_FLAGS \ 76 (__I915_EXEC_UNKNOWN_FLAGS | \ 77 I915_EXEC_CONSTANTS_MASK | \ 78 I915_EXEC_RESOURCE_STREAMER) 79 80 /* Catch emission of unexpected errors for CI! */ 81 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 82 #undef EINVAL 83 #define EINVAL ({ \ 84 DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ 85 22; \ 86 }) 87 #endif 88 89 /** 90 * DOC: User command execution 91 * 92 * Userspace submits commands to be executed on the GPU as an instruction 93 * stream within a GEM object we call a batchbuffer. This instructions may 94 * refer to other GEM objects containing auxiliary state such as kernels, 95 * samplers, render targets and even secondary batchbuffers. Userspace does 96 * not know where in the GPU memory these objects reside and so before the 97 * batchbuffer is passed to the GPU for execution, those addresses in the 98 * batchbuffer and auxiliary objects are updated. This is known as relocation, 99 * or patching. To try and avoid having to relocate each object on the next 100 * execution, userspace is told the location of those objects in this pass, 101 * but this remains just a hint as the kernel may choose a new location for 102 * any object in the future. 103 * 104 * At the level of talking to the hardware, submitting a batchbuffer for the 105 * GPU to execute is to add content to a buffer from which the HW 106 * command streamer is reading. 107 * 108 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. 109 * Execlists, this command is not placed on the same buffer as the 110 * remaining items. 111 * 112 * 2. Add a command to invalidate caches to the buffer. 113 * 114 * 3. Add a batchbuffer start command to the buffer; the start command is 115 * essentially a token together with the GPU address of the batchbuffer 116 * to be executed. 117 * 118 * 4. Add a pipeline flush to the buffer. 119 * 120 * 5. Add a memory write command to the buffer to record when the GPU 121 * is done executing the batchbuffer. The memory write writes the 122 * global sequence number of the request, ``i915_request::global_seqno``; 123 * the i915 driver uses the current value in the register to determine 124 * if the GPU has completed the batchbuffer. 125 * 126 * 6. Add a user interrupt command to the buffer. This command instructs 127 * the GPU to issue an interrupt when the command, pipeline flush and 128 * memory write are completed. 129 * 130 * 7. Inform the hardware of the additional commands added to the buffer 131 * (by updating the tail pointer). 132 * 133 * Processing an execbuf ioctl is conceptually split up into a few phases. 134 * 135 * 1. Validation - Ensure all the pointers, handles and flags are valid. 136 * 2. Reservation - Assign GPU address space for every object 137 * 3. Relocation - Update any addresses to point to the final locations 138 * 4. Serialisation - Order the request with respect to its dependencies 139 * 5. Construction - Construct a request to execute the batchbuffer 140 * 6. Submission (at some point in the future execution) 141 * 142 * Reserving resources for the execbuf is the most complicated phase. We 143 * neither want to have to migrate the object in the address space, nor do 144 * we want to have to update any relocations pointing to this object. Ideally, 145 * we want to leave the object where it is and for all the existing relocations 146 * to match. If the object is given a new address, or if userspace thinks the 147 * object is elsewhere, we have to parse all the relocation entries and update 148 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that 149 * all the target addresses in all of its objects match the value in the 150 * relocation entries and that they all match the presumed offsets given by the 151 * list of execbuffer objects. Using this knowledge, we know that if we haven't 152 * moved any buffers, all the relocation entries are valid and we can skip 153 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU 154 * hang.) The requirement for using I915_EXEC_NO_RELOC are: 155 * 156 * The addresses written in the objects must match the corresponding 157 * reloc.presumed_offset which in turn must match the corresponding 158 * execobject.offset. 159 * 160 * Any render targets written to in the batch must be flagged with 161 * EXEC_OBJECT_WRITE. 162 * 163 * To avoid stalling, execobject.offset should match the current 164 * address of that object within the active context. 165 * 166 * The reservation is done is multiple phases. First we try and keep any 167 * object already bound in its current location - so as long as meets the 168 * constraints imposed by the new execbuffer. Any object left unbound after the 169 * first pass is then fitted into any available idle space. If an object does 170 * not fit, all objects are removed from the reservation and the process rerun 171 * after sorting the objects into a priority order (more difficult to fit 172 * objects are tried first). Failing that, the entire VM is cleared and we try 173 * to fit the execbuf once last time before concluding that it simply will not 174 * fit. 175 * 176 * A small complication to all of this is that we allow userspace not only to 177 * specify an alignment and a size for the object in the address space, but 178 * we also allow userspace to specify the exact offset. This objects are 179 * simpler to place (the location is known a priori) all we have to do is make 180 * sure the space is available. 181 * 182 * Once all the objects are in place, patching up the buried pointers to point 183 * to the final locations is a fairly simple job of walking over the relocation 184 * entry arrays, looking up the right address and rewriting the value into 185 * the object. Simple! ... The relocation entries are stored in user memory 186 * and so to access them we have to copy them into a local buffer. That copy 187 * has to avoid taking any pagefaults as they may lead back to a GEM object 188 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split 189 * the relocation into multiple passes. First we try to do everything within an 190 * atomic context (avoid the pagefaults) which requires that we never wait. If 191 * we detect that we may wait, or if we need to fault, then we have to fallback 192 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm 193 * bells yet?) Dropping the mutex means that we lose all the state we have 194 * built up so far for the execbuf and we must reset any global data. However, 195 * we do leave the objects pinned in their final locations - which is a 196 * potential issue for concurrent execbufs. Once we have left the mutex, we can 197 * allocate and copy all the relocation entries into a large array at our 198 * leisure, reacquire the mutex, reclaim all the objects and other state and 199 * then proceed to update any incorrect addresses with the objects. 200 * 201 * As we process the relocation entries, we maintain a record of whether the 202 * object is being written to. Using NORELOC, we expect userspace to provide 203 * this information instead. We also check whether we can skip the relocation 204 * by comparing the expected value inside the relocation entry with the target's 205 * final address. If they differ, we have to map the current object and rewrite 206 * the 4 or 8 byte pointer within. 207 * 208 * Serialising an execbuf is quite simple according to the rules of the GEM 209 * ABI. Execution within each context is ordered by the order of submission. 210 * Writes to any GEM object are in order of submission and are exclusive. Reads 211 * from a GEM object are unordered with respect to other reads, but ordered by 212 * writes. A write submitted after a read cannot occur before the read, and 213 * similarly any read submitted after a write cannot occur before the write. 214 * Writes are ordered between engines such that only one write occurs at any 215 * time (completing any reads beforehand) - using semaphores where available 216 * and CPU serialisation otherwise. Other GEM access obey the same rules, any 217 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU 218 * reads before starting, and any read (either using set-domain or pread) must 219 * flush all GPU writes before starting. (Note we only employ a barrier before, 220 * we currently rely on userspace not concurrently starting a new execution 221 * whilst reading or writing to an object. This may be an advantage or not 222 * depending on how much you trust userspace not to shoot themselves in the 223 * foot.) Serialisation may just result in the request being inserted into 224 * a DAG awaiting its turn, but most simple is to wait on the CPU until 225 * all dependencies are resolved. 226 * 227 * After all of that, is just a matter of closing the request and handing it to 228 * the hardware (well, leaving it in a queue to be executed). However, we also 229 * offer the ability for batchbuffers to be run with elevated privileges so 230 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) 231 * Before any batch is given extra privileges we first must check that it 232 * contains no nefarious instructions, we check that each instruction is from 233 * our whitelist and all registers are also from an allowed list. We first 234 * copy the user's batchbuffer to a shadow (so that the user doesn't have 235 * access to it, either by the CPU or GPU as we scan it) and then parse each 236 * instruction. If everything is ok, we set a flag telling the hardware to run 237 * the batchbuffer in trusted mode, otherwise the ioctl is rejected. 238 */ 239 240 struct eb_fence { 241 struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ 242 struct dma_fence *dma_fence; 243 u64 value; 244 struct dma_fence_chain *chain_fence; 245 }; 246 247 struct i915_execbuffer { 248 struct drm_i915_private *i915; /** i915 backpointer */ 249 struct drm_file *file; /** per-file lookup tables and limits */ 250 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ 251 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ 252 struct eb_vma *vma; 253 254 struct intel_gt *gt; /* gt for the execbuf */ 255 struct intel_context *context; /* logical state for the request */ 256 struct i915_gem_context *gem_context; /** caller's context */ 257 intel_wakeref_t wakeref; 258 intel_wakeref_t wakeref_gt0; 259 260 /** our requests to build */ 261 struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; 262 /** identity of the batch obj/vma */ 263 struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1]; 264 struct i915_vma *trampoline; /** trampoline used for chaining */ 265 266 /** used for excl fence in dma_resv objects when > 1 BB submitted */ 267 struct dma_fence *composite_fence; 268 269 /** actual size of execobj[] as we may extend it for the cmdparser */ 270 unsigned int buffer_count; 271 272 /* number of batches in execbuf IOCTL */ 273 unsigned int num_batches; 274 275 /** list of vma not yet bound during reservation phase */ 276 struct list_head unbound; 277 278 /** list of vma that have execobj.relocation_count */ 279 struct list_head relocs; 280 281 struct i915_gem_ww_ctx ww; 282 283 /** 284 * Track the most recently used object for relocations, as we 285 * frequently have to perform multiple relocations within the same 286 * obj/page 287 */ 288 struct reloc_cache { 289 struct drm_mm_node node; /** temporary GTT binding */ 290 unsigned long vaddr; /** Current kmap address */ 291 unsigned long page; /** Currently mapped page index */ 292 unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */ 293 bool use_64bit_reloc : 1; 294 bool has_llc : 1; 295 bool has_fence : 1; 296 bool needs_unfenced : 1; 297 } reloc_cache; 298 299 u64 invalid_flags; /** Set of execobj.flags that are invalid */ 300 301 /** Length of batch within object */ 302 u64 batch_len[MAX_ENGINE_INSTANCE + 1]; 303 u32 batch_start_offset; /** Location within object of batch */ 304 u32 batch_flags; /** Flags composed for emit_bb_start() */ 305 struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ 306 307 /** 308 * Indicate either the size of the hastable used to resolve 309 * relocation handles, or if negative that we are using a direct 310 * index into the execobj[]. 311 */ 312 int lut_size; 313 struct hlist_head *buckets; /** ht for relocation handles */ 314 315 struct eb_fence *fences; 316 unsigned long num_fences; 317 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) 318 struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1]; 319 #endif 320 }; 321 322 static int eb_parse(struct i915_execbuffer *eb); 323 static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); 324 static void eb_unpin_engine(struct i915_execbuffer *eb); 325 static void eb_capture_release(struct i915_execbuffer *eb); 326 327 static bool eb_use_cmdparser(const struct i915_execbuffer *eb) 328 { 329 return intel_engine_requires_cmd_parser(eb->context->engine) || 330 (intel_engine_using_cmd_parser(eb->context->engine) && 331 eb->args->batch_len); 332 } 333 334 static int eb_create(struct i915_execbuffer *eb) 335 { 336 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { 337 unsigned int size = 1 + ilog2(eb->buffer_count); 338 339 /* 340 * Without a 1:1 association between relocation handles and 341 * the execobject[] index, we instead create a hashtable. 342 * We size it dynamically based on available memory, starting 343 * first with 1:1 assocative hash and scaling back until 344 * the allocation succeeds. 345 * 346 * Later on we use a positive lut_size to indicate we are 347 * using this hashtable, and a negative value to indicate a 348 * direct lookup. 349 */ 350 do { 351 gfp_t flags; 352 353 /* While we can still reduce the allocation size, don't 354 * raise a warning and allow the allocation to fail. 355 * On the last pass though, we want to try as hard 356 * as possible to perform the allocation and warn 357 * if it fails. 358 */ 359 flags = GFP_KERNEL; 360 if (size > 1) 361 flags |= __GFP_NORETRY | __GFP_NOWARN; 362 363 eb->buckets = kzalloc(sizeof(struct hlist_head) << size, 364 flags); 365 if (eb->buckets) 366 break; 367 } while (--size); 368 369 if (unlikely(!size)) 370 return -ENOMEM; 371 372 eb->lut_size = size; 373 } else { 374 eb->lut_size = -eb->buffer_count; 375 } 376 377 return 0; 378 } 379 380 static bool 381 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, 382 const struct i915_vma *vma, 383 unsigned int flags) 384 { 385 const u64 start = i915_vma_offset(vma); 386 const u64 size = i915_vma_size(vma); 387 388 if (size < entry->pad_to_size) 389 return true; 390 391 if (entry->alignment && !IS_ALIGNED(start, entry->alignment)) 392 return true; 393 394 if (flags & EXEC_OBJECT_PINNED && 395 start != entry->offset) 396 return true; 397 398 if (flags & __EXEC_OBJECT_NEEDS_BIAS && 399 start < BATCH_OFFSET_BIAS) 400 return true; 401 402 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && 403 (start + size + 4095) >> 32) 404 return true; 405 406 if (flags & __EXEC_OBJECT_NEEDS_MAP && 407 !i915_vma_is_map_and_fenceable(vma)) 408 return true; 409 410 return false; 411 } 412 413 static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, 414 unsigned int exec_flags) 415 { 416 u64 pin_flags = 0; 417 418 if (exec_flags & EXEC_OBJECT_NEEDS_GTT) 419 pin_flags |= PIN_GLOBAL; 420 421 /* 422 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, 423 * limit address to the first 4GBs for unflagged objects. 424 */ 425 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 426 pin_flags |= PIN_ZONE_4G; 427 428 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) 429 pin_flags |= PIN_MAPPABLE; 430 431 if (exec_flags & EXEC_OBJECT_PINNED) 432 pin_flags |= entry->offset | PIN_OFFSET_FIXED; 433 else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) 434 pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 435 436 return pin_flags; 437 } 438 439 static int 440 eb_pin_vma(struct i915_execbuffer *eb, 441 const struct drm_i915_gem_exec_object2 *entry, 442 struct eb_vma *ev) 443 { 444 struct i915_vma *vma = ev->vma; 445 u64 pin_flags; 446 int err; 447 448 if (vma->node.size) 449 pin_flags = __i915_vma_offset(vma); 450 else 451 pin_flags = entry->offset & PIN_OFFSET_MASK; 452 453 pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE; 454 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)) 455 pin_flags |= PIN_GLOBAL; 456 457 /* Attempt to reuse the current location if available */ 458 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags); 459 if (err == -EDEADLK) 460 return err; 461 462 if (unlikely(err)) { 463 if (entry->flags & EXEC_OBJECT_PINNED) 464 return err; 465 466 /* Failing that pick any _free_ space if suitable */ 467 err = i915_vma_pin_ww(vma, &eb->ww, 468 entry->pad_to_size, 469 entry->alignment, 470 eb_pin_flags(entry, ev->flags) | 471 PIN_USER | PIN_NOEVICT | PIN_VALIDATE); 472 if (unlikely(err)) 473 return err; 474 } 475 476 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { 477 err = i915_vma_pin_fence(vma); 478 if (unlikely(err)) 479 return err; 480 481 if (vma->fence) 482 ev->flags |= __EXEC_OBJECT_HAS_FENCE; 483 } 484 485 ev->flags |= __EXEC_OBJECT_HAS_PIN; 486 if (eb_vma_misplaced(entry, vma, ev->flags)) 487 return -EBADSLT; 488 489 return 0; 490 } 491 492 static void 493 eb_unreserve_vma(struct eb_vma *ev) 494 { 495 if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) 496 __i915_vma_unpin_fence(ev->vma); 497 498 ev->flags &= ~__EXEC_OBJECT_RESERVED; 499 } 500 501 static int 502 eb_validate_vma(struct i915_execbuffer *eb, 503 struct drm_i915_gem_exec_object2 *entry, 504 struct i915_vma *vma) 505 { 506 /* Relocations are disallowed for all platforms after TGL-LP. This 507 * also covers all platforms with local memory. 508 */ 509 if (entry->relocation_count && 510 GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) 511 return -EINVAL; 512 513 if (unlikely(entry->flags & eb->invalid_flags)) 514 return -EINVAL; 515 516 if (unlikely(entry->alignment && 517 !is_power_of_2_u64(entry->alignment))) 518 return -EINVAL; 519 520 /* 521 * Offset can be used as input (EXEC_OBJECT_PINNED), reject 522 * any non-page-aligned or non-canonical addresses. 523 */ 524 if (unlikely(entry->flags & EXEC_OBJECT_PINNED && 525 entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) 526 return -EINVAL; 527 528 /* pad_to_size was once a reserved field, so sanitize it */ 529 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { 530 if (unlikely(offset_in_page(entry->pad_to_size))) 531 return -EINVAL; 532 } else { 533 entry->pad_to_size = 0; 534 } 535 /* 536 * From drm_mm perspective address space is continuous, 537 * so from this point we're always using non-canonical 538 * form internally. 539 */ 540 entry->offset = gen8_noncanonical_addr(entry->offset); 541 542 if (!eb->reloc_cache.has_fence) { 543 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; 544 } else { 545 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || 546 eb->reloc_cache.needs_unfenced) && 547 i915_gem_object_is_tiled(vma->obj)) 548 entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; 549 } 550 551 return 0; 552 } 553 554 static bool 555 is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx) 556 { 557 return eb->args->flags & I915_EXEC_BATCH_FIRST ? 558 buffer_idx < eb->num_batches : 559 buffer_idx >= eb->args->buffer_count - eb->num_batches; 560 } 561 562 static int 563 eb_add_vma(struct i915_execbuffer *eb, 564 unsigned int *current_batch, 565 unsigned int i, 566 struct i915_vma *vma) 567 { 568 struct drm_i915_private *i915 = eb->i915; 569 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 570 struct eb_vma *ev = &eb->vma[i]; 571 572 ev->vma = vma; 573 ev->exec = entry; 574 ev->flags = entry->flags; 575 576 if (eb->lut_size > 0) { 577 ev->handle = entry->handle; 578 hlist_add_head(&ev->node, 579 &eb->buckets[hash_32(entry->handle, 580 eb->lut_size)]); 581 } 582 583 if (entry->relocation_count) 584 list_add_tail(&ev->reloc_link, &eb->relocs); 585 586 /* 587 * SNA is doing fancy tricks with compressing batch buffers, which leads 588 * to negative relocation deltas. Usually that works out ok since the 589 * relocate address is still positive, except when the batch is placed 590 * very low in the GTT. Ensure this doesn't happen. 591 * 592 * Note that actual hangs have only been observed on gen7, but for 593 * paranoia do it everywhere. 594 */ 595 if (is_batch_buffer(eb, i)) { 596 if (entry->relocation_count && 597 !(ev->flags & EXEC_OBJECT_PINNED)) 598 ev->flags |= __EXEC_OBJECT_NEEDS_BIAS; 599 if (eb->reloc_cache.has_fence) 600 ev->flags |= EXEC_OBJECT_NEEDS_FENCE; 601 602 eb->batches[*current_batch] = ev; 603 604 if (unlikely(ev->flags & EXEC_OBJECT_WRITE)) { 605 drm_dbg(&i915->drm, 606 "Attempting to use self-modifying batch buffer\n"); 607 return -EINVAL; 608 } 609 610 if (range_overflows_t(u64, 611 eb->batch_start_offset, 612 eb->args->batch_len, 613 ev->vma->size)) { 614 drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); 615 return -EINVAL; 616 } 617 618 if (eb->args->batch_len == 0) 619 eb->batch_len[*current_batch] = ev->vma->size - 620 eb->batch_start_offset; 621 else 622 eb->batch_len[*current_batch] = eb->args->batch_len; 623 if (unlikely(eb->batch_len[*current_batch] == 0)) { /* impossible! */ 624 drm_dbg(&i915->drm, "Invalid batch length\n"); 625 return -EINVAL; 626 } 627 628 ++*current_batch; 629 } 630 631 return 0; 632 } 633 634 static int use_cpu_reloc(const struct reloc_cache *cache, 635 const struct drm_i915_gem_object *obj) 636 { 637 if (!i915_gem_object_has_struct_page(obj)) 638 return false; 639 640 if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) 641 return true; 642 643 if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) 644 return false; 645 646 /* 647 * For objects created by userspace through GEM_CREATE with pat_index 648 * set by set_pat extension, i915_gem_object_has_cache_level() always 649 * return true, otherwise the call would fall back to checking whether 650 * the object is un-cached. 651 */ 652 return (cache->has_llc || 653 obj->cache_dirty || 654 !i915_gem_object_has_cache_level(obj, I915_CACHE_NONE)); 655 } 656 657 static int eb_reserve_vma(struct i915_execbuffer *eb, 658 struct eb_vma *ev, 659 u64 pin_flags) 660 { 661 struct drm_i915_gem_exec_object2 *entry = ev->exec; 662 struct i915_vma *vma = ev->vma; 663 int err; 664 665 if (drm_mm_node_allocated(&vma->node) && 666 eb_vma_misplaced(entry, vma, ev->flags)) { 667 err = i915_vma_unbind(vma); 668 if (err) 669 return err; 670 } 671 672 err = i915_vma_pin_ww(vma, &eb->ww, 673 entry->pad_to_size, entry->alignment, 674 eb_pin_flags(entry, ev->flags) | pin_flags); 675 if (err) 676 return err; 677 678 if (entry->offset != i915_vma_offset(vma)) { 679 entry->offset = i915_vma_offset(vma) | UPDATE; 680 eb->args->flags |= __EXEC_HAS_RELOC; 681 } 682 683 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { 684 err = i915_vma_pin_fence(vma); 685 if (unlikely(err)) 686 return err; 687 688 if (vma->fence) 689 ev->flags |= __EXEC_OBJECT_HAS_FENCE; 690 } 691 692 ev->flags |= __EXEC_OBJECT_HAS_PIN; 693 GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags)); 694 695 return 0; 696 } 697 698 static bool eb_unbind(struct i915_execbuffer *eb, bool force) 699 { 700 const unsigned int count = eb->buffer_count; 701 unsigned int i; 702 struct list_head last; 703 bool unpinned = false; 704 705 /* Resort *all* the objects into priority order */ 706 INIT_LIST_HEAD(&eb->unbound); 707 INIT_LIST_HEAD(&last); 708 709 for (i = 0; i < count; i++) { 710 struct eb_vma *ev = &eb->vma[i]; 711 unsigned int flags = ev->flags; 712 713 if (!force && flags & EXEC_OBJECT_PINNED && 714 flags & __EXEC_OBJECT_HAS_PIN) 715 continue; 716 717 unpinned = true; 718 eb_unreserve_vma(ev); 719 720 if (flags & EXEC_OBJECT_PINNED) 721 /* Pinned must have their slot */ 722 list_add(&ev->bind_link, &eb->unbound); 723 else if (flags & __EXEC_OBJECT_NEEDS_MAP) 724 /* Map require the lowest 256MiB (aperture) */ 725 list_add_tail(&ev->bind_link, &eb->unbound); 726 else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 727 /* Prioritise 4GiB region for restricted bo */ 728 list_add(&ev->bind_link, &last); 729 else 730 list_add_tail(&ev->bind_link, &last); 731 } 732 733 list_splice_tail(&last, &eb->unbound); 734 return unpinned; 735 } 736 737 static int eb_reserve(struct i915_execbuffer *eb) 738 { 739 struct eb_vma *ev; 740 unsigned int pass; 741 int err = 0; 742 743 /* 744 * We have one more buffers that we couldn't bind, which could be due to 745 * various reasons. To resolve this we have 4 passes, with every next 746 * level turning the screws tighter: 747 * 748 * 0. Unbind all objects that do not match the GTT constraints for the 749 * execbuffer (fenceable, mappable, alignment etc). Bind all new 750 * objects. This avoids unnecessary unbinding of later objects in order 751 * to make room for the earlier objects *unless* we need to defragment. 752 * 753 * 1. Reorder the buffers, where objects with the most restrictive 754 * placement requirements go first (ignoring fixed location buffers for 755 * now). For example, objects needing the mappable aperture (the first 756 * 256M of GTT), should go first vs objects that can be placed just 757 * about anywhere. Repeat the previous pass. 758 * 759 * 2. Consider buffers that are pinned at a fixed location. Also try to 760 * evict the entire VM this time, leaving only objects that we were 761 * unable to lock. Try again to bind the buffers. (still using the new 762 * buffer order). 763 * 764 * 3. We likely have object lock contention for one or more stubborn 765 * objects in the VM, for which we need to evict to make forward 766 * progress (perhaps we are fighting the shrinker?). When evicting the 767 * VM this time around, anything that we can't lock we now track using 768 * the busy_bo, using the full lock (after dropping the vm->mutex to 769 * prevent deadlocks), instead of trylock. We then continue to evict the 770 * VM, this time with the stubborn object locked, which we can now 771 * hopefully unbind (if still bound in the VM). Repeat until the VM is 772 * evicted. Finally we should be able bind everything. 773 */ 774 for (pass = 0; pass <= 3; pass++) { 775 int pin_flags = PIN_USER | PIN_VALIDATE; 776 777 if (pass == 0) 778 pin_flags |= PIN_NONBLOCK; 779 780 if (pass >= 1) 781 eb_unbind(eb, pass >= 2); 782 783 if (pass == 2) { 784 err = mutex_lock_interruptible(&eb->context->vm->mutex); 785 if (!err) { 786 err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); 787 mutex_unlock(&eb->context->vm->mutex); 788 } 789 if (err) 790 return err; 791 } 792 793 if (pass == 3) { 794 retry: 795 err = mutex_lock_interruptible(&eb->context->vm->mutex); 796 if (!err) { 797 struct drm_i915_gem_object *busy_bo = NULL; 798 799 err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo); 800 mutex_unlock(&eb->context->vm->mutex); 801 if (err && busy_bo) { 802 err = i915_gem_object_lock(busy_bo, &eb->ww); 803 i915_gem_object_put(busy_bo); 804 if (!err) 805 goto retry; 806 } 807 } 808 if (err) 809 return err; 810 } 811 812 list_for_each_entry(ev, &eb->unbound, bind_link) { 813 err = eb_reserve_vma(eb, ev, pin_flags); 814 if (err) 815 break; 816 } 817 818 if (err != -ENOSPC) 819 break; 820 } 821 822 return err; 823 } 824 825 static int eb_select_context(struct i915_execbuffer *eb) 826 { 827 struct i915_gem_context *ctx; 828 829 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); 830 if (unlikely(IS_ERR(ctx))) 831 return PTR_ERR(ctx); 832 833 eb->gem_context = ctx; 834 if (i915_gem_context_has_full_ppgtt(ctx)) 835 eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; 836 837 return 0; 838 } 839 840 static int __eb_add_lut(struct i915_execbuffer *eb, 841 u32 handle, struct i915_vma *vma) 842 { 843 struct i915_gem_context *ctx = eb->gem_context; 844 struct i915_lut_handle *lut; 845 int err; 846 847 lut = i915_lut_handle_alloc(); 848 if (unlikely(!lut)) 849 return -ENOMEM; 850 851 i915_vma_get(vma); 852 if (!atomic_fetch_inc(&vma->open_count)) 853 i915_vma_reopen(vma); 854 lut->handle = handle; 855 lut->ctx = ctx; 856 857 /* Check that the context hasn't been closed in the meantime */ 858 err = -EINTR; 859 if (!mutex_lock_interruptible(&ctx->lut_mutex)) { 860 if (likely(!i915_gem_context_is_closed(ctx))) 861 err = radix_tree_insert(&ctx->handles_vma, handle, vma); 862 else 863 err = -ENOENT; 864 if (err == 0) { /* And nor has this handle */ 865 struct drm_i915_gem_object *obj = vma->obj; 866 867 spin_lock(&obj->lut_lock); 868 if (idr_find(&eb->file->object_idr, handle) == obj) { 869 list_add(&lut->obj_link, &obj->lut_list); 870 } else { 871 radix_tree_delete(&ctx->handles_vma, handle); 872 err = -ENOENT; 873 } 874 spin_unlock(&obj->lut_lock); 875 } 876 mutex_unlock(&ctx->lut_mutex); 877 } 878 if (unlikely(err)) 879 goto err; 880 881 return 0; 882 883 err: 884 i915_vma_close(vma); 885 i915_vma_put(vma); 886 i915_lut_handle_free(lut); 887 return err; 888 } 889 890 static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) 891 { 892 struct i915_address_space *vm = eb->context->vm; 893 894 do { 895 struct drm_i915_gem_object *obj; 896 struct i915_vma *vma; 897 int err; 898 899 rcu_read_lock(); 900 vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle); 901 if (likely(vma && vma->vm == vm)) 902 vma = i915_vma_tryget(vma); 903 rcu_read_unlock(); 904 if (likely(vma)) 905 return vma; 906 907 obj = i915_gem_object_lookup(eb->file, handle); 908 if (unlikely(!obj)) 909 return ERR_PTR(-ENOENT); 910 911 /* 912 * If the user has opted-in for protected-object tracking, make 913 * sure the object encryption can be used. 914 * We only need to do this when the object is first used with 915 * this context, because the context itself will be banned when 916 * the protected objects become invalid. 917 */ 918 if (i915_gem_context_uses_protected_content(eb->gem_context) && 919 i915_gem_object_is_protected(obj)) { 920 err = intel_pxp_key_check(eb->i915->pxp, obj, true); 921 if (err) { 922 i915_gem_object_put(obj); 923 return ERR_PTR(err); 924 } 925 } 926 927 vma = i915_vma_instance(obj, vm, NULL); 928 if (IS_ERR(vma)) { 929 i915_gem_object_put(obj); 930 return vma; 931 } 932 933 err = __eb_add_lut(eb, handle, vma); 934 if (likely(!err)) 935 return vma; 936 937 i915_gem_object_put(obj); 938 if (err != -EEXIST) 939 return ERR_PTR(err); 940 } while (1); 941 } 942 943 static int eb_lookup_vmas(struct i915_execbuffer *eb) 944 { 945 unsigned int i, current_batch = 0; 946 int err = 0; 947 948 INIT_LIST_HEAD(&eb->relocs); 949 950 for (i = 0; i < eb->buffer_count; i++) { 951 struct i915_vma *vma; 952 953 vma = eb_lookup_vma(eb, eb->exec[i].handle); 954 if (IS_ERR(vma)) { 955 err = PTR_ERR(vma); 956 goto err; 957 } 958 959 err = eb_validate_vma(eb, &eb->exec[i], vma); 960 if (unlikely(err)) { 961 i915_vma_put(vma); 962 goto err; 963 } 964 965 err = eb_add_vma(eb, ¤t_batch, i, vma); 966 if (err) 967 return err; 968 969 if (i915_gem_object_is_userptr(vma->obj)) { 970 err = i915_gem_object_userptr_submit_init(vma->obj); 971 if (err) { 972 if (i + 1 < eb->buffer_count) { 973 /* 974 * Execbuffer code expects last vma entry to be NULL, 975 * since we already initialized this entry, 976 * set the next value to NULL or we mess up 977 * cleanup handling. 978 */ 979 eb->vma[i + 1].vma = NULL; 980 } 981 982 return err; 983 } 984 985 eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT; 986 eb->args->flags |= __EXEC_USERPTR_USED; 987 } 988 } 989 990 return 0; 991 992 err: 993 eb->vma[i].vma = NULL; 994 return err; 995 } 996 997 static int eb_lock_vmas(struct i915_execbuffer *eb) 998 { 999 unsigned int i; 1000 int err; 1001 1002 for (i = 0; i < eb->buffer_count; i++) { 1003 struct eb_vma *ev = &eb->vma[i]; 1004 struct i915_vma *vma = ev->vma; 1005 1006 err = i915_gem_object_lock(vma->obj, &eb->ww); 1007 if (err) 1008 return err; 1009 } 1010 1011 return 0; 1012 } 1013 1014 static int eb_validate_vmas(struct i915_execbuffer *eb) 1015 { 1016 unsigned int i; 1017 int err; 1018 1019 INIT_LIST_HEAD(&eb->unbound); 1020 1021 err = eb_lock_vmas(eb); 1022 if (err) 1023 return err; 1024 1025 for (i = 0; i < eb->buffer_count; i++) { 1026 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 1027 struct eb_vma *ev = &eb->vma[i]; 1028 struct i915_vma *vma = ev->vma; 1029 1030 err = eb_pin_vma(eb, entry, ev); 1031 if (err == -EDEADLK) 1032 return err; 1033 1034 if (!err) { 1035 if (entry->offset != i915_vma_offset(vma)) { 1036 entry->offset = i915_vma_offset(vma) | UPDATE; 1037 eb->args->flags |= __EXEC_HAS_RELOC; 1038 } 1039 } else { 1040 eb_unreserve_vma(ev); 1041 1042 list_add_tail(&ev->bind_link, &eb->unbound); 1043 if (drm_mm_node_allocated(&vma->node)) { 1044 err = i915_vma_unbind(vma); 1045 if (err) 1046 return err; 1047 } 1048 } 1049 1050 /* Reserve enough slots to accommodate composite fences */ 1051 err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches); 1052 if (err) 1053 return err; 1054 1055 GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && 1056 eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); 1057 } 1058 1059 if (!list_empty(&eb->unbound)) 1060 return eb_reserve(eb); 1061 1062 return 0; 1063 } 1064 1065 static struct eb_vma * 1066 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) 1067 { 1068 if (eb->lut_size < 0) { 1069 if (handle >= -eb->lut_size) 1070 return NULL; 1071 return &eb->vma[handle]; 1072 } else { 1073 struct hlist_head *head; 1074 struct eb_vma *ev; 1075 1076 head = &eb->buckets[hash_32(handle, eb->lut_size)]; 1077 hlist_for_each_entry(ev, head, node) { 1078 if (ev->handle == handle) 1079 return ev; 1080 } 1081 return NULL; 1082 } 1083 } 1084 1085 static void eb_release_vmas(struct i915_execbuffer *eb, bool final) 1086 { 1087 const unsigned int count = eb->buffer_count; 1088 unsigned int i; 1089 1090 for (i = 0; i < count; i++) { 1091 struct eb_vma *ev = &eb->vma[i]; 1092 struct i915_vma *vma = ev->vma; 1093 1094 if (!vma) 1095 break; 1096 1097 eb_unreserve_vma(ev); 1098 1099 if (final) 1100 i915_vma_put(vma); 1101 } 1102 1103 eb_capture_release(eb); 1104 eb_unpin_engine(eb); 1105 } 1106 1107 static void eb_destroy(const struct i915_execbuffer *eb) 1108 { 1109 if (eb->lut_size > 0) 1110 kfree(eb->buckets); 1111 } 1112 1113 static u64 1114 relocation_target(const struct drm_i915_gem_relocation_entry *reloc, 1115 const struct i915_vma *target) 1116 { 1117 return gen8_canonical_addr((int)reloc->delta + i915_vma_offset(target)); 1118 } 1119 1120 static void reloc_cache_init(struct reloc_cache *cache, 1121 struct drm_i915_private *i915) 1122 { 1123 cache->page = -1; 1124 cache->vaddr = 0; 1125 /* Must be a variable in the struct to allow GCC to unroll. */ 1126 cache->graphics_ver = GRAPHICS_VER(i915); 1127 cache->has_llc = HAS_LLC(i915); 1128 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); 1129 cache->has_fence = cache->graphics_ver < 4; 1130 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; 1131 cache->node.flags = 0; 1132 } 1133 1134 static void *unmask_page(unsigned long p) 1135 { 1136 return (void *)(uintptr_t)(p & PAGE_MASK); 1137 } 1138 1139 static unsigned int unmask_flags(unsigned long p) 1140 { 1141 return p & ~PAGE_MASK; 1142 } 1143 1144 #define KMAP 0x4 /* after CLFLUSH_FLAGS */ 1145 1146 static struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) 1147 { 1148 struct drm_i915_private *i915 = 1149 container_of(cache, struct i915_execbuffer, reloc_cache)->i915; 1150 return to_gt(i915)->ggtt; 1151 } 1152 1153 static void reloc_cache_unmap(struct reloc_cache *cache) 1154 { 1155 void *vaddr; 1156 1157 if (!cache->vaddr) 1158 return; 1159 1160 vaddr = unmask_page(cache->vaddr); 1161 if (cache->vaddr & KMAP) 1162 kunmap_local(vaddr); 1163 else 1164 io_mapping_unmap_atomic((void __iomem *)vaddr); 1165 } 1166 1167 static void reloc_cache_remap(struct reloc_cache *cache, 1168 struct drm_i915_gem_object *obj) 1169 { 1170 void *vaddr; 1171 1172 if (!cache->vaddr) 1173 return; 1174 1175 if (cache->vaddr & KMAP) { 1176 struct page *page = i915_gem_object_get_page(obj, cache->page); 1177 1178 vaddr = kmap_local_page(page); 1179 cache->vaddr = unmask_flags(cache->vaddr) | 1180 (unsigned long)vaddr; 1181 } else { 1182 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1183 unsigned long offset; 1184 1185 offset = cache->node.start; 1186 if (!drm_mm_node_allocated(&cache->node)) 1187 offset += cache->page << PAGE_SHIFT; 1188 1189 cache->vaddr = (unsigned long) 1190 io_mapping_map_atomic_wc(&ggtt->iomap, offset); 1191 } 1192 } 1193 1194 static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) 1195 { 1196 void *vaddr; 1197 1198 if (!cache->vaddr) 1199 return; 1200 1201 vaddr = unmask_page(cache->vaddr); 1202 if (cache->vaddr & KMAP) { 1203 struct drm_i915_gem_object *obj = 1204 (struct drm_i915_gem_object *)cache->node.mm; 1205 if (cache->vaddr & CLFLUSH_AFTER) 1206 mb(); 1207 1208 kunmap_local(vaddr); 1209 i915_gem_object_finish_access(obj); 1210 } else { 1211 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1212 1213 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 1214 io_mapping_unmap_atomic((void __iomem *)vaddr); 1215 1216 if (drm_mm_node_allocated(&cache->node)) { 1217 ggtt->vm.clear_range(&ggtt->vm, 1218 cache->node.start, 1219 cache->node.size); 1220 mutex_lock(&ggtt->vm.mutex); 1221 drm_mm_remove_node(&cache->node); 1222 mutex_unlock(&ggtt->vm.mutex); 1223 } else { 1224 i915_vma_unpin((struct i915_vma *)cache->node.mm); 1225 } 1226 } 1227 1228 cache->vaddr = 0; 1229 cache->page = -1; 1230 } 1231 1232 static void *reloc_kmap(struct drm_i915_gem_object *obj, 1233 struct reloc_cache *cache, 1234 unsigned long pageno) 1235 { 1236 void *vaddr; 1237 struct page *page; 1238 1239 if (cache->vaddr) { 1240 kunmap_local(unmask_page(cache->vaddr)); 1241 } else { 1242 unsigned int flushes; 1243 int err; 1244 1245 err = i915_gem_object_prepare_write(obj, &flushes); 1246 if (err) 1247 return ERR_PTR(err); 1248 1249 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); 1250 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); 1251 1252 cache->vaddr = flushes | KMAP; 1253 cache->node.mm = (void *)obj; 1254 if (flushes) 1255 mb(); 1256 } 1257 1258 page = i915_gem_object_get_page(obj, pageno); 1259 if (!obj->mm.dirty) 1260 set_page_dirty(page); 1261 1262 vaddr = kmap_local_page(page); 1263 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; 1264 cache->page = pageno; 1265 1266 return vaddr; 1267 } 1268 1269 static void *reloc_iomap(struct i915_vma *batch, 1270 struct i915_execbuffer *eb, 1271 unsigned long page) 1272 { 1273 struct drm_i915_gem_object *obj = batch->obj; 1274 struct reloc_cache *cache = &eb->reloc_cache; 1275 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1276 unsigned long offset; 1277 void *vaddr; 1278 1279 if (cache->vaddr) { 1280 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 1281 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); 1282 } else { 1283 struct i915_vma *vma = ERR_PTR(-ENODEV); 1284 int err; 1285 1286 if (i915_gem_object_is_tiled(obj)) 1287 return ERR_PTR(-EINVAL); 1288 1289 if (use_cpu_reloc(cache, obj)) 1290 return NULL; 1291 1292 err = i915_gem_object_set_to_gtt_domain(obj, true); 1293 if (err) 1294 return ERR_PTR(err); 1295 1296 /* 1297 * i915_gem_object_ggtt_pin_ww may attempt to remove the batch 1298 * VMA from the object list because we no longer pin. 1299 * 1300 * Only attempt to pin the batch buffer to ggtt if the current batch 1301 * is not inside ggtt, or the batch buffer is not misplaced. 1302 */ 1303 if (!i915_is_ggtt(batch->vm) || 1304 !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) { 1305 vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, 1306 PIN_MAPPABLE | 1307 PIN_NONBLOCK /* NOWARN */ | 1308 PIN_NOEVICT); 1309 } 1310 1311 if (vma == ERR_PTR(-EDEADLK)) 1312 return vma; 1313 1314 if (IS_ERR(vma)) { 1315 memset(&cache->node, 0, sizeof(cache->node)); 1316 mutex_lock(&ggtt->vm.mutex); 1317 err = drm_mm_insert_node_in_range 1318 (&ggtt->vm.mm, &cache->node, 1319 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 1320 0, ggtt->mappable_end, 1321 DRM_MM_INSERT_LOW); 1322 mutex_unlock(&ggtt->vm.mutex); 1323 if (err) /* no inactive aperture space, use cpu reloc */ 1324 return NULL; 1325 } else { 1326 cache->node.start = i915_ggtt_offset(vma); 1327 cache->node.mm = (void *)vma; 1328 } 1329 } 1330 1331 offset = cache->node.start; 1332 if (drm_mm_node_allocated(&cache->node)) { 1333 ggtt->vm.insert_page(&ggtt->vm, 1334 i915_gem_object_get_dma_address(obj, page), 1335 offset, 1336 i915_gem_get_pat_index(ggtt->vm.i915, 1337 I915_CACHE_NONE), 1338 0); 1339 } else { 1340 offset += page << PAGE_SHIFT; 1341 } 1342 1343 vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, 1344 offset); 1345 cache->page = page; 1346 cache->vaddr = (unsigned long)vaddr; 1347 1348 return vaddr; 1349 } 1350 1351 static void *reloc_vaddr(struct i915_vma *vma, 1352 struct i915_execbuffer *eb, 1353 unsigned long page) 1354 { 1355 struct reloc_cache *cache = &eb->reloc_cache; 1356 void *vaddr; 1357 1358 if (cache->page == page) { 1359 vaddr = unmask_page(cache->vaddr); 1360 } else { 1361 vaddr = NULL; 1362 if ((cache->vaddr & KMAP) == 0) 1363 vaddr = reloc_iomap(vma, eb, page); 1364 if (!vaddr) 1365 vaddr = reloc_kmap(vma->obj, cache, page); 1366 } 1367 1368 return vaddr; 1369 } 1370 1371 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) 1372 { 1373 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { 1374 if (flushes & CLFLUSH_BEFORE) 1375 drm_clflush_virt_range(addr, sizeof(*addr)); 1376 1377 *addr = value; 1378 1379 /* 1380 * Writes to the same cacheline are serialised by the CPU 1381 * (including clflush). On the write path, we only require 1382 * that it hits memory in an orderly fashion and place 1383 * mb barriers at the start and end of the relocation phase 1384 * to ensure ordering of clflush wrt to the system. 1385 */ 1386 if (flushes & CLFLUSH_AFTER) 1387 drm_clflush_virt_range(addr, sizeof(*addr)); 1388 } else 1389 *addr = value; 1390 } 1391 1392 static u64 1393 relocate_entry(struct i915_vma *vma, 1394 const struct drm_i915_gem_relocation_entry *reloc, 1395 struct i915_execbuffer *eb, 1396 const struct i915_vma *target) 1397 { 1398 u64 target_addr = relocation_target(reloc, target); 1399 u64 offset = reloc->offset; 1400 bool wide = eb->reloc_cache.use_64bit_reloc; 1401 void *vaddr; 1402 1403 repeat: 1404 vaddr = reloc_vaddr(vma, eb, 1405 offset >> PAGE_SHIFT); 1406 if (IS_ERR(vaddr)) 1407 return PTR_ERR(vaddr); 1408 1409 GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); 1410 clflush_write32(vaddr + offset_in_page(offset), 1411 lower_32_bits(target_addr), 1412 eb->reloc_cache.vaddr); 1413 1414 if (wide) { 1415 offset += sizeof(u32); 1416 target_addr >>= 32; 1417 wide = false; 1418 goto repeat; 1419 } 1420 1421 return target->node.start | UPDATE; 1422 } 1423 1424 static u64 1425 eb_relocate_entry(struct i915_execbuffer *eb, 1426 struct eb_vma *ev, 1427 const struct drm_i915_gem_relocation_entry *reloc) 1428 { 1429 struct drm_i915_private *i915 = eb->i915; 1430 struct eb_vma *target; 1431 int err; 1432 1433 /* we've already hold a reference to all valid objects */ 1434 target = eb_get_vma(eb, reloc->target_handle); 1435 if (unlikely(!target)) 1436 return -ENOENT; 1437 1438 /* Validate that the target is in a valid r/w GPU domain */ 1439 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 1440 drm_dbg(&i915->drm, "reloc with multiple write domains: " 1441 "target %d offset %d " 1442 "read %08x write %08x\n", 1443 reloc->target_handle, 1444 (int) reloc->offset, 1445 reloc->read_domains, 1446 reloc->write_domain); 1447 return -EINVAL; 1448 } 1449 if (unlikely((reloc->write_domain | reloc->read_domains) 1450 & ~I915_GEM_GPU_DOMAINS)) { 1451 drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: " 1452 "target %d offset %d " 1453 "read %08x write %08x\n", 1454 reloc->target_handle, 1455 (int) reloc->offset, 1456 reloc->read_domains, 1457 reloc->write_domain); 1458 return -EINVAL; 1459 } 1460 1461 if (reloc->write_domain) { 1462 target->flags |= EXEC_OBJECT_WRITE; 1463 1464 /* 1465 * Sandybridge PPGTT errata: We need a global gtt mapping 1466 * for MI and pipe_control writes because the gpu doesn't 1467 * properly redirect them through the ppgtt for non_secure 1468 * batchbuffers. 1469 */ 1470 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 1471 GRAPHICS_VER(eb->i915) == 6 && 1472 !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) { 1473 struct i915_vma *vma = target->vma; 1474 1475 reloc_cache_unmap(&eb->reloc_cache); 1476 mutex_lock(&vma->vm->mutex); 1477 err = i915_vma_bind(target->vma, 1478 target->vma->obj->pat_index, 1479 PIN_GLOBAL, NULL, NULL); 1480 mutex_unlock(&vma->vm->mutex); 1481 reloc_cache_remap(&eb->reloc_cache, ev->vma->obj); 1482 if (err) 1483 return err; 1484 } 1485 } 1486 1487 /* 1488 * If the relocation already has the right value in it, no 1489 * more work needs to be done. 1490 */ 1491 if (!DBG_FORCE_RELOC && 1492 gen8_canonical_addr(i915_vma_offset(target->vma)) == reloc->presumed_offset) 1493 return 0; 1494 1495 /* Check that the relocation address is valid... */ 1496 if (unlikely(reloc->offset > 1497 ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { 1498 drm_dbg(&i915->drm, "Relocation beyond object bounds: " 1499 "target %d offset %d size %d.\n", 1500 reloc->target_handle, 1501 (int)reloc->offset, 1502 (int)ev->vma->size); 1503 return -EINVAL; 1504 } 1505 if (unlikely(reloc->offset & 3)) { 1506 drm_dbg(&i915->drm, "Relocation not 4-byte aligned: " 1507 "target %d offset %d.\n", 1508 reloc->target_handle, 1509 (int)reloc->offset); 1510 return -EINVAL; 1511 } 1512 1513 /* 1514 * If we write into the object, we need to force the synchronisation 1515 * barrier, either with an asynchronous clflush or if we executed the 1516 * patching using the GPU (though that should be serialised by the 1517 * timeline). To be completely sure, and since we are required to 1518 * do relocations we are already stalling, disable the user's opt 1519 * out of our synchronisation. 1520 */ 1521 ev->flags &= ~EXEC_OBJECT_ASYNC; 1522 1523 /* and update the user's relocation entry */ 1524 return relocate_entry(ev->vma, reloc, eb, target->vma); 1525 } 1526 1527 static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) 1528 { 1529 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 1530 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; 1531 const struct drm_i915_gem_exec_object2 *entry = ev->exec; 1532 struct drm_i915_gem_relocation_entry __user *urelocs = 1533 u64_to_user_ptr(entry->relocs_ptr); 1534 unsigned long remain = entry->relocation_count; 1535 1536 if (unlikely(remain > N_RELOC(ULONG_MAX))) 1537 return -EINVAL; 1538 1539 /* 1540 * We must check that the entire relocation array is safe 1541 * to read. However, if the array is not writable the user loses 1542 * the updated relocation values. 1543 */ 1544 if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs)))) 1545 return -EFAULT; 1546 1547 do { 1548 struct drm_i915_gem_relocation_entry *r = stack; 1549 unsigned int count = 1550 min_t(unsigned long, remain, ARRAY_SIZE(stack)); 1551 unsigned int copied; 1552 1553 /* 1554 * This is the fast path and we cannot handle a pagefault 1555 * whilst holding the struct mutex lest the user pass in the 1556 * relocations contained within a mmaped bo. For in such a case 1557 * we, the page fault handler would call i915_gem_fault() and 1558 * we would try to acquire the struct mutex again. Obviously 1559 * this is bad and so lockdep complains vehemently. 1560 */ 1561 pagefault_disable(); 1562 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); 1563 pagefault_enable(); 1564 if (unlikely(copied)) { 1565 remain = -EFAULT; 1566 goto out; 1567 } 1568 1569 remain -= count; 1570 do { 1571 u64 offset = eb_relocate_entry(eb, ev, r); 1572 1573 if (likely(offset == 0)) { 1574 } else if ((s64)offset < 0) { 1575 remain = (int)offset; 1576 goto out; 1577 } else { 1578 /* 1579 * Note that reporting an error now 1580 * leaves everything in an inconsistent 1581 * state as we have *already* changed 1582 * the relocation value inside the 1583 * object. As we have not changed the 1584 * reloc.presumed_offset or will not 1585 * change the execobject.offset, on the 1586 * call we may not rewrite the value 1587 * inside the object, leaving it 1588 * dangling and causing a GPU hang. Unless 1589 * userspace dynamically rebuilds the 1590 * relocations on each execbuf rather than 1591 * presume a static tree. 1592 * 1593 * We did previously check if the relocations 1594 * were writable (access_ok), an error now 1595 * would be a strange race with mprotect, 1596 * having already demonstrated that we 1597 * can read from this userspace address. 1598 */ 1599 offset = gen8_canonical_addr(offset & ~UPDATE); 1600 __put_user(offset, 1601 &urelocs[r - stack].presumed_offset); 1602 } 1603 } while (r++, --count); 1604 urelocs += ARRAY_SIZE(stack); 1605 } while (remain); 1606 out: 1607 reloc_cache_reset(&eb->reloc_cache, eb); 1608 return remain; 1609 } 1610 1611 static int 1612 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev) 1613 { 1614 const struct drm_i915_gem_exec_object2 *entry = ev->exec; 1615 struct drm_i915_gem_relocation_entry *relocs = 1616 u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1617 unsigned int i; 1618 int err; 1619 1620 for (i = 0; i < entry->relocation_count; i++) { 1621 u64 offset = eb_relocate_entry(eb, ev, &relocs[i]); 1622 1623 if ((s64)offset < 0) { 1624 err = (int)offset; 1625 goto err; 1626 } 1627 } 1628 err = 0; 1629 err: 1630 reloc_cache_reset(&eb->reloc_cache, eb); 1631 return err; 1632 } 1633 1634 static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) 1635 { 1636 const char __user *addr, *end; 1637 unsigned long size; 1638 char __maybe_unused c; 1639 1640 size = entry->relocation_count; 1641 if (size == 0) 1642 return 0; 1643 1644 if (size > N_RELOC(ULONG_MAX)) 1645 return -EINVAL; 1646 1647 addr = u64_to_user_ptr(entry->relocs_ptr); 1648 size *= sizeof(struct drm_i915_gem_relocation_entry); 1649 if (!access_ok(addr, size)) 1650 return -EFAULT; 1651 1652 end = addr + size; 1653 for (; addr < end; addr += PAGE_SIZE) { 1654 int err = __get_user(c, addr); 1655 if (err) 1656 return err; 1657 } 1658 return __get_user(c, end - 1); 1659 } 1660 1661 static int eb_copy_relocations(const struct i915_execbuffer *eb) 1662 { 1663 struct drm_i915_gem_relocation_entry *relocs; 1664 const unsigned int count = eb->buffer_count; 1665 unsigned int i; 1666 int err; 1667 1668 for (i = 0; i < count; i++) { 1669 const unsigned int nreloc = eb->exec[i].relocation_count; 1670 struct drm_i915_gem_relocation_entry __user *urelocs; 1671 unsigned long size; 1672 unsigned long copied; 1673 1674 if (nreloc == 0) 1675 continue; 1676 1677 err = check_relocations(&eb->exec[i]); 1678 if (err) 1679 goto err; 1680 1681 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); 1682 size = nreloc * sizeof(*relocs); 1683 1684 relocs = kvmalloc_array(1, size, GFP_KERNEL); 1685 if (!relocs) { 1686 err = -ENOMEM; 1687 goto err; 1688 } 1689 1690 /* copy_from_user is limited to < 4GiB */ 1691 copied = 0; 1692 do { 1693 unsigned int len = 1694 min_t(u64, BIT_ULL(31), size - copied); 1695 1696 if (__copy_from_user((char *)relocs + copied, 1697 (char __user *)urelocs + copied, 1698 len)) 1699 goto end; 1700 1701 copied += len; 1702 } while (copied < size); 1703 1704 /* 1705 * As we do not update the known relocation offsets after 1706 * relocating (due to the complexities in lock handling), 1707 * we need to mark them as invalid now so that we force the 1708 * relocation processing next time. Just in case the target 1709 * object is evicted and then rebound into its old 1710 * presumed_offset before the next execbuffer - if that 1711 * happened we would make the mistake of assuming that the 1712 * relocations were valid. 1713 */ 1714 if (!user_access_begin(urelocs, size)) 1715 goto end; 1716 1717 for (copied = 0; copied < nreloc; copied++) 1718 unsafe_put_user(-1, 1719 &urelocs[copied].presumed_offset, 1720 end_user); 1721 user_access_end(); 1722 1723 eb->exec[i].relocs_ptr = (uintptr_t)relocs; 1724 } 1725 1726 return 0; 1727 1728 end_user: 1729 user_access_end(); 1730 end: 1731 kvfree(relocs); 1732 err = -EFAULT; 1733 err: 1734 while (i--) { 1735 relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); 1736 if (eb->exec[i].relocation_count) 1737 kvfree(relocs); 1738 } 1739 return err; 1740 } 1741 1742 static int eb_prefault_relocations(const struct i915_execbuffer *eb) 1743 { 1744 const unsigned int count = eb->buffer_count; 1745 unsigned int i; 1746 1747 for (i = 0; i < count; i++) { 1748 int err; 1749 1750 err = check_relocations(&eb->exec[i]); 1751 if (err) 1752 return err; 1753 } 1754 1755 return 0; 1756 } 1757 1758 static int eb_reinit_userptr(struct i915_execbuffer *eb) 1759 { 1760 const unsigned int count = eb->buffer_count; 1761 unsigned int i; 1762 int ret; 1763 1764 if (likely(!(eb->args->flags & __EXEC_USERPTR_USED))) 1765 return 0; 1766 1767 for (i = 0; i < count; i++) { 1768 struct eb_vma *ev = &eb->vma[i]; 1769 1770 if (!i915_gem_object_is_userptr(ev->vma->obj)) 1771 continue; 1772 1773 ret = i915_gem_object_userptr_submit_init(ev->vma->obj); 1774 if (ret) 1775 return ret; 1776 1777 ev->flags |= __EXEC_OBJECT_USERPTR_INIT; 1778 } 1779 1780 return 0; 1781 } 1782 1783 static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) 1784 { 1785 bool have_copy = false; 1786 struct eb_vma *ev; 1787 int err = 0; 1788 1789 repeat: 1790 if (signal_pending(current)) { 1791 err = -ERESTARTSYS; 1792 goto out; 1793 } 1794 1795 /* We may process another execbuffer during the unlock... */ 1796 eb_release_vmas(eb, false); 1797 i915_gem_ww_ctx_fini(&eb->ww); 1798 1799 /* 1800 * We take 3 passes through the slowpatch. 1801 * 1802 * 1 - we try to just prefault all the user relocation entries and 1803 * then attempt to reuse the atomic pagefault disabled fast path again. 1804 * 1805 * 2 - we copy the user entries to a local buffer here outside of the 1806 * local and allow ourselves to wait upon any rendering before 1807 * relocations 1808 * 1809 * 3 - we already have a local copy of the relocation entries, but 1810 * were interrupted (EAGAIN) whilst waiting for the objects, try again. 1811 */ 1812 if (!err) { 1813 err = eb_prefault_relocations(eb); 1814 } else if (!have_copy) { 1815 err = eb_copy_relocations(eb); 1816 have_copy = err == 0; 1817 } else { 1818 cond_resched(); 1819 err = 0; 1820 } 1821 1822 if (!err) 1823 err = eb_reinit_userptr(eb); 1824 1825 i915_gem_ww_ctx_init(&eb->ww, true); 1826 if (err) 1827 goto out; 1828 1829 /* reacquire the objects */ 1830 repeat_validate: 1831 err = eb_pin_engine(eb, false); 1832 if (err) 1833 goto err; 1834 1835 err = eb_validate_vmas(eb); 1836 if (err) 1837 goto err; 1838 1839 GEM_BUG_ON(!eb->batches[0]); 1840 1841 list_for_each_entry(ev, &eb->relocs, reloc_link) { 1842 if (!have_copy) { 1843 err = eb_relocate_vma(eb, ev); 1844 if (err) 1845 break; 1846 } else { 1847 err = eb_relocate_vma_slow(eb, ev); 1848 if (err) 1849 break; 1850 } 1851 } 1852 1853 if (err == -EDEADLK) 1854 goto err; 1855 1856 if (err && !have_copy) 1857 goto repeat; 1858 1859 if (err) 1860 goto err; 1861 1862 /* as last step, parse the command buffer */ 1863 err = eb_parse(eb); 1864 if (err) 1865 goto err; 1866 1867 /* 1868 * Leave the user relocations as are, this is the painfully slow path, 1869 * and we want to avoid the complication of dropping the lock whilst 1870 * having buffers reserved in the aperture and so causing spurious 1871 * ENOSPC for random operations. 1872 */ 1873 1874 err: 1875 if (err == -EDEADLK) { 1876 eb_release_vmas(eb, false); 1877 err = i915_gem_ww_ctx_backoff(&eb->ww); 1878 if (!err) 1879 goto repeat_validate; 1880 } 1881 1882 if (err == -EAGAIN) 1883 goto repeat; 1884 1885 out: 1886 if (have_copy) { 1887 const unsigned int count = eb->buffer_count; 1888 unsigned int i; 1889 1890 for (i = 0; i < count; i++) { 1891 const struct drm_i915_gem_exec_object2 *entry = 1892 &eb->exec[i]; 1893 struct drm_i915_gem_relocation_entry *relocs; 1894 1895 if (!entry->relocation_count) 1896 continue; 1897 1898 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1899 kvfree(relocs); 1900 } 1901 } 1902 1903 return err; 1904 } 1905 1906 static int eb_relocate_parse(struct i915_execbuffer *eb) 1907 { 1908 int err; 1909 bool throttle = true; 1910 1911 retry: 1912 err = eb_pin_engine(eb, throttle); 1913 if (err) { 1914 if (err != -EDEADLK) 1915 return err; 1916 1917 goto err; 1918 } 1919 1920 /* only throttle once, even if we didn't need to throttle */ 1921 throttle = false; 1922 1923 err = eb_validate_vmas(eb); 1924 if (err == -EAGAIN) 1925 goto slow; 1926 else if (err) 1927 goto err; 1928 1929 /* The objects are in their final locations, apply the relocations. */ 1930 if (eb->args->flags & __EXEC_HAS_RELOC) { 1931 struct eb_vma *ev; 1932 1933 list_for_each_entry(ev, &eb->relocs, reloc_link) { 1934 err = eb_relocate_vma(eb, ev); 1935 if (err) 1936 break; 1937 } 1938 1939 if (err == -EDEADLK) 1940 goto err; 1941 else if (err) 1942 goto slow; 1943 } 1944 1945 if (!err) 1946 err = eb_parse(eb); 1947 1948 err: 1949 if (err == -EDEADLK) { 1950 eb_release_vmas(eb, false); 1951 err = i915_gem_ww_ctx_backoff(&eb->ww); 1952 if (!err) 1953 goto retry; 1954 } 1955 1956 return err; 1957 1958 slow: 1959 err = eb_relocate_parse_slow(eb); 1960 if (err) 1961 /* 1962 * If the user expects the execobject.offset and 1963 * reloc.presumed_offset to be an exact match, 1964 * as for using NO_RELOC, then we cannot update 1965 * the execobject.offset until we have completed 1966 * relocation. 1967 */ 1968 eb->args->flags &= ~__EXEC_HAS_RELOC; 1969 1970 return err; 1971 } 1972 1973 /* 1974 * Using two helper loops for the order of which requests / batches are created 1975 * and added the to backend. Requests are created in order from the parent to 1976 * the last child. Requests are added in the reverse order, from the last child 1977 * to parent. This is done for locking reasons as the timeline lock is acquired 1978 * during request creation and released when the request is added to the 1979 * backend. To make lockdep happy (see intel_context_timeline_lock) this must be 1980 * the ordering. 1981 */ 1982 #define for_each_batch_create_order(_eb, _i) \ 1983 for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i)) 1984 #define for_each_batch_add_order(_eb, _i) \ 1985 BUILD_BUG_ON(!typecheck(int, _i)); \ 1986 for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i)) 1987 1988 static struct i915_request * 1989 eb_find_first_request_added(struct i915_execbuffer *eb) 1990 { 1991 int i; 1992 1993 for_each_batch_add_order(eb, i) 1994 if (eb->requests[i]) 1995 return eb->requests[i]; 1996 1997 GEM_BUG_ON("Request not found"); 1998 1999 return NULL; 2000 } 2001 2002 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) 2003 2004 /* Stage with GFP_KERNEL allocations before we enter the signaling critical path */ 2005 static int eb_capture_stage(struct i915_execbuffer *eb) 2006 { 2007 const unsigned int count = eb->buffer_count; 2008 unsigned int i = count, j; 2009 2010 while (i--) { 2011 struct eb_vma *ev = &eb->vma[i]; 2012 struct i915_vma *vma = ev->vma; 2013 unsigned int flags = ev->flags; 2014 2015 if (!(flags & EXEC_OBJECT_CAPTURE)) 2016 continue; 2017 2018 if (i915_gem_context_is_recoverable(eb->gem_context) && 2019 (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0))) 2020 return -EINVAL; 2021 2022 for_each_batch_create_order(eb, j) { 2023 struct i915_capture_list *capture; 2024 2025 capture = kmalloc(sizeof(*capture), GFP_KERNEL); 2026 if (!capture) 2027 continue; 2028 2029 capture->next = eb->capture_lists[j]; 2030 capture->vma_res = i915_vma_resource_get(vma->resource); 2031 eb->capture_lists[j] = capture; 2032 } 2033 } 2034 2035 return 0; 2036 } 2037 2038 /* Commit once we're in the critical path */ 2039 static void eb_capture_commit(struct i915_execbuffer *eb) 2040 { 2041 unsigned int j; 2042 2043 for_each_batch_create_order(eb, j) { 2044 struct i915_request *rq = eb->requests[j]; 2045 2046 if (!rq) 2047 break; 2048 2049 rq->capture_list = eb->capture_lists[j]; 2050 eb->capture_lists[j] = NULL; 2051 } 2052 } 2053 2054 /* 2055 * Release anything that didn't get committed due to errors. 2056 * The capture_list will otherwise be freed at request retire. 2057 */ 2058 static void eb_capture_release(struct i915_execbuffer *eb) 2059 { 2060 unsigned int j; 2061 2062 for_each_batch_create_order(eb, j) { 2063 if (eb->capture_lists[j]) { 2064 i915_request_free_capture_list(eb->capture_lists[j]); 2065 eb->capture_lists[j] = NULL; 2066 } 2067 } 2068 } 2069 2070 static void eb_capture_list_clear(struct i915_execbuffer *eb) 2071 { 2072 memset(eb->capture_lists, 0, sizeof(eb->capture_lists)); 2073 } 2074 2075 #else 2076 2077 static int eb_capture_stage(struct i915_execbuffer *eb) 2078 { 2079 return 0; 2080 } 2081 2082 static void eb_capture_commit(struct i915_execbuffer *eb) 2083 { 2084 } 2085 2086 static void eb_capture_release(struct i915_execbuffer *eb) 2087 { 2088 } 2089 2090 static void eb_capture_list_clear(struct i915_execbuffer *eb) 2091 { 2092 } 2093 2094 #endif 2095 2096 static int eb_move_to_gpu(struct i915_execbuffer *eb) 2097 { 2098 const unsigned int count = eb->buffer_count; 2099 unsigned int i = count; 2100 int err = 0, j; 2101 2102 while (i--) { 2103 struct eb_vma *ev = &eb->vma[i]; 2104 struct i915_vma *vma = ev->vma; 2105 unsigned int flags = ev->flags; 2106 struct drm_i915_gem_object *obj = vma->obj; 2107 2108 assert_vma_held(vma); 2109 2110 /* 2111 * If the GPU is not _reading_ through the CPU cache, we need 2112 * to make sure that any writes (both previous GPU writes from 2113 * before a change in snooping levels and normal CPU writes) 2114 * caught in that cache are flushed to main memory. 2115 * 2116 * We want to say 2117 * obj->cache_dirty && 2118 * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) 2119 * but gcc's optimiser doesn't handle that as well and emits 2120 * two jumps instead of one. Maybe one day... 2121 * 2122 * FIXME: There is also sync flushing in set_pages(), which 2123 * serves a different purpose(some of the time at least). 2124 * 2125 * We should consider: 2126 * 2127 * 1. Rip out the async flush code. 2128 * 2129 * 2. Or make the sync flushing use the async clflush path 2130 * using mandatory fences underneath. Currently the below 2131 * async flush happens after we bind the object. 2132 */ 2133 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { 2134 if (i915_gem_clflush_object(obj, 0)) 2135 flags &= ~EXEC_OBJECT_ASYNC; 2136 } 2137 2138 /* We only need to await on the first request */ 2139 if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) { 2140 err = i915_request_await_object 2141 (eb_find_first_request_added(eb), obj, 2142 flags & EXEC_OBJECT_WRITE); 2143 } 2144 2145 for_each_batch_add_order(eb, j) { 2146 if (err) 2147 break; 2148 if (!eb->requests[j]) 2149 continue; 2150 2151 err = _i915_vma_move_to_active(vma, eb->requests[j], 2152 j ? NULL : 2153 eb->composite_fence ? 2154 eb->composite_fence : 2155 &eb->requests[j]->fence, 2156 flags | __EXEC_OBJECT_NO_RESERVE | 2157 __EXEC_OBJECT_NO_REQUEST_AWAIT); 2158 } 2159 } 2160 2161 #ifdef CONFIG_MMU_NOTIFIER 2162 if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { 2163 for (i = 0; i < count; i++) { 2164 struct eb_vma *ev = &eb->vma[i]; 2165 struct drm_i915_gem_object *obj = ev->vma->obj; 2166 2167 if (!i915_gem_object_is_userptr(obj)) 2168 continue; 2169 2170 err = i915_gem_object_userptr_submit_done(obj); 2171 if (err) 2172 break; 2173 } 2174 } 2175 #endif 2176 2177 if (unlikely(err)) 2178 goto err_skip; 2179 2180 /* Unconditionally flush any chipset caches (for streaming writes). */ 2181 intel_gt_chipset_flush(eb->gt); 2182 eb_capture_commit(eb); 2183 2184 return 0; 2185 2186 err_skip: 2187 for_each_batch_create_order(eb, j) { 2188 if (!eb->requests[j]) 2189 break; 2190 2191 i915_request_set_error_once(eb->requests[j], err); 2192 } 2193 return err; 2194 } 2195 2196 static int i915_gem_check_execbuffer(struct drm_i915_private *i915, 2197 struct drm_i915_gem_execbuffer2 *exec) 2198 { 2199 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) 2200 return -EINVAL; 2201 2202 /* Kernel clipping was a DRI1 misfeature */ 2203 if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | 2204 I915_EXEC_USE_EXTENSIONS))) { 2205 if (exec->num_cliprects || exec->cliprects_ptr) 2206 return -EINVAL; 2207 } 2208 2209 if (exec->DR4 == 0xffffffff) { 2210 drm_dbg(&i915->drm, "UXA submitting garbage DR4, fixing up\n"); 2211 exec->DR4 = 0; 2212 } 2213 if (exec->DR1 || exec->DR4) 2214 return -EINVAL; 2215 2216 if ((exec->batch_start_offset | exec->batch_len) & 0x7) 2217 return -EINVAL; 2218 2219 return 0; 2220 } 2221 2222 static int i915_reset_gen7_sol_offsets(struct i915_request *rq) 2223 { 2224 u32 *cs; 2225 int i; 2226 2227 if (GRAPHICS_VER(rq->i915) != 7 || rq->engine->id != RCS0) { 2228 drm_dbg(&rq->i915->drm, "sol reset is gen7/rcs only\n"); 2229 return -EINVAL; 2230 } 2231 2232 cs = intel_ring_begin(rq, 4 * 2 + 2); 2233 if (IS_ERR(cs)) 2234 return PTR_ERR(cs); 2235 2236 *cs++ = MI_LOAD_REGISTER_IMM(4); 2237 for (i = 0; i < 4; i++) { 2238 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); 2239 *cs++ = 0; 2240 } 2241 *cs++ = MI_NOOP; 2242 intel_ring_advance(rq, cs); 2243 2244 return 0; 2245 } 2246 2247 static struct i915_vma * 2248 shadow_batch_pin(struct i915_execbuffer *eb, 2249 struct drm_i915_gem_object *obj, 2250 struct i915_address_space *vm, 2251 unsigned int flags) 2252 { 2253 struct i915_vma *vma; 2254 int err; 2255 2256 vma = i915_vma_instance(obj, vm, NULL); 2257 if (IS_ERR(vma)) 2258 return vma; 2259 2260 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE); 2261 if (err) 2262 return ERR_PTR(err); 2263 2264 return vma; 2265 } 2266 2267 static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) 2268 { 2269 /* 2270 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 2271 * batch" bit. Hence we need to pin secure batches into the global gtt. 2272 * hsw should have this fixed, but bdw mucks it up again. */ 2273 if (eb->batch_flags & I915_DISPATCH_SECURE) 2274 return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE); 2275 2276 return NULL; 2277 } 2278 2279 static int eb_parse(struct i915_execbuffer *eb) 2280 { 2281 struct drm_i915_private *i915 = eb->i915; 2282 struct intel_gt_buffer_pool_node *pool = eb->batch_pool; 2283 struct i915_vma *shadow, *trampoline, *batch; 2284 unsigned long len; 2285 int err; 2286 2287 if (!eb_use_cmdparser(eb)) { 2288 batch = eb_dispatch_secure(eb, eb->batches[0]->vma); 2289 if (IS_ERR(batch)) 2290 return PTR_ERR(batch); 2291 2292 goto secure_batch; 2293 } 2294 2295 if (intel_context_is_parallel(eb->context)) 2296 return -EINVAL; 2297 2298 len = eb->batch_len[0]; 2299 if (!CMDPARSER_USES_GGTT(eb->i915)) { 2300 /* 2301 * ppGTT backed shadow buffers must be mapped RO, to prevent 2302 * post-scan tampering 2303 */ 2304 if (!eb->context->vm->has_read_only) { 2305 drm_dbg(&i915->drm, 2306 "Cannot prevent post-scan tampering without RO capable vm\n"); 2307 return -EINVAL; 2308 } 2309 } else { 2310 len += I915_CMD_PARSER_TRAMPOLINE_SIZE; 2311 } 2312 if (unlikely(len < eb->batch_len[0])) /* last paranoid check of overflow */ 2313 return -EINVAL; 2314 2315 if (!pool) { 2316 pool = intel_gt_get_buffer_pool(eb->gt, len, 2317 I915_MAP_WB); 2318 if (IS_ERR(pool)) 2319 return PTR_ERR(pool); 2320 eb->batch_pool = pool; 2321 } 2322 2323 err = i915_gem_object_lock(pool->obj, &eb->ww); 2324 if (err) 2325 return err; 2326 2327 shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER); 2328 if (IS_ERR(shadow)) 2329 return PTR_ERR(shadow); 2330 2331 intel_gt_buffer_pool_mark_used(pool); 2332 i915_gem_object_set_readonly(shadow->obj); 2333 shadow->private = pool; 2334 2335 trampoline = NULL; 2336 if (CMDPARSER_USES_GGTT(eb->i915)) { 2337 trampoline = shadow; 2338 2339 shadow = shadow_batch_pin(eb, pool->obj, 2340 &eb->gt->ggtt->vm, 2341 PIN_GLOBAL); 2342 if (IS_ERR(shadow)) 2343 return PTR_ERR(shadow); 2344 2345 shadow->private = pool; 2346 2347 eb->batch_flags |= I915_DISPATCH_SECURE; 2348 } 2349 2350 batch = eb_dispatch_secure(eb, shadow); 2351 if (IS_ERR(batch)) 2352 return PTR_ERR(batch); 2353 2354 err = dma_resv_reserve_fences(shadow->obj->base.resv, 1); 2355 if (err) 2356 return err; 2357 2358 err = intel_engine_cmd_parser(eb->context->engine, 2359 eb->batches[0]->vma, 2360 eb->batch_start_offset, 2361 eb->batch_len[0], 2362 shadow, trampoline); 2363 if (err) 2364 return err; 2365 2366 eb->batches[0] = &eb->vma[eb->buffer_count++]; 2367 eb->batches[0]->vma = i915_vma_get(shadow); 2368 eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN; 2369 2370 eb->trampoline = trampoline; 2371 eb->batch_start_offset = 0; 2372 2373 secure_batch: 2374 if (batch) { 2375 if (intel_context_is_parallel(eb->context)) 2376 return -EINVAL; 2377 2378 eb->batches[0] = &eb->vma[eb->buffer_count++]; 2379 eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN; 2380 eb->batches[0]->vma = i915_vma_get(batch); 2381 } 2382 return 0; 2383 } 2384 2385 static int eb_request_submit(struct i915_execbuffer *eb, 2386 struct i915_request *rq, 2387 struct i915_vma *batch, 2388 u64 batch_len) 2389 { 2390 int err; 2391 2392 if (intel_context_nopreempt(rq->context)) 2393 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); 2394 2395 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { 2396 err = i915_reset_gen7_sol_offsets(rq); 2397 if (err) 2398 return err; 2399 } 2400 2401 /* 2402 * After we completed waiting for other engines (using HW semaphores) 2403 * then we can signal that this request/batch is ready to run. This 2404 * allows us to determine if the batch is still waiting on the GPU 2405 * or actually running by checking the breadcrumb. 2406 */ 2407 if (rq->context->engine->emit_init_breadcrumb) { 2408 err = rq->context->engine->emit_init_breadcrumb(rq); 2409 if (err) 2410 return err; 2411 } 2412 2413 err = rq->context->engine->emit_bb_start(rq, 2414 i915_vma_offset(batch) + 2415 eb->batch_start_offset, 2416 batch_len, 2417 eb->batch_flags); 2418 if (err) 2419 return err; 2420 2421 if (eb->trampoline) { 2422 GEM_BUG_ON(intel_context_is_parallel(rq->context)); 2423 GEM_BUG_ON(eb->batch_start_offset); 2424 err = rq->context->engine->emit_bb_start(rq, 2425 i915_vma_offset(eb->trampoline) + 2426 batch_len, 0, 0); 2427 if (err) 2428 return err; 2429 } 2430 2431 return 0; 2432 } 2433 2434 static int eb_submit(struct i915_execbuffer *eb) 2435 { 2436 unsigned int i; 2437 int err; 2438 2439 err = eb_move_to_gpu(eb); 2440 2441 for_each_batch_create_order(eb, i) { 2442 if (!eb->requests[i]) 2443 break; 2444 2445 trace_i915_request_queue(eb->requests[i], eb->batch_flags); 2446 if (!err) 2447 err = eb_request_submit(eb, eb->requests[i], 2448 eb->batches[i]->vma, 2449 eb->batch_len[i]); 2450 } 2451 2452 return err; 2453 } 2454 2455 /* 2456 * Find one BSD ring to dispatch the corresponding BSD command. 2457 * The engine index is returned. 2458 */ 2459 static unsigned int 2460 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, 2461 struct drm_file *file) 2462 { 2463 struct drm_i915_file_private *file_priv = file->driver_priv; 2464 2465 /* Check whether the file_priv has already selected one ring. */ 2466 if ((int)file_priv->bsd_engine < 0) 2467 file_priv->bsd_engine = 2468 get_random_u32_below(dev_priv->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]); 2469 2470 return file_priv->bsd_engine; 2471 } 2472 2473 static const enum intel_engine_id user_ring_map[] = { 2474 [I915_EXEC_DEFAULT] = RCS0, 2475 [I915_EXEC_RENDER] = RCS0, 2476 [I915_EXEC_BLT] = BCS0, 2477 [I915_EXEC_BSD] = VCS0, 2478 [I915_EXEC_VEBOX] = VECS0 2479 }; 2480 2481 static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce) 2482 { 2483 struct intel_ring *ring = ce->ring; 2484 struct intel_timeline *tl = ce->timeline; 2485 struct i915_request *rq; 2486 2487 /* 2488 * Completely unscientific finger-in-the-air estimates for suitable 2489 * maximum user request size (to avoid blocking) and then backoff. 2490 */ 2491 if (intel_ring_update_space(ring) >= PAGE_SIZE) 2492 return NULL; 2493 2494 /* 2495 * Find a request that after waiting upon, there will be at least half 2496 * the ring available. The hysteresis allows us to compete for the 2497 * shared ring and should mean that we sleep less often prior to 2498 * claiming our resources, but not so long that the ring completely 2499 * drains before we can submit our next request. 2500 */ 2501 list_for_each_entry(rq, &tl->requests, link) { 2502 if (rq->ring != ring) 2503 continue; 2504 2505 if (__intel_ring_space(rq->postfix, 2506 ring->emit, ring->size) > ring->size / 2) 2507 break; 2508 } 2509 if (&rq->link == &tl->requests) 2510 return NULL; /* weird, we will check again later for real */ 2511 2512 return i915_request_get(rq); 2513 } 2514 2515 static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce, 2516 bool throttle) 2517 { 2518 struct intel_timeline *tl; 2519 struct i915_request *rq = NULL; 2520 2521 /* 2522 * Take a local wakeref for preparing to dispatch the execbuf as 2523 * we expect to access the hardware fairly frequently in the 2524 * process, and require the engine to be kept awake between accesses. 2525 * Upon dispatch, we acquire another prolonged wakeref that we hold 2526 * until the timeline is idle, which in turn releases the wakeref 2527 * taken on the engine, and the parent device. 2528 */ 2529 tl = intel_context_timeline_lock(ce); 2530 if (IS_ERR(tl)) 2531 return PTR_ERR(tl); 2532 2533 intel_context_enter(ce); 2534 if (throttle) 2535 rq = eb_throttle(eb, ce); 2536 intel_context_timeline_unlock(tl); 2537 2538 if (rq) { 2539 bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; 2540 long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT; 2541 2542 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 2543 timeout) < 0) { 2544 i915_request_put(rq); 2545 2546 /* 2547 * Error path, cannot use intel_context_timeline_lock as 2548 * that is user interruptable and this clean up step 2549 * must be done. 2550 */ 2551 mutex_lock(&ce->timeline->mutex); 2552 intel_context_exit(ce); 2553 mutex_unlock(&ce->timeline->mutex); 2554 2555 if (nonblock) 2556 return -EWOULDBLOCK; 2557 else 2558 return -EINTR; 2559 } 2560 i915_request_put(rq); 2561 } 2562 2563 return 0; 2564 } 2565 2566 static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle) 2567 { 2568 struct intel_context *ce = eb->context, *child; 2569 int err; 2570 int i = 0, j = 0; 2571 2572 GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); 2573 2574 if (unlikely(intel_context_is_banned(ce))) 2575 return -EIO; 2576 2577 /* 2578 * Pinning the contexts may generate requests in order to acquire 2579 * GGTT space, so do this first before we reserve a seqno for 2580 * ourselves. 2581 */ 2582 err = intel_context_pin_ww(ce, &eb->ww); 2583 if (err) 2584 return err; 2585 for_each_child(ce, child) { 2586 err = intel_context_pin_ww(child, &eb->ww); 2587 GEM_BUG_ON(err); /* perma-pinned should incr a counter */ 2588 } 2589 2590 for_each_child(ce, child) { 2591 err = eb_pin_timeline(eb, child, throttle); 2592 if (err) 2593 goto unwind; 2594 ++i; 2595 } 2596 err = eb_pin_timeline(eb, ce, throttle); 2597 if (err) 2598 goto unwind; 2599 2600 eb->args->flags |= __EXEC_ENGINE_PINNED; 2601 return 0; 2602 2603 unwind: 2604 for_each_child(ce, child) { 2605 if (j++ < i) { 2606 mutex_lock(&child->timeline->mutex); 2607 intel_context_exit(child); 2608 mutex_unlock(&child->timeline->mutex); 2609 } 2610 } 2611 for_each_child(ce, child) 2612 intel_context_unpin(child); 2613 intel_context_unpin(ce); 2614 return err; 2615 } 2616 2617 static void eb_unpin_engine(struct i915_execbuffer *eb) 2618 { 2619 struct intel_context *ce = eb->context, *child; 2620 2621 if (!(eb->args->flags & __EXEC_ENGINE_PINNED)) 2622 return; 2623 2624 eb->args->flags &= ~__EXEC_ENGINE_PINNED; 2625 2626 for_each_child(ce, child) { 2627 mutex_lock(&child->timeline->mutex); 2628 intel_context_exit(child); 2629 mutex_unlock(&child->timeline->mutex); 2630 2631 intel_context_unpin(child); 2632 } 2633 2634 mutex_lock(&ce->timeline->mutex); 2635 intel_context_exit(ce); 2636 mutex_unlock(&ce->timeline->mutex); 2637 2638 intel_context_unpin(ce); 2639 } 2640 2641 static unsigned int 2642 eb_select_legacy_ring(struct i915_execbuffer *eb) 2643 { 2644 struct drm_i915_private *i915 = eb->i915; 2645 struct drm_i915_gem_execbuffer2 *args = eb->args; 2646 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; 2647 2648 if (user_ring_id != I915_EXEC_BSD && 2649 (args->flags & I915_EXEC_BSD_MASK)) { 2650 drm_dbg(&i915->drm, 2651 "execbuf with non bsd ring but with invalid " 2652 "bsd dispatch flags: %d\n", (int)(args->flags)); 2653 return -1; 2654 } 2655 2656 if (user_ring_id == I915_EXEC_BSD && 2657 i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO] > 1) { 2658 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; 2659 2660 if (bsd_idx == I915_EXEC_BSD_DEFAULT) { 2661 bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file); 2662 } else if (bsd_idx >= I915_EXEC_BSD_RING1 && 2663 bsd_idx <= I915_EXEC_BSD_RING2) { 2664 bsd_idx >>= I915_EXEC_BSD_SHIFT; 2665 bsd_idx--; 2666 } else { 2667 drm_dbg(&i915->drm, 2668 "execbuf with unknown bsd ring: %u\n", 2669 bsd_idx); 2670 return -1; 2671 } 2672 2673 return _VCS(bsd_idx); 2674 } 2675 2676 if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { 2677 drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n", 2678 user_ring_id); 2679 return -1; 2680 } 2681 2682 return user_ring_map[user_ring_id]; 2683 } 2684 2685 static int 2686 eb_select_engine(struct i915_execbuffer *eb) 2687 { 2688 struct intel_context *ce, *child; 2689 struct intel_gt *gt; 2690 unsigned int idx; 2691 int err; 2692 2693 if (i915_gem_context_user_engines(eb->gem_context)) 2694 idx = eb->args->flags & I915_EXEC_RING_MASK; 2695 else 2696 idx = eb_select_legacy_ring(eb); 2697 2698 ce = i915_gem_context_get_engine(eb->gem_context, idx); 2699 if (IS_ERR(ce)) 2700 return PTR_ERR(ce); 2701 2702 if (intel_context_is_parallel(ce)) { 2703 if (eb->buffer_count < ce->parallel.number_children + 1) { 2704 intel_context_put(ce); 2705 return -EINVAL; 2706 } 2707 if (eb->batch_start_offset || eb->args->batch_len) { 2708 intel_context_put(ce); 2709 return -EINVAL; 2710 } 2711 } 2712 eb->num_batches = ce->parallel.number_children + 1; 2713 gt = ce->engine->gt; 2714 2715 for_each_child(ce, child) 2716 intel_context_get(child); 2717 eb->wakeref = intel_gt_pm_get(ce->engine->gt); 2718 /* 2719 * Keep GT0 active on MTL so that i915_vma_parked() doesn't 2720 * free VMAs while execbuf ioctl is validating VMAs. 2721 */ 2722 if (gt->info.id) 2723 eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915)); 2724 2725 if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { 2726 err = intel_context_alloc_state(ce); 2727 if (err) 2728 goto err; 2729 } 2730 for_each_child(ce, child) { 2731 if (!test_bit(CONTEXT_ALLOC_BIT, &child->flags)) { 2732 err = intel_context_alloc_state(child); 2733 if (err) 2734 goto err; 2735 } 2736 } 2737 2738 /* 2739 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2740 * EIO if the GPU is already wedged. 2741 */ 2742 err = intel_gt_terminally_wedged(ce->engine->gt); 2743 if (err) 2744 goto err; 2745 2746 if (!i915_vm_tryget(ce->vm)) { 2747 err = -ENOENT; 2748 goto err; 2749 } 2750 2751 eb->context = ce; 2752 eb->gt = ce->engine->gt; 2753 2754 /* 2755 * Make sure engine pool stays alive even if we call intel_context_put 2756 * during ww handling. The pool is destroyed when last pm reference 2757 * is dropped, which breaks our -EDEADLK handling. 2758 */ 2759 return err; 2760 2761 err: 2762 if (gt->info.id) 2763 intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0); 2764 2765 intel_gt_pm_put(ce->engine->gt, eb->wakeref); 2766 for_each_child(ce, child) 2767 intel_context_put(child); 2768 intel_context_put(ce); 2769 return err; 2770 } 2771 2772 static void 2773 eb_put_engine(struct i915_execbuffer *eb) 2774 { 2775 struct intel_context *child; 2776 2777 i915_vm_put(eb->context->vm); 2778 /* 2779 * This works in conjunction with eb_select_engine() to prevent 2780 * i915_vma_parked() from interfering while execbuf validates vmas. 2781 */ 2782 if (eb->gt->info.id) 2783 intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0); 2784 intel_gt_pm_put(eb->context->engine->gt, eb->wakeref); 2785 for_each_child(eb->context, child) 2786 intel_context_put(child); 2787 intel_context_put(eb->context); 2788 } 2789 2790 static void 2791 __free_fence_array(struct eb_fence *fences, unsigned int n) 2792 { 2793 while (n--) { 2794 drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); 2795 dma_fence_put(fences[n].dma_fence); 2796 dma_fence_chain_free(fences[n].chain_fence); 2797 } 2798 kvfree(fences); 2799 } 2800 2801 static int 2802 add_timeline_fence_array(struct i915_execbuffer *eb, 2803 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences) 2804 { 2805 struct drm_i915_gem_exec_fence __user *user_fences; 2806 u64 __user *user_values; 2807 struct eb_fence *f; 2808 u64 nfences; 2809 int err = 0; 2810 2811 nfences = timeline_fences->fence_count; 2812 if (!nfences) 2813 return 0; 2814 2815 /* Check multiplication overflow for access_ok() and kvmalloc_array() */ 2816 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); 2817 if (nfences > min_t(unsigned long, 2818 ULONG_MAX / sizeof(*user_fences), 2819 SIZE_MAX / sizeof(*f)) - eb->num_fences) 2820 return -EINVAL; 2821 2822 user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); 2823 if (!access_ok(user_fences, nfences * sizeof(*user_fences))) 2824 return -EFAULT; 2825 2826 user_values = u64_to_user_ptr(timeline_fences->values_ptr); 2827 if (!access_ok(user_values, nfences * sizeof(*user_values))) 2828 return -EFAULT; 2829 2830 f = krealloc(eb->fences, 2831 (eb->num_fences + nfences) * sizeof(*f), 2832 __GFP_NOWARN | GFP_KERNEL); 2833 if (!f) 2834 return -ENOMEM; 2835 2836 eb->fences = f; 2837 f += eb->num_fences; 2838 2839 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & 2840 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); 2841 2842 while (nfences--) { 2843 struct drm_i915_gem_exec_fence user_fence; 2844 struct drm_syncobj *syncobj; 2845 struct dma_fence *fence = NULL; 2846 u64 point; 2847 2848 if (__copy_from_user(&user_fence, 2849 user_fences++, 2850 sizeof(user_fence))) 2851 return -EFAULT; 2852 2853 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) 2854 return -EINVAL; 2855 2856 if (__get_user(point, user_values++)) 2857 return -EFAULT; 2858 2859 syncobj = drm_syncobj_find(eb->file, user_fence.handle); 2860 if (!syncobj) { 2861 drm_dbg(&eb->i915->drm, 2862 "Invalid syncobj handle provided\n"); 2863 return -ENOENT; 2864 } 2865 2866 fence = drm_syncobj_fence_get(syncobj); 2867 2868 if (!fence && user_fence.flags && 2869 !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2870 drm_dbg(&eb->i915->drm, 2871 "Syncobj handle has no fence\n"); 2872 drm_syncobj_put(syncobj); 2873 return -EINVAL; 2874 } 2875 2876 if (fence) 2877 err = dma_fence_chain_find_seqno(&fence, point); 2878 2879 if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2880 drm_dbg(&eb->i915->drm, 2881 "Syncobj handle missing requested point %llu\n", 2882 point); 2883 dma_fence_put(fence); 2884 drm_syncobj_put(syncobj); 2885 return err; 2886 } 2887 2888 /* 2889 * A point might have been signaled already and 2890 * garbage collected from the timeline. In this case 2891 * just ignore the point and carry on. 2892 */ 2893 if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2894 drm_syncobj_put(syncobj); 2895 continue; 2896 } 2897 2898 /* 2899 * For timeline syncobjs we need to preallocate chains for 2900 * later signaling. 2901 */ 2902 if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { 2903 /* 2904 * Waiting and signaling the same point (when point != 2905 * 0) would break the timeline. 2906 */ 2907 if (user_fence.flags & I915_EXEC_FENCE_WAIT) { 2908 drm_dbg(&eb->i915->drm, 2909 "Trying to wait & signal the same timeline point.\n"); 2910 dma_fence_put(fence); 2911 drm_syncobj_put(syncobj); 2912 return -EINVAL; 2913 } 2914 2915 f->chain_fence = dma_fence_chain_alloc(); 2916 if (!f->chain_fence) { 2917 drm_syncobj_put(syncobj); 2918 dma_fence_put(fence); 2919 return -ENOMEM; 2920 } 2921 } else { 2922 f->chain_fence = NULL; 2923 } 2924 2925 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); 2926 f->dma_fence = fence; 2927 f->value = point; 2928 f++; 2929 eb->num_fences++; 2930 } 2931 2932 return 0; 2933 } 2934 2935 static int add_fence_array(struct i915_execbuffer *eb) 2936 { 2937 struct drm_i915_gem_execbuffer2 *args = eb->args; 2938 struct drm_i915_gem_exec_fence __user *user; 2939 unsigned long num_fences = args->num_cliprects; 2940 struct eb_fence *f; 2941 2942 if (!(args->flags & I915_EXEC_FENCE_ARRAY)) 2943 return 0; 2944 2945 if (!num_fences) 2946 return 0; 2947 2948 /* Check multiplication overflow for access_ok() and kvmalloc_array() */ 2949 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); 2950 if (num_fences > min_t(unsigned long, 2951 ULONG_MAX / sizeof(*user), 2952 SIZE_MAX / sizeof(*f) - eb->num_fences)) 2953 return -EINVAL; 2954 2955 user = u64_to_user_ptr(args->cliprects_ptr); 2956 if (!access_ok(user, num_fences * sizeof(*user))) 2957 return -EFAULT; 2958 2959 f = krealloc(eb->fences, 2960 (eb->num_fences + num_fences) * sizeof(*f), 2961 __GFP_NOWARN | GFP_KERNEL); 2962 if (!f) 2963 return -ENOMEM; 2964 2965 eb->fences = f; 2966 f += eb->num_fences; 2967 while (num_fences--) { 2968 struct drm_i915_gem_exec_fence user_fence; 2969 struct drm_syncobj *syncobj; 2970 struct dma_fence *fence = NULL; 2971 2972 if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) 2973 return -EFAULT; 2974 2975 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) 2976 return -EINVAL; 2977 2978 syncobj = drm_syncobj_find(eb->file, user_fence.handle); 2979 if (!syncobj) { 2980 drm_dbg(&eb->i915->drm, 2981 "Invalid syncobj handle provided\n"); 2982 return -ENOENT; 2983 } 2984 2985 if (user_fence.flags & I915_EXEC_FENCE_WAIT) { 2986 fence = drm_syncobj_fence_get(syncobj); 2987 if (!fence) { 2988 drm_dbg(&eb->i915->drm, 2989 "Syncobj handle has no fence\n"); 2990 drm_syncobj_put(syncobj); 2991 return -EINVAL; 2992 } 2993 } 2994 2995 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & 2996 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); 2997 2998 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); 2999 f->dma_fence = fence; 3000 f->value = 0; 3001 f->chain_fence = NULL; 3002 f++; 3003 eb->num_fences++; 3004 } 3005 3006 return 0; 3007 } 3008 3009 static void put_fence_array(struct eb_fence *fences, int num_fences) 3010 { 3011 if (fences) 3012 __free_fence_array(fences, num_fences); 3013 } 3014 3015 static int 3016 await_fence_array(struct i915_execbuffer *eb, 3017 struct i915_request *rq) 3018 { 3019 unsigned int n; 3020 int err; 3021 3022 for (n = 0; n < eb->num_fences; n++) { 3023 if (!eb->fences[n].dma_fence) 3024 continue; 3025 3026 err = i915_request_await_dma_fence(rq, eb->fences[n].dma_fence); 3027 if (err < 0) 3028 return err; 3029 } 3030 3031 return 0; 3032 } 3033 3034 static void signal_fence_array(const struct i915_execbuffer *eb, 3035 struct dma_fence * const fence) 3036 { 3037 unsigned int n; 3038 3039 for (n = 0; n < eb->num_fences; n++) { 3040 struct drm_syncobj *syncobj; 3041 unsigned int flags; 3042 3043 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); 3044 if (!(flags & I915_EXEC_FENCE_SIGNAL)) 3045 continue; 3046 3047 if (eb->fences[n].chain_fence) { 3048 drm_syncobj_add_point(syncobj, 3049 eb->fences[n].chain_fence, 3050 fence, 3051 eb->fences[n].value); 3052 /* 3053 * The chain's ownership is transferred to the 3054 * timeline. 3055 */ 3056 eb->fences[n].chain_fence = NULL; 3057 } else { 3058 drm_syncobj_replace_fence(syncobj, fence); 3059 } 3060 } 3061 } 3062 3063 static int 3064 parse_timeline_fences(struct i915_user_extension __user *ext, void *data) 3065 { 3066 struct i915_execbuffer *eb = data; 3067 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; 3068 3069 if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences))) 3070 return -EFAULT; 3071 3072 return add_timeline_fence_array(eb, &timeline_fences); 3073 } 3074 3075 static void retire_requests(struct intel_timeline *tl, struct i915_request *end) 3076 { 3077 struct i915_request *rq, *rn; 3078 3079 list_for_each_entry_safe(rq, rn, &tl->requests, link) 3080 if (rq == end || !i915_request_retire(rq)) 3081 break; 3082 } 3083 3084 static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq, 3085 int err, bool last_parallel) 3086 { 3087 struct intel_timeline * const tl = i915_request_timeline(rq); 3088 struct i915_sched_attr attr = {}; 3089 struct i915_request *prev; 3090 3091 lockdep_assert_held(&tl->mutex); 3092 lockdep_unpin_lock(&tl->mutex, rq->cookie); 3093 3094 trace_i915_request_add(rq); 3095 3096 prev = __i915_request_commit(rq); 3097 3098 /* Check that the context wasn't destroyed before submission */ 3099 if (likely(!intel_context_is_closed(eb->context))) { 3100 attr = eb->gem_context->sched; 3101 } else { 3102 /* Serialise with context_close via the add_to_timeline */ 3103 i915_request_set_error_once(rq, -ENOENT); 3104 __i915_request_skip(rq); 3105 err = -ENOENT; /* override any transient errors */ 3106 } 3107 3108 if (intel_context_is_parallel(eb->context)) { 3109 if (err) { 3110 __i915_request_skip(rq); 3111 set_bit(I915_FENCE_FLAG_SKIP_PARALLEL, 3112 &rq->fence.flags); 3113 } 3114 if (last_parallel) 3115 set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, 3116 &rq->fence.flags); 3117 } 3118 3119 __i915_request_queue(rq, &attr); 3120 3121 /* Try to clean up the client's timeline after submitting the request */ 3122 if (prev) 3123 retire_requests(tl, prev); 3124 3125 mutex_unlock(&tl->mutex); 3126 3127 return err; 3128 } 3129 3130 static int eb_requests_add(struct i915_execbuffer *eb, int err) 3131 { 3132 int i; 3133 3134 /* 3135 * We iterate in reverse order of creation to release timeline mutexes in 3136 * same order. 3137 */ 3138 for_each_batch_add_order(eb, i) { 3139 struct i915_request *rq = eb->requests[i]; 3140 3141 if (!rq) 3142 continue; 3143 err |= eb_request_add(eb, rq, err, i == 0); 3144 } 3145 3146 return err; 3147 } 3148 3149 static const i915_user_extension_fn execbuf_extensions[] = { 3150 [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, 3151 }; 3152 3153 static int 3154 parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args, 3155 struct i915_execbuffer *eb) 3156 { 3157 if (!(args->flags & I915_EXEC_USE_EXTENSIONS)) 3158 return 0; 3159 3160 /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot 3161 * have another flag also using it at the same time. 3162 */ 3163 if (eb->args->flags & I915_EXEC_FENCE_ARRAY) 3164 return -EINVAL; 3165 3166 if (args->num_cliprects != 0) 3167 return -EINVAL; 3168 3169 return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr), 3170 execbuf_extensions, 3171 ARRAY_SIZE(execbuf_extensions), 3172 eb); 3173 } 3174 3175 static void eb_requests_get(struct i915_execbuffer *eb) 3176 { 3177 unsigned int i; 3178 3179 for_each_batch_create_order(eb, i) { 3180 if (!eb->requests[i]) 3181 break; 3182 3183 i915_request_get(eb->requests[i]); 3184 } 3185 } 3186 3187 static void eb_requests_put(struct i915_execbuffer *eb) 3188 { 3189 unsigned int i; 3190 3191 for_each_batch_create_order(eb, i) { 3192 if (!eb->requests[i]) 3193 break; 3194 3195 i915_request_put(eb->requests[i]); 3196 } 3197 } 3198 3199 static struct sync_file * 3200 eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) 3201 { 3202 struct sync_file *out_fence = NULL; 3203 struct dma_fence_array *fence_array; 3204 struct dma_fence **fences; 3205 unsigned int i; 3206 3207 GEM_BUG_ON(!intel_context_is_parent(eb->context)); 3208 3209 fences = kmalloc_array(eb->num_batches, sizeof(*fences), GFP_KERNEL); 3210 if (!fences) 3211 return ERR_PTR(-ENOMEM); 3212 3213 for_each_batch_create_order(eb, i) { 3214 fences[i] = &eb->requests[i]->fence; 3215 __set_bit(I915_FENCE_FLAG_COMPOSITE, 3216 &eb->requests[i]->fence.flags); 3217 } 3218 3219 fence_array = dma_fence_array_create(eb->num_batches, 3220 fences, 3221 eb->context->parallel.fence_context, 3222 eb->context->parallel.seqno++, 3223 false); 3224 if (!fence_array) { 3225 kfree(fences); 3226 return ERR_PTR(-ENOMEM); 3227 } 3228 3229 /* Move ownership to the dma_fence_array created above */ 3230 for_each_batch_create_order(eb, i) 3231 dma_fence_get(fences[i]); 3232 3233 if (out_fence_fd != -1) { 3234 out_fence = sync_file_create(&fence_array->base); 3235 /* sync_file now owns fence_arry, drop creation ref */ 3236 dma_fence_put(&fence_array->base); 3237 if (!out_fence) 3238 return ERR_PTR(-ENOMEM); 3239 } 3240 3241 eb->composite_fence = &fence_array->base; 3242 3243 return out_fence; 3244 } 3245 3246 static struct sync_file * 3247 eb_fences_add(struct i915_execbuffer *eb, struct i915_request *rq, 3248 struct dma_fence *in_fence, int out_fence_fd) 3249 { 3250 struct sync_file *out_fence = NULL; 3251 int err; 3252 3253 if (unlikely(eb->gem_context->syncobj)) { 3254 struct dma_fence *fence; 3255 3256 fence = drm_syncobj_fence_get(eb->gem_context->syncobj); 3257 err = i915_request_await_dma_fence(rq, fence); 3258 dma_fence_put(fence); 3259 if (err) 3260 return ERR_PTR(err); 3261 } 3262 3263 if (in_fence) { 3264 if (eb->args->flags & I915_EXEC_FENCE_SUBMIT) 3265 err = i915_request_await_execution(rq, in_fence); 3266 else 3267 err = i915_request_await_dma_fence(rq, in_fence); 3268 if (err < 0) 3269 return ERR_PTR(err); 3270 } 3271 3272 if (eb->fences) { 3273 err = await_fence_array(eb, rq); 3274 if (err) 3275 return ERR_PTR(err); 3276 } 3277 3278 if (intel_context_is_parallel(eb->context)) { 3279 out_fence = eb_composite_fence_create(eb, out_fence_fd); 3280 if (IS_ERR(out_fence)) 3281 return ERR_PTR(-ENOMEM); 3282 } else if (out_fence_fd != -1) { 3283 out_fence = sync_file_create(&rq->fence); 3284 if (!out_fence) 3285 return ERR_PTR(-ENOMEM); 3286 } 3287 3288 return out_fence; 3289 } 3290 3291 static struct intel_context * 3292 eb_find_context(struct i915_execbuffer *eb, unsigned int context_number) 3293 { 3294 struct intel_context *child; 3295 3296 if (likely(context_number == 0)) 3297 return eb->context; 3298 3299 for_each_child(eb->context, child) 3300 if (!--context_number) 3301 return child; 3302 3303 GEM_BUG_ON("Context not found"); 3304 3305 return NULL; 3306 } 3307 3308 static struct sync_file * 3309 eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence, 3310 int out_fence_fd) 3311 { 3312 struct sync_file *out_fence = NULL; 3313 unsigned int i; 3314 3315 for_each_batch_create_order(eb, i) { 3316 /* Allocate a request for this batch buffer nice and early. */ 3317 eb->requests[i] = i915_request_create(eb_find_context(eb, i)); 3318 if (IS_ERR(eb->requests[i])) { 3319 out_fence = ERR_CAST(eb->requests[i]); 3320 eb->requests[i] = NULL; 3321 return out_fence; 3322 } 3323 3324 /* 3325 * Only the first request added (committed to backend) has to 3326 * take the in fences into account as all subsequent requests 3327 * will have fences inserted inbetween them. 3328 */ 3329 if (i + 1 == eb->num_batches) { 3330 out_fence = eb_fences_add(eb, eb->requests[i], 3331 in_fence, out_fence_fd); 3332 if (IS_ERR(out_fence)) 3333 return out_fence; 3334 } 3335 3336 /* 3337 * Not really on stack, but we don't want to call 3338 * kfree on the batch_snapshot when we put it, so use the 3339 * _onstack interface. 3340 */ 3341 if (eb->batches[i]->vma) 3342 eb->requests[i]->batch_res = 3343 i915_vma_resource_get(eb->batches[i]->vma->resource); 3344 if (eb->batch_pool) { 3345 GEM_BUG_ON(intel_context_is_parallel(eb->context)); 3346 intel_gt_buffer_pool_mark_active(eb->batch_pool, 3347 eb->requests[i]); 3348 } 3349 } 3350 3351 return out_fence; 3352 } 3353 3354 static int 3355 i915_gem_do_execbuffer(struct drm_device *dev, 3356 struct drm_file *file, 3357 struct drm_i915_gem_execbuffer2 *args, 3358 struct drm_i915_gem_exec_object2 *exec) 3359 { 3360 struct drm_i915_private *i915 = to_i915(dev); 3361 struct i915_execbuffer eb; 3362 struct dma_fence *in_fence = NULL; 3363 struct sync_file *out_fence = NULL; 3364 int out_fence_fd = -1; 3365 int err; 3366 3367 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); 3368 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & 3369 ~__EXEC_OBJECT_UNKNOWN_FLAGS); 3370 3371 eb.i915 = i915; 3372 eb.file = file; 3373 eb.args = args; 3374 if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) 3375 args->flags |= __EXEC_HAS_RELOC; 3376 3377 eb.exec = exec; 3378 eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); 3379 eb.vma[0].vma = NULL; 3380 eb.batch_pool = NULL; 3381 3382 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 3383 reloc_cache_init(&eb.reloc_cache, eb.i915); 3384 3385 eb.buffer_count = args->buffer_count; 3386 eb.batch_start_offset = args->batch_start_offset; 3387 eb.trampoline = NULL; 3388 3389 eb.fences = NULL; 3390 eb.num_fences = 0; 3391 3392 eb_capture_list_clear(&eb); 3393 3394 memset(eb.requests, 0, sizeof(struct i915_request *) * 3395 ARRAY_SIZE(eb.requests)); 3396 eb.composite_fence = NULL; 3397 3398 eb.batch_flags = 0; 3399 if (args->flags & I915_EXEC_SECURE) { 3400 if (GRAPHICS_VER(i915) >= 11) 3401 return -ENODEV; 3402 3403 /* Return -EPERM to trigger fallback code on old binaries. */ 3404 if (!HAS_SECURE_BATCHES(i915)) 3405 return -EPERM; 3406 3407 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) 3408 return -EPERM; 3409 3410 eb.batch_flags |= I915_DISPATCH_SECURE; 3411 } 3412 if (args->flags & I915_EXEC_IS_PINNED) 3413 eb.batch_flags |= I915_DISPATCH_PINNED; 3414 3415 err = parse_execbuf2_extensions(args, &eb); 3416 if (err) 3417 goto err_ext; 3418 3419 err = add_fence_array(&eb); 3420 if (err) 3421 goto err_ext; 3422 3423 #define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT) 3424 if (args->flags & IN_FENCES) { 3425 if ((args->flags & IN_FENCES) == IN_FENCES) 3426 return -EINVAL; 3427 3428 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); 3429 if (!in_fence) { 3430 err = -EINVAL; 3431 goto err_ext; 3432 } 3433 } 3434 #undef IN_FENCES 3435 3436 if (args->flags & I915_EXEC_FENCE_OUT) { 3437 out_fence_fd = get_unused_fd_flags(O_CLOEXEC); 3438 if (out_fence_fd < 0) { 3439 err = out_fence_fd; 3440 goto err_in_fence; 3441 } 3442 } 3443 3444 err = eb_create(&eb); 3445 if (err) 3446 goto err_out_fence; 3447 3448 GEM_BUG_ON(!eb.lut_size); 3449 3450 err = eb_select_context(&eb); 3451 if (unlikely(err)) 3452 goto err_destroy; 3453 3454 err = eb_select_engine(&eb); 3455 if (unlikely(err)) 3456 goto err_context; 3457 3458 err = eb_lookup_vmas(&eb); 3459 if (err) { 3460 eb_release_vmas(&eb, true); 3461 goto err_engine; 3462 } 3463 3464 i915_gem_ww_ctx_init(&eb.ww, true); 3465 3466 err = eb_relocate_parse(&eb); 3467 if (err) { 3468 /* 3469 * If the user expects the execobject.offset and 3470 * reloc.presumed_offset to be an exact match, 3471 * as for using NO_RELOC, then we cannot update 3472 * the execobject.offset until we have completed 3473 * relocation. 3474 */ 3475 args->flags &= ~__EXEC_HAS_RELOC; 3476 goto err_vma; 3477 } 3478 3479 ww_acquire_done(&eb.ww.ctx); 3480 err = eb_capture_stage(&eb); 3481 if (err) 3482 goto err_vma; 3483 3484 out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); 3485 if (IS_ERR(out_fence)) { 3486 err = PTR_ERR(out_fence); 3487 out_fence = NULL; 3488 if (eb.requests[0]) 3489 goto err_request; 3490 else 3491 goto err_vma; 3492 } 3493 3494 err = eb_submit(&eb); 3495 3496 err_request: 3497 eb_requests_get(&eb); 3498 err = eb_requests_add(&eb, err); 3499 3500 if (eb.fences) 3501 signal_fence_array(&eb, eb.composite_fence ? 3502 eb.composite_fence : 3503 &eb.requests[0]->fence); 3504 3505 if (unlikely(eb.gem_context->syncobj)) { 3506 drm_syncobj_replace_fence(eb.gem_context->syncobj, 3507 eb.composite_fence ? 3508 eb.composite_fence : 3509 &eb.requests[0]->fence); 3510 } 3511 3512 if (out_fence) { 3513 if (err == 0) { 3514 fd_install(out_fence_fd, out_fence->file); 3515 args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ 3516 args->rsvd2 |= (u64)out_fence_fd << 32; 3517 out_fence_fd = -1; 3518 } else { 3519 fput(out_fence->file); 3520 } 3521 } 3522 3523 if (!out_fence && eb.composite_fence) 3524 dma_fence_put(eb.composite_fence); 3525 3526 eb_requests_put(&eb); 3527 3528 err_vma: 3529 eb_release_vmas(&eb, true); 3530 WARN_ON(err == -EDEADLK); 3531 i915_gem_ww_ctx_fini(&eb.ww); 3532 3533 if (eb.batch_pool) 3534 intel_gt_buffer_pool_put(eb.batch_pool); 3535 err_engine: 3536 eb_put_engine(&eb); 3537 err_context: 3538 i915_gem_context_put(eb.gem_context); 3539 err_destroy: 3540 eb_destroy(&eb); 3541 err_out_fence: 3542 if (out_fence_fd != -1) 3543 put_unused_fd(out_fence_fd); 3544 err_in_fence: 3545 dma_fence_put(in_fence); 3546 err_ext: 3547 put_fence_array(eb.fences, eb.num_fences); 3548 return err; 3549 } 3550 3551 static size_t eb_element_size(void) 3552 { 3553 return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma); 3554 } 3555 3556 static bool check_buffer_count(size_t count) 3557 { 3558 const size_t sz = eb_element_size(); 3559 3560 /* 3561 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup 3562 * array size (see eb_create()). Otherwise, we can accept an array as 3563 * large as can be addressed (though use large arrays at your peril)! 3564 */ 3565 3566 return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); 3567 } 3568 3569 int 3570 i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, 3571 struct drm_file *file) 3572 { 3573 struct drm_i915_private *i915 = to_i915(dev); 3574 struct drm_i915_gem_execbuffer2 *args = data; 3575 struct drm_i915_gem_exec_object2 *exec2_list; 3576 const size_t count = args->buffer_count; 3577 int err; 3578 3579 if (!check_buffer_count(count)) { 3580 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count); 3581 return -EINVAL; 3582 } 3583 3584 err = i915_gem_check_execbuffer(i915, args); 3585 if (err) 3586 return err; 3587 3588 /* Allocate extra slots for use by the command parser */ 3589 exec2_list = kvmalloc_array(count + 2, eb_element_size(), 3590 __GFP_NOWARN | GFP_KERNEL); 3591 if (exec2_list == NULL) { 3592 drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n", 3593 count); 3594 return -ENOMEM; 3595 } 3596 if (copy_from_user(exec2_list, 3597 u64_to_user_ptr(args->buffers_ptr), 3598 sizeof(*exec2_list) * count)) { 3599 drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count); 3600 kvfree(exec2_list); 3601 return -EFAULT; 3602 } 3603 3604 err = i915_gem_do_execbuffer(dev, file, args, exec2_list); 3605 3606 /* 3607 * Now that we have begun execution of the batchbuffer, we ignore 3608 * any new error after this point. Also given that we have already 3609 * updated the associated relocations, we try to write out the current 3610 * object locations irrespective of any error. 3611 */ 3612 if (args->flags & __EXEC_HAS_RELOC) { 3613 struct drm_i915_gem_exec_object2 __user *user_exec_list = 3614 u64_to_user_ptr(args->buffers_ptr); 3615 unsigned int i; 3616 3617 /* Copy the new buffer offsets back to the user's exec list. */ 3618 /* 3619 * Note: count * sizeof(*user_exec_list) does not overflow, 3620 * because we checked 'count' in check_buffer_count(). 3621 * 3622 * And this range already got effectively checked earlier 3623 * when we did the "copy_from_user()" above. 3624 */ 3625 if (!user_write_access_begin(user_exec_list, 3626 count * sizeof(*user_exec_list))) 3627 goto end; 3628 3629 for (i = 0; i < args->buffer_count; i++) { 3630 if (!(exec2_list[i].offset & UPDATE)) 3631 continue; 3632 3633 exec2_list[i].offset = 3634 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); 3635 unsafe_put_user(exec2_list[i].offset, 3636 &user_exec_list[i].offset, 3637 end_user); 3638 } 3639 end_user: 3640 user_write_access_end(); 3641 end:; 3642 } 3643 3644 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; 3645 kvfree(exec2_list); 3646 return err; 3647 } 3648