1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "i915_gemfs.h" 39 #include <linux/dma-fence-array.h> 40 #include <linux/kthread.h> 41 #include <linux/reservation.h> 42 #include <linux/shmem_fs.h> 43 #include <linux/slab.h> 44 #include <linux/stop_machine.h> 45 #include <linux/swap.h> 46 #include <linux/pci.h> 47 #include <linux/dma-buf.h> 48 49 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 50 51 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 52 { 53 if (obj->cache_dirty) 54 return false; 55 56 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 57 return true; 58 59 return obj->pin_global; /* currently in use by HW, keep flushed */ 60 } 61 62 static int 63 insert_mappable_node(struct i915_ggtt *ggtt, 64 struct drm_mm_node *node, u32 size) 65 { 66 memset(node, 0, sizeof(*node)); 67 return drm_mm_insert_node_in_range(&ggtt->base.mm, node, 68 size, 0, I915_COLOR_UNEVICTABLE, 69 0, ggtt->mappable_end, 70 DRM_MM_INSERT_LOW); 71 } 72 73 static void 74 remove_mappable_node(struct drm_mm_node *node) 75 { 76 drm_mm_remove_node(node); 77 } 78 79 /* some bookkeeping */ 80 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 81 u64 size) 82 { 83 spin_lock(&dev_priv->mm.object_stat_lock); 84 dev_priv->mm.object_count++; 85 dev_priv->mm.object_memory += size; 86 spin_unlock(&dev_priv->mm.object_stat_lock); 87 } 88 89 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 90 u64 size) 91 { 92 spin_lock(&dev_priv->mm.object_stat_lock); 93 dev_priv->mm.object_count--; 94 dev_priv->mm.object_memory -= size; 95 spin_unlock(&dev_priv->mm.object_stat_lock); 96 } 97 98 static int 99 i915_gem_wait_for_error(struct i915_gpu_error *error) 100 { 101 int ret; 102 103 might_sleep(); 104 105 /* 106 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 107 * userspace. If it takes that long something really bad is going on and 108 * we should simply try to bail out and fail as gracefully as possible. 109 */ 110 ret = wait_event_interruptible_timeout(error->reset_queue, 111 !i915_reset_backoff(error), 112 I915_RESET_TIMEOUT); 113 if (ret == 0) { 114 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 115 return -EIO; 116 } else if (ret < 0) { 117 return ret; 118 } else { 119 return 0; 120 } 121 } 122 123 int i915_mutex_lock_interruptible(struct drm_device *dev) 124 { 125 struct drm_i915_private *dev_priv = to_i915(dev); 126 int ret; 127 128 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 129 if (ret) 130 return ret; 131 132 ret = mutex_lock_interruptible(&dev->struct_mutex); 133 if (ret) 134 return ret; 135 136 return 0; 137 } 138 139 int 140 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 141 struct drm_file *file) 142 { 143 struct drm_i915_private *dev_priv = to_i915(dev); 144 struct i915_ggtt *ggtt = &dev_priv->ggtt; 145 struct drm_i915_gem_get_aperture *args = data; 146 struct i915_vma *vma; 147 u64 pinned; 148 149 pinned = ggtt->base.reserved; 150 mutex_lock(&dev->struct_mutex); 151 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 152 if (i915_vma_is_pinned(vma)) 153 pinned += vma->node.size; 154 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 155 if (i915_vma_is_pinned(vma)) 156 pinned += vma->node.size; 157 mutex_unlock(&dev->struct_mutex); 158 159 args->aper_size = ggtt->base.total; 160 args->aper_available_size = args->aper_size - pinned; 161 162 return 0; 163 } 164 165 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 166 { 167 struct address_space *mapping = obj->base.filp->f_mapping; 168 drm_dma_handle_t *phys; 169 struct sg_table *st; 170 struct scatterlist *sg; 171 char *vaddr; 172 int i; 173 int err; 174 175 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 176 return -EINVAL; 177 178 /* Always aligning to the object size, allows a single allocation 179 * to handle all possible callers, and given typical object sizes, 180 * the alignment of the buddy allocation will naturally match. 181 */ 182 phys = drm_pci_alloc(obj->base.dev, 183 roundup_pow_of_two(obj->base.size), 184 roundup_pow_of_two(obj->base.size)); 185 if (!phys) 186 return -ENOMEM; 187 188 vaddr = phys->vaddr; 189 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 190 struct page *page; 191 char *src; 192 193 page = shmem_read_mapping_page(mapping, i); 194 if (IS_ERR(page)) { 195 err = PTR_ERR(page); 196 goto err_phys; 197 } 198 199 src = kmap_atomic(page); 200 memcpy(vaddr, src, PAGE_SIZE); 201 drm_clflush_virt_range(vaddr, PAGE_SIZE); 202 kunmap_atomic(src); 203 204 put_page(page); 205 vaddr += PAGE_SIZE; 206 } 207 208 i915_gem_chipset_flush(to_i915(obj->base.dev)); 209 210 st = kmalloc(sizeof(*st), GFP_KERNEL); 211 if (!st) { 212 err = -ENOMEM; 213 goto err_phys; 214 } 215 216 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 217 kfree(st); 218 err = -ENOMEM; 219 goto err_phys; 220 } 221 222 sg = st->sgl; 223 sg->offset = 0; 224 sg->length = obj->base.size; 225 226 sg_dma_address(sg) = phys->busaddr; 227 sg_dma_len(sg) = obj->base.size; 228 229 obj->phys_handle = phys; 230 231 __i915_gem_object_set_pages(obj, st, sg->length); 232 233 return 0; 234 235 err_phys: 236 drm_pci_free(obj->base.dev, phys); 237 238 return err; 239 } 240 241 static void __start_cpu_write(struct drm_i915_gem_object *obj) 242 { 243 obj->read_domains = I915_GEM_DOMAIN_CPU; 244 obj->write_domain = I915_GEM_DOMAIN_CPU; 245 if (cpu_write_needs_clflush(obj)) 246 obj->cache_dirty = true; 247 } 248 249 static void 250 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 251 struct sg_table *pages, 252 bool needs_clflush) 253 { 254 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 255 256 if (obj->mm.madv == I915_MADV_DONTNEED) 257 obj->mm.dirty = false; 258 259 if (needs_clflush && 260 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 261 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 262 drm_clflush_sg(pages); 263 264 __start_cpu_write(obj); 265 } 266 267 static void 268 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 269 struct sg_table *pages) 270 { 271 __i915_gem_object_release_shmem(obj, pages, false); 272 273 if (obj->mm.dirty) { 274 struct address_space *mapping = obj->base.filp->f_mapping; 275 char *vaddr = obj->phys_handle->vaddr; 276 int i; 277 278 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 279 struct page *page; 280 char *dst; 281 282 page = shmem_read_mapping_page(mapping, i); 283 if (IS_ERR(page)) 284 continue; 285 286 dst = kmap_atomic(page); 287 drm_clflush_virt_range(vaddr, PAGE_SIZE); 288 memcpy(dst, vaddr, PAGE_SIZE); 289 kunmap_atomic(dst); 290 291 set_page_dirty(page); 292 if (obj->mm.madv == I915_MADV_WILLNEED) 293 mark_page_accessed(page); 294 put_page(page); 295 vaddr += PAGE_SIZE; 296 } 297 obj->mm.dirty = false; 298 } 299 300 sg_free_table(pages); 301 kfree(pages); 302 303 drm_pci_free(obj->base.dev, obj->phys_handle); 304 } 305 306 static void 307 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 308 { 309 i915_gem_object_unpin_pages(obj); 310 } 311 312 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 313 .get_pages = i915_gem_object_get_pages_phys, 314 .put_pages = i915_gem_object_put_pages_phys, 315 .release = i915_gem_object_release_phys, 316 }; 317 318 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 319 320 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 321 { 322 struct i915_vma *vma; 323 LIST_HEAD(still_in_list); 324 int ret; 325 326 lockdep_assert_held(&obj->base.dev->struct_mutex); 327 328 /* Closed vma are removed from the obj->vma_list - but they may 329 * still have an active binding on the object. To remove those we 330 * must wait for all rendering to complete to the object (as unbinding 331 * must anyway), and retire the requests. 332 */ 333 ret = i915_gem_object_set_to_cpu_domain(obj, false); 334 if (ret) 335 return ret; 336 337 while ((vma = list_first_entry_or_null(&obj->vma_list, 338 struct i915_vma, 339 obj_link))) { 340 list_move_tail(&vma->obj_link, &still_in_list); 341 ret = i915_vma_unbind(vma); 342 if (ret) 343 break; 344 } 345 list_splice(&still_in_list, &obj->vma_list); 346 347 return ret; 348 } 349 350 static long 351 i915_gem_object_wait_fence(struct dma_fence *fence, 352 unsigned int flags, 353 long timeout, 354 struct intel_rps_client *rps_client) 355 { 356 struct i915_request *rq; 357 358 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 359 360 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 361 return timeout; 362 363 if (!dma_fence_is_i915(fence)) 364 return dma_fence_wait_timeout(fence, 365 flags & I915_WAIT_INTERRUPTIBLE, 366 timeout); 367 368 rq = to_request(fence); 369 if (i915_request_completed(rq)) 370 goto out; 371 372 /* 373 * This client is about to stall waiting for the GPU. In many cases 374 * this is undesirable and limits the throughput of the system, as 375 * many clients cannot continue processing user input/output whilst 376 * blocked. RPS autotuning may take tens of milliseconds to respond 377 * to the GPU load and thus incurs additional latency for the client. 378 * We can circumvent that by promoting the GPU frequency to maximum 379 * before we wait. This makes the GPU throttle up much more quickly 380 * (good for benchmarks and user experience, e.g. window animations), 381 * but at a cost of spending more power processing the workload 382 * (bad for battery). Not all clients even want their results 383 * immediately and for them we should just let the GPU select its own 384 * frequency to maximise efficiency. To prevent a single client from 385 * forcing the clocks too high for the whole system, we only allow 386 * each client to waitboost once in a busy period. 387 */ 388 if (rps_client && !i915_request_started(rq)) { 389 if (INTEL_GEN(rq->i915) >= 6) 390 gen6_rps_boost(rq, rps_client); 391 } 392 393 timeout = i915_request_wait(rq, flags, timeout); 394 395 out: 396 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 397 i915_request_retire_upto(rq); 398 399 return timeout; 400 } 401 402 static long 403 i915_gem_object_wait_reservation(struct reservation_object *resv, 404 unsigned int flags, 405 long timeout, 406 struct intel_rps_client *rps_client) 407 { 408 unsigned int seq = __read_seqcount_begin(&resv->seq); 409 struct dma_fence *excl; 410 bool prune_fences = false; 411 412 if (flags & I915_WAIT_ALL) { 413 struct dma_fence **shared; 414 unsigned int count, i; 415 int ret; 416 417 ret = reservation_object_get_fences_rcu(resv, 418 &excl, &count, &shared); 419 if (ret) 420 return ret; 421 422 for (i = 0; i < count; i++) { 423 timeout = i915_gem_object_wait_fence(shared[i], 424 flags, timeout, 425 rps_client); 426 if (timeout < 0) 427 break; 428 429 dma_fence_put(shared[i]); 430 } 431 432 for (; i < count; i++) 433 dma_fence_put(shared[i]); 434 kfree(shared); 435 436 /* 437 * If both shared fences and an exclusive fence exist, 438 * then by construction the shared fences must be later 439 * than the exclusive fence. If we successfully wait for 440 * all the shared fences, we know that the exclusive fence 441 * must all be signaled. If all the shared fences are 442 * signaled, we can prune the array and recover the 443 * floating references on the fences/requests. 444 */ 445 prune_fences = count && timeout >= 0; 446 } else { 447 excl = reservation_object_get_excl_rcu(resv); 448 } 449 450 if (excl && timeout >= 0) 451 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 452 rps_client); 453 454 dma_fence_put(excl); 455 456 /* 457 * Opportunistically prune the fences iff we know they have *all* been 458 * signaled and that the reservation object has not been changed (i.e. 459 * no new fences have been added). 460 */ 461 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 462 if (reservation_object_trylock(resv)) { 463 if (!__read_seqcount_retry(&resv->seq, seq)) 464 reservation_object_add_excl_fence(resv, NULL); 465 reservation_object_unlock(resv); 466 } 467 } 468 469 return timeout; 470 } 471 472 static void __fence_set_priority(struct dma_fence *fence, int prio) 473 { 474 struct i915_request *rq; 475 struct intel_engine_cs *engine; 476 477 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 478 return; 479 480 rq = to_request(fence); 481 engine = rq->engine; 482 483 rcu_read_lock(); 484 if (engine->schedule) 485 engine->schedule(rq, prio); 486 rcu_read_unlock(); 487 } 488 489 static void fence_set_priority(struct dma_fence *fence, int prio) 490 { 491 /* Recurse once into a fence-array */ 492 if (dma_fence_is_array(fence)) { 493 struct dma_fence_array *array = to_dma_fence_array(fence); 494 int i; 495 496 for (i = 0; i < array->num_fences; i++) 497 __fence_set_priority(array->fences[i], prio); 498 } else { 499 __fence_set_priority(fence, prio); 500 } 501 } 502 503 int 504 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 505 unsigned int flags, 506 int prio) 507 { 508 struct dma_fence *excl; 509 510 if (flags & I915_WAIT_ALL) { 511 struct dma_fence **shared; 512 unsigned int count, i; 513 int ret; 514 515 ret = reservation_object_get_fences_rcu(obj->resv, 516 &excl, &count, &shared); 517 if (ret) 518 return ret; 519 520 for (i = 0; i < count; i++) { 521 fence_set_priority(shared[i], prio); 522 dma_fence_put(shared[i]); 523 } 524 525 kfree(shared); 526 } else { 527 excl = reservation_object_get_excl_rcu(obj->resv); 528 } 529 530 if (excl) { 531 fence_set_priority(excl, prio); 532 dma_fence_put(excl); 533 } 534 return 0; 535 } 536 537 /** 538 * Waits for rendering to the object to be completed 539 * @obj: i915 gem object 540 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 541 * @timeout: how long to wait 542 * @rps_client: client (user process) to charge for any waitboosting 543 */ 544 int 545 i915_gem_object_wait(struct drm_i915_gem_object *obj, 546 unsigned int flags, 547 long timeout, 548 struct intel_rps_client *rps_client) 549 { 550 might_sleep(); 551 #if IS_ENABLED(CONFIG_LOCKDEP) 552 GEM_BUG_ON(debug_locks && 553 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 554 !!(flags & I915_WAIT_LOCKED)); 555 #endif 556 GEM_BUG_ON(timeout < 0); 557 558 timeout = i915_gem_object_wait_reservation(obj->resv, 559 flags, timeout, 560 rps_client); 561 return timeout < 0 ? timeout : 0; 562 } 563 564 static struct intel_rps_client *to_rps_client(struct drm_file *file) 565 { 566 struct drm_i915_file_private *fpriv = file->driver_priv; 567 568 return &fpriv->rps_client; 569 } 570 571 static int 572 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 573 struct drm_i915_gem_pwrite *args, 574 struct drm_file *file) 575 { 576 void *vaddr = obj->phys_handle->vaddr + args->offset; 577 char __user *user_data = u64_to_user_ptr(args->data_ptr); 578 579 /* We manually control the domain here and pretend that it 580 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 581 */ 582 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 583 if (copy_from_user(vaddr, user_data, args->size)) 584 return -EFAULT; 585 586 drm_clflush_virt_range(vaddr, args->size); 587 i915_gem_chipset_flush(to_i915(obj->base.dev)); 588 589 intel_fb_obj_flush(obj, ORIGIN_CPU); 590 return 0; 591 } 592 593 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 594 { 595 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 596 } 597 598 void i915_gem_object_free(struct drm_i915_gem_object *obj) 599 { 600 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 601 kmem_cache_free(dev_priv->objects, obj); 602 } 603 604 static int 605 i915_gem_create(struct drm_file *file, 606 struct drm_i915_private *dev_priv, 607 uint64_t size, 608 uint32_t *handle_p) 609 { 610 struct drm_i915_gem_object *obj; 611 int ret; 612 u32 handle; 613 614 size = roundup(size, PAGE_SIZE); 615 if (size == 0) 616 return -EINVAL; 617 618 /* Allocate the new object */ 619 obj = i915_gem_object_create(dev_priv, size); 620 if (IS_ERR(obj)) 621 return PTR_ERR(obj); 622 623 ret = drm_gem_handle_create(file, &obj->base, &handle); 624 /* drop reference from allocate - handle holds it now */ 625 i915_gem_object_put(obj); 626 if (ret) 627 return ret; 628 629 *handle_p = handle; 630 return 0; 631 } 632 633 int 634 i915_gem_dumb_create(struct drm_file *file, 635 struct drm_device *dev, 636 struct drm_mode_create_dumb *args) 637 { 638 /* have to work out size/pitch and return them */ 639 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 640 args->size = args->pitch * args->height; 641 return i915_gem_create(file, to_i915(dev), 642 args->size, &args->handle); 643 } 644 645 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 646 { 647 return !(obj->cache_level == I915_CACHE_NONE || 648 obj->cache_level == I915_CACHE_WT); 649 } 650 651 /** 652 * Creates a new mm object and returns a handle to it. 653 * @dev: drm device pointer 654 * @data: ioctl data blob 655 * @file: drm file pointer 656 */ 657 int 658 i915_gem_create_ioctl(struct drm_device *dev, void *data, 659 struct drm_file *file) 660 { 661 struct drm_i915_private *dev_priv = to_i915(dev); 662 struct drm_i915_gem_create *args = data; 663 664 i915_gem_flush_free_objects(dev_priv); 665 666 return i915_gem_create(file, dev_priv, 667 args->size, &args->handle); 668 } 669 670 static inline enum fb_op_origin 671 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 672 { 673 return (domain == I915_GEM_DOMAIN_GTT ? 674 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 675 } 676 677 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 678 { 679 /* 680 * No actual flushing is required for the GTT write domain for reads 681 * from the GTT domain. Writes to it "immediately" go to main memory 682 * as far as we know, so there's no chipset flush. It also doesn't 683 * land in the GPU render cache. 684 * 685 * However, we do have to enforce the order so that all writes through 686 * the GTT land before any writes to the device, such as updates to 687 * the GATT itself. 688 * 689 * We also have to wait a bit for the writes to land from the GTT. 690 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 691 * timing. This issue has only been observed when switching quickly 692 * between GTT writes and CPU reads from inside the kernel on recent hw, 693 * and it appears to only affect discrete GTT blocks (i.e. on LLC 694 * system agents we cannot reproduce this behaviour, until Cannonlake 695 * that was!). 696 */ 697 698 wmb(); 699 700 intel_runtime_pm_get(dev_priv); 701 spin_lock_irq(&dev_priv->uncore.lock); 702 703 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 704 705 spin_unlock_irq(&dev_priv->uncore.lock); 706 intel_runtime_pm_put(dev_priv); 707 } 708 709 static void 710 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 711 { 712 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 713 struct i915_vma *vma; 714 715 if (!(obj->write_domain & flush_domains)) 716 return; 717 718 switch (obj->write_domain) { 719 case I915_GEM_DOMAIN_GTT: 720 i915_gem_flush_ggtt_writes(dev_priv); 721 722 intel_fb_obj_flush(obj, 723 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 724 725 for_each_ggtt_vma(vma, obj) { 726 if (vma->iomap) 727 continue; 728 729 i915_vma_unset_ggtt_write(vma); 730 } 731 break; 732 733 case I915_GEM_DOMAIN_CPU: 734 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 735 break; 736 737 case I915_GEM_DOMAIN_RENDER: 738 if (gpu_write_needs_clflush(obj)) 739 obj->cache_dirty = true; 740 break; 741 } 742 743 obj->write_domain = 0; 744 } 745 746 static inline int 747 __copy_to_user_swizzled(char __user *cpu_vaddr, 748 const char *gpu_vaddr, int gpu_offset, 749 int length) 750 { 751 int ret, cpu_offset = 0; 752 753 while (length > 0) { 754 int cacheline_end = ALIGN(gpu_offset + 1, 64); 755 int this_length = min(cacheline_end - gpu_offset, length); 756 int swizzled_gpu_offset = gpu_offset ^ 64; 757 758 ret = __copy_to_user(cpu_vaddr + cpu_offset, 759 gpu_vaddr + swizzled_gpu_offset, 760 this_length); 761 if (ret) 762 return ret + length; 763 764 cpu_offset += this_length; 765 gpu_offset += this_length; 766 length -= this_length; 767 } 768 769 return 0; 770 } 771 772 static inline int 773 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 774 const char __user *cpu_vaddr, 775 int length) 776 { 777 int ret, cpu_offset = 0; 778 779 while (length > 0) { 780 int cacheline_end = ALIGN(gpu_offset + 1, 64); 781 int this_length = min(cacheline_end - gpu_offset, length); 782 int swizzled_gpu_offset = gpu_offset ^ 64; 783 784 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 785 cpu_vaddr + cpu_offset, 786 this_length); 787 if (ret) 788 return ret + length; 789 790 cpu_offset += this_length; 791 gpu_offset += this_length; 792 length -= this_length; 793 } 794 795 return 0; 796 } 797 798 /* 799 * Pins the specified object's pages and synchronizes the object with 800 * GPU accesses. Sets needs_clflush to non-zero if the caller should 801 * flush the object from the CPU cache. 802 */ 803 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 804 unsigned int *needs_clflush) 805 { 806 int ret; 807 808 lockdep_assert_held(&obj->base.dev->struct_mutex); 809 810 *needs_clflush = 0; 811 if (!i915_gem_object_has_struct_page(obj)) 812 return -ENODEV; 813 814 ret = i915_gem_object_wait(obj, 815 I915_WAIT_INTERRUPTIBLE | 816 I915_WAIT_LOCKED, 817 MAX_SCHEDULE_TIMEOUT, 818 NULL); 819 if (ret) 820 return ret; 821 822 ret = i915_gem_object_pin_pages(obj); 823 if (ret) 824 return ret; 825 826 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 827 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 828 ret = i915_gem_object_set_to_cpu_domain(obj, false); 829 if (ret) 830 goto err_unpin; 831 else 832 goto out; 833 } 834 835 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 836 837 /* If we're not in the cpu read domain, set ourself into the gtt 838 * read domain and manually flush cachelines (if required). This 839 * optimizes for the case when the gpu will dirty the data 840 * anyway again before the next pread happens. 841 */ 842 if (!obj->cache_dirty && 843 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 844 *needs_clflush = CLFLUSH_BEFORE; 845 846 out: 847 /* return with the pages pinned */ 848 return 0; 849 850 err_unpin: 851 i915_gem_object_unpin_pages(obj); 852 return ret; 853 } 854 855 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 856 unsigned int *needs_clflush) 857 { 858 int ret; 859 860 lockdep_assert_held(&obj->base.dev->struct_mutex); 861 862 *needs_clflush = 0; 863 if (!i915_gem_object_has_struct_page(obj)) 864 return -ENODEV; 865 866 ret = i915_gem_object_wait(obj, 867 I915_WAIT_INTERRUPTIBLE | 868 I915_WAIT_LOCKED | 869 I915_WAIT_ALL, 870 MAX_SCHEDULE_TIMEOUT, 871 NULL); 872 if (ret) 873 return ret; 874 875 ret = i915_gem_object_pin_pages(obj); 876 if (ret) 877 return ret; 878 879 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 880 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 881 ret = i915_gem_object_set_to_cpu_domain(obj, true); 882 if (ret) 883 goto err_unpin; 884 else 885 goto out; 886 } 887 888 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 889 890 /* If we're not in the cpu write domain, set ourself into the 891 * gtt write domain and manually flush cachelines (as required). 892 * This optimizes for the case when the gpu will use the data 893 * right away and we therefore have to clflush anyway. 894 */ 895 if (!obj->cache_dirty) { 896 *needs_clflush |= CLFLUSH_AFTER; 897 898 /* 899 * Same trick applies to invalidate partially written 900 * cachelines read before writing. 901 */ 902 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 903 *needs_clflush |= CLFLUSH_BEFORE; 904 } 905 906 out: 907 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 908 obj->mm.dirty = true; 909 /* return with the pages pinned */ 910 return 0; 911 912 err_unpin: 913 i915_gem_object_unpin_pages(obj); 914 return ret; 915 } 916 917 static void 918 shmem_clflush_swizzled_range(char *addr, unsigned long length, 919 bool swizzled) 920 { 921 if (unlikely(swizzled)) { 922 unsigned long start = (unsigned long) addr; 923 unsigned long end = (unsigned long) addr + length; 924 925 /* For swizzling simply ensure that we always flush both 926 * channels. Lame, but simple and it works. Swizzled 927 * pwrite/pread is far from a hotpath - current userspace 928 * doesn't use it at all. */ 929 start = round_down(start, 128); 930 end = round_up(end, 128); 931 932 drm_clflush_virt_range((void *)start, end - start); 933 } else { 934 drm_clflush_virt_range(addr, length); 935 } 936 937 } 938 939 /* Only difference to the fast-path function is that this can handle bit17 940 * and uses non-atomic copy and kmap functions. */ 941 static int 942 shmem_pread_slow(struct page *page, int offset, int length, 943 char __user *user_data, 944 bool page_do_bit17_swizzling, bool needs_clflush) 945 { 946 char *vaddr; 947 int ret; 948 949 vaddr = kmap(page); 950 if (needs_clflush) 951 shmem_clflush_swizzled_range(vaddr + offset, length, 952 page_do_bit17_swizzling); 953 954 if (page_do_bit17_swizzling) 955 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 956 else 957 ret = __copy_to_user(user_data, vaddr + offset, length); 958 kunmap(page); 959 960 return ret ? - EFAULT : 0; 961 } 962 963 static int 964 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 965 bool page_do_bit17_swizzling, bool needs_clflush) 966 { 967 int ret; 968 969 ret = -ENODEV; 970 if (!page_do_bit17_swizzling) { 971 char *vaddr = kmap_atomic(page); 972 973 if (needs_clflush) 974 drm_clflush_virt_range(vaddr + offset, length); 975 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 976 kunmap_atomic(vaddr); 977 } 978 if (ret == 0) 979 return 0; 980 981 return shmem_pread_slow(page, offset, length, user_data, 982 page_do_bit17_swizzling, needs_clflush); 983 } 984 985 static int 986 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 987 struct drm_i915_gem_pread *args) 988 { 989 char __user *user_data; 990 u64 remain; 991 unsigned int obj_do_bit17_swizzling; 992 unsigned int needs_clflush; 993 unsigned int idx, offset; 994 int ret; 995 996 obj_do_bit17_swizzling = 0; 997 if (i915_gem_object_needs_bit17_swizzle(obj)) 998 obj_do_bit17_swizzling = BIT(17); 999 1000 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1001 if (ret) 1002 return ret; 1003 1004 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1005 mutex_unlock(&obj->base.dev->struct_mutex); 1006 if (ret) 1007 return ret; 1008 1009 remain = args->size; 1010 user_data = u64_to_user_ptr(args->data_ptr); 1011 offset = offset_in_page(args->offset); 1012 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1013 struct page *page = i915_gem_object_get_page(obj, idx); 1014 int length; 1015 1016 length = remain; 1017 if (offset + length > PAGE_SIZE) 1018 length = PAGE_SIZE - offset; 1019 1020 ret = shmem_pread(page, offset, length, user_data, 1021 page_to_phys(page) & obj_do_bit17_swizzling, 1022 needs_clflush); 1023 if (ret) 1024 break; 1025 1026 remain -= length; 1027 user_data += length; 1028 offset = 0; 1029 } 1030 1031 i915_gem_obj_finish_shmem_access(obj); 1032 return ret; 1033 } 1034 1035 static inline bool 1036 gtt_user_read(struct io_mapping *mapping, 1037 loff_t base, int offset, 1038 char __user *user_data, int length) 1039 { 1040 void __iomem *vaddr; 1041 unsigned long unwritten; 1042 1043 /* We can use the cpu mem copy function because this is X86. */ 1044 vaddr = io_mapping_map_atomic_wc(mapping, base); 1045 unwritten = __copy_to_user_inatomic(user_data, 1046 (void __force *)vaddr + offset, 1047 length); 1048 io_mapping_unmap_atomic(vaddr); 1049 if (unwritten) { 1050 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1051 unwritten = copy_to_user(user_data, 1052 (void __force *)vaddr + offset, 1053 length); 1054 io_mapping_unmap(vaddr); 1055 } 1056 return unwritten; 1057 } 1058 1059 static int 1060 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1061 const struct drm_i915_gem_pread *args) 1062 { 1063 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1064 struct i915_ggtt *ggtt = &i915->ggtt; 1065 struct drm_mm_node node; 1066 struct i915_vma *vma; 1067 void __user *user_data; 1068 u64 remain, offset; 1069 int ret; 1070 1071 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1072 if (ret) 1073 return ret; 1074 1075 intel_runtime_pm_get(i915); 1076 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1077 PIN_MAPPABLE | 1078 PIN_NONFAULT | 1079 PIN_NONBLOCK); 1080 if (!IS_ERR(vma)) { 1081 node.start = i915_ggtt_offset(vma); 1082 node.allocated = false; 1083 ret = i915_vma_put_fence(vma); 1084 if (ret) { 1085 i915_vma_unpin(vma); 1086 vma = ERR_PTR(ret); 1087 } 1088 } 1089 if (IS_ERR(vma)) { 1090 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1091 if (ret) 1092 goto out_unlock; 1093 GEM_BUG_ON(!node.allocated); 1094 } 1095 1096 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1097 if (ret) 1098 goto out_unpin; 1099 1100 mutex_unlock(&i915->drm.struct_mutex); 1101 1102 user_data = u64_to_user_ptr(args->data_ptr); 1103 remain = args->size; 1104 offset = args->offset; 1105 1106 while (remain > 0) { 1107 /* Operation in this page 1108 * 1109 * page_base = page offset within aperture 1110 * page_offset = offset within page 1111 * page_length = bytes to copy for this page 1112 */ 1113 u32 page_base = node.start; 1114 unsigned page_offset = offset_in_page(offset); 1115 unsigned page_length = PAGE_SIZE - page_offset; 1116 page_length = remain < page_length ? remain : page_length; 1117 if (node.allocated) { 1118 wmb(); 1119 ggtt->base.insert_page(&ggtt->base, 1120 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1121 node.start, I915_CACHE_NONE, 0); 1122 wmb(); 1123 } else { 1124 page_base += offset & PAGE_MASK; 1125 } 1126 1127 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1128 user_data, page_length)) { 1129 ret = -EFAULT; 1130 break; 1131 } 1132 1133 remain -= page_length; 1134 user_data += page_length; 1135 offset += page_length; 1136 } 1137 1138 mutex_lock(&i915->drm.struct_mutex); 1139 out_unpin: 1140 if (node.allocated) { 1141 wmb(); 1142 ggtt->base.clear_range(&ggtt->base, 1143 node.start, node.size); 1144 remove_mappable_node(&node); 1145 } else { 1146 i915_vma_unpin(vma); 1147 } 1148 out_unlock: 1149 intel_runtime_pm_put(i915); 1150 mutex_unlock(&i915->drm.struct_mutex); 1151 1152 return ret; 1153 } 1154 1155 /** 1156 * Reads data from the object referenced by handle. 1157 * @dev: drm device pointer 1158 * @data: ioctl data blob 1159 * @file: drm file pointer 1160 * 1161 * On error, the contents of *data are undefined. 1162 */ 1163 int 1164 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1165 struct drm_file *file) 1166 { 1167 struct drm_i915_gem_pread *args = data; 1168 struct drm_i915_gem_object *obj; 1169 int ret; 1170 1171 if (args->size == 0) 1172 return 0; 1173 1174 if (!access_ok(VERIFY_WRITE, 1175 u64_to_user_ptr(args->data_ptr), 1176 args->size)) 1177 return -EFAULT; 1178 1179 obj = i915_gem_object_lookup(file, args->handle); 1180 if (!obj) 1181 return -ENOENT; 1182 1183 /* Bounds check source. */ 1184 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1185 ret = -EINVAL; 1186 goto out; 1187 } 1188 1189 trace_i915_gem_object_pread(obj, args->offset, args->size); 1190 1191 ret = i915_gem_object_wait(obj, 1192 I915_WAIT_INTERRUPTIBLE, 1193 MAX_SCHEDULE_TIMEOUT, 1194 to_rps_client(file)); 1195 if (ret) 1196 goto out; 1197 1198 ret = i915_gem_object_pin_pages(obj); 1199 if (ret) 1200 goto out; 1201 1202 ret = i915_gem_shmem_pread(obj, args); 1203 if (ret == -EFAULT || ret == -ENODEV) 1204 ret = i915_gem_gtt_pread(obj, args); 1205 1206 i915_gem_object_unpin_pages(obj); 1207 out: 1208 i915_gem_object_put(obj); 1209 return ret; 1210 } 1211 1212 /* This is the fast write path which cannot handle 1213 * page faults in the source data 1214 */ 1215 1216 static inline bool 1217 ggtt_write(struct io_mapping *mapping, 1218 loff_t base, int offset, 1219 char __user *user_data, int length) 1220 { 1221 void __iomem *vaddr; 1222 unsigned long unwritten; 1223 1224 /* We can use the cpu mem copy function because this is X86. */ 1225 vaddr = io_mapping_map_atomic_wc(mapping, base); 1226 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1227 user_data, length); 1228 io_mapping_unmap_atomic(vaddr); 1229 if (unwritten) { 1230 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1231 unwritten = copy_from_user((void __force *)vaddr + offset, 1232 user_data, length); 1233 io_mapping_unmap(vaddr); 1234 } 1235 1236 return unwritten; 1237 } 1238 1239 /** 1240 * This is the fast pwrite path, where we copy the data directly from the 1241 * user into the GTT, uncached. 1242 * @obj: i915 GEM object 1243 * @args: pwrite arguments structure 1244 */ 1245 static int 1246 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1247 const struct drm_i915_gem_pwrite *args) 1248 { 1249 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1250 struct i915_ggtt *ggtt = &i915->ggtt; 1251 struct drm_mm_node node; 1252 struct i915_vma *vma; 1253 u64 remain, offset; 1254 void __user *user_data; 1255 int ret; 1256 1257 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1258 if (ret) 1259 return ret; 1260 1261 if (i915_gem_object_has_struct_page(obj)) { 1262 /* 1263 * Avoid waking the device up if we can fallback, as 1264 * waking/resuming is very slow (worst-case 10-100 ms 1265 * depending on PCI sleeps and our own resume time). 1266 * This easily dwarfs any performance advantage from 1267 * using the cache bypass of indirect GGTT access. 1268 */ 1269 if (!intel_runtime_pm_get_if_in_use(i915)) { 1270 ret = -EFAULT; 1271 goto out_unlock; 1272 } 1273 } else { 1274 /* No backing pages, no fallback, we must force GGTT access */ 1275 intel_runtime_pm_get(i915); 1276 } 1277 1278 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1279 PIN_MAPPABLE | 1280 PIN_NONFAULT | 1281 PIN_NONBLOCK); 1282 if (!IS_ERR(vma)) { 1283 node.start = i915_ggtt_offset(vma); 1284 node.allocated = false; 1285 ret = i915_vma_put_fence(vma); 1286 if (ret) { 1287 i915_vma_unpin(vma); 1288 vma = ERR_PTR(ret); 1289 } 1290 } 1291 if (IS_ERR(vma)) { 1292 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1293 if (ret) 1294 goto out_rpm; 1295 GEM_BUG_ON(!node.allocated); 1296 } 1297 1298 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1299 if (ret) 1300 goto out_unpin; 1301 1302 mutex_unlock(&i915->drm.struct_mutex); 1303 1304 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1305 1306 user_data = u64_to_user_ptr(args->data_ptr); 1307 offset = args->offset; 1308 remain = args->size; 1309 while (remain) { 1310 /* Operation in this page 1311 * 1312 * page_base = page offset within aperture 1313 * page_offset = offset within page 1314 * page_length = bytes to copy for this page 1315 */ 1316 u32 page_base = node.start; 1317 unsigned int page_offset = offset_in_page(offset); 1318 unsigned int page_length = PAGE_SIZE - page_offset; 1319 page_length = remain < page_length ? remain : page_length; 1320 if (node.allocated) { 1321 wmb(); /* flush the write before we modify the GGTT */ 1322 ggtt->base.insert_page(&ggtt->base, 1323 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1324 node.start, I915_CACHE_NONE, 0); 1325 wmb(); /* flush modifications to the GGTT (insert_page) */ 1326 } else { 1327 page_base += offset & PAGE_MASK; 1328 } 1329 /* If we get a fault while copying data, then (presumably) our 1330 * source page isn't available. Return the error and we'll 1331 * retry in the slow path. 1332 * If the object is non-shmem backed, we retry again with the 1333 * path that handles page fault. 1334 */ 1335 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1336 user_data, page_length)) { 1337 ret = -EFAULT; 1338 break; 1339 } 1340 1341 remain -= page_length; 1342 user_data += page_length; 1343 offset += page_length; 1344 } 1345 intel_fb_obj_flush(obj, ORIGIN_CPU); 1346 1347 mutex_lock(&i915->drm.struct_mutex); 1348 out_unpin: 1349 if (node.allocated) { 1350 wmb(); 1351 ggtt->base.clear_range(&ggtt->base, 1352 node.start, node.size); 1353 remove_mappable_node(&node); 1354 } else { 1355 i915_vma_unpin(vma); 1356 } 1357 out_rpm: 1358 intel_runtime_pm_put(i915); 1359 out_unlock: 1360 mutex_unlock(&i915->drm.struct_mutex); 1361 return ret; 1362 } 1363 1364 static int 1365 shmem_pwrite_slow(struct page *page, int offset, int length, 1366 char __user *user_data, 1367 bool page_do_bit17_swizzling, 1368 bool needs_clflush_before, 1369 bool needs_clflush_after) 1370 { 1371 char *vaddr; 1372 int ret; 1373 1374 vaddr = kmap(page); 1375 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1376 shmem_clflush_swizzled_range(vaddr + offset, length, 1377 page_do_bit17_swizzling); 1378 if (page_do_bit17_swizzling) 1379 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1380 length); 1381 else 1382 ret = __copy_from_user(vaddr + offset, user_data, length); 1383 if (needs_clflush_after) 1384 shmem_clflush_swizzled_range(vaddr + offset, length, 1385 page_do_bit17_swizzling); 1386 kunmap(page); 1387 1388 return ret ? -EFAULT : 0; 1389 } 1390 1391 /* Per-page copy function for the shmem pwrite fastpath. 1392 * Flushes invalid cachelines before writing to the target if 1393 * needs_clflush_before is set and flushes out any written cachelines after 1394 * writing if needs_clflush is set. 1395 */ 1396 static int 1397 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1398 bool page_do_bit17_swizzling, 1399 bool needs_clflush_before, 1400 bool needs_clflush_after) 1401 { 1402 int ret; 1403 1404 ret = -ENODEV; 1405 if (!page_do_bit17_swizzling) { 1406 char *vaddr = kmap_atomic(page); 1407 1408 if (needs_clflush_before) 1409 drm_clflush_virt_range(vaddr + offset, len); 1410 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1411 if (needs_clflush_after) 1412 drm_clflush_virt_range(vaddr + offset, len); 1413 1414 kunmap_atomic(vaddr); 1415 } 1416 if (ret == 0) 1417 return ret; 1418 1419 return shmem_pwrite_slow(page, offset, len, user_data, 1420 page_do_bit17_swizzling, 1421 needs_clflush_before, 1422 needs_clflush_after); 1423 } 1424 1425 static int 1426 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1427 const struct drm_i915_gem_pwrite *args) 1428 { 1429 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1430 void __user *user_data; 1431 u64 remain; 1432 unsigned int obj_do_bit17_swizzling; 1433 unsigned int partial_cacheline_write; 1434 unsigned int needs_clflush; 1435 unsigned int offset, idx; 1436 int ret; 1437 1438 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1439 if (ret) 1440 return ret; 1441 1442 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1443 mutex_unlock(&i915->drm.struct_mutex); 1444 if (ret) 1445 return ret; 1446 1447 obj_do_bit17_swizzling = 0; 1448 if (i915_gem_object_needs_bit17_swizzle(obj)) 1449 obj_do_bit17_swizzling = BIT(17); 1450 1451 /* If we don't overwrite a cacheline completely we need to be 1452 * careful to have up-to-date data by first clflushing. Don't 1453 * overcomplicate things and flush the entire patch. 1454 */ 1455 partial_cacheline_write = 0; 1456 if (needs_clflush & CLFLUSH_BEFORE) 1457 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1458 1459 user_data = u64_to_user_ptr(args->data_ptr); 1460 remain = args->size; 1461 offset = offset_in_page(args->offset); 1462 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1463 struct page *page = i915_gem_object_get_page(obj, idx); 1464 int length; 1465 1466 length = remain; 1467 if (offset + length > PAGE_SIZE) 1468 length = PAGE_SIZE - offset; 1469 1470 ret = shmem_pwrite(page, offset, length, user_data, 1471 page_to_phys(page) & obj_do_bit17_swizzling, 1472 (offset | length) & partial_cacheline_write, 1473 needs_clflush & CLFLUSH_AFTER); 1474 if (ret) 1475 break; 1476 1477 remain -= length; 1478 user_data += length; 1479 offset = 0; 1480 } 1481 1482 intel_fb_obj_flush(obj, ORIGIN_CPU); 1483 i915_gem_obj_finish_shmem_access(obj); 1484 return ret; 1485 } 1486 1487 /** 1488 * Writes data to the object referenced by handle. 1489 * @dev: drm device 1490 * @data: ioctl data blob 1491 * @file: drm file 1492 * 1493 * On error, the contents of the buffer that were to be modified are undefined. 1494 */ 1495 int 1496 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1497 struct drm_file *file) 1498 { 1499 struct drm_i915_gem_pwrite *args = data; 1500 struct drm_i915_gem_object *obj; 1501 int ret; 1502 1503 if (args->size == 0) 1504 return 0; 1505 1506 if (!access_ok(VERIFY_READ, 1507 u64_to_user_ptr(args->data_ptr), 1508 args->size)) 1509 return -EFAULT; 1510 1511 obj = i915_gem_object_lookup(file, args->handle); 1512 if (!obj) 1513 return -ENOENT; 1514 1515 /* Bounds check destination. */ 1516 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1517 ret = -EINVAL; 1518 goto err; 1519 } 1520 1521 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1522 1523 ret = -ENODEV; 1524 if (obj->ops->pwrite) 1525 ret = obj->ops->pwrite(obj, args); 1526 if (ret != -ENODEV) 1527 goto err; 1528 1529 ret = i915_gem_object_wait(obj, 1530 I915_WAIT_INTERRUPTIBLE | 1531 I915_WAIT_ALL, 1532 MAX_SCHEDULE_TIMEOUT, 1533 to_rps_client(file)); 1534 if (ret) 1535 goto err; 1536 1537 ret = i915_gem_object_pin_pages(obj); 1538 if (ret) 1539 goto err; 1540 1541 ret = -EFAULT; 1542 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1543 * it would end up going through the fenced access, and we'll get 1544 * different detiling behavior between reading and writing. 1545 * pread/pwrite currently are reading and writing from the CPU 1546 * perspective, requiring manual detiling by the client. 1547 */ 1548 if (!i915_gem_object_has_struct_page(obj) || 1549 cpu_write_needs_clflush(obj)) 1550 /* Note that the gtt paths might fail with non-page-backed user 1551 * pointers (e.g. gtt mappings when moving data between 1552 * textures). Fallback to the shmem path in that case. 1553 */ 1554 ret = i915_gem_gtt_pwrite_fast(obj, args); 1555 1556 if (ret == -EFAULT || ret == -ENOSPC) { 1557 if (obj->phys_handle) 1558 ret = i915_gem_phys_pwrite(obj, args, file); 1559 else 1560 ret = i915_gem_shmem_pwrite(obj, args); 1561 } 1562 1563 i915_gem_object_unpin_pages(obj); 1564 err: 1565 i915_gem_object_put(obj); 1566 return ret; 1567 } 1568 1569 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1570 { 1571 struct drm_i915_private *i915; 1572 struct list_head *list; 1573 struct i915_vma *vma; 1574 1575 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1576 1577 for_each_ggtt_vma(vma, obj) { 1578 if (i915_vma_is_active(vma)) 1579 continue; 1580 1581 if (!drm_mm_node_allocated(&vma->node)) 1582 continue; 1583 1584 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1585 } 1586 1587 i915 = to_i915(obj->base.dev); 1588 spin_lock(&i915->mm.obj_lock); 1589 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1590 list_move_tail(&obj->mm.link, list); 1591 spin_unlock(&i915->mm.obj_lock); 1592 } 1593 1594 /** 1595 * Called when user space prepares to use an object with the CPU, either 1596 * through the mmap ioctl's mapping or a GTT mapping. 1597 * @dev: drm device 1598 * @data: ioctl data blob 1599 * @file: drm file 1600 */ 1601 int 1602 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1603 struct drm_file *file) 1604 { 1605 struct drm_i915_gem_set_domain *args = data; 1606 struct drm_i915_gem_object *obj; 1607 uint32_t read_domains = args->read_domains; 1608 uint32_t write_domain = args->write_domain; 1609 int err; 1610 1611 /* Only handle setting domains to types used by the CPU. */ 1612 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1613 return -EINVAL; 1614 1615 /* Having something in the write domain implies it's in the read 1616 * domain, and only that read domain. Enforce that in the request. 1617 */ 1618 if (write_domain != 0 && read_domains != write_domain) 1619 return -EINVAL; 1620 1621 obj = i915_gem_object_lookup(file, args->handle); 1622 if (!obj) 1623 return -ENOENT; 1624 1625 /* Try to flush the object off the GPU without holding the lock. 1626 * We will repeat the flush holding the lock in the normal manner 1627 * to catch cases where we are gazumped. 1628 */ 1629 err = i915_gem_object_wait(obj, 1630 I915_WAIT_INTERRUPTIBLE | 1631 (write_domain ? I915_WAIT_ALL : 0), 1632 MAX_SCHEDULE_TIMEOUT, 1633 to_rps_client(file)); 1634 if (err) 1635 goto out; 1636 1637 /* 1638 * Proxy objects do not control access to the backing storage, ergo 1639 * they cannot be used as a means to manipulate the cache domain 1640 * tracking for that backing storage. The proxy object is always 1641 * considered to be outside of any cache domain. 1642 */ 1643 if (i915_gem_object_is_proxy(obj)) { 1644 err = -ENXIO; 1645 goto out; 1646 } 1647 1648 /* 1649 * Flush and acquire obj->pages so that we are coherent through 1650 * direct access in memory with previous cached writes through 1651 * shmemfs and that our cache domain tracking remains valid. 1652 * For example, if the obj->filp was moved to swap without us 1653 * being notified and releasing the pages, we would mistakenly 1654 * continue to assume that the obj remained out of the CPU cached 1655 * domain. 1656 */ 1657 err = i915_gem_object_pin_pages(obj); 1658 if (err) 1659 goto out; 1660 1661 err = i915_mutex_lock_interruptible(dev); 1662 if (err) 1663 goto out_unpin; 1664 1665 if (read_domains & I915_GEM_DOMAIN_WC) 1666 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1667 else if (read_domains & I915_GEM_DOMAIN_GTT) 1668 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1669 else 1670 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1671 1672 /* And bump the LRU for this access */ 1673 i915_gem_object_bump_inactive_ggtt(obj); 1674 1675 mutex_unlock(&dev->struct_mutex); 1676 1677 if (write_domain != 0) 1678 intel_fb_obj_invalidate(obj, 1679 fb_write_origin(obj, write_domain)); 1680 1681 out_unpin: 1682 i915_gem_object_unpin_pages(obj); 1683 out: 1684 i915_gem_object_put(obj); 1685 return err; 1686 } 1687 1688 /** 1689 * Called when user space has done writes to this buffer 1690 * @dev: drm device 1691 * @data: ioctl data blob 1692 * @file: drm file 1693 */ 1694 int 1695 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1696 struct drm_file *file) 1697 { 1698 struct drm_i915_gem_sw_finish *args = data; 1699 struct drm_i915_gem_object *obj; 1700 1701 obj = i915_gem_object_lookup(file, args->handle); 1702 if (!obj) 1703 return -ENOENT; 1704 1705 /* 1706 * Proxy objects are barred from CPU access, so there is no 1707 * need to ban sw_finish as it is a nop. 1708 */ 1709 1710 /* Pinned buffers may be scanout, so flush the cache */ 1711 i915_gem_object_flush_if_display(obj); 1712 i915_gem_object_put(obj); 1713 1714 return 0; 1715 } 1716 1717 /** 1718 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1719 * it is mapped to. 1720 * @dev: drm device 1721 * @data: ioctl data blob 1722 * @file: drm file 1723 * 1724 * While the mapping holds a reference on the contents of the object, it doesn't 1725 * imply a ref on the object itself. 1726 * 1727 * IMPORTANT: 1728 * 1729 * DRM driver writers who look a this function as an example for how to do GEM 1730 * mmap support, please don't implement mmap support like here. The modern way 1731 * to implement DRM mmap support is with an mmap offset ioctl (like 1732 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1733 * That way debug tooling like valgrind will understand what's going on, hiding 1734 * the mmap call in a driver private ioctl will break that. The i915 driver only 1735 * does cpu mmaps this way because we didn't know better. 1736 */ 1737 int 1738 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1739 struct drm_file *file) 1740 { 1741 struct drm_i915_gem_mmap *args = data; 1742 struct drm_i915_gem_object *obj; 1743 unsigned long addr; 1744 1745 if (args->flags & ~(I915_MMAP_WC)) 1746 return -EINVAL; 1747 1748 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1749 return -ENODEV; 1750 1751 obj = i915_gem_object_lookup(file, args->handle); 1752 if (!obj) 1753 return -ENOENT; 1754 1755 /* prime objects have no backing filp to GEM mmap 1756 * pages from. 1757 */ 1758 if (!obj->base.filp) { 1759 i915_gem_object_put(obj); 1760 return -ENXIO; 1761 } 1762 1763 addr = vm_mmap(obj->base.filp, 0, args->size, 1764 PROT_READ | PROT_WRITE, MAP_SHARED, 1765 args->offset); 1766 if (args->flags & I915_MMAP_WC) { 1767 struct mm_struct *mm = current->mm; 1768 struct vm_area_struct *vma; 1769 1770 if (down_write_killable(&mm->mmap_sem)) { 1771 i915_gem_object_put(obj); 1772 return -EINTR; 1773 } 1774 vma = find_vma(mm, addr); 1775 if (vma) 1776 vma->vm_page_prot = 1777 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1778 else 1779 addr = -ENOMEM; 1780 up_write(&mm->mmap_sem); 1781 1782 /* This may race, but that's ok, it only gets set */ 1783 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1784 } 1785 i915_gem_object_put(obj); 1786 if (IS_ERR((void *)addr)) 1787 return addr; 1788 1789 args->addr_ptr = (uint64_t) addr; 1790 1791 return 0; 1792 } 1793 1794 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) 1795 { 1796 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1797 } 1798 1799 /** 1800 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1801 * 1802 * A history of the GTT mmap interface: 1803 * 1804 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1805 * aligned and suitable for fencing, and still fit into the available 1806 * mappable space left by the pinned display objects. A classic problem 1807 * we called the page-fault-of-doom where we would ping-pong between 1808 * two objects that could not fit inside the GTT and so the memcpy 1809 * would page one object in at the expense of the other between every 1810 * single byte. 1811 * 1812 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1813 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1814 * object is too large for the available space (or simply too large 1815 * for the mappable aperture!), a view is created instead and faulted 1816 * into userspace. (This view is aligned and sized appropriately for 1817 * fenced access.) 1818 * 1819 * 2 - Recognise WC as a separate cache domain so that we can flush the 1820 * delayed writes via GTT before performing direct access via WC. 1821 * 1822 * Restrictions: 1823 * 1824 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1825 * hangs on some architectures, corruption on others. An attempt to service 1826 * a GTT page fault from a snoopable object will generate a SIGBUS. 1827 * 1828 * * the object must be able to fit into RAM (physical memory, though no 1829 * limited to the mappable aperture). 1830 * 1831 * 1832 * Caveats: 1833 * 1834 * * a new GTT page fault will synchronize rendering from the GPU and flush 1835 * all data to system memory. Subsequent access will not be synchronized. 1836 * 1837 * * all mappings are revoked on runtime device suspend. 1838 * 1839 * * there are only 8, 16 or 32 fence registers to share between all users 1840 * (older machines require fence register for display and blitter access 1841 * as well). Contention of the fence registers will cause the previous users 1842 * to be unmapped and any new access will generate new page faults. 1843 * 1844 * * running out of memory while servicing a fault may generate a SIGBUS, 1845 * rather than the expected SIGSEGV. 1846 */ 1847 int i915_gem_mmap_gtt_version(void) 1848 { 1849 return 2; 1850 } 1851 1852 static inline struct i915_ggtt_view 1853 compute_partial_view(struct drm_i915_gem_object *obj, 1854 pgoff_t page_offset, 1855 unsigned int chunk) 1856 { 1857 struct i915_ggtt_view view; 1858 1859 if (i915_gem_object_is_tiled(obj)) 1860 chunk = roundup(chunk, tile_row_pages(obj)); 1861 1862 view.type = I915_GGTT_VIEW_PARTIAL; 1863 view.partial.offset = rounddown(page_offset, chunk); 1864 view.partial.size = 1865 min_t(unsigned int, chunk, 1866 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1867 1868 /* If the partial covers the entire object, just create a normal VMA. */ 1869 if (chunk >= obj->base.size >> PAGE_SHIFT) 1870 view.type = I915_GGTT_VIEW_NORMAL; 1871 1872 return view; 1873 } 1874 1875 /** 1876 * i915_gem_fault - fault a page into the GTT 1877 * @vmf: fault info 1878 * 1879 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1880 * from userspace. The fault handler takes care of binding the object to 1881 * the GTT (if needed), allocating and programming a fence register (again, 1882 * only if needed based on whether the old reg is still valid or the object 1883 * is tiled) and inserting a new PTE into the faulting process. 1884 * 1885 * Note that the faulting process may involve evicting existing objects 1886 * from the GTT and/or fence registers to make room. So performance may 1887 * suffer if the GTT working set is large or there are few fence registers 1888 * left. 1889 * 1890 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 1891 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 1892 */ 1893 int i915_gem_fault(struct vm_fault *vmf) 1894 { 1895 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ 1896 struct vm_area_struct *area = vmf->vma; 1897 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 1898 struct drm_device *dev = obj->base.dev; 1899 struct drm_i915_private *dev_priv = to_i915(dev); 1900 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1901 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1902 struct i915_vma *vma; 1903 pgoff_t page_offset; 1904 unsigned int flags; 1905 int ret; 1906 1907 /* We don't use vmf->pgoff since that has the fake offset */ 1908 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 1909 1910 trace_i915_gem_object_fault(obj, page_offset, true, write); 1911 1912 /* Try to flush the object off the GPU first without holding the lock. 1913 * Upon acquiring the lock, we will perform our sanity checks and then 1914 * repeat the flush holding the lock in the normal manner to catch cases 1915 * where we are gazumped. 1916 */ 1917 ret = i915_gem_object_wait(obj, 1918 I915_WAIT_INTERRUPTIBLE, 1919 MAX_SCHEDULE_TIMEOUT, 1920 NULL); 1921 if (ret) 1922 goto err; 1923 1924 ret = i915_gem_object_pin_pages(obj); 1925 if (ret) 1926 goto err; 1927 1928 intel_runtime_pm_get(dev_priv); 1929 1930 ret = i915_mutex_lock_interruptible(dev); 1931 if (ret) 1932 goto err_rpm; 1933 1934 /* Access to snoopable pages through the GTT is incoherent. */ 1935 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 1936 ret = -EFAULT; 1937 goto err_unlock; 1938 } 1939 1940 /* If the object is smaller than a couple of partial vma, it is 1941 * not worth only creating a single partial vma - we may as well 1942 * clear enough space for the full object. 1943 */ 1944 flags = PIN_MAPPABLE; 1945 if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT) 1946 flags |= PIN_NONBLOCK | PIN_NONFAULT; 1947 1948 /* Now pin it into the GTT as needed */ 1949 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); 1950 if (IS_ERR(vma)) { 1951 /* Use a partial view if it is bigger than available space */ 1952 struct i915_ggtt_view view = 1953 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 1954 1955 /* Userspace is now writing through an untracked VMA, abandon 1956 * all hope that the hardware is able to track future writes. 1957 */ 1958 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 1959 1960 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 1961 } 1962 if (IS_ERR(vma)) { 1963 ret = PTR_ERR(vma); 1964 goto err_unlock; 1965 } 1966 1967 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1968 if (ret) 1969 goto err_unpin; 1970 1971 ret = i915_vma_pin_fence(vma); 1972 if (ret) 1973 goto err_unpin; 1974 1975 /* Finally, remap it using the new GTT offset */ 1976 ret = remap_io_mapping(area, 1977 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 1978 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 1979 min_t(u64, vma->size, area->vm_end - area->vm_start), 1980 &ggtt->iomap); 1981 if (ret) 1982 goto err_fence; 1983 1984 /* Mark as being mmapped into userspace for later revocation */ 1985 assert_rpm_wakelock_held(dev_priv); 1986 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 1987 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 1988 GEM_BUG_ON(!obj->userfault_count); 1989 1990 i915_vma_set_ggtt_write(vma); 1991 1992 err_fence: 1993 i915_vma_unpin_fence(vma); 1994 err_unpin: 1995 __i915_vma_unpin(vma); 1996 err_unlock: 1997 mutex_unlock(&dev->struct_mutex); 1998 err_rpm: 1999 intel_runtime_pm_put(dev_priv); 2000 i915_gem_object_unpin_pages(obj); 2001 err: 2002 switch (ret) { 2003 case -EIO: 2004 /* 2005 * We eat errors when the gpu is terminally wedged to avoid 2006 * userspace unduly crashing (gl has no provisions for mmaps to 2007 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2008 * and so needs to be reported. 2009 */ 2010 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2011 ret = VM_FAULT_SIGBUS; 2012 break; 2013 } 2014 case -EAGAIN: 2015 /* 2016 * EAGAIN means the gpu is hung and we'll wait for the error 2017 * handler to reset everything when re-faulting in 2018 * i915_mutex_lock_interruptible. 2019 */ 2020 case 0: 2021 case -ERESTARTSYS: 2022 case -EINTR: 2023 case -EBUSY: 2024 /* 2025 * EBUSY is ok: this just means that another thread 2026 * already did the job. 2027 */ 2028 ret = VM_FAULT_NOPAGE; 2029 break; 2030 case -ENOMEM: 2031 ret = VM_FAULT_OOM; 2032 break; 2033 case -ENOSPC: 2034 case -EFAULT: 2035 ret = VM_FAULT_SIGBUS; 2036 break; 2037 default: 2038 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2039 ret = VM_FAULT_SIGBUS; 2040 break; 2041 } 2042 return ret; 2043 } 2044 2045 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2046 { 2047 struct i915_vma *vma; 2048 2049 GEM_BUG_ON(!obj->userfault_count); 2050 2051 obj->userfault_count = 0; 2052 list_del(&obj->userfault_link); 2053 drm_vma_node_unmap(&obj->base.vma_node, 2054 obj->base.dev->anon_inode->i_mapping); 2055 2056 for_each_ggtt_vma(vma, obj) 2057 i915_vma_unset_userfault(vma); 2058 } 2059 2060 /** 2061 * i915_gem_release_mmap - remove physical page mappings 2062 * @obj: obj in question 2063 * 2064 * Preserve the reservation of the mmapping with the DRM core code, but 2065 * relinquish ownership of the pages back to the system. 2066 * 2067 * It is vital that we remove the page mapping if we have mapped a tiled 2068 * object through the GTT and then lose the fence register due to 2069 * resource pressure. Similarly if the object has been moved out of the 2070 * aperture, than pages mapped into userspace must be revoked. Removing the 2071 * mapping will then trigger a page fault on the next user access, allowing 2072 * fixup by i915_gem_fault(). 2073 */ 2074 void 2075 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2076 { 2077 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2078 2079 /* Serialisation between user GTT access and our code depends upon 2080 * revoking the CPU's PTE whilst the mutex is held. The next user 2081 * pagefault then has to wait until we release the mutex. 2082 * 2083 * Note that RPM complicates somewhat by adding an additional 2084 * requirement that operations to the GGTT be made holding the RPM 2085 * wakeref. 2086 */ 2087 lockdep_assert_held(&i915->drm.struct_mutex); 2088 intel_runtime_pm_get(i915); 2089 2090 if (!obj->userfault_count) 2091 goto out; 2092 2093 __i915_gem_object_release_mmap(obj); 2094 2095 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2096 * memory transactions from userspace before we return. The TLB 2097 * flushing implied above by changing the PTE above *should* be 2098 * sufficient, an extra barrier here just provides us with a bit 2099 * of paranoid documentation about our requirement to serialise 2100 * memory writes before touching registers / GSM. 2101 */ 2102 wmb(); 2103 2104 out: 2105 intel_runtime_pm_put(i915); 2106 } 2107 2108 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2109 { 2110 struct drm_i915_gem_object *obj, *on; 2111 int i; 2112 2113 /* 2114 * Only called during RPM suspend. All users of the userfault_list 2115 * must be holding an RPM wakeref to ensure that this can not 2116 * run concurrently with themselves (and use the struct_mutex for 2117 * protection between themselves). 2118 */ 2119 2120 list_for_each_entry_safe(obj, on, 2121 &dev_priv->mm.userfault_list, userfault_link) 2122 __i915_gem_object_release_mmap(obj); 2123 2124 /* The fence will be lost when the device powers down. If any were 2125 * in use by hardware (i.e. they are pinned), we should not be powering 2126 * down! All other fences will be reacquired by the user upon waking. 2127 */ 2128 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2129 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2130 2131 /* Ideally we want to assert that the fence register is not 2132 * live at this point (i.e. that no piece of code will be 2133 * trying to write through fence + GTT, as that both violates 2134 * our tracking of activity and associated locking/barriers, 2135 * but also is illegal given that the hw is powered down). 2136 * 2137 * Previously we used reg->pin_count as a "liveness" indicator. 2138 * That is not sufficient, and we need a more fine-grained 2139 * tool if we want to have a sanity check here. 2140 */ 2141 2142 if (!reg->vma) 2143 continue; 2144 2145 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2146 reg->dirty = true; 2147 } 2148 } 2149 2150 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2151 { 2152 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2153 int err; 2154 2155 err = drm_gem_create_mmap_offset(&obj->base); 2156 if (likely(!err)) 2157 return 0; 2158 2159 /* Attempt to reap some mmap space from dead objects */ 2160 do { 2161 err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); 2162 if (err) 2163 break; 2164 2165 i915_gem_drain_freed_objects(dev_priv); 2166 err = drm_gem_create_mmap_offset(&obj->base); 2167 if (!err) 2168 break; 2169 2170 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2171 2172 return err; 2173 } 2174 2175 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2176 { 2177 drm_gem_free_mmap_offset(&obj->base); 2178 } 2179 2180 int 2181 i915_gem_mmap_gtt(struct drm_file *file, 2182 struct drm_device *dev, 2183 uint32_t handle, 2184 uint64_t *offset) 2185 { 2186 struct drm_i915_gem_object *obj; 2187 int ret; 2188 2189 obj = i915_gem_object_lookup(file, handle); 2190 if (!obj) 2191 return -ENOENT; 2192 2193 ret = i915_gem_object_create_mmap_offset(obj); 2194 if (ret == 0) 2195 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2196 2197 i915_gem_object_put(obj); 2198 return ret; 2199 } 2200 2201 /** 2202 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2203 * @dev: DRM device 2204 * @data: GTT mapping ioctl data 2205 * @file: GEM object info 2206 * 2207 * Simply returns the fake offset to userspace so it can mmap it. 2208 * The mmap call will end up in drm_gem_mmap(), which will set things 2209 * up so we can get faults in the handler above. 2210 * 2211 * The fault handler will take care of binding the object into the GTT 2212 * (since it may have been evicted to make room for something), allocating 2213 * a fence register, and mapping the appropriate aperture address into 2214 * userspace. 2215 */ 2216 int 2217 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2218 struct drm_file *file) 2219 { 2220 struct drm_i915_gem_mmap_gtt *args = data; 2221 2222 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2223 } 2224 2225 /* Immediately discard the backing storage */ 2226 static void 2227 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2228 { 2229 i915_gem_object_free_mmap_offset(obj); 2230 2231 if (obj->base.filp == NULL) 2232 return; 2233 2234 /* Our goal here is to return as much of the memory as 2235 * is possible back to the system as we are called from OOM. 2236 * To do this we must instruct the shmfs to drop all of its 2237 * backing pages, *now*. 2238 */ 2239 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2240 obj->mm.madv = __I915_MADV_PURGED; 2241 obj->mm.pages = ERR_PTR(-EFAULT); 2242 } 2243 2244 /* Try to discard unwanted pages */ 2245 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2246 { 2247 struct address_space *mapping; 2248 2249 lockdep_assert_held(&obj->mm.lock); 2250 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2251 2252 switch (obj->mm.madv) { 2253 case I915_MADV_DONTNEED: 2254 i915_gem_object_truncate(obj); 2255 case __I915_MADV_PURGED: 2256 return; 2257 } 2258 2259 if (obj->base.filp == NULL) 2260 return; 2261 2262 mapping = obj->base.filp->f_mapping, 2263 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2264 } 2265 2266 static void 2267 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2268 struct sg_table *pages) 2269 { 2270 struct sgt_iter sgt_iter; 2271 struct page *page; 2272 2273 __i915_gem_object_release_shmem(obj, pages, true); 2274 2275 i915_gem_gtt_finish_pages(obj, pages); 2276 2277 if (i915_gem_object_needs_bit17_swizzle(obj)) 2278 i915_gem_object_save_bit_17_swizzle(obj, pages); 2279 2280 for_each_sgt_page(page, sgt_iter, pages) { 2281 if (obj->mm.dirty) 2282 set_page_dirty(page); 2283 2284 if (obj->mm.madv == I915_MADV_WILLNEED) 2285 mark_page_accessed(page); 2286 2287 put_page(page); 2288 } 2289 obj->mm.dirty = false; 2290 2291 sg_free_table(pages); 2292 kfree(pages); 2293 } 2294 2295 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2296 { 2297 struct radix_tree_iter iter; 2298 void __rcu **slot; 2299 2300 rcu_read_lock(); 2301 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2302 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2303 rcu_read_unlock(); 2304 } 2305 2306 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2307 enum i915_mm_subclass subclass) 2308 { 2309 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2310 struct sg_table *pages; 2311 2312 if (i915_gem_object_has_pinned_pages(obj)) 2313 return; 2314 2315 GEM_BUG_ON(obj->bind_count); 2316 if (!i915_gem_object_has_pages(obj)) 2317 return; 2318 2319 /* May be called by shrinker from within get_pages() (on another bo) */ 2320 mutex_lock_nested(&obj->mm.lock, subclass); 2321 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2322 goto unlock; 2323 2324 /* ->put_pages might need to allocate memory for the bit17 swizzle 2325 * array, hence protect them from being reaped by removing them from gtt 2326 * lists early. */ 2327 pages = fetch_and_zero(&obj->mm.pages); 2328 GEM_BUG_ON(!pages); 2329 2330 spin_lock(&i915->mm.obj_lock); 2331 list_del(&obj->mm.link); 2332 spin_unlock(&i915->mm.obj_lock); 2333 2334 if (obj->mm.mapping) { 2335 void *ptr; 2336 2337 ptr = page_mask_bits(obj->mm.mapping); 2338 if (is_vmalloc_addr(ptr)) 2339 vunmap(ptr); 2340 else 2341 kunmap(kmap_to_page(ptr)); 2342 2343 obj->mm.mapping = NULL; 2344 } 2345 2346 __i915_gem_object_reset_page_iter(obj); 2347 2348 if (!IS_ERR(pages)) 2349 obj->ops->put_pages(obj, pages); 2350 2351 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2352 2353 unlock: 2354 mutex_unlock(&obj->mm.lock); 2355 } 2356 2357 static bool i915_sg_trim(struct sg_table *orig_st) 2358 { 2359 struct sg_table new_st; 2360 struct scatterlist *sg, *new_sg; 2361 unsigned int i; 2362 2363 if (orig_st->nents == orig_st->orig_nents) 2364 return false; 2365 2366 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2367 return false; 2368 2369 new_sg = new_st.sgl; 2370 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2371 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2372 /* called before being DMA mapped, no need to copy sg->dma_* */ 2373 new_sg = sg_next(new_sg); 2374 } 2375 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2376 2377 sg_free_table(orig_st); 2378 2379 *orig_st = new_st; 2380 return true; 2381 } 2382 2383 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2384 { 2385 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2386 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2387 unsigned long i; 2388 struct address_space *mapping; 2389 struct sg_table *st; 2390 struct scatterlist *sg; 2391 struct sgt_iter sgt_iter; 2392 struct page *page; 2393 unsigned long last_pfn = 0; /* suppress gcc warning */ 2394 unsigned int max_segment = i915_sg_segment_size(); 2395 unsigned int sg_page_sizes; 2396 gfp_t noreclaim; 2397 int ret; 2398 2399 /* Assert that the object is not currently in any GPU domain. As it 2400 * wasn't in the GTT, there shouldn't be any way it could have been in 2401 * a GPU cache 2402 */ 2403 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2404 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2405 2406 st = kmalloc(sizeof(*st), GFP_KERNEL); 2407 if (st == NULL) 2408 return -ENOMEM; 2409 2410 rebuild_st: 2411 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2412 kfree(st); 2413 return -ENOMEM; 2414 } 2415 2416 /* Get the list of pages out of our struct file. They'll be pinned 2417 * at this point until we release them. 2418 * 2419 * Fail silently without starting the shrinker 2420 */ 2421 mapping = obj->base.filp->f_mapping; 2422 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2423 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2424 2425 sg = st->sgl; 2426 st->nents = 0; 2427 sg_page_sizes = 0; 2428 for (i = 0; i < page_count; i++) { 2429 const unsigned int shrink[] = { 2430 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2431 0, 2432 }, *s = shrink; 2433 gfp_t gfp = noreclaim; 2434 2435 do { 2436 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2437 if (likely(!IS_ERR(page))) 2438 break; 2439 2440 if (!*s) { 2441 ret = PTR_ERR(page); 2442 goto err_sg; 2443 } 2444 2445 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2446 cond_resched(); 2447 2448 /* We've tried hard to allocate the memory by reaping 2449 * our own buffer, now let the real VM do its job and 2450 * go down in flames if truly OOM. 2451 * 2452 * However, since graphics tend to be disposable, 2453 * defer the oom here by reporting the ENOMEM back 2454 * to userspace. 2455 */ 2456 if (!*s) { 2457 /* reclaim and warn, but no oom */ 2458 gfp = mapping_gfp_mask(mapping); 2459 2460 /* Our bo are always dirty and so we require 2461 * kswapd to reclaim our pages (direct reclaim 2462 * does not effectively begin pageout of our 2463 * buffers on its own). However, direct reclaim 2464 * only waits for kswapd when under allocation 2465 * congestion. So as a result __GFP_RECLAIM is 2466 * unreliable and fails to actually reclaim our 2467 * dirty pages -- unless you try over and over 2468 * again with !__GFP_NORETRY. However, we still 2469 * want to fail this allocation rather than 2470 * trigger the out-of-memory killer and for 2471 * this we want __GFP_RETRY_MAYFAIL. 2472 */ 2473 gfp |= __GFP_RETRY_MAYFAIL; 2474 } 2475 } while (1); 2476 2477 if (!i || 2478 sg->length >= max_segment || 2479 page_to_pfn(page) != last_pfn + 1) { 2480 if (i) { 2481 sg_page_sizes |= sg->length; 2482 sg = sg_next(sg); 2483 } 2484 st->nents++; 2485 sg_set_page(sg, page, PAGE_SIZE, 0); 2486 } else { 2487 sg->length += PAGE_SIZE; 2488 } 2489 last_pfn = page_to_pfn(page); 2490 2491 /* Check that the i965g/gm workaround works. */ 2492 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2493 } 2494 if (sg) { /* loop terminated early; short sg table */ 2495 sg_page_sizes |= sg->length; 2496 sg_mark_end(sg); 2497 } 2498 2499 /* Trim unused sg entries to avoid wasting memory. */ 2500 i915_sg_trim(st); 2501 2502 ret = i915_gem_gtt_prepare_pages(obj, st); 2503 if (ret) { 2504 /* DMA remapping failed? One possible cause is that 2505 * it could not reserve enough large entries, asking 2506 * for PAGE_SIZE chunks instead may be helpful. 2507 */ 2508 if (max_segment > PAGE_SIZE) { 2509 for_each_sgt_page(page, sgt_iter, st) 2510 put_page(page); 2511 sg_free_table(st); 2512 2513 max_segment = PAGE_SIZE; 2514 goto rebuild_st; 2515 } else { 2516 dev_warn(&dev_priv->drm.pdev->dev, 2517 "Failed to DMA remap %lu pages\n", 2518 page_count); 2519 goto err_pages; 2520 } 2521 } 2522 2523 if (i915_gem_object_needs_bit17_swizzle(obj)) 2524 i915_gem_object_do_bit_17_swizzle(obj, st); 2525 2526 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2527 2528 return 0; 2529 2530 err_sg: 2531 sg_mark_end(sg); 2532 err_pages: 2533 for_each_sgt_page(page, sgt_iter, st) 2534 put_page(page); 2535 sg_free_table(st); 2536 kfree(st); 2537 2538 /* shmemfs first checks if there is enough memory to allocate the page 2539 * and reports ENOSPC should there be insufficient, along with the usual 2540 * ENOMEM for a genuine allocation failure. 2541 * 2542 * We use ENOSPC in our driver to mean that we have run out of aperture 2543 * space and so want to translate the error from shmemfs back to our 2544 * usual understanding of ENOMEM. 2545 */ 2546 if (ret == -ENOSPC) 2547 ret = -ENOMEM; 2548 2549 return ret; 2550 } 2551 2552 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2553 struct sg_table *pages, 2554 unsigned int sg_page_sizes) 2555 { 2556 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2557 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2558 int i; 2559 2560 lockdep_assert_held(&obj->mm.lock); 2561 2562 obj->mm.get_page.sg_pos = pages->sgl; 2563 obj->mm.get_page.sg_idx = 0; 2564 2565 obj->mm.pages = pages; 2566 2567 if (i915_gem_object_is_tiled(obj) && 2568 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2569 GEM_BUG_ON(obj->mm.quirked); 2570 __i915_gem_object_pin_pages(obj); 2571 obj->mm.quirked = true; 2572 } 2573 2574 GEM_BUG_ON(!sg_page_sizes); 2575 obj->mm.page_sizes.phys = sg_page_sizes; 2576 2577 /* 2578 * Calculate the supported page-sizes which fit into the given 2579 * sg_page_sizes. This will give us the page-sizes which we may be able 2580 * to use opportunistically when later inserting into the GTT. For 2581 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2582 * 64K or 4K pages, although in practice this will depend on a number of 2583 * other factors. 2584 */ 2585 obj->mm.page_sizes.sg = 0; 2586 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2587 if (obj->mm.page_sizes.phys & ~0u << i) 2588 obj->mm.page_sizes.sg |= BIT(i); 2589 } 2590 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2591 2592 spin_lock(&i915->mm.obj_lock); 2593 list_add(&obj->mm.link, &i915->mm.unbound_list); 2594 spin_unlock(&i915->mm.obj_lock); 2595 } 2596 2597 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2598 { 2599 int err; 2600 2601 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2602 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2603 return -EFAULT; 2604 } 2605 2606 err = obj->ops->get_pages(obj); 2607 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2608 2609 return err; 2610 } 2611 2612 /* Ensure that the associated pages are gathered from the backing storage 2613 * and pinned into our object. i915_gem_object_pin_pages() may be called 2614 * multiple times before they are released by a single call to 2615 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2616 * either as a result of memory pressure (reaping pages under the shrinker) 2617 * or as the object is itself released. 2618 */ 2619 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2620 { 2621 int err; 2622 2623 err = mutex_lock_interruptible(&obj->mm.lock); 2624 if (err) 2625 return err; 2626 2627 if (unlikely(!i915_gem_object_has_pages(obj))) { 2628 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2629 2630 err = ____i915_gem_object_get_pages(obj); 2631 if (err) 2632 goto unlock; 2633 2634 smp_mb__before_atomic(); 2635 } 2636 atomic_inc(&obj->mm.pages_pin_count); 2637 2638 unlock: 2639 mutex_unlock(&obj->mm.lock); 2640 return err; 2641 } 2642 2643 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2644 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2645 enum i915_map_type type) 2646 { 2647 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2648 struct sg_table *sgt = obj->mm.pages; 2649 struct sgt_iter sgt_iter; 2650 struct page *page; 2651 struct page *stack_pages[32]; 2652 struct page **pages = stack_pages; 2653 unsigned long i = 0; 2654 pgprot_t pgprot; 2655 void *addr; 2656 2657 /* A single page can always be kmapped */ 2658 if (n_pages == 1 && type == I915_MAP_WB) 2659 return kmap(sg_page(sgt->sgl)); 2660 2661 if (n_pages > ARRAY_SIZE(stack_pages)) { 2662 /* Too big for stack -- allocate temporary array instead */ 2663 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2664 if (!pages) 2665 return NULL; 2666 } 2667 2668 for_each_sgt_page(page, sgt_iter, sgt) 2669 pages[i++] = page; 2670 2671 /* Check that we have the expected number of pages */ 2672 GEM_BUG_ON(i != n_pages); 2673 2674 switch (type) { 2675 default: 2676 MISSING_CASE(type); 2677 /* fallthrough to use PAGE_KERNEL anyway */ 2678 case I915_MAP_WB: 2679 pgprot = PAGE_KERNEL; 2680 break; 2681 case I915_MAP_WC: 2682 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2683 break; 2684 } 2685 addr = vmap(pages, n_pages, 0, pgprot); 2686 2687 if (pages != stack_pages) 2688 kvfree(pages); 2689 2690 return addr; 2691 } 2692 2693 /* get, pin, and map the pages of the object into kernel space */ 2694 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2695 enum i915_map_type type) 2696 { 2697 enum i915_map_type has_type; 2698 bool pinned; 2699 void *ptr; 2700 int ret; 2701 2702 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2703 return ERR_PTR(-ENXIO); 2704 2705 ret = mutex_lock_interruptible(&obj->mm.lock); 2706 if (ret) 2707 return ERR_PTR(ret); 2708 2709 pinned = !(type & I915_MAP_OVERRIDE); 2710 type &= ~I915_MAP_OVERRIDE; 2711 2712 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2713 if (unlikely(!i915_gem_object_has_pages(obj))) { 2714 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2715 2716 ret = ____i915_gem_object_get_pages(obj); 2717 if (ret) 2718 goto err_unlock; 2719 2720 smp_mb__before_atomic(); 2721 } 2722 atomic_inc(&obj->mm.pages_pin_count); 2723 pinned = false; 2724 } 2725 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2726 2727 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2728 if (ptr && has_type != type) { 2729 if (pinned) { 2730 ret = -EBUSY; 2731 goto err_unpin; 2732 } 2733 2734 if (is_vmalloc_addr(ptr)) 2735 vunmap(ptr); 2736 else 2737 kunmap(kmap_to_page(ptr)); 2738 2739 ptr = obj->mm.mapping = NULL; 2740 } 2741 2742 if (!ptr) { 2743 ptr = i915_gem_object_map(obj, type); 2744 if (!ptr) { 2745 ret = -ENOMEM; 2746 goto err_unpin; 2747 } 2748 2749 obj->mm.mapping = page_pack_bits(ptr, type); 2750 } 2751 2752 out_unlock: 2753 mutex_unlock(&obj->mm.lock); 2754 return ptr; 2755 2756 err_unpin: 2757 atomic_dec(&obj->mm.pages_pin_count); 2758 err_unlock: 2759 ptr = ERR_PTR(ret); 2760 goto out_unlock; 2761 } 2762 2763 static int 2764 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2765 const struct drm_i915_gem_pwrite *arg) 2766 { 2767 struct address_space *mapping = obj->base.filp->f_mapping; 2768 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2769 u64 remain, offset; 2770 unsigned int pg; 2771 2772 /* Before we instantiate/pin the backing store for our use, we 2773 * can prepopulate the shmemfs filp efficiently using a write into 2774 * the pagecache. We avoid the penalty of instantiating all the 2775 * pages, important if the user is just writing to a few and never 2776 * uses the object on the GPU, and using a direct write into shmemfs 2777 * allows it to avoid the cost of retrieving a page (either swapin 2778 * or clearing-before-use) before it is overwritten. 2779 */ 2780 if (i915_gem_object_has_pages(obj)) 2781 return -ENODEV; 2782 2783 if (obj->mm.madv != I915_MADV_WILLNEED) 2784 return -EFAULT; 2785 2786 /* Before the pages are instantiated the object is treated as being 2787 * in the CPU domain. The pages will be clflushed as required before 2788 * use, and we can freely write into the pages directly. If userspace 2789 * races pwrite with any other operation; corruption will ensue - 2790 * that is userspace's prerogative! 2791 */ 2792 2793 remain = arg->size; 2794 offset = arg->offset; 2795 pg = offset_in_page(offset); 2796 2797 do { 2798 unsigned int len, unwritten; 2799 struct page *page; 2800 void *data, *vaddr; 2801 int err; 2802 2803 len = PAGE_SIZE - pg; 2804 if (len > remain) 2805 len = remain; 2806 2807 err = pagecache_write_begin(obj->base.filp, mapping, 2808 offset, len, 0, 2809 &page, &data); 2810 if (err < 0) 2811 return err; 2812 2813 vaddr = kmap(page); 2814 unwritten = copy_from_user(vaddr + pg, user_data, len); 2815 kunmap(page); 2816 2817 err = pagecache_write_end(obj->base.filp, mapping, 2818 offset, len, len - unwritten, 2819 page, data); 2820 if (err < 0) 2821 return err; 2822 2823 if (unwritten) 2824 return -EFAULT; 2825 2826 remain -= len; 2827 user_data += len; 2828 offset += len; 2829 pg = 0; 2830 } while (remain); 2831 2832 return 0; 2833 } 2834 2835 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 2836 { 2837 bool banned; 2838 2839 atomic_inc(&ctx->guilty_count); 2840 2841 banned = false; 2842 if (i915_gem_context_is_bannable(ctx)) { 2843 unsigned int score; 2844 2845 score = atomic_add_return(CONTEXT_SCORE_GUILTY, 2846 &ctx->ban_score); 2847 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 2848 2849 DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", 2850 ctx->name, score, yesno(banned)); 2851 } 2852 if (!banned) 2853 return; 2854 2855 i915_gem_context_set_banned(ctx); 2856 if (!IS_ERR_OR_NULL(ctx->file_priv)) { 2857 atomic_inc(&ctx->file_priv->context_bans); 2858 DRM_DEBUG_DRIVER("client %s has had %d context banned\n", 2859 ctx->name, atomic_read(&ctx->file_priv->context_bans)); 2860 } 2861 } 2862 2863 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 2864 { 2865 atomic_inc(&ctx->active_count); 2866 } 2867 2868 struct i915_request * 2869 i915_gem_find_active_request(struct intel_engine_cs *engine) 2870 { 2871 struct i915_request *request, *active = NULL; 2872 unsigned long flags; 2873 2874 /* We are called by the error capture and reset at a random 2875 * point in time. In particular, note that neither is crucially 2876 * ordered with an interrupt. After a hang, the GPU is dead and we 2877 * assume that no more writes can happen (we waited long enough for 2878 * all writes that were in transaction to be flushed) - adding an 2879 * extra delay for a recent interrupt is pointless. Hence, we do 2880 * not need an engine->irq_seqno_barrier() before the seqno reads. 2881 */ 2882 spin_lock_irqsave(&engine->timeline->lock, flags); 2883 list_for_each_entry(request, &engine->timeline->requests, link) { 2884 if (__i915_request_completed(request, request->global_seqno)) 2885 continue; 2886 2887 GEM_BUG_ON(request->engine != engine); 2888 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 2889 &request->fence.flags)); 2890 2891 active = request; 2892 break; 2893 } 2894 spin_unlock_irqrestore(&engine->timeline->lock, flags); 2895 2896 return active; 2897 } 2898 2899 static bool engine_stalled(struct intel_engine_cs *engine) 2900 { 2901 if (!engine->hangcheck.stalled) 2902 return false; 2903 2904 /* Check for possible seqno movement after hang declaration */ 2905 if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) { 2906 DRM_DEBUG_DRIVER("%s pardoned\n", engine->name); 2907 return false; 2908 } 2909 2910 return true; 2911 } 2912 2913 /* 2914 * Ensure irq handler finishes, and not run again. 2915 * Also return the active request so that we only search for it once. 2916 */ 2917 struct i915_request * 2918 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 2919 { 2920 struct i915_request *request = NULL; 2921 2922 /* 2923 * During the reset sequence, we must prevent the engine from 2924 * entering RC6. As the context state is undefined until we restart 2925 * the engine, if it does enter RC6 during the reset, the state 2926 * written to the powercontext is undefined and so we may lose 2927 * GPU state upon resume, i.e. fail to restart after a reset. 2928 */ 2929 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 2930 2931 /* 2932 * Prevent the signaler thread from updating the request 2933 * state (by calling dma_fence_signal) as we are processing 2934 * the reset. The write from the GPU of the seqno is 2935 * asynchronous and the signaler thread may see a different 2936 * value to us and declare the request complete, even though 2937 * the reset routine have picked that request as the active 2938 * (incomplete) request. This conflict is not handled 2939 * gracefully! 2940 */ 2941 kthread_park(engine->breadcrumbs.signaler); 2942 2943 /* 2944 * Prevent request submission to the hardware until we have 2945 * completed the reset in i915_gem_reset_finish(). If a request 2946 * is completed by one engine, it may then queue a request 2947 * to a second via its execlists->tasklet *just* as we are 2948 * calling engine->init_hw() and also writing the ELSP. 2949 * Turning off the execlists->tasklet until the reset is over 2950 * prevents the race. 2951 * 2952 * Note that this needs to be a single atomic operation on the 2953 * tasklet (flush existing tasks, prevent new tasks) to prevent 2954 * a race between reset and set-wedged. It is not, so we do the best 2955 * we can atm and make sure we don't lock the machine up in the more 2956 * common case of recursively being called from set-wedged from inside 2957 * i915_reset. 2958 */ 2959 if (!atomic_read(&engine->execlists.tasklet.count)) 2960 tasklet_kill(&engine->execlists.tasklet); 2961 tasklet_disable(&engine->execlists.tasklet); 2962 2963 /* 2964 * We're using worker to queue preemption requests from the tasklet in 2965 * GuC submission mode. 2966 * Even though tasklet was disabled, we may still have a worker queued. 2967 * Let's make sure that all workers scheduled before disabling the 2968 * tasklet are completed before continuing with the reset. 2969 */ 2970 if (engine->i915->guc.preempt_wq) 2971 flush_workqueue(engine->i915->guc.preempt_wq); 2972 2973 if (engine->irq_seqno_barrier) 2974 engine->irq_seqno_barrier(engine); 2975 2976 request = i915_gem_find_active_request(engine); 2977 if (request && request->fence.error == -EIO) 2978 request = ERR_PTR(-EIO); /* Previous reset failed! */ 2979 2980 return request; 2981 } 2982 2983 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 2984 { 2985 struct intel_engine_cs *engine; 2986 struct i915_request *request; 2987 enum intel_engine_id id; 2988 int err = 0; 2989 2990 for_each_engine(engine, dev_priv, id) { 2991 request = i915_gem_reset_prepare_engine(engine); 2992 if (IS_ERR(request)) { 2993 err = PTR_ERR(request); 2994 continue; 2995 } 2996 2997 engine->hangcheck.active_request = request; 2998 } 2999 3000 i915_gem_revoke_fences(dev_priv); 3001 3002 return err; 3003 } 3004 3005 static void skip_request(struct i915_request *request) 3006 { 3007 void *vaddr = request->ring->vaddr; 3008 u32 head; 3009 3010 /* As this request likely depends on state from the lost 3011 * context, clear out all the user operations leaving the 3012 * breadcrumb at the end (so we get the fence notifications). 3013 */ 3014 head = request->head; 3015 if (request->postfix < head) { 3016 memset(vaddr + head, 0, request->ring->size - head); 3017 head = 0; 3018 } 3019 memset(vaddr + head, 0, request->postfix - head); 3020 3021 dma_fence_set_error(&request->fence, -EIO); 3022 } 3023 3024 static void engine_skip_context(struct i915_request *request) 3025 { 3026 struct intel_engine_cs *engine = request->engine; 3027 struct i915_gem_context *hung_ctx = request->ctx; 3028 struct intel_timeline *timeline; 3029 unsigned long flags; 3030 3031 timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); 3032 3033 spin_lock_irqsave(&engine->timeline->lock, flags); 3034 spin_lock(&timeline->lock); 3035 3036 list_for_each_entry_continue(request, &engine->timeline->requests, link) 3037 if (request->ctx == hung_ctx) 3038 skip_request(request); 3039 3040 list_for_each_entry(request, &timeline->requests, link) 3041 skip_request(request); 3042 3043 spin_unlock(&timeline->lock); 3044 spin_unlock_irqrestore(&engine->timeline->lock, flags); 3045 } 3046 3047 /* Returns the request if it was guilty of the hang */ 3048 static struct i915_request * 3049 i915_gem_reset_request(struct intel_engine_cs *engine, 3050 struct i915_request *request) 3051 { 3052 /* The guilty request will get skipped on a hung engine. 3053 * 3054 * Users of client default contexts do not rely on logical 3055 * state preserved between batches so it is safe to execute 3056 * queued requests following the hang. Non default contexts 3057 * rely on preserved state, so skipping a batch loses the 3058 * evolution of the state and it needs to be considered corrupted. 3059 * Executing more queued batches on top of corrupted state is 3060 * risky. But we take the risk by trying to advance through 3061 * the queued requests in order to make the client behaviour 3062 * more predictable around resets, by not throwing away random 3063 * amount of batches it has prepared for execution. Sophisticated 3064 * clients can use gem_reset_stats_ioctl and dma fence status 3065 * (exported via sync_file info ioctl on explicit fences) to observe 3066 * when it loses the context state and should rebuild accordingly. 3067 * 3068 * The context ban, and ultimately the client ban, mechanism are safety 3069 * valves if client submission ends up resulting in nothing more than 3070 * subsequent hangs. 3071 */ 3072 3073 if (engine_stalled(engine)) { 3074 i915_gem_context_mark_guilty(request->ctx); 3075 skip_request(request); 3076 3077 /* If this context is now banned, skip all pending requests. */ 3078 if (i915_gem_context_is_banned(request->ctx)) 3079 engine_skip_context(request); 3080 } else { 3081 /* 3082 * Since this is not the hung engine, it may have advanced 3083 * since the hang declaration. Double check by refinding 3084 * the active request at the time of the reset. 3085 */ 3086 request = i915_gem_find_active_request(engine); 3087 if (request) { 3088 i915_gem_context_mark_innocent(request->ctx); 3089 dma_fence_set_error(&request->fence, -EAGAIN); 3090 3091 /* Rewind the engine to replay the incomplete rq */ 3092 spin_lock_irq(&engine->timeline->lock); 3093 request = list_prev_entry(request, link); 3094 if (&request->link == &engine->timeline->requests) 3095 request = NULL; 3096 spin_unlock_irq(&engine->timeline->lock); 3097 } 3098 } 3099 3100 return request; 3101 } 3102 3103 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3104 struct i915_request *request) 3105 { 3106 /* 3107 * Make sure this write is visible before we re-enable the interrupt 3108 * handlers on another CPU, as tasklet_enable() resolves to just 3109 * a compiler barrier which is insufficient for our purpose here. 3110 */ 3111 smp_store_mb(engine->irq_posted, 0); 3112 3113 if (request) 3114 request = i915_gem_reset_request(engine, request); 3115 3116 if (request) { 3117 DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", 3118 engine->name, request->global_seqno); 3119 } 3120 3121 /* Setup the CS to resume from the breadcrumb of the hung request */ 3122 engine->reset_hw(engine, request); 3123 } 3124 3125 void i915_gem_reset(struct drm_i915_private *dev_priv) 3126 { 3127 struct intel_engine_cs *engine; 3128 enum intel_engine_id id; 3129 3130 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3131 3132 i915_retire_requests(dev_priv); 3133 3134 for_each_engine(engine, dev_priv, id) { 3135 struct i915_gem_context *ctx; 3136 3137 i915_gem_reset_engine(engine, engine->hangcheck.active_request); 3138 ctx = fetch_and_zero(&engine->last_retired_context); 3139 if (ctx) 3140 engine->context_unpin(engine, ctx); 3141 3142 /* 3143 * Ostensibily, we always want a context loaded for powersaving, 3144 * so if the engine is idle after the reset, send a request 3145 * to load our scratch kernel_context. 3146 * 3147 * More mysteriously, if we leave the engine idle after a reset, 3148 * the next userspace batch may hang, with what appears to be 3149 * an incoherent read by the CS (presumably stale TLB). An 3150 * empty request appears sufficient to paper over the glitch. 3151 */ 3152 if (intel_engine_is_idle(engine)) { 3153 struct i915_request *rq; 3154 3155 rq = i915_request_alloc(engine, 3156 dev_priv->kernel_context); 3157 if (!IS_ERR(rq)) 3158 __i915_request_add(rq, false); 3159 } 3160 } 3161 3162 i915_gem_restore_fences(dev_priv); 3163 3164 if (dev_priv->gt.awake) { 3165 intel_sanitize_gt_powersave(dev_priv); 3166 intel_enable_gt_powersave(dev_priv); 3167 if (INTEL_GEN(dev_priv) >= 6) 3168 gen6_rps_busy(dev_priv); 3169 } 3170 } 3171 3172 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3173 { 3174 tasklet_enable(&engine->execlists.tasklet); 3175 kthread_unpark(engine->breadcrumbs.signaler); 3176 3177 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3178 } 3179 3180 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3181 { 3182 struct intel_engine_cs *engine; 3183 enum intel_engine_id id; 3184 3185 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3186 3187 for_each_engine(engine, dev_priv, id) { 3188 engine->hangcheck.active_request = NULL; 3189 i915_gem_reset_finish_engine(engine); 3190 } 3191 } 3192 3193 static void nop_submit_request(struct i915_request *request) 3194 { 3195 dma_fence_set_error(&request->fence, -EIO); 3196 3197 i915_request_submit(request); 3198 } 3199 3200 static void nop_complete_submit_request(struct i915_request *request) 3201 { 3202 unsigned long flags; 3203 3204 dma_fence_set_error(&request->fence, -EIO); 3205 3206 spin_lock_irqsave(&request->engine->timeline->lock, flags); 3207 __i915_request_submit(request); 3208 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3209 spin_unlock_irqrestore(&request->engine->timeline->lock, flags); 3210 } 3211 3212 void i915_gem_set_wedged(struct drm_i915_private *i915) 3213 { 3214 struct intel_engine_cs *engine; 3215 enum intel_engine_id id; 3216 3217 if (drm_debug & DRM_UT_DRIVER) { 3218 struct drm_printer p = drm_debug_printer(__func__); 3219 3220 for_each_engine(engine, i915, id) 3221 intel_engine_dump(engine, &p, "%s\n", engine->name); 3222 } 3223 3224 set_bit(I915_WEDGED, &i915->gpu_error.flags); 3225 smp_mb__after_atomic(); 3226 3227 /* 3228 * First, stop submission to hw, but do not yet complete requests by 3229 * rolling the global seqno forward (since this would complete requests 3230 * for which we haven't set the fence error to EIO yet). 3231 */ 3232 for_each_engine(engine, i915, id) { 3233 i915_gem_reset_prepare_engine(engine); 3234 3235 engine->submit_request = nop_submit_request; 3236 engine->schedule = NULL; 3237 } 3238 i915->caps.scheduler = 0; 3239 3240 /* 3241 * Make sure no one is running the old callback before we proceed with 3242 * cancelling requests and resetting the completion tracking. Otherwise 3243 * we might submit a request to the hardware which never completes. 3244 */ 3245 synchronize_rcu(); 3246 3247 for_each_engine(engine, i915, id) { 3248 /* Mark all executing requests as skipped */ 3249 engine->cancel_requests(engine); 3250 3251 /* 3252 * Only once we've force-cancelled all in-flight requests can we 3253 * start to complete all requests. 3254 */ 3255 engine->submit_request = nop_complete_submit_request; 3256 } 3257 3258 /* 3259 * Make sure no request can slip through without getting completed by 3260 * either this call here to intel_engine_init_global_seqno, or the one 3261 * in nop_complete_submit_request. 3262 */ 3263 synchronize_rcu(); 3264 3265 for_each_engine(engine, i915, id) { 3266 unsigned long flags; 3267 3268 /* 3269 * Mark all pending requests as complete so that any concurrent 3270 * (lockless) lookup doesn't try and wait upon the request as we 3271 * reset it. 3272 */ 3273 spin_lock_irqsave(&engine->timeline->lock, flags); 3274 intel_engine_init_global_seqno(engine, 3275 intel_engine_last_submit(engine)); 3276 spin_unlock_irqrestore(&engine->timeline->lock, flags); 3277 3278 i915_gem_reset_finish_engine(engine); 3279 } 3280 3281 wake_up_all(&i915->gpu_error.reset_queue); 3282 } 3283 3284 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3285 { 3286 struct i915_gem_timeline *tl; 3287 int i; 3288 3289 lockdep_assert_held(&i915->drm.struct_mutex); 3290 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3291 return true; 3292 3293 /* Before unwedging, make sure that all pending operations 3294 * are flushed and errored out - we may have requests waiting upon 3295 * third party fences. We marked all inflight requests as EIO, and 3296 * every execbuf since returned EIO, for consistency we want all 3297 * the currently pending requests to also be marked as EIO, which 3298 * is done inside our nop_submit_request - and so we must wait. 3299 * 3300 * No more can be submitted until we reset the wedged bit. 3301 */ 3302 list_for_each_entry(tl, &i915->gt.timelines, link) { 3303 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3304 struct i915_request *rq; 3305 3306 rq = i915_gem_active_peek(&tl->engine[i].last_request, 3307 &i915->drm.struct_mutex); 3308 if (!rq) 3309 continue; 3310 3311 /* We can't use our normal waiter as we want to 3312 * avoid recursively trying to handle the current 3313 * reset. The basic dma_fence_default_wait() installs 3314 * a callback for dma_fence_signal(), which is 3315 * triggered by our nop handler (indirectly, the 3316 * callback enables the signaler thread which is 3317 * woken by the nop_submit_request() advancing the seqno 3318 * and when the seqno passes the fence, the signaler 3319 * then signals the fence waking us up). 3320 */ 3321 if (dma_fence_default_wait(&rq->fence, true, 3322 MAX_SCHEDULE_TIMEOUT) < 0) 3323 return false; 3324 } 3325 } 3326 3327 /* Undo nop_submit_request. We prevent all new i915 requests from 3328 * being queued (by disallowing execbuf whilst wedged) so having 3329 * waited for all active requests above, we know the system is idle 3330 * and do not have to worry about a thread being inside 3331 * engine->submit_request() as we swap over. So unlike installing 3332 * the nop_submit_request on reset, we can do this from normal 3333 * context and do not require stop_machine(). 3334 */ 3335 intel_engines_reset_default_submission(i915); 3336 i915_gem_contexts_lost(i915); 3337 3338 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3339 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3340 3341 return true; 3342 } 3343 3344 static void 3345 i915_gem_retire_work_handler(struct work_struct *work) 3346 { 3347 struct drm_i915_private *dev_priv = 3348 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3349 struct drm_device *dev = &dev_priv->drm; 3350 3351 /* Come back later if the device is busy... */ 3352 if (mutex_trylock(&dev->struct_mutex)) { 3353 i915_retire_requests(dev_priv); 3354 mutex_unlock(&dev->struct_mutex); 3355 } 3356 3357 /* 3358 * Keep the retire handler running until we are finally idle. 3359 * We do not need to do this test under locking as in the worst-case 3360 * we queue the retire worker once too often. 3361 */ 3362 if (READ_ONCE(dev_priv->gt.awake)) 3363 queue_delayed_work(dev_priv->wq, 3364 &dev_priv->gt.retire_work, 3365 round_jiffies_up_relative(HZ)); 3366 } 3367 3368 static void shrink_caches(struct drm_i915_private *i915) 3369 { 3370 /* 3371 * kmem_cache_shrink() discards empty slabs and reorders partially 3372 * filled slabs to prioritise allocating from the mostly full slabs, 3373 * with the aim of reducing fragmentation. 3374 */ 3375 kmem_cache_shrink(i915->priorities); 3376 kmem_cache_shrink(i915->dependencies); 3377 kmem_cache_shrink(i915->requests); 3378 kmem_cache_shrink(i915->luts); 3379 kmem_cache_shrink(i915->vmas); 3380 kmem_cache_shrink(i915->objects); 3381 } 3382 3383 struct sleep_rcu_work { 3384 union { 3385 struct rcu_head rcu; 3386 struct work_struct work; 3387 }; 3388 struct drm_i915_private *i915; 3389 unsigned int epoch; 3390 }; 3391 3392 static inline bool 3393 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3394 { 3395 /* 3396 * There is a small chance that the epoch wrapped since we started 3397 * sleeping. If we assume that epoch is at least a u32, then it will 3398 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3399 */ 3400 return epoch == READ_ONCE(i915->gt.epoch); 3401 } 3402 3403 static void __sleep_work(struct work_struct *work) 3404 { 3405 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3406 struct drm_i915_private *i915 = s->i915; 3407 unsigned int epoch = s->epoch; 3408 3409 kfree(s); 3410 if (same_epoch(i915, epoch)) 3411 shrink_caches(i915); 3412 } 3413 3414 static void __sleep_rcu(struct rcu_head *rcu) 3415 { 3416 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3417 struct drm_i915_private *i915 = s->i915; 3418 3419 if (same_epoch(i915, s->epoch)) { 3420 INIT_WORK(&s->work, __sleep_work); 3421 queue_work(i915->wq, &s->work); 3422 } else { 3423 kfree(s); 3424 } 3425 } 3426 3427 static inline bool 3428 new_requests_since_last_retire(const struct drm_i915_private *i915) 3429 { 3430 return (READ_ONCE(i915->gt.active_requests) || 3431 work_pending(&i915->gt.idle_work.work)); 3432 } 3433 3434 static void 3435 i915_gem_idle_work_handler(struct work_struct *work) 3436 { 3437 struct drm_i915_private *dev_priv = 3438 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3439 unsigned int epoch = I915_EPOCH_INVALID; 3440 bool rearm_hangcheck; 3441 3442 if (!READ_ONCE(dev_priv->gt.awake)) 3443 return; 3444 3445 /* 3446 * Wait for last execlists context complete, but bail out in case a 3447 * new request is submitted. As we don't trust the hardware, we 3448 * continue on if the wait times out. This is necessary to allow 3449 * the machine to suspend even if the hardware dies, and we will 3450 * try to recover in resume (after depriving the hardware of power, 3451 * it may be in a better mmod). 3452 */ 3453 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3454 intel_engines_are_idle(dev_priv), 3455 I915_IDLE_ENGINES_TIMEOUT * 1000, 3456 10, 500); 3457 3458 rearm_hangcheck = 3459 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3460 3461 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3462 /* Currently busy, come back later */ 3463 mod_delayed_work(dev_priv->wq, 3464 &dev_priv->gt.idle_work, 3465 msecs_to_jiffies(50)); 3466 goto out_rearm; 3467 } 3468 3469 /* 3470 * New request retired after this work handler started, extend active 3471 * period until next instance of the work. 3472 */ 3473 if (new_requests_since_last_retire(dev_priv)) 3474 goto out_unlock; 3475 3476 /* 3477 * Be paranoid and flush a concurrent interrupt to make sure 3478 * we don't reactivate any irq tasklets after parking. 3479 * 3480 * FIXME: Note that even though we have waited for execlists to be idle, 3481 * there may still be an in-flight interrupt even though the CSB 3482 * is now empty. synchronize_irq() makes sure that a residual interrupt 3483 * is completed before we continue, but it doesn't prevent the HW from 3484 * raising a spurious interrupt later. To complete the shield we should 3485 * coordinate disabling the CS irq with flushing the interrupts. 3486 */ 3487 synchronize_irq(dev_priv->drm.irq); 3488 3489 intel_engines_park(dev_priv); 3490 i915_gem_timelines_park(dev_priv); 3491 3492 i915_pmu_gt_parked(dev_priv); 3493 3494 GEM_BUG_ON(!dev_priv->gt.awake); 3495 dev_priv->gt.awake = false; 3496 epoch = dev_priv->gt.epoch; 3497 GEM_BUG_ON(epoch == I915_EPOCH_INVALID); 3498 rearm_hangcheck = false; 3499 3500 if (INTEL_GEN(dev_priv) >= 6) 3501 gen6_rps_idle(dev_priv); 3502 3503 intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ); 3504 3505 intel_runtime_pm_put(dev_priv); 3506 out_unlock: 3507 mutex_unlock(&dev_priv->drm.struct_mutex); 3508 3509 out_rearm: 3510 if (rearm_hangcheck) { 3511 GEM_BUG_ON(!dev_priv->gt.awake); 3512 i915_queue_hangcheck(dev_priv); 3513 } 3514 3515 /* 3516 * When we are idle, it is an opportune time to reap our caches. 3517 * However, we have many objects that utilise RCU and the ordered 3518 * i915->wq that this work is executing on. To try and flush any 3519 * pending frees now we are idle, we first wait for an RCU grace 3520 * period, and then queue a task (that will run last on the wq) to 3521 * shrink and re-optimize the caches. 3522 */ 3523 if (same_epoch(dev_priv, epoch)) { 3524 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3525 if (s) { 3526 s->i915 = dev_priv; 3527 s->epoch = epoch; 3528 call_rcu(&s->rcu, __sleep_rcu); 3529 } 3530 } 3531 } 3532 3533 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3534 { 3535 struct drm_i915_private *i915 = to_i915(gem->dev); 3536 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3537 struct drm_i915_file_private *fpriv = file->driver_priv; 3538 struct i915_lut_handle *lut, *ln; 3539 3540 mutex_lock(&i915->drm.struct_mutex); 3541 3542 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3543 struct i915_gem_context *ctx = lut->ctx; 3544 struct i915_vma *vma; 3545 3546 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3547 if (ctx->file_priv != fpriv) 3548 continue; 3549 3550 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3551 GEM_BUG_ON(vma->obj != obj); 3552 3553 /* We allow the process to have multiple handles to the same 3554 * vma, in the same fd namespace, by virtue of flink/open. 3555 */ 3556 GEM_BUG_ON(!vma->open_count); 3557 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3558 i915_vma_close(vma); 3559 3560 list_del(&lut->obj_link); 3561 list_del(&lut->ctx_link); 3562 3563 kmem_cache_free(i915->luts, lut); 3564 __i915_gem_object_release_unless_active(obj); 3565 } 3566 3567 mutex_unlock(&i915->drm.struct_mutex); 3568 } 3569 3570 static unsigned long to_wait_timeout(s64 timeout_ns) 3571 { 3572 if (timeout_ns < 0) 3573 return MAX_SCHEDULE_TIMEOUT; 3574 3575 if (timeout_ns == 0) 3576 return 0; 3577 3578 return nsecs_to_jiffies_timeout(timeout_ns); 3579 } 3580 3581 /** 3582 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3583 * @dev: drm device pointer 3584 * @data: ioctl data blob 3585 * @file: drm file pointer 3586 * 3587 * Returns 0 if successful, else an error is returned with the remaining time in 3588 * the timeout parameter. 3589 * -ETIME: object is still busy after timeout 3590 * -ERESTARTSYS: signal interrupted the wait 3591 * -ENONENT: object doesn't exist 3592 * Also possible, but rare: 3593 * -EAGAIN: incomplete, restart syscall 3594 * -ENOMEM: damn 3595 * -ENODEV: Internal IRQ fail 3596 * -E?: The add request failed 3597 * 3598 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3599 * non-zero timeout parameter the wait ioctl will wait for the given number of 3600 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3601 * without holding struct_mutex the object may become re-busied before this 3602 * function completes. A similar but shorter * race condition exists in the busy 3603 * ioctl 3604 */ 3605 int 3606 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3607 { 3608 struct drm_i915_gem_wait *args = data; 3609 struct drm_i915_gem_object *obj; 3610 ktime_t start; 3611 long ret; 3612 3613 if (args->flags != 0) 3614 return -EINVAL; 3615 3616 obj = i915_gem_object_lookup(file, args->bo_handle); 3617 if (!obj) 3618 return -ENOENT; 3619 3620 start = ktime_get(); 3621 3622 ret = i915_gem_object_wait(obj, 3623 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, 3624 to_wait_timeout(args->timeout_ns), 3625 to_rps_client(file)); 3626 3627 if (args->timeout_ns > 0) { 3628 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3629 if (args->timeout_ns < 0) 3630 args->timeout_ns = 0; 3631 3632 /* 3633 * Apparently ktime isn't accurate enough and occasionally has a 3634 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3635 * things up to make the test happy. We allow up to 1 jiffy. 3636 * 3637 * This is a regression from the timespec->ktime conversion. 3638 */ 3639 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3640 args->timeout_ns = 0; 3641 3642 /* Asked to wait beyond the jiffie/scheduler precision? */ 3643 if (ret == -ETIME && args->timeout_ns) 3644 ret = -EAGAIN; 3645 } 3646 3647 i915_gem_object_put(obj); 3648 return ret; 3649 } 3650 3651 static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) 3652 { 3653 int ret, i; 3654 3655 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3656 ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); 3657 if (ret) 3658 return ret; 3659 } 3660 3661 return 0; 3662 } 3663 3664 static int wait_for_engines(struct drm_i915_private *i915) 3665 { 3666 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3667 dev_err(i915->drm.dev, 3668 "Failed to idle engines, declaring wedged!\n"); 3669 if (drm_debug & DRM_UT_DRIVER) { 3670 struct drm_printer p = drm_debug_printer(__func__); 3671 struct intel_engine_cs *engine; 3672 enum intel_engine_id id; 3673 3674 for_each_engine(engine, i915, id) 3675 intel_engine_dump(engine, &p, 3676 "%s\n", engine->name); 3677 } 3678 3679 i915_gem_set_wedged(i915); 3680 return -EIO; 3681 } 3682 3683 return 0; 3684 } 3685 3686 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) 3687 { 3688 int ret; 3689 3690 /* If the device is asleep, we have no requests outstanding */ 3691 if (!READ_ONCE(i915->gt.awake)) 3692 return 0; 3693 3694 if (flags & I915_WAIT_LOCKED) { 3695 struct i915_gem_timeline *tl; 3696 3697 lockdep_assert_held(&i915->drm.struct_mutex); 3698 3699 list_for_each_entry(tl, &i915->gt.timelines, link) { 3700 ret = wait_for_timeline(tl, flags); 3701 if (ret) 3702 return ret; 3703 } 3704 i915_retire_requests(i915); 3705 3706 ret = wait_for_engines(i915); 3707 } else { 3708 ret = wait_for_timeline(&i915->gt.global_timeline, flags); 3709 } 3710 3711 return ret; 3712 } 3713 3714 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3715 { 3716 /* 3717 * We manually flush the CPU domain so that we can override and 3718 * force the flush for the display, and perform it asyncrhonously. 3719 */ 3720 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3721 if (obj->cache_dirty) 3722 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3723 obj->write_domain = 0; 3724 } 3725 3726 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3727 { 3728 if (!READ_ONCE(obj->pin_global)) 3729 return; 3730 3731 mutex_lock(&obj->base.dev->struct_mutex); 3732 __i915_gem_object_flush_for_display(obj); 3733 mutex_unlock(&obj->base.dev->struct_mutex); 3734 } 3735 3736 /** 3737 * Moves a single object to the WC read, and possibly write domain. 3738 * @obj: object to act on 3739 * @write: ask for write access or read only 3740 * 3741 * This function returns when the move is complete, including waiting on 3742 * flushes to occur. 3743 */ 3744 int 3745 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3746 { 3747 int ret; 3748 3749 lockdep_assert_held(&obj->base.dev->struct_mutex); 3750 3751 ret = i915_gem_object_wait(obj, 3752 I915_WAIT_INTERRUPTIBLE | 3753 I915_WAIT_LOCKED | 3754 (write ? I915_WAIT_ALL : 0), 3755 MAX_SCHEDULE_TIMEOUT, 3756 NULL); 3757 if (ret) 3758 return ret; 3759 3760 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3761 return 0; 3762 3763 /* Flush and acquire obj->pages so that we are coherent through 3764 * direct access in memory with previous cached writes through 3765 * shmemfs and that our cache domain tracking remains valid. 3766 * For example, if the obj->filp was moved to swap without us 3767 * being notified and releasing the pages, we would mistakenly 3768 * continue to assume that the obj remained out of the CPU cached 3769 * domain. 3770 */ 3771 ret = i915_gem_object_pin_pages(obj); 3772 if (ret) 3773 return ret; 3774 3775 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3776 3777 /* Serialise direct access to this object with the barriers for 3778 * coherent writes from the GPU, by effectively invalidating the 3779 * WC domain upon first access. 3780 */ 3781 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3782 mb(); 3783 3784 /* It should now be out of any other write domains, and we can update 3785 * the domain values for our changes. 3786 */ 3787 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3788 obj->read_domains |= I915_GEM_DOMAIN_WC; 3789 if (write) { 3790 obj->read_domains = I915_GEM_DOMAIN_WC; 3791 obj->write_domain = I915_GEM_DOMAIN_WC; 3792 obj->mm.dirty = true; 3793 } 3794 3795 i915_gem_object_unpin_pages(obj); 3796 return 0; 3797 } 3798 3799 /** 3800 * Moves a single object to the GTT read, and possibly write domain. 3801 * @obj: object to act on 3802 * @write: ask for write access or read only 3803 * 3804 * This function returns when the move is complete, including waiting on 3805 * flushes to occur. 3806 */ 3807 int 3808 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3809 { 3810 int ret; 3811 3812 lockdep_assert_held(&obj->base.dev->struct_mutex); 3813 3814 ret = i915_gem_object_wait(obj, 3815 I915_WAIT_INTERRUPTIBLE | 3816 I915_WAIT_LOCKED | 3817 (write ? I915_WAIT_ALL : 0), 3818 MAX_SCHEDULE_TIMEOUT, 3819 NULL); 3820 if (ret) 3821 return ret; 3822 3823 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3824 return 0; 3825 3826 /* Flush and acquire obj->pages so that we are coherent through 3827 * direct access in memory with previous cached writes through 3828 * shmemfs and that our cache domain tracking remains valid. 3829 * For example, if the obj->filp was moved to swap without us 3830 * being notified and releasing the pages, we would mistakenly 3831 * continue to assume that the obj remained out of the CPU cached 3832 * domain. 3833 */ 3834 ret = i915_gem_object_pin_pages(obj); 3835 if (ret) 3836 return ret; 3837 3838 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3839 3840 /* Serialise direct access to this object with the barriers for 3841 * coherent writes from the GPU, by effectively invalidating the 3842 * GTT domain upon first access. 3843 */ 3844 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3845 mb(); 3846 3847 /* It should now be out of any other write domains, and we can update 3848 * the domain values for our changes. 3849 */ 3850 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3851 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3852 if (write) { 3853 obj->read_domains = I915_GEM_DOMAIN_GTT; 3854 obj->write_domain = I915_GEM_DOMAIN_GTT; 3855 obj->mm.dirty = true; 3856 } 3857 3858 i915_gem_object_unpin_pages(obj); 3859 return 0; 3860 } 3861 3862 /** 3863 * Changes the cache-level of an object across all VMA. 3864 * @obj: object to act on 3865 * @cache_level: new cache level to set for the object 3866 * 3867 * After this function returns, the object will be in the new cache-level 3868 * across all GTT and the contents of the backing storage will be coherent, 3869 * with respect to the new cache-level. In order to keep the backing storage 3870 * coherent for all users, we only allow a single cache level to be set 3871 * globally on the object and prevent it from being changed whilst the 3872 * hardware is reading from the object. That is if the object is currently 3873 * on the scanout it will be set to uncached (or equivalent display 3874 * cache coherency) and all non-MOCS GPU access will also be uncached so 3875 * that all direct access to the scanout remains coherent. 3876 */ 3877 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3878 enum i915_cache_level cache_level) 3879 { 3880 struct i915_vma *vma; 3881 int ret; 3882 3883 lockdep_assert_held(&obj->base.dev->struct_mutex); 3884 3885 if (obj->cache_level == cache_level) 3886 return 0; 3887 3888 /* Inspect the list of currently bound VMA and unbind any that would 3889 * be invalid given the new cache-level. This is principally to 3890 * catch the issue of the CS prefetch crossing page boundaries and 3891 * reading an invalid PTE on older architectures. 3892 */ 3893 restart: 3894 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3895 if (!drm_mm_node_allocated(&vma->node)) 3896 continue; 3897 3898 if (i915_vma_is_pinned(vma)) { 3899 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3900 return -EBUSY; 3901 } 3902 3903 if (!i915_vma_is_closed(vma) && 3904 i915_gem_valid_gtt_space(vma, cache_level)) 3905 continue; 3906 3907 ret = i915_vma_unbind(vma); 3908 if (ret) 3909 return ret; 3910 3911 /* As unbinding may affect other elements in the 3912 * obj->vma_list (due to side-effects from retiring 3913 * an active vma), play safe and restart the iterator. 3914 */ 3915 goto restart; 3916 } 3917 3918 /* We can reuse the existing drm_mm nodes but need to change the 3919 * cache-level on the PTE. We could simply unbind them all and 3920 * rebind with the correct cache-level on next use. However since 3921 * we already have a valid slot, dma mapping, pages etc, we may as 3922 * rewrite the PTE in the belief that doing so tramples upon less 3923 * state and so involves less work. 3924 */ 3925 if (obj->bind_count) { 3926 /* Before we change the PTE, the GPU must not be accessing it. 3927 * If we wait upon the object, we know that all the bound 3928 * VMA are no longer active. 3929 */ 3930 ret = i915_gem_object_wait(obj, 3931 I915_WAIT_INTERRUPTIBLE | 3932 I915_WAIT_LOCKED | 3933 I915_WAIT_ALL, 3934 MAX_SCHEDULE_TIMEOUT, 3935 NULL); 3936 if (ret) 3937 return ret; 3938 3939 if (!HAS_LLC(to_i915(obj->base.dev)) && 3940 cache_level != I915_CACHE_NONE) { 3941 /* Access to snoopable pages through the GTT is 3942 * incoherent and on some machines causes a hard 3943 * lockup. Relinquish the CPU mmaping to force 3944 * userspace to refault in the pages and we can 3945 * then double check if the GTT mapping is still 3946 * valid for that pointer access. 3947 */ 3948 i915_gem_release_mmap(obj); 3949 3950 /* As we no longer need a fence for GTT access, 3951 * we can relinquish it now (and so prevent having 3952 * to steal a fence from someone else on the next 3953 * fence request). Note GPU activity would have 3954 * dropped the fence as all snoopable access is 3955 * supposed to be linear. 3956 */ 3957 for_each_ggtt_vma(vma, obj) { 3958 ret = i915_vma_put_fence(vma); 3959 if (ret) 3960 return ret; 3961 } 3962 } else { 3963 /* We either have incoherent backing store and 3964 * so no GTT access or the architecture is fully 3965 * coherent. In such cases, existing GTT mmaps 3966 * ignore the cache bit in the PTE and we can 3967 * rewrite it without confusing the GPU or having 3968 * to force userspace to fault back in its mmaps. 3969 */ 3970 } 3971 3972 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3973 if (!drm_mm_node_allocated(&vma->node)) 3974 continue; 3975 3976 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3977 if (ret) 3978 return ret; 3979 } 3980 } 3981 3982 list_for_each_entry(vma, &obj->vma_list, obj_link) 3983 vma->node.color = cache_level; 3984 i915_gem_object_set_cache_coherency(obj, cache_level); 3985 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 3986 3987 return 0; 3988 } 3989 3990 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3991 struct drm_file *file) 3992 { 3993 struct drm_i915_gem_caching *args = data; 3994 struct drm_i915_gem_object *obj; 3995 int err = 0; 3996 3997 rcu_read_lock(); 3998 obj = i915_gem_object_lookup_rcu(file, args->handle); 3999 if (!obj) { 4000 err = -ENOENT; 4001 goto out; 4002 } 4003 4004 switch (obj->cache_level) { 4005 case I915_CACHE_LLC: 4006 case I915_CACHE_L3_LLC: 4007 args->caching = I915_CACHING_CACHED; 4008 break; 4009 4010 case I915_CACHE_WT: 4011 args->caching = I915_CACHING_DISPLAY; 4012 break; 4013 4014 default: 4015 args->caching = I915_CACHING_NONE; 4016 break; 4017 } 4018 out: 4019 rcu_read_unlock(); 4020 return err; 4021 } 4022 4023 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4024 struct drm_file *file) 4025 { 4026 struct drm_i915_private *i915 = to_i915(dev); 4027 struct drm_i915_gem_caching *args = data; 4028 struct drm_i915_gem_object *obj; 4029 enum i915_cache_level level; 4030 int ret = 0; 4031 4032 switch (args->caching) { 4033 case I915_CACHING_NONE: 4034 level = I915_CACHE_NONE; 4035 break; 4036 case I915_CACHING_CACHED: 4037 /* 4038 * Due to a HW issue on BXT A stepping, GPU stores via a 4039 * snooped mapping may leave stale data in a corresponding CPU 4040 * cacheline, whereas normally such cachelines would get 4041 * invalidated. 4042 */ 4043 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4044 return -ENODEV; 4045 4046 level = I915_CACHE_LLC; 4047 break; 4048 case I915_CACHING_DISPLAY: 4049 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4050 break; 4051 default: 4052 return -EINVAL; 4053 } 4054 4055 obj = i915_gem_object_lookup(file, args->handle); 4056 if (!obj) 4057 return -ENOENT; 4058 4059 /* 4060 * The caching mode of proxy object is handled by its generator, and 4061 * not allowed to be changed by userspace. 4062 */ 4063 if (i915_gem_object_is_proxy(obj)) { 4064 ret = -ENXIO; 4065 goto out; 4066 } 4067 4068 if (obj->cache_level == level) 4069 goto out; 4070 4071 ret = i915_gem_object_wait(obj, 4072 I915_WAIT_INTERRUPTIBLE, 4073 MAX_SCHEDULE_TIMEOUT, 4074 to_rps_client(file)); 4075 if (ret) 4076 goto out; 4077 4078 ret = i915_mutex_lock_interruptible(dev); 4079 if (ret) 4080 goto out; 4081 4082 ret = i915_gem_object_set_cache_level(obj, level); 4083 mutex_unlock(&dev->struct_mutex); 4084 4085 out: 4086 i915_gem_object_put(obj); 4087 return ret; 4088 } 4089 4090 /* 4091 * Prepare buffer for display plane (scanout, cursors, etc). 4092 * Can be called from an uninterruptible phase (modesetting) and allows 4093 * any flushes to be pipelined (for pageflips). 4094 */ 4095 struct i915_vma * 4096 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4097 u32 alignment, 4098 const struct i915_ggtt_view *view, 4099 unsigned int flags) 4100 { 4101 struct i915_vma *vma; 4102 int ret; 4103 4104 lockdep_assert_held(&obj->base.dev->struct_mutex); 4105 4106 /* Mark the global pin early so that we account for the 4107 * display coherency whilst setting up the cache domains. 4108 */ 4109 obj->pin_global++; 4110 4111 /* The display engine is not coherent with the LLC cache on gen6. As 4112 * a result, we make sure that the pinning that is about to occur is 4113 * done with uncached PTEs. This is lowest common denominator for all 4114 * chipsets. 4115 * 4116 * However for gen6+, we could do better by using the GFDT bit instead 4117 * of uncaching, which would allow us to flush all the LLC-cached data 4118 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4119 */ 4120 ret = i915_gem_object_set_cache_level(obj, 4121 HAS_WT(to_i915(obj->base.dev)) ? 4122 I915_CACHE_WT : I915_CACHE_NONE); 4123 if (ret) { 4124 vma = ERR_PTR(ret); 4125 goto err_unpin_global; 4126 } 4127 4128 /* As the user may map the buffer once pinned in the display plane 4129 * (e.g. libkms for the bootup splash), we have to ensure that we 4130 * always use map_and_fenceable for all scanout buffers. However, 4131 * it may simply be too big to fit into mappable, in which case 4132 * put it anyway and hope that userspace can cope (but always first 4133 * try to preserve the existing ABI). 4134 */ 4135 vma = ERR_PTR(-ENOSPC); 4136 if ((flags & PIN_MAPPABLE) == 0 && 4137 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4138 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4139 flags | 4140 PIN_MAPPABLE | 4141 PIN_NONBLOCK); 4142 if (IS_ERR(vma)) 4143 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4144 if (IS_ERR(vma)) 4145 goto err_unpin_global; 4146 4147 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4148 4149 /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ 4150 __i915_gem_object_flush_for_display(obj); 4151 intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); 4152 4153 /* It should now be out of any other write domains, and we can update 4154 * the domain values for our changes. 4155 */ 4156 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4157 4158 return vma; 4159 4160 err_unpin_global: 4161 obj->pin_global--; 4162 return vma; 4163 } 4164 4165 void 4166 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4167 { 4168 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4169 4170 if (WARN_ON(vma->obj->pin_global == 0)) 4171 return; 4172 4173 if (--vma->obj->pin_global == 0) 4174 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4175 4176 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4177 i915_gem_object_bump_inactive_ggtt(vma->obj); 4178 4179 i915_vma_unpin(vma); 4180 } 4181 4182 /** 4183 * Moves a single object to the CPU read, and possibly write domain. 4184 * @obj: object to act on 4185 * @write: requesting write or read-only access 4186 * 4187 * This function returns when the move is complete, including waiting on 4188 * flushes to occur. 4189 */ 4190 int 4191 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4192 { 4193 int ret; 4194 4195 lockdep_assert_held(&obj->base.dev->struct_mutex); 4196 4197 ret = i915_gem_object_wait(obj, 4198 I915_WAIT_INTERRUPTIBLE | 4199 I915_WAIT_LOCKED | 4200 (write ? I915_WAIT_ALL : 0), 4201 MAX_SCHEDULE_TIMEOUT, 4202 NULL); 4203 if (ret) 4204 return ret; 4205 4206 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4207 4208 /* Flush the CPU cache if it's still invalid. */ 4209 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4210 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4211 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4212 } 4213 4214 /* It should now be out of any other write domains, and we can update 4215 * the domain values for our changes. 4216 */ 4217 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4218 4219 /* If we're writing through the CPU, then the GPU read domains will 4220 * need to be invalidated at next use. 4221 */ 4222 if (write) 4223 __start_cpu_write(obj); 4224 4225 return 0; 4226 } 4227 4228 /* Throttle our rendering by waiting until the ring has completed our requests 4229 * emitted over 20 msec ago. 4230 * 4231 * Note that if we were to use the current jiffies each time around the loop, 4232 * we wouldn't escape the function with any frames outstanding if the time to 4233 * render a frame was over 20ms. 4234 * 4235 * This should get us reasonable parallelism between CPU and GPU but also 4236 * relatively low latency when blocking on a particular request to finish. 4237 */ 4238 static int 4239 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4240 { 4241 struct drm_i915_private *dev_priv = to_i915(dev); 4242 struct drm_i915_file_private *file_priv = file->driver_priv; 4243 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4244 struct i915_request *request, *target = NULL; 4245 long ret; 4246 4247 /* ABI: return -EIO if already wedged */ 4248 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4249 return -EIO; 4250 4251 spin_lock(&file_priv->mm.lock); 4252 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4253 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4254 break; 4255 4256 if (target) { 4257 list_del(&target->client_link); 4258 target->file_priv = NULL; 4259 } 4260 4261 target = request; 4262 } 4263 if (target) 4264 i915_request_get(target); 4265 spin_unlock(&file_priv->mm.lock); 4266 4267 if (target == NULL) 4268 return 0; 4269 4270 ret = i915_request_wait(target, 4271 I915_WAIT_INTERRUPTIBLE, 4272 MAX_SCHEDULE_TIMEOUT); 4273 i915_request_put(target); 4274 4275 return ret < 0 ? ret : 0; 4276 } 4277 4278 struct i915_vma * 4279 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4280 const struct i915_ggtt_view *view, 4281 u64 size, 4282 u64 alignment, 4283 u64 flags) 4284 { 4285 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4286 struct i915_address_space *vm = &dev_priv->ggtt.base; 4287 struct i915_vma *vma; 4288 int ret; 4289 4290 lockdep_assert_held(&obj->base.dev->struct_mutex); 4291 4292 if (flags & PIN_MAPPABLE && 4293 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4294 /* If the required space is larger than the available 4295 * aperture, we will not able to find a slot for the 4296 * object and unbinding the object now will be in 4297 * vain. Worse, doing so may cause us to ping-pong 4298 * the object in and out of the Global GTT and 4299 * waste a lot of cycles under the mutex. 4300 */ 4301 if (obj->base.size > dev_priv->ggtt.mappable_end) 4302 return ERR_PTR(-E2BIG); 4303 4304 /* If NONBLOCK is set the caller is optimistically 4305 * trying to cache the full object within the mappable 4306 * aperture, and *must* have a fallback in place for 4307 * situations where we cannot bind the object. We 4308 * can be a little more lax here and use the fallback 4309 * more often to avoid costly migrations of ourselves 4310 * and other objects within the aperture. 4311 * 4312 * Half-the-aperture is used as a simple heuristic. 4313 * More interesting would to do search for a free 4314 * block prior to making the commitment to unbind. 4315 * That caters for the self-harm case, and with a 4316 * little more heuristics (e.g. NOFAULT, NOEVICT) 4317 * we could try to minimise harm to others. 4318 */ 4319 if (flags & PIN_NONBLOCK && 4320 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4321 return ERR_PTR(-ENOSPC); 4322 } 4323 4324 vma = i915_vma_instance(obj, vm, view); 4325 if (unlikely(IS_ERR(vma))) 4326 return vma; 4327 4328 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4329 if (flags & PIN_NONBLOCK) { 4330 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4331 return ERR_PTR(-ENOSPC); 4332 4333 if (flags & PIN_MAPPABLE && 4334 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4335 return ERR_PTR(-ENOSPC); 4336 } 4337 4338 WARN(i915_vma_is_pinned(vma), 4339 "bo is already pinned in ggtt with incorrect alignment:" 4340 " offset=%08x, req.alignment=%llx," 4341 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4342 i915_ggtt_offset(vma), alignment, 4343 !!(flags & PIN_MAPPABLE), 4344 i915_vma_is_map_and_fenceable(vma)); 4345 ret = i915_vma_unbind(vma); 4346 if (ret) 4347 return ERR_PTR(ret); 4348 } 4349 4350 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4351 if (ret) 4352 return ERR_PTR(ret); 4353 4354 return vma; 4355 } 4356 4357 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4358 { 4359 /* Note that we could alias engines in the execbuf API, but 4360 * that would be very unwise as it prevents userspace from 4361 * fine control over engine selection. Ahem. 4362 * 4363 * This should be something like EXEC_MAX_ENGINE instead of 4364 * I915_NUM_ENGINES. 4365 */ 4366 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4367 return 0x10000 << id; 4368 } 4369 4370 static __always_inline unsigned int __busy_write_id(unsigned int id) 4371 { 4372 /* The uABI guarantees an active writer is also amongst the read 4373 * engines. This would be true if we accessed the activity tracking 4374 * under the lock, but as we perform the lookup of the object and 4375 * its activity locklessly we can not guarantee that the last_write 4376 * being active implies that we have set the same engine flag from 4377 * last_read - hence we always set both read and write busy for 4378 * last_write. 4379 */ 4380 return id | __busy_read_flag(id); 4381 } 4382 4383 static __always_inline unsigned int 4384 __busy_set_if_active(const struct dma_fence *fence, 4385 unsigned int (*flag)(unsigned int id)) 4386 { 4387 struct i915_request *rq; 4388 4389 /* We have to check the current hw status of the fence as the uABI 4390 * guarantees forward progress. We could rely on the idle worker 4391 * to eventually flush us, but to minimise latency just ask the 4392 * hardware. 4393 * 4394 * Note we only report on the status of native fences. 4395 */ 4396 if (!dma_fence_is_i915(fence)) 4397 return 0; 4398 4399 /* opencode to_request() in order to avoid const warnings */ 4400 rq = container_of(fence, struct i915_request, fence); 4401 if (i915_request_completed(rq)) 4402 return 0; 4403 4404 return flag(rq->engine->uabi_id); 4405 } 4406 4407 static __always_inline unsigned int 4408 busy_check_reader(const struct dma_fence *fence) 4409 { 4410 return __busy_set_if_active(fence, __busy_read_flag); 4411 } 4412 4413 static __always_inline unsigned int 4414 busy_check_writer(const struct dma_fence *fence) 4415 { 4416 if (!fence) 4417 return 0; 4418 4419 return __busy_set_if_active(fence, __busy_write_id); 4420 } 4421 4422 int 4423 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4424 struct drm_file *file) 4425 { 4426 struct drm_i915_gem_busy *args = data; 4427 struct drm_i915_gem_object *obj; 4428 struct reservation_object_list *list; 4429 unsigned int seq; 4430 int err; 4431 4432 err = -ENOENT; 4433 rcu_read_lock(); 4434 obj = i915_gem_object_lookup_rcu(file, args->handle); 4435 if (!obj) 4436 goto out; 4437 4438 /* A discrepancy here is that we do not report the status of 4439 * non-i915 fences, i.e. even though we may report the object as idle, 4440 * a call to set-domain may still stall waiting for foreign rendering. 4441 * This also means that wait-ioctl may report an object as busy, 4442 * where busy-ioctl considers it idle. 4443 * 4444 * We trade the ability to warn of foreign fences to report on which 4445 * i915 engines are active for the object. 4446 * 4447 * Alternatively, we can trade that extra information on read/write 4448 * activity with 4449 * args->busy = 4450 * !reservation_object_test_signaled_rcu(obj->resv, true); 4451 * to report the overall busyness. This is what the wait-ioctl does. 4452 * 4453 */ 4454 retry: 4455 seq = raw_read_seqcount(&obj->resv->seq); 4456 4457 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4458 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4459 4460 /* Translate shared fences to READ set of engines */ 4461 list = rcu_dereference(obj->resv->fence); 4462 if (list) { 4463 unsigned int shared_count = list->shared_count, i; 4464 4465 for (i = 0; i < shared_count; ++i) { 4466 struct dma_fence *fence = 4467 rcu_dereference(list->shared[i]); 4468 4469 args->busy |= busy_check_reader(fence); 4470 } 4471 } 4472 4473 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4474 goto retry; 4475 4476 err = 0; 4477 out: 4478 rcu_read_unlock(); 4479 return err; 4480 } 4481 4482 int 4483 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4484 struct drm_file *file_priv) 4485 { 4486 return i915_gem_ring_throttle(dev, file_priv); 4487 } 4488 4489 int 4490 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4491 struct drm_file *file_priv) 4492 { 4493 struct drm_i915_private *dev_priv = to_i915(dev); 4494 struct drm_i915_gem_madvise *args = data; 4495 struct drm_i915_gem_object *obj; 4496 int err; 4497 4498 switch (args->madv) { 4499 case I915_MADV_DONTNEED: 4500 case I915_MADV_WILLNEED: 4501 break; 4502 default: 4503 return -EINVAL; 4504 } 4505 4506 obj = i915_gem_object_lookup(file_priv, args->handle); 4507 if (!obj) 4508 return -ENOENT; 4509 4510 err = mutex_lock_interruptible(&obj->mm.lock); 4511 if (err) 4512 goto out; 4513 4514 if (i915_gem_object_has_pages(obj) && 4515 i915_gem_object_is_tiled(obj) && 4516 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4517 if (obj->mm.madv == I915_MADV_WILLNEED) { 4518 GEM_BUG_ON(!obj->mm.quirked); 4519 __i915_gem_object_unpin_pages(obj); 4520 obj->mm.quirked = false; 4521 } 4522 if (args->madv == I915_MADV_WILLNEED) { 4523 GEM_BUG_ON(obj->mm.quirked); 4524 __i915_gem_object_pin_pages(obj); 4525 obj->mm.quirked = true; 4526 } 4527 } 4528 4529 if (obj->mm.madv != __I915_MADV_PURGED) 4530 obj->mm.madv = args->madv; 4531 4532 /* if the object is no longer attached, discard its backing storage */ 4533 if (obj->mm.madv == I915_MADV_DONTNEED && 4534 !i915_gem_object_has_pages(obj)) 4535 i915_gem_object_truncate(obj); 4536 4537 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4538 mutex_unlock(&obj->mm.lock); 4539 4540 out: 4541 i915_gem_object_put(obj); 4542 return err; 4543 } 4544 4545 static void 4546 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 4547 { 4548 struct drm_i915_gem_object *obj = 4549 container_of(active, typeof(*obj), frontbuffer_write); 4550 4551 intel_fb_obj_flush(obj, ORIGIN_CS); 4552 } 4553 4554 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4555 const struct drm_i915_gem_object_ops *ops) 4556 { 4557 mutex_init(&obj->mm.lock); 4558 4559 INIT_LIST_HEAD(&obj->vma_list); 4560 INIT_LIST_HEAD(&obj->lut_list); 4561 INIT_LIST_HEAD(&obj->batch_pool_link); 4562 4563 obj->ops = ops; 4564 4565 reservation_object_init(&obj->__builtin_resv); 4566 obj->resv = &obj->__builtin_resv; 4567 4568 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4569 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4570 4571 obj->mm.madv = I915_MADV_WILLNEED; 4572 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4573 mutex_init(&obj->mm.get_page.lock); 4574 4575 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4576 } 4577 4578 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4579 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4580 I915_GEM_OBJECT_IS_SHRINKABLE, 4581 4582 .get_pages = i915_gem_object_get_pages_gtt, 4583 .put_pages = i915_gem_object_put_pages_gtt, 4584 4585 .pwrite = i915_gem_object_pwrite_gtt, 4586 }; 4587 4588 static int i915_gem_object_create_shmem(struct drm_device *dev, 4589 struct drm_gem_object *obj, 4590 size_t size) 4591 { 4592 struct drm_i915_private *i915 = to_i915(dev); 4593 unsigned long flags = VM_NORESERVE; 4594 struct file *filp; 4595 4596 drm_gem_private_object_init(dev, obj, size); 4597 4598 if (i915->mm.gemfs) 4599 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4600 flags); 4601 else 4602 filp = shmem_file_setup("i915", size, flags); 4603 4604 if (IS_ERR(filp)) 4605 return PTR_ERR(filp); 4606 4607 obj->filp = filp; 4608 4609 return 0; 4610 } 4611 4612 struct drm_i915_gem_object * 4613 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4614 { 4615 struct drm_i915_gem_object *obj; 4616 struct address_space *mapping; 4617 unsigned int cache_level; 4618 gfp_t mask; 4619 int ret; 4620 4621 /* There is a prevalence of the assumption that we fit the object's 4622 * page count inside a 32bit _signed_ variable. Let's document this and 4623 * catch if we ever need to fix it. In the meantime, if you do spot 4624 * such a local variable, please consider fixing! 4625 */ 4626 if (size >> PAGE_SHIFT > INT_MAX) 4627 return ERR_PTR(-E2BIG); 4628 4629 if (overflows_type(size, obj->base.size)) 4630 return ERR_PTR(-E2BIG); 4631 4632 obj = i915_gem_object_alloc(dev_priv); 4633 if (obj == NULL) 4634 return ERR_PTR(-ENOMEM); 4635 4636 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4637 if (ret) 4638 goto fail; 4639 4640 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4641 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4642 /* 965gm cannot relocate objects above 4GiB. */ 4643 mask &= ~__GFP_HIGHMEM; 4644 mask |= __GFP_DMA32; 4645 } 4646 4647 mapping = obj->base.filp->f_mapping; 4648 mapping_set_gfp_mask(mapping, mask); 4649 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4650 4651 i915_gem_object_init(obj, &i915_gem_object_ops); 4652 4653 obj->write_domain = I915_GEM_DOMAIN_CPU; 4654 obj->read_domains = I915_GEM_DOMAIN_CPU; 4655 4656 if (HAS_LLC(dev_priv)) 4657 /* On some devices, we can have the GPU use the LLC (the CPU 4658 * cache) for about a 10% performance improvement 4659 * compared to uncached. Graphics requests other than 4660 * display scanout are coherent with the CPU in 4661 * accessing this cache. This means in this mode we 4662 * don't need to clflush on the CPU side, and on the 4663 * GPU side we only need to flush internal caches to 4664 * get data visible to the CPU. 4665 * 4666 * However, we maintain the display planes as UC, and so 4667 * need to rebind when first used as such. 4668 */ 4669 cache_level = I915_CACHE_LLC; 4670 else 4671 cache_level = I915_CACHE_NONE; 4672 4673 i915_gem_object_set_cache_coherency(obj, cache_level); 4674 4675 trace_i915_gem_object_create(obj); 4676 4677 return obj; 4678 4679 fail: 4680 i915_gem_object_free(obj); 4681 return ERR_PTR(ret); 4682 } 4683 4684 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4685 { 4686 /* If we are the last user of the backing storage (be it shmemfs 4687 * pages or stolen etc), we know that the pages are going to be 4688 * immediately released. In this case, we can then skip copying 4689 * back the contents from the GPU. 4690 */ 4691 4692 if (obj->mm.madv != I915_MADV_WILLNEED) 4693 return false; 4694 4695 if (obj->base.filp == NULL) 4696 return true; 4697 4698 /* At first glance, this looks racy, but then again so would be 4699 * userspace racing mmap against close. However, the first external 4700 * reference to the filp can only be obtained through the 4701 * i915_gem_mmap_ioctl() which safeguards us against the user 4702 * acquiring such a reference whilst we are in the middle of 4703 * freeing the object. 4704 */ 4705 return atomic_long_read(&obj->base.filp->f_count) == 1; 4706 } 4707 4708 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4709 struct llist_node *freed) 4710 { 4711 struct drm_i915_gem_object *obj, *on; 4712 4713 intel_runtime_pm_get(i915); 4714 llist_for_each_entry_safe(obj, on, freed, freed) { 4715 struct i915_vma *vma, *vn; 4716 4717 trace_i915_gem_object_destroy(obj); 4718 4719 mutex_lock(&i915->drm.struct_mutex); 4720 4721 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4722 list_for_each_entry_safe(vma, vn, 4723 &obj->vma_list, obj_link) { 4724 GEM_BUG_ON(i915_vma_is_active(vma)); 4725 vma->flags &= ~I915_VMA_PIN_MASK; 4726 i915_vma_close(vma); 4727 } 4728 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4729 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4730 4731 /* This serializes freeing with the shrinker. Since the free 4732 * is delayed, first by RCU then by the workqueue, we want the 4733 * shrinker to be able to free pages of unreferenced objects, 4734 * or else we may oom whilst there are plenty of deferred 4735 * freed objects. 4736 */ 4737 if (i915_gem_object_has_pages(obj)) { 4738 spin_lock(&i915->mm.obj_lock); 4739 list_del_init(&obj->mm.link); 4740 spin_unlock(&i915->mm.obj_lock); 4741 } 4742 4743 mutex_unlock(&i915->drm.struct_mutex); 4744 4745 GEM_BUG_ON(obj->bind_count); 4746 GEM_BUG_ON(obj->userfault_count); 4747 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4748 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4749 4750 if (obj->ops->release) 4751 obj->ops->release(obj); 4752 4753 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4754 atomic_set(&obj->mm.pages_pin_count, 0); 4755 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4756 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4757 4758 if (obj->base.import_attach) 4759 drm_prime_gem_destroy(&obj->base, NULL); 4760 4761 reservation_object_fini(&obj->__builtin_resv); 4762 drm_gem_object_release(&obj->base); 4763 i915_gem_info_remove_obj(i915, obj->base.size); 4764 4765 kfree(obj->bit_17); 4766 i915_gem_object_free(obj); 4767 4768 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4769 atomic_dec(&i915->mm.free_count); 4770 4771 if (on) 4772 cond_resched(); 4773 } 4774 intel_runtime_pm_put(i915); 4775 } 4776 4777 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4778 { 4779 struct llist_node *freed; 4780 4781 /* Free the oldest, most stale object to keep the free_list short */ 4782 freed = NULL; 4783 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4784 /* Only one consumer of llist_del_first() allowed */ 4785 spin_lock(&i915->mm.free_lock); 4786 freed = llist_del_first(&i915->mm.free_list); 4787 spin_unlock(&i915->mm.free_lock); 4788 } 4789 if (unlikely(freed)) { 4790 freed->next = NULL; 4791 __i915_gem_free_objects(i915, freed); 4792 } 4793 } 4794 4795 static void __i915_gem_free_work(struct work_struct *work) 4796 { 4797 struct drm_i915_private *i915 = 4798 container_of(work, struct drm_i915_private, mm.free_work); 4799 struct llist_node *freed; 4800 4801 /* 4802 * All file-owned VMA should have been released by this point through 4803 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4804 * However, the object may also be bound into the global GTT (e.g. 4805 * older GPUs without per-process support, or for direct access through 4806 * the GTT either for the user or for scanout). Those VMA still need to 4807 * unbound now. 4808 */ 4809 4810 spin_lock(&i915->mm.free_lock); 4811 while ((freed = llist_del_all(&i915->mm.free_list))) { 4812 spin_unlock(&i915->mm.free_lock); 4813 4814 __i915_gem_free_objects(i915, freed); 4815 if (need_resched()) 4816 return; 4817 4818 spin_lock(&i915->mm.free_lock); 4819 } 4820 spin_unlock(&i915->mm.free_lock); 4821 } 4822 4823 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4824 { 4825 struct drm_i915_gem_object *obj = 4826 container_of(head, typeof(*obj), rcu); 4827 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4828 4829 /* 4830 * Since we require blocking on struct_mutex to unbind the freed 4831 * object from the GPU before releasing resources back to the 4832 * system, we can not do that directly from the RCU callback (which may 4833 * be a softirq context), but must instead then defer that work onto a 4834 * kthread. We use the RCU callback rather than move the freed object 4835 * directly onto the work queue so that we can mix between using the 4836 * worker and performing frees directly from subsequent allocations for 4837 * crude but effective memory throttling. 4838 */ 4839 if (llist_add(&obj->freed, &i915->mm.free_list)) 4840 queue_work(i915->wq, &i915->mm.free_work); 4841 } 4842 4843 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4844 { 4845 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4846 4847 if (obj->mm.quirked) 4848 __i915_gem_object_unpin_pages(obj); 4849 4850 if (discard_backing_storage(obj)) 4851 obj->mm.madv = I915_MADV_DONTNEED; 4852 4853 /* 4854 * Before we free the object, make sure any pure RCU-only 4855 * read-side critical sections are complete, e.g. 4856 * i915_gem_busy_ioctl(). For the corresponding synchronized 4857 * lookup see i915_gem_object_lookup_rcu(). 4858 */ 4859 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 4860 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4861 } 4862 4863 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4864 { 4865 lockdep_assert_held(&obj->base.dev->struct_mutex); 4866 4867 if (!i915_gem_object_has_active_reference(obj) && 4868 i915_gem_object_is_active(obj)) 4869 i915_gem_object_set_active_reference(obj); 4870 else 4871 i915_gem_object_put(obj); 4872 } 4873 4874 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 4875 { 4876 struct i915_gem_context *kernel_context = i915->kernel_context; 4877 struct intel_engine_cs *engine; 4878 enum intel_engine_id id; 4879 4880 for_each_engine(engine, i915, id) { 4881 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); 4882 GEM_BUG_ON(engine->last_retired_context != kernel_context); 4883 } 4884 } 4885 4886 void i915_gem_sanitize(struct drm_i915_private *i915) 4887 { 4888 if (i915_terminally_wedged(&i915->gpu_error)) { 4889 mutex_lock(&i915->drm.struct_mutex); 4890 i915_gem_unset_wedged(i915); 4891 mutex_unlock(&i915->drm.struct_mutex); 4892 } 4893 4894 /* 4895 * If we inherit context state from the BIOS or earlier occupants 4896 * of the GPU, the GPU may be in an inconsistent state when we 4897 * try to take over. The only way to remove the earlier state 4898 * is by resetting. However, resetting on earlier gen is tricky as 4899 * it may impact the display and we are uncertain about the stability 4900 * of the reset, so this could be applied to even earlier gen. 4901 */ 4902 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 4903 WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 4904 } 4905 4906 int i915_gem_suspend(struct drm_i915_private *dev_priv) 4907 { 4908 struct drm_device *dev = &dev_priv->drm; 4909 int ret; 4910 4911 intel_runtime_pm_get(dev_priv); 4912 intel_suspend_gt_powersave(dev_priv); 4913 4914 mutex_lock(&dev->struct_mutex); 4915 4916 /* We have to flush all the executing contexts to main memory so 4917 * that they can saved in the hibernation image. To ensure the last 4918 * context image is coherent, we have to switch away from it. That 4919 * leaves the dev_priv->kernel_context still active when 4920 * we actually suspend, and its image in memory may not match the GPU 4921 * state. Fortunately, the kernel_context is disposable and we do 4922 * not rely on its state. 4923 */ 4924 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 4925 ret = i915_gem_switch_to_kernel_context(dev_priv); 4926 if (ret) 4927 goto err_unlock; 4928 4929 ret = i915_gem_wait_for_idle(dev_priv, 4930 I915_WAIT_INTERRUPTIBLE | 4931 I915_WAIT_LOCKED); 4932 if (ret && ret != -EIO) 4933 goto err_unlock; 4934 4935 assert_kernel_context_is_current(dev_priv); 4936 } 4937 i915_gem_contexts_lost(dev_priv); 4938 mutex_unlock(&dev->struct_mutex); 4939 4940 intel_uc_suspend(dev_priv); 4941 4942 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4943 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 4944 4945 /* As the idle_work is rearming if it detects a race, play safe and 4946 * repeat the flush until it is definitely idle. 4947 */ 4948 drain_delayed_work(&dev_priv->gt.idle_work); 4949 4950 /* Assert that we sucessfully flushed all the work and 4951 * reset the GPU back to its idle, low power state. 4952 */ 4953 WARN_ON(dev_priv->gt.awake); 4954 if (WARN_ON(!intel_engines_are_idle(dev_priv))) 4955 i915_gem_set_wedged(dev_priv); /* no hope, discard everything */ 4956 4957 /* 4958 * Neither the BIOS, ourselves or any other kernel 4959 * expects the system to be in execlists mode on startup, 4960 * so we need to reset the GPU back to legacy mode. And the only 4961 * known way to disable logical contexts is through a GPU reset. 4962 * 4963 * So in order to leave the system in a known default configuration, 4964 * always reset the GPU upon unload and suspend. Afterwards we then 4965 * clean up the GEM state tracking, flushing off the requests and 4966 * leaving the system in a known idle state. 4967 * 4968 * Note that is of the upmost importance that the GPU is idle and 4969 * all stray writes are flushed *before* we dismantle the backing 4970 * storage for the pinned objects. 4971 * 4972 * However, since we are uncertain that resetting the GPU on older 4973 * machines is a good idea, we don't - just in case it leaves the 4974 * machine in an unusable condition. 4975 */ 4976 i915_gem_sanitize(dev_priv); 4977 4978 intel_runtime_pm_put(dev_priv); 4979 return 0; 4980 4981 err_unlock: 4982 mutex_unlock(&dev->struct_mutex); 4983 intel_runtime_pm_put(dev_priv); 4984 return ret; 4985 } 4986 4987 void i915_gem_resume(struct drm_i915_private *i915) 4988 { 4989 WARN_ON(i915->gt.awake); 4990 4991 mutex_lock(&i915->drm.struct_mutex); 4992 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 4993 4994 i915_gem_restore_gtt_mappings(i915); 4995 i915_gem_restore_fences(i915); 4996 4997 /* 4998 * As we didn't flush the kernel context before suspend, we cannot 4999 * guarantee that the context image is complete. So let's just reset 5000 * it and start again. 5001 */ 5002 i915->gt.resume(i915); 5003 5004 if (i915_gem_init_hw(i915)) 5005 goto err_wedged; 5006 5007 intel_uc_resume(i915); 5008 5009 /* Always reload a context for powersaving. */ 5010 if (i915_gem_switch_to_kernel_context(i915)) 5011 goto err_wedged; 5012 5013 out_unlock: 5014 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5015 mutex_unlock(&i915->drm.struct_mutex); 5016 return; 5017 5018 err_wedged: 5019 if (!i915_terminally_wedged(&i915->gpu_error)) { 5020 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5021 i915_gem_set_wedged(i915); 5022 } 5023 goto out_unlock; 5024 } 5025 5026 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5027 { 5028 if (INTEL_GEN(dev_priv) < 5 || 5029 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5030 return; 5031 5032 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5033 DISP_TILE_SURFACE_SWIZZLING); 5034 5035 if (IS_GEN5(dev_priv)) 5036 return; 5037 5038 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5039 if (IS_GEN6(dev_priv)) 5040 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5041 else if (IS_GEN7(dev_priv)) 5042 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5043 else if (IS_GEN8(dev_priv)) 5044 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5045 else 5046 BUG(); 5047 } 5048 5049 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5050 { 5051 I915_WRITE(RING_CTL(base), 0); 5052 I915_WRITE(RING_HEAD(base), 0); 5053 I915_WRITE(RING_TAIL(base), 0); 5054 I915_WRITE(RING_START(base), 0); 5055 } 5056 5057 static void init_unused_rings(struct drm_i915_private *dev_priv) 5058 { 5059 if (IS_I830(dev_priv)) { 5060 init_unused_ring(dev_priv, PRB1_BASE); 5061 init_unused_ring(dev_priv, SRB0_BASE); 5062 init_unused_ring(dev_priv, SRB1_BASE); 5063 init_unused_ring(dev_priv, SRB2_BASE); 5064 init_unused_ring(dev_priv, SRB3_BASE); 5065 } else if (IS_GEN2(dev_priv)) { 5066 init_unused_ring(dev_priv, SRB0_BASE); 5067 init_unused_ring(dev_priv, SRB1_BASE); 5068 } else if (IS_GEN3(dev_priv)) { 5069 init_unused_ring(dev_priv, PRB1_BASE); 5070 init_unused_ring(dev_priv, PRB2_BASE); 5071 } 5072 } 5073 5074 static int __i915_gem_restart_engines(void *data) 5075 { 5076 struct drm_i915_private *i915 = data; 5077 struct intel_engine_cs *engine; 5078 enum intel_engine_id id; 5079 int err; 5080 5081 for_each_engine(engine, i915, id) { 5082 err = engine->init_hw(engine); 5083 if (err) { 5084 DRM_ERROR("Failed to restart %s (%d)\n", 5085 engine->name, err); 5086 return err; 5087 } 5088 } 5089 5090 return 0; 5091 } 5092 5093 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5094 { 5095 int ret; 5096 5097 dev_priv->gt.last_init_time = ktime_get(); 5098 5099 /* Double layer security blanket, see i915_gem_init() */ 5100 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5101 5102 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5103 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5104 5105 if (IS_HASWELL(dev_priv)) 5106 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5107 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5108 5109 if (HAS_PCH_NOP(dev_priv)) { 5110 if (IS_IVYBRIDGE(dev_priv)) { 5111 u32 temp = I915_READ(GEN7_MSG_CTL); 5112 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5113 I915_WRITE(GEN7_MSG_CTL, temp); 5114 } else if (INTEL_GEN(dev_priv) >= 7) { 5115 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5116 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5117 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5118 } 5119 } 5120 5121 i915_gem_init_swizzling(dev_priv); 5122 5123 /* 5124 * At least 830 can leave some of the unused rings 5125 * "active" (ie. head != tail) after resume which 5126 * will prevent c3 entry. Makes sure all unused rings 5127 * are totally idle. 5128 */ 5129 init_unused_rings(dev_priv); 5130 5131 BUG_ON(!dev_priv->kernel_context); 5132 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5133 ret = -EIO; 5134 goto out; 5135 } 5136 5137 ret = i915_ppgtt_init_hw(dev_priv); 5138 if (ret) { 5139 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5140 goto out; 5141 } 5142 5143 /* We can't enable contexts until all firmware is loaded */ 5144 ret = intel_uc_init_hw(dev_priv); 5145 if (ret) { 5146 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5147 goto out; 5148 } 5149 5150 intel_mocs_init_l3cc_table(dev_priv); 5151 5152 /* Only when the HW is re-initialised, can we replay the requests */ 5153 ret = __i915_gem_restart_engines(dev_priv); 5154 out: 5155 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5156 return ret; 5157 } 5158 5159 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5160 { 5161 struct i915_gem_context *ctx; 5162 struct intel_engine_cs *engine; 5163 enum intel_engine_id id; 5164 int err; 5165 5166 /* 5167 * As we reset the gpu during very early sanitisation, the current 5168 * register state on the GPU should reflect its defaults values. 5169 * We load a context onto the hw (with restore-inhibit), then switch 5170 * over to a second context to save that default register state. We 5171 * can then prime every new context with that state so they all start 5172 * from the same default HW values. 5173 */ 5174 5175 ctx = i915_gem_context_create_kernel(i915, 0); 5176 if (IS_ERR(ctx)) 5177 return PTR_ERR(ctx); 5178 5179 for_each_engine(engine, i915, id) { 5180 struct i915_request *rq; 5181 5182 rq = i915_request_alloc(engine, ctx); 5183 if (IS_ERR(rq)) { 5184 err = PTR_ERR(rq); 5185 goto out_ctx; 5186 } 5187 5188 err = 0; 5189 if (engine->init_context) 5190 err = engine->init_context(rq); 5191 5192 __i915_request_add(rq, true); 5193 if (err) 5194 goto err_active; 5195 } 5196 5197 err = i915_gem_switch_to_kernel_context(i915); 5198 if (err) 5199 goto err_active; 5200 5201 err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); 5202 if (err) 5203 goto err_active; 5204 5205 assert_kernel_context_is_current(i915); 5206 5207 for_each_engine(engine, i915, id) { 5208 struct i915_vma *state; 5209 5210 state = ctx->engine[id].state; 5211 if (!state) 5212 continue; 5213 5214 /* 5215 * As we will hold a reference to the logical state, it will 5216 * not be torn down with the context, and importantly the 5217 * object will hold onto its vma (making it possible for a 5218 * stray GTT write to corrupt our defaults). Unmap the vma 5219 * from the GTT to prevent such accidents and reclaim the 5220 * space. 5221 */ 5222 err = i915_vma_unbind(state); 5223 if (err) 5224 goto err_active; 5225 5226 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5227 if (err) 5228 goto err_active; 5229 5230 engine->default_state = i915_gem_object_get(state->obj); 5231 } 5232 5233 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5234 unsigned int found = intel_engines_has_context_isolation(i915); 5235 5236 /* 5237 * Make sure that classes with multiple engine instances all 5238 * share the same basic configuration. 5239 */ 5240 for_each_engine(engine, i915, id) { 5241 unsigned int bit = BIT(engine->uabi_class); 5242 unsigned int expected = engine->default_state ? bit : 0; 5243 5244 if ((found & bit) != expected) { 5245 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5246 engine->uabi_class, engine->name); 5247 } 5248 } 5249 } 5250 5251 out_ctx: 5252 i915_gem_context_set_closed(ctx); 5253 i915_gem_context_put(ctx); 5254 return err; 5255 5256 err_active: 5257 /* 5258 * If we have to abandon now, we expect the engines to be idle 5259 * and ready to be torn-down. First try to flush any remaining 5260 * request, ensure we are pointing at the kernel context and 5261 * then remove it. 5262 */ 5263 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5264 goto out_ctx; 5265 5266 if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) 5267 goto out_ctx; 5268 5269 i915_gem_contexts_lost(i915); 5270 goto out_ctx; 5271 } 5272 5273 int i915_gem_init(struct drm_i915_private *dev_priv) 5274 { 5275 int ret; 5276 5277 /* 5278 * We need to fallback to 4K pages since gvt gtt handling doesn't 5279 * support huge page entries - we will need to check either hypervisor 5280 * mm can support huge guest page or just do emulation in gvt. 5281 */ 5282 if (intel_vgpu_active(dev_priv)) 5283 mkwrite_device_info(dev_priv)->page_sizes = 5284 I915_GTT_PAGE_SIZE_4K; 5285 5286 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5287 5288 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5289 dev_priv->gt.resume = intel_lr_context_resume; 5290 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5291 } else { 5292 dev_priv->gt.resume = intel_legacy_submission_resume; 5293 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5294 } 5295 5296 ret = i915_gem_init_userptr(dev_priv); 5297 if (ret) 5298 return ret; 5299 5300 ret = intel_uc_init_misc(dev_priv); 5301 if (ret) 5302 return ret; 5303 5304 /* This is just a security blanket to placate dragons. 5305 * On some systems, we very sporadically observe that the first TLBs 5306 * used by the CS may be stale, despite us poking the TLB reset. If 5307 * we hold the forcewake during initialisation these problems 5308 * just magically go away. 5309 */ 5310 mutex_lock(&dev_priv->drm.struct_mutex); 5311 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5312 5313 ret = i915_gem_init_ggtt(dev_priv); 5314 if (ret) { 5315 GEM_BUG_ON(ret == -EIO); 5316 goto err_unlock; 5317 } 5318 5319 ret = i915_gem_contexts_init(dev_priv); 5320 if (ret) { 5321 GEM_BUG_ON(ret == -EIO); 5322 goto err_ggtt; 5323 } 5324 5325 ret = intel_engines_init(dev_priv); 5326 if (ret) { 5327 GEM_BUG_ON(ret == -EIO); 5328 goto err_context; 5329 } 5330 5331 intel_init_gt_powersave(dev_priv); 5332 5333 ret = intel_uc_init(dev_priv); 5334 if (ret) 5335 goto err_pm; 5336 5337 ret = i915_gem_init_hw(dev_priv); 5338 if (ret) 5339 goto err_uc_init; 5340 5341 /* 5342 * Despite its name intel_init_clock_gating applies both display 5343 * clock gating workarounds; GT mmio workarounds and the occasional 5344 * GT power context workaround. Worse, sometimes it includes a context 5345 * register workaround which we need to apply before we record the 5346 * default HW state for all contexts. 5347 * 5348 * FIXME: break up the workarounds and apply them at the right time! 5349 */ 5350 intel_init_clock_gating(dev_priv); 5351 5352 ret = __intel_engines_record_defaults(dev_priv); 5353 if (ret) 5354 goto err_init_hw; 5355 5356 if (i915_inject_load_failure()) { 5357 ret = -ENODEV; 5358 goto err_init_hw; 5359 } 5360 5361 if (i915_inject_load_failure()) { 5362 ret = -EIO; 5363 goto err_init_hw; 5364 } 5365 5366 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5367 mutex_unlock(&dev_priv->drm.struct_mutex); 5368 5369 return 0; 5370 5371 /* 5372 * Unwinding is complicated by that we want to handle -EIO to mean 5373 * disable GPU submission but keep KMS alive. We want to mark the 5374 * HW as irrevisibly wedged, but keep enough state around that the 5375 * driver doesn't explode during runtime. 5376 */ 5377 err_init_hw: 5378 i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); 5379 i915_gem_contexts_lost(dev_priv); 5380 intel_uc_fini_hw(dev_priv); 5381 err_uc_init: 5382 intel_uc_fini(dev_priv); 5383 err_pm: 5384 if (ret != -EIO) { 5385 intel_cleanup_gt_powersave(dev_priv); 5386 i915_gem_cleanup_engines(dev_priv); 5387 } 5388 err_context: 5389 if (ret != -EIO) 5390 i915_gem_contexts_fini(dev_priv); 5391 err_ggtt: 5392 err_unlock: 5393 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5394 mutex_unlock(&dev_priv->drm.struct_mutex); 5395 5396 intel_uc_fini_misc(dev_priv); 5397 5398 if (ret != -EIO) 5399 i915_gem_cleanup_userptr(dev_priv); 5400 5401 if (ret == -EIO) { 5402 /* 5403 * Allow engine initialisation to fail by marking the GPU as 5404 * wedged. But we only want to do this where the GPU is angry, 5405 * for all other failure, such as an allocation failure, bail. 5406 */ 5407 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5408 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5409 i915_gem_set_wedged(dev_priv); 5410 } 5411 ret = 0; 5412 } 5413 5414 i915_gem_drain_freed_objects(dev_priv); 5415 return ret; 5416 } 5417 5418 void i915_gem_init_mmio(struct drm_i915_private *i915) 5419 { 5420 i915_gem_sanitize(i915); 5421 } 5422 5423 void 5424 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5425 { 5426 struct intel_engine_cs *engine; 5427 enum intel_engine_id id; 5428 5429 for_each_engine(engine, dev_priv, id) 5430 dev_priv->gt.cleanup_engine(engine); 5431 } 5432 5433 void 5434 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5435 { 5436 int i; 5437 5438 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5439 !IS_CHERRYVIEW(dev_priv)) 5440 dev_priv->num_fence_regs = 32; 5441 else if (INTEL_GEN(dev_priv) >= 4 || 5442 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5443 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5444 dev_priv->num_fence_regs = 16; 5445 else 5446 dev_priv->num_fence_regs = 8; 5447 5448 if (intel_vgpu_active(dev_priv)) 5449 dev_priv->num_fence_regs = 5450 I915_READ(vgtif_reg(avail_rs.fence_num)); 5451 5452 /* Initialize fence registers to zero */ 5453 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5454 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5455 5456 fence->i915 = dev_priv; 5457 fence->id = i; 5458 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5459 } 5460 i915_gem_restore_fences(dev_priv); 5461 5462 i915_gem_detect_bit_6_swizzle(dev_priv); 5463 } 5464 5465 static void i915_gem_init__mm(struct drm_i915_private *i915) 5466 { 5467 spin_lock_init(&i915->mm.object_stat_lock); 5468 spin_lock_init(&i915->mm.obj_lock); 5469 spin_lock_init(&i915->mm.free_lock); 5470 5471 init_llist_head(&i915->mm.free_list); 5472 5473 INIT_LIST_HEAD(&i915->mm.unbound_list); 5474 INIT_LIST_HEAD(&i915->mm.bound_list); 5475 INIT_LIST_HEAD(&i915->mm.fence_list); 5476 INIT_LIST_HEAD(&i915->mm.userfault_list); 5477 5478 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5479 } 5480 5481 int 5482 i915_gem_load_init(struct drm_i915_private *dev_priv) 5483 { 5484 int err = -ENOMEM; 5485 5486 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5487 if (!dev_priv->objects) 5488 goto err_out; 5489 5490 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5491 if (!dev_priv->vmas) 5492 goto err_objects; 5493 5494 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5495 if (!dev_priv->luts) 5496 goto err_vmas; 5497 5498 dev_priv->requests = KMEM_CACHE(i915_request, 5499 SLAB_HWCACHE_ALIGN | 5500 SLAB_RECLAIM_ACCOUNT | 5501 SLAB_TYPESAFE_BY_RCU); 5502 if (!dev_priv->requests) 5503 goto err_luts; 5504 5505 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5506 SLAB_HWCACHE_ALIGN | 5507 SLAB_RECLAIM_ACCOUNT); 5508 if (!dev_priv->dependencies) 5509 goto err_requests; 5510 5511 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5512 if (!dev_priv->priorities) 5513 goto err_dependencies; 5514 5515 mutex_lock(&dev_priv->drm.struct_mutex); 5516 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5517 err = i915_gem_timeline_init__global(dev_priv); 5518 mutex_unlock(&dev_priv->drm.struct_mutex); 5519 if (err) 5520 goto err_priorities; 5521 5522 i915_gem_init__mm(dev_priv); 5523 5524 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5525 i915_gem_retire_work_handler); 5526 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5527 i915_gem_idle_work_handler); 5528 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5529 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5530 5531 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5532 5533 spin_lock_init(&dev_priv->fb_tracking.lock); 5534 5535 err = i915_gemfs_init(dev_priv); 5536 if (err) 5537 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5538 5539 return 0; 5540 5541 err_priorities: 5542 kmem_cache_destroy(dev_priv->priorities); 5543 err_dependencies: 5544 kmem_cache_destroy(dev_priv->dependencies); 5545 err_requests: 5546 kmem_cache_destroy(dev_priv->requests); 5547 err_luts: 5548 kmem_cache_destroy(dev_priv->luts); 5549 err_vmas: 5550 kmem_cache_destroy(dev_priv->vmas); 5551 err_objects: 5552 kmem_cache_destroy(dev_priv->objects); 5553 err_out: 5554 return err; 5555 } 5556 5557 void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) 5558 { 5559 i915_gem_drain_freed_objects(dev_priv); 5560 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5561 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5562 WARN_ON(dev_priv->mm.object_count); 5563 5564 mutex_lock(&dev_priv->drm.struct_mutex); 5565 i915_gem_timeline_fini(&dev_priv->gt.global_timeline); 5566 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5567 mutex_unlock(&dev_priv->drm.struct_mutex); 5568 5569 kmem_cache_destroy(dev_priv->priorities); 5570 kmem_cache_destroy(dev_priv->dependencies); 5571 kmem_cache_destroy(dev_priv->requests); 5572 kmem_cache_destroy(dev_priv->luts); 5573 kmem_cache_destroy(dev_priv->vmas); 5574 kmem_cache_destroy(dev_priv->objects); 5575 5576 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5577 rcu_barrier(); 5578 5579 i915_gemfs_fini(dev_priv); 5580 } 5581 5582 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5583 { 5584 /* Discard all purgeable objects, let userspace recover those as 5585 * required after resuming. 5586 */ 5587 i915_gem_shrink_all(dev_priv); 5588 5589 return 0; 5590 } 5591 5592 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 5593 { 5594 struct drm_i915_gem_object *obj; 5595 struct list_head *phases[] = { 5596 &dev_priv->mm.unbound_list, 5597 &dev_priv->mm.bound_list, 5598 NULL 5599 }, **p; 5600 5601 /* Called just before we write the hibernation image. 5602 * 5603 * We need to update the domain tracking to reflect that the CPU 5604 * will be accessing all the pages to create and restore from the 5605 * hibernation, and so upon restoration those pages will be in the 5606 * CPU domain. 5607 * 5608 * To make sure the hibernation image contains the latest state, 5609 * we update that state just before writing out the image. 5610 * 5611 * To try and reduce the hibernation image, we manually shrink 5612 * the objects as well, see i915_gem_freeze() 5613 */ 5614 5615 i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); 5616 i915_gem_drain_freed_objects(dev_priv); 5617 5618 spin_lock(&dev_priv->mm.obj_lock); 5619 for (p = phases; *p; p++) { 5620 list_for_each_entry(obj, *p, mm.link) 5621 __start_cpu_write(obj); 5622 } 5623 spin_unlock(&dev_priv->mm.obj_lock); 5624 5625 return 0; 5626 } 5627 5628 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5629 { 5630 struct drm_i915_file_private *file_priv = file->driver_priv; 5631 struct i915_request *request; 5632 5633 /* Clean up our request list when the client is going away, so that 5634 * later retire_requests won't dereference our soon-to-be-gone 5635 * file_priv. 5636 */ 5637 spin_lock(&file_priv->mm.lock); 5638 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5639 request->file_priv = NULL; 5640 spin_unlock(&file_priv->mm.lock); 5641 } 5642 5643 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5644 { 5645 struct drm_i915_file_private *file_priv; 5646 int ret; 5647 5648 DRM_DEBUG("\n"); 5649 5650 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5651 if (!file_priv) 5652 return -ENOMEM; 5653 5654 file->driver_priv = file_priv; 5655 file_priv->dev_priv = i915; 5656 file_priv->file = file; 5657 5658 spin_lock_init(&file_priv->mm.lock); 5659 INIT_LIST_HEAD(&file_priv->mm.request_list); 5660 5661 file_priv->bsd_engine = -1; 5662 5663 ret = i915_gem_context_open(i915, file); 5664 if (ret) 5665 kfree(file_priv); 5666 5667 return ret; 5668 } 5669 5670 /** 5671 * i915_gem_track_fb - update frontbuffer tracking 5672 * @old: current GEM buffer for the frontbuffer slots 5673 * @new: new GEM buffer for the frontbuffer slots 5674 * @frontbuffer_bits: bitmask of frontbuffer slots 5675 * 5676 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5677 * from @old and setting them in @new. Both @old and @new can be NULL. 5678 */ 5679 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5680 struct drm_i915_gem_object *new, 5681 unsigned frontbuffer_bits) 5682 { 5683 /* Control of individual bits within the mask are guarded by 5684 * the owning plane->mutex, i.e. we can never see concurrent 5685 * manipulation of individual bits. But since the bitfield as a whole 5686 * is updated using RMW, we need to use atomics in order to update 5687 * the bits. 5688 */ 5689 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5690 sizeof(atomic_t) * BITS_PER_BYTE); 5691 5692 if (old) { 5693 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5694 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5695 } 5696 5697 if (new) { 5698 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5699 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5700 } 5701 } 5702 5703 /* Allocate a new GEM object and fill it with the supplied data */ 5704 struct drm_i915_gem_object * 5705 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5706 const void *data, size_t size) 5707 { 5708 struct drm_i915_gem_object *obj; 5709 struct file *file; 5710 size_t offset; 5711 int err; 5712 5713 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5714 if (IS_ERR(obj)) 5715 return obj; 5716 5717 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 5718 5719 file = obj->base.filp; 5720 offset = 0; 5721 do { 5722 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5723 struct page *page; 5724 void *pgdata, *vaddr; 5725 5726 err = pagecache_write_begin(file, file->f_mapping, 5727 offset, len, 0, 5728 &page, &pgdata); 5729 if (err < 0) 5730 goto fail; 5731 5732 vaddr = kmap(page); 5733 memcpy(vaddr, data, len); 5734 kunmap(page); 5735 5736 err = pagecache_write_end(file, file->f_mapping, 5737 offset, len, len, 5738 page, pgdata); 5739 if (err < 0) 5740 goto fail; 5741 5742 size -= len; 5743 data += len; 5744 offset += len; 5745 } while (size); 5746 5747 return obj; 5748 5749 fail: 5750 i915_gem_object_put(obj); 5751 return ERR_PTR(err); 5752 } 5753 5754 struct scatterlist * 5755 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 5756 unsigned int n, 5757 unsigned int *offset) 5758 { 5759 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 5760 struct scatterlist *sg; 5761 unsigned int idx, count; 5762 5763 might_sleep(); 5764 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 5765 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 5766 5767 /* As we iterate forward through the sg, we record each entry in a 5768 * radixtree for quick repeated (backwards) lookups. If we have seen 5769 * this index previously, we will have an entry for it. 5770 * 5771 * Initial lookup is O(N), but this is amortized to O(1) for 5772 * sequential page access (where each new request is consecutive 5773 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 5774 * i.e. O(1) with a large constant! 5775 */ 5776 if (n < READ_ONCE(iter->sg_idx)) 5777 goto lookup; 5778 5779 mutex_lock(&iter->lock); 5780 5781 /* We prefer to reuse the last sg so that repeated lookup of this 5782 * (or the subsequent) sg are fast - comparing against the last 5783 * sg is faster than going through the radixtree. 5784 */ 5785 5786 sg = iter->sg_pos; 5787 idx = iter->sg_idx; 5788 count = __sg_page_count(sg); 5789 5790 while (idx + count <= n) { 5791 unsigned long exception, i; 5792 int ret; 5793 5794 /* If we cannot allocate and insert this entry, or the 5795 * individual pages from this range, cancel updating the 5796 * sg_idx so that on this lookup we are forced to linearly 5797 * scan onwards, but on future lookups we will try the 5798 * insertion again (in which case we need to be careful of 5799 * the error return reporting that we have already inserted 5800 * this index). 5801 */ 5802 ret = radix_tree_insert(&iter->radix, idx, sg); 5803 if (ret && ret != -EEXIST) 5804 goto scan; 5805 5806 exception = 5807 RADIX_TREE_EXCEPTIONAL_ENTRY | 5808 idx << RADIX_TREE_EXCEPTIONAL_SHIFT; 5809 for (i = 1; i < count; i++) { 5810 ret = radix_tree_insert(&iter->radix, idx + i, 5811 (void *)exception); 5812 if (ret && ret != -EEXIST) 5813 goto scan; 5814 } 5815 5816 idx += count; 5817 sg = ____sg_next(sg); 5818 count = __sg_page_count(sg); 5819 } 5820 5821 scan: 5822 iter->sg_pos = sg; 5823 iter->sg_idx = idx; 5824 5825 mutex_unlock(&iter->lock); 5826 5827 if (unlikely(n < idx)) /* insertion completed by another thread */ 5828 goto lookup; 5829 5830 /* In case we failed to insert the entry into the radixtree, we need 5831 * to look beyond the current sg. 5832 */ 5833 while (idx + count <= n) { 5834 idx += count; 5835 sg = ____sg_next(sg); 5836 count = __sg_page_count(sg); 5837 } 5838 5839 *offset = n - idx; 5840 return sg; 5841 5842 lookup: 5843 rcu_read_lock(); 5844 5845 sg = radix_tree_lookup(&iter->radix, n); 5846 GEM_BUG_ON(!sg); 5847 5848 /* If this index is in the middle of multi-page sg entry, 5849 * the radixtree will contain an exceptional entry that points 5850 * to the start of that range. We will return the pointer to 5851 * the base page and the offset of this page within the 5852 * sg entry's range. 5853 */ 5854 *offset = 0; 5855 if (unlikely(radix_tree_exception(sg))) { 5856 unsigned long base = 5857 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 5858 5859 sg = radix_tree_lookup(&iter->radix, base); 5860 GEM_BUG_ON(!sg); 5861 5862 *offset = n - base; 5863 } 5864 5865 rcu_read_unlock(); 5866 5867 return sg; 5868 } 5869 5870 struct page * 5871 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 5872 { 5873 struct scatterlist *sg; 5874 unsigned int offset; 5875 5876 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 5877 5878 sg = i915_gem_object_get_sg(obj, n, &offset); 5879 return nth_page(sg_page(sg), offset); 5880 } 5881 5882 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5883 struct page * 5884 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 5885 unsigned int n) 5886 { 5887 struct page *page; 5888 5889 page = i915_gem_object_get_page(obj, n); 5890 if (!obj->mm.dirty) 5891 set_page_dirty(page); 5892 5893 return page; 5894 } 5895 5896 dma_addr_t 5897 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 5898 unsigned long n) 5899 { 5900 struct scatterlist *sg; 5901 unsigned int offset; 5902 5903 sg = i915_gem_object_get_sg(obj, n, &offset); 5904 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 5905 } 5906 5907 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 5908 { 5909 struct sg_table *pages; 5910 int err; 5911 5912 if (align > obj->base.size) 5913 return -EINVAL; 5914 5915 if (obj->ops == &i915_gem_phys_ops) 5916 return 0; 5917 5918 if (obj->ops != &i915_gem_object_ops) 5919 return -EINVAL; 5920 5921 err = i915_gem_object_unbind(obj); 5922 if (err) 5923 return err; 5924 5925 mutex_lock(&obj->mm.lock); 5926 5927 if (obj->mm.madv != I915_MADV_WILLNEED) { 5928 err = -EFAULT; 5929 goto err_unlock; 5930 } 5931 5932 if (obj->mm.quirked) { 5933 err = -EFAULT; 5934 goto err_unlock; 5935 } 5936 5937 if (obj->mm.mapping) { 5938 err = -EBUSY; 5939 goto err_unlock; 5940 } 5941 5942 pages = fetch_and_zero(&obj->mm.pages); 5943 if (pages) { 5944 struct drm_i915_private *i915 = to_i915(obj->base.dev); 5945 5946 __i915_gem_object_reset_page_iter(obj); 5947 5948 spin_lock(&i915->mm.obj_lock); 5949 list_del(&obj->mm.link); 5950 spin_unlock(&i915->mm.obj_lock); 5951 } 5952 5953 obj->ops = &i915_gem_phys_ops; 5954 5955 err = ____i915_gem_object_get_pages(obj); 5956 if (err) 5957 goto err_xfer; 5958 5959 /* Perma-pin (until release) the physical set of pages */ 5960 __i915_gem_object_pin_pages(obj); 5961 5962 if (!IS_ERR_OR_NULL(pages)) 5963 i915_gem_object_ops.put_pages(obj, pages); 5964 mutex_unlock(&obj->mm.lock); 5965 return 0; 5966 5967 err_xfer: 5968 obj->ops = &i915_gem_object_ops; 5969 obj->mm.pages = pages; 5970 err_unlock: 5971 mutex_unlock(&obj->mm.lock); 5972 return err; 5973 } 5974 5975 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5976 #include "selftests/scatterlist.c" 5977 #include "selftests/mock_gem_device.c" 5978 #include "selftests/huge_gem_object.c" 5979 #include "selftests/huge_pages.c" 5980 #include "selftests/i915_gem_object.c" 5981 #include "selftests/i915_gem_coherency.c" 5982 #endif 5983