1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "intel_workarounds.h" 39 #include "i915_gemfs.h" 40 #include <linux/dma-fence-array.h> 41 #include <linux/kthread.h> 42 #include <linux/reservation.h> 43 #include <linux/shmem_fs.h> 44 #include <linux/slab.h> 45 #include <linux/stop_machine.h> 46 #include <linux/swap.h> 47 #include <linux/pci.h> 48 #include <linux/dma-buf.h> 49 50 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->cache_dirty) 55 return false; 56 57 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 58 return true; 59 60 return obj->pin_global; /* currently in use by HW, keep flushed */ 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->base.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 static u32 __i915_gem_park(struct drm_i915_private *i915) 141 { 142 lockdep_assert_held(&i915->drm.struct_mutex); 143 GEM_BUG_ON(i915->gt.active_requests); 144 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 145 146 if (!i915->gt.awake) 147 return I915_EPOCH_INVALID; 148 149 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 150 151 /* 152 * Be paranoid and flush a concurrent interrupt to make sure 153 * we don't reactivate any irq tasklets after parking. 154 * 155 * FIXME: Note that even though we have waited for execlists to be idle, 156 * there may still be an in-flight interrupt even though the CSB 157 * is now empty. synchronize_irq() makes sure that a residual interrupt 158 * is completed before we continue, but it doesn't prevent the HW from 159 * raising a spurious interrupt later. To complete the shield we should 160 * coordinate disabling the CS irq with flushing the interrupts. 161 */ 162 synchronize_irq(i915->drm.irq); 163 164 intel_engines_park(i915); 165 i915_timelines_park(i915); 166 167 i915_pmu_gt_parked(i915); 168 i915_vma_parked(i915); 169 170 i915->gt.awake = false; 171 172 if (INTEL_GEN(i915) >= 6) 173 gen6_rps_idle(i915); 174 175 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); 176 177 intel_runtime_pm_put(i915); 178 179 return i915->gt.epoch; 180 } 181 182 void i915_gem_park(struct drm_i915_private *i915) 183 { 184 lockdep_assert_held(&i915->drm.struct_mutex); 185 GEM_BUG_ON(i915->gt.active_requests); 186 187 if (!i915->gt.awake) 188 return; 189 190 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 191 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 192 } 193 194 void i915_gem_unpark(struct drm_i915_private *i915) 195 { 196 lockdep_assert_held(&i915->drm.struct_mutex); 197 GEM_BUG_ON(!i915->gt.active_requests); 198 199 if (i915->gt.awake) 200 return; 201 202 intel_runtime_pm_get_noresume(i915); 203 204 /* 205 * It seems that the DMC likes to transition between the DC states a lot 206 * when there are no connected displays (no active power domains) during 207 * command submission. 208 * 209 * This activity has negative impact on the performance of the chip with 210 * huge latencies observed in the interrupt handler and elsewhere. 211 * 212 * Work around it by grabbing a GT IRQ power domain whilst there is any 213 * GT activity, preventing any DC state transitions. 214 */ 215 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 216 217 i915->gt.awake = true; 218 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 219 i915->gt.epoch = 1; 220 221 intel_enable_gt_powersave(i915); 222 i915_update_gfx_val(i915); 223 if (INTEL_GEN(i915) >= 6) 224 gen6_rps_busy(i915); 225 i915_pmu_gt_unparked(i915); 226 227 intel_engines_unpark(i915); 228 229 i915_queue_hangcheck(i915); 230 231 queue_delayed_work(i915->wq, 232 &i915->gt.retire_work, 233 round_jiffies_up_relative(HZ)); 234 } 235 236 int 237 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 238 struct drm_file *file) 239 { 240 struct drm_i915_private *dev_priv = to_i915(dev); 241 struct i915_ggtt *ggtt = &dev_priv->ggtt; 242 struct drm_i915_gem_get_aperture *args = data; 243 struct i915_vma *vma; 244 u64 pinned; 245 246 pinned = ggtt->base.reserved; 247 mutex_lock(&dev->struct_mutex); 248 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 249 if (i915_vma_is_pinned(vma)) 250 pinned += vma->node.size; 251 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 252 if (i915_vma_is_pinned(vma)) 253 pinned += vma->node.size; 254 mutex_unlock(&dev->struct_mutex); 255 256 args->aper_size = ggtt->base.total; 257 args->aper_available_size = args->aper_size - pinned; 258 259 return 0; 260 } 261 262 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 263 { 264 struct address_space *mapping = obj->base.filp->f_mapping; 265 drm_dma_handle_t *phys; 266 struct sg_table *st; 267 struct scatterlist *sg; 268 char *vaddr; 269 int i; 270 int err; 271 272 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 273 return -EINVAL; 274 275 /* Always aligning to the object size, allows a single allocation 276 * to handle all possible callers, and given typical object sizes, 277 * the alignment of the buddy allocation will naturally match. 278 */ 279 phys = drm_pci_alloc(obj->base.dev, 280 roundup_pow_of_two(obj->base.size), 281 roundup_pow_of_two(obj->base.size)); 282 if (!phys) 283 return -ENOMEM; 284 285 vaddr = phys->vaddr; 286 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 287 struct page *page; 288 char *src; 289 290 page = shmem_read_mapping_page(mapping, i); 291 if (IS_ERR(page)) { 292 err = PTR_ERR(page); 293 goto err_phys; 294 } 295 296 src = kmap_atomic(page); 297 memcpy(vaddr, src, PAGE_SIZE); 298 drm_clflush_virt_range(vaddr, PAGE_SIZE); 299 kunmap_atomic(src); 300 301 put_page(page); 302 vaddr += PAGE_SIZE; 303 } 304 305 i915_gem_chipset_flush(to_i915(obj->base.dev)); 306 307 st = kmalloc(sizeof(*st), GFP_KERNEL); 308 if (!st) { 309 err = -ENOMEM; 310 goto err_phys; 311 } 312 313 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 314 kfree(st); 315 err = -ENOMEM; 316 goto err_phys; 317 } 318 319 sg = st->sgl; 320 sg->offset = 0; 321 sg->length = obj->base.size; 322 323 sg_dma_address(sg) = phys->busaddr; 324 sg_dma_len(sg) = obj->base.size; 325 326 obj->phys_handle = phys; 327 328 __i915_gem_object_set_pages(obj, st, sg->length); 329 330 return 0; 331 332 err_phys: 333 drm_pci_free(obj->base.dev, phys); 334 335 return err; 336 } 337 338 static void __start_cpu_write(struct drm_i915_gem_object *obj) 339 { 340 obj->read_domains = I915_GEM_DOMAIN_CPU; 341 obj->write_domain = I915_GEM_DOMAIN_CPU; 342 if (cpu_write_needs_clflush(obj)) 343 obj->cache_dirty = true; 344 } 345 346 static void 347 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 348 struct sg_table *pages, 349 bool needs_clflush) 350 { 351 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 352 353 if (obj->mm.madv == I915_MADV_DONTNEED) 354 obj->mm.dirty = false; 355 356 if (needs_clflush && 357 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 358 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 359 drm_clflush_sg(pages); 360 361 __start_cpu_write(obj); 362 } 363 364 static void 365 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 366 struct sg_table *pages) 367 { 368 __i915_gem_object_release_shmem(obj, pages, false); 369 370 if (obj->mm.dirty) { 371 struct address_space *mapping = obj->base.filp->f_mapping; 372 char *vaddr = obj->phys_handle->vaddr; 373 int i; 374 375 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 376 struct page *page; 377 char *dst; 378 379 page = shmem_read_mapping_page(mapping, i); 380 if (IS_ERR(page)) 381 continue; 382 383 dst = kmap_atomic(page); 384 drm_clflush_virt_range(vaddr, PAGE_SIZE); 385 memcpy(dst, vaddr, PAGE_SIZE); 386 kunmap_atomic(dst); 387 388 set_page_dirty(page); 389 if (obj->mm.madv == I915_MADV_WILLNEED) 390 mark_page_accessed(page); 391 put_page(page); 392 vaddr += PAGE_SIZE; 393 } 394 obj->mm.dirty = false; 395 } 396 397 sg_free_table(pages); 398 kfree(pages); 399 400 drm_pci_free(obj->base.dev, obj->phys_handle); 401 } 402 403 static void 404 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 405 { 406 i915_gem_object_unpin_pages(obj); 407 } 408 409 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 410 .get_pages = i915_gem_object_get_pages_phys, 411 .put_pages = i915_gem_object_put_pages_phys, 412 .release = i915_gem_object_release_phys, 413 }; 414 415 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 416 417 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 418 { 419 struct i915_vma *vma; 420 LIST_HEAD(still_in_list); 421 int ret; 422 423 lockdep_assert_held(&obj->base.dev->struct_mutex); 424 425 /* Closed vma are removed from the obj->vma_list - but they may 426 * still have an active binding on the object. To remove those we 427 * must wait for all rendering to complete to the object (as unbinding 428 * must anyway), and retire the requests. 429 */ 430 ret = i915_gem_object_set_to_cpu_domain(obj, false); 431 if (ret) 432 return ret; 433 434 while ((vma = list_first_entry_or_null(&obj->vma_list, 435 struct i915_vma, 436 obj_link))) { 437 list_move_tail(&vma->obj_link, &still_in_list); 438 ret = i915_vma_unbind(vma); 439 if (ret) 440 break; 441 } 442 list_splice(&still_in_list, &obj->vma_list); 443 444 return ret; 445 } 446 447 static long 448 i915_gem_object_wait_fence(struct dma_fence *fence, 449 unsigned int flags, 450 long timeout, 451 struct intel_rps_client *rps_client) 452 { 453 struct i915_request *rq; 454 455 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 456 457 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 458 return timeout; 459 460 if (!dma_fence_is_i915(fence)) 461 return dma_fence_wait_timeout(fence, 462 flags & I915_WAIT_INTERRUPTIBLE, 463 timeout); 464 465 rq = to_request(fence); 466 if (i915_request_completed(rq)) 467 goto out; 468 469 /* 470 * This client is about to stall waiting for the GPU. In many cases 471 * this is undesirable and limits the throughput of the system, as 472 * many clients cannot continue processing user input/output whilst 473 * blocked. RPS autotuning may take tens of milliseconds to respond 474 * to the GPU load and thus incurs additional latency for the client. 475 * We can circumvent that by promoting the GPU frequency to maximum 476 * before we wait. This makes the GPU throttle up much more quickly 477 * (good for benchmarks and user experience, e.g. window animations), 478 * but at a cost of spending more power processing the workload 479 * (bad for battery). Not all clients even want their results 480 * immediately and for them we should just let the GPU select its own 481 * frequency to maximise efficiency. To prevent a single client from 482 * forcing the clocks too high for the whole system, we only allow 483 * each client to waitboost once in a busy period. 484 */ 485 if (rps_client && !i915_request_started(rq)) { 486 if (INTEL_GEN(rq->i915) >= 6) 487 gen6_rps_boost(rq, rps_client); 488 } 489 490 timeout = i915_request_wait(rq, flags, timeout); 491 492 out: 493 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 494 i915_request_retire_upto(rq); 495 496 return timeout; 497 } 498 499 static long 500 i915_gem_object_wait_reservation(struct reservation_object *resv, 501 unsigned int flags, 502 long timeout, 503 struct intel_rps_client *rps_client) 504 { 505 unsigned int seq = __read_seqcount_begin(&resv->seq); 506 struct dma_fence *excl; 507 bool prune_fences = false; 508 509 if (flags & I915_WAIT_ALL) { 510 struct dma_fence **shared; 511 unsigned int count, i; 512 int ret; 513 514 ret = reservation_object_get_fences_rcu(resv, 515 &excl, &count, &shared); 516 if (ret) 517 return ret; 518 519 for (i = 0; i < count; i++) { 520 timeout = i915_gem_object_wait_fence(shared[i], 521 flags, timeout, 522 rps_client); 523 if (timeout < 0) 524 break; 525 526 dma_fence_put(shared[i]); 527 } 528 529 for (; i < count; i++) 530 dma_fence_put(shared[i]); 531 kfree(shared); 532 533 /* 534 * If both shared fences and an exclusive fence exist, 535 * then by construction the shared fences must be later 536 * than the exclusive fence. If we successfully wait for 537 * all the shared fences, we know that the exclusive fence 538 * must all be signaled. If all the shared fences are 539 * signaled, we can prune the array and recover the 540 * floating references on the fences/requests. 541 */ 542 prune_fences = count && timeout >= 0; 543 } else { 544 excl = reservation_object_get_excl_rcu(resv); 545 } 546 547 if (excl && timeout >= 0) 548 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 549 rps_client); 550 551 dma_fence_put(excl); 552 553 /* 554 * Opportunistically prune the fences iff we know they have *all* been 555 * signaled and that the reservation object has not been changed (i.e. 556 * no new fences have been added). 557 */ 558 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 559 if (reservation_object_trylock(resv)) { 560 if (!__read_seqcount_retry(&resv->seq, seq)) 561 reservation_object_add_excl_fence(resv, NULL); 562 reservation_object_unlock(resv); 563 } 564 } 565 566 return timeout; 567 } 568 569 static void __fence_set_priority(struct dma_fence *fence, 570 const struct i915_sched_attr *attr) 571 { 572 struct i915_request *rq; 573 struct intel_engine_cs *engine; 574 575 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 576 return; 577 578 rq = to_request(fence); 579 engine = rq->engine; 580 581 local_bh_disable(); 582 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 583 if (engine->schedule) 584 engine->schedule(rq, attr); 585 rcu_read_unlock(); 586 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 587 } 588 589 static void fence_set_priority(struct dma_fence *fence, 590 const struct i915_sched_attr *attr) 591 { 592 /* Recurse once into a fence-array */ 593 if (dma_fence_is_array(fence)) { 594 struct dma_fence_array *array = to_dma_fence_array(fence); 595 int i; 596 597 for (i = 0; i < array->num_fences; i++) 598 __fence_set_priority(array->fences[i], attr); 599 } else { 600 __fence_set_priority(fence, attr); 601 } 602 } 603 604 int 605 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 606 unsigned int flags, 607 const struct i915_sched_attr *attr) 608 { 609 struct dma_fence *excl; 610 611 if (flags & I915_WAIT_ALL) { 612 struct dma_fence **shared; 613 unsigned int count, i; 614 int ret; 615 616 ret = reservation_object_get_fences_rcu(obj->resv, 617 &excl, &count, &shared); 618 if (ret) 619 return ret; 620 621 for (i = 0; i < count; i++) { 622 fence_set_priority(shared[i], attr); 623 dma_fence_put(shared[i]); 624 } 625 626 kfree(shared); 627 } else { 628 excl = reservation_object_get_excl_rcu(obj->resv); 629 } 630 631 if (excl) { 632 fence_set_priority(excl, attr); 633 dma_fence_put(excl); 634 } 635 return 0; 636 } 637 638 /** 639 * Waits for rendering to the object to be completed 640 * @obj: i915 gem object 641 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 642 * @timeout: how long to wait 643 * @rps_client: client (user process) to charge for any waitboosting 644 */ 645 int 646 i915_gem_object_wait(struct drm_i915_gem_object *obj, 647 unsigned int flags, 648 long timeout, 649 struct intel_rps_client *rps_client) 650 { 651 might_sleep(); 652 #if IS_ENABLED(CONFIG_LOCKDEP) 653 GEM_BUG_ON(debug_locks && 654 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 655 !!(flags & I915_WAIT_LOCKED)); 656 #endif 657 GEM_BUG_ON(timeout < 0); 658 659 timeout = i915_gem_object_wait_reservation(obj->resv, 660 flags, timeout, 661 rps_client); 662 return timeout < 0 ? timeout : 0; 663 } 664 665 static struct intel_rps_client *to_rps_client(struct drm_file *file) 666 { 667 struct drm_i915_file_private *fpriv = file->driver_priv; 668 669 return &fpriv->rps_client; 670 } 671 672 static int 673 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 674 struct drm_i915_gem_pwrite *args, 675 struct drm_file *file) 676 { 677 void *vaddr = obj->phys_handle->vaddr + args->offset; 678 char __user *user_data = u64_to_user_ptr(args->data_ptr); 679 680 /* We manually control the domain here and pretend that it 681 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 682 */ 683 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 684 if (copy_from_user(vaddr, user_data, args->size)) 685 return -EFAULT; 686 687 drm_clflush_virt_range(vaddr, args->size); 688 i915_gem_chipset_flush(to_i915(obj->base.dev)); 689 690 intel_fb_obj_flush(obj, ORIGIN_CPU); 691 return 0; 692 } 693 694 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 695 { 696 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 697 } 698 699 void i915_gem_object_free(struct drm_i915_gem_object *obj) 700 { 701 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 702 kmem_cache_free(dev_priv->objects, obj); 703 } 704 705 static int 706 i915_gem_create(struct drm_file *file, 707 struct drm_i915_private *dev_priv, 708 uint64_t size, 709 uint32_t *handle_p) 710 { 711 struct drm_i915_gem_object *obj; 712 int ret; 713 u32 handle; 714 715 size = roundup(size, PAGE_SIZE); 716 if (size == 0) 717 return -EINVAL; 718 719 /* Allocate the new object */ 720 obj = i915_gem_object_create(dev_priv, size); 721 if (IS_ERR(obj)) 722 return PTR_ERR(obj); 723 724 ret = drm_gem_handle_create(file, &obj->base, &handle); 725 /* drop reference from allocate - handle holds it now */ 726 i915_gem_object_put(obj); 727 if (ret) 728 return ret; 729 730 *handle_p = handle; 731 return 0; 732 } 733 734 int 735 i915_gem_dumb_create(struct drm_file *file, 736 struct drm_device *dev, 737 struct drm_mode_create_dumb *args) 738 { 739 /* have to work out size/pitch and return them */ 740 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 741 args->size = args->pitch * args->height; 742 return i915_gem_create(file, to_i915(dev), 743 args->size, &args->handle); 744 } 745 746 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 747 { 748 return !(obj->cache_level == I915_CACHE_NONE || 749 obj->cache_level == I915_CACHE_WT); 750 } 751 752 /** 753 * Creates a new mm object and returns a handle to it. 754 * @dev: drm device pointer 755 * @data: ioctl data blob 756 * @file: drm file pointer 757 */ 758 int 759 i915_gem_create_ioctl(struct drm_device *dev, void *data, 760 struct drm_file *file) 761 { 762 struct drm_i915_private *dev_priv = to_i915(dev); 763 struct drm_i915_gem_create *args = data; 764 765 i915_gem_flush_free_objects(dev_priv); 766 767 return i915_gem_create(file, dev_priv, 768 args->size, &args->handle); 769 } 770 771 static inline enum fb_op_origin 772 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 773 { 774 return (domain == I915_GEM_DOMAIN_GTT ? 775 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 776 } 777 778 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 779 { 780 /* 781 * No actual flushing is required for the GTT write domain for reads 782 * from the GTT domain. Writes to it "immediately" go to main memory 783 * as far as we know, so there's no chipset flush. It also doesn't 784 * land in the GPU render cache. 785 * 786 * However, we do have to enforce the order so that all writes through 787 * the GTT land before any writes to the device, such as updates to 788 * the GATT itself. 789 * 790 * We also have to wait a bit for the writes to land from the GTT. 791 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 792 * timing. This issue has only been observed when switching quickly 793 * between GTT writes and CPU reads from inside the kernel on recent hw, 794 * and it appears to only affect discrete GTT blocks (i.e. on LLC 795 * system agents we cannot reproduce this behaviour, until Cannonlake 796 * that was!). 797 */ 798 799 wmb(); 800 801 intel_runtime_pm_get(dev_priv); 802 spin_lock_irq(&dev_priv->uncore.lock); 803 804 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 805 806 spin_unlock_irq(&dev_priv->uncore.lock); 807 intel_runtime_pm_put(dev_priv); 808 } 809 810 static void 811 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 812 { 813 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 814 struct i915_vma *vma; 815 816 if (!(obj->write_domain & flush_domains)) 817 return; 818 819 switch (obj->write_domain) { 820 case I915_GEM_DOMAIN_GTT: 821 i915_gem_flush_ggtt_writes(dev_priv); 822 823 intel_fb_obj_flush(obj, 824 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 825 826 for_each_ggtt_vma(vma, obj) { 827 if (vma->iomap) 828 continue; 829 830 i915_vma_unset_ggtt_write(vma); 831 } 832 break; 833 834 case I915_GEM_DOMAIN_CPU: 835 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 836 break; 837 838 case I915_GEM_DOMAIN_RENDER: 839 if (gpu_write_needs_clflush(obj)) 840 obj->cache_dirty = true; 841 break; 842 } 843 844 obj->write_domain = 0; 845 } 846 847 static inline int 848 __copy_to_user_swizzled(char __user *cpu_vaddr, 849 const char *gpu_vaddr, int gpu_offset, 850 int length) 851 { 852 int ret, cpu_offset = 0; 853 854 while (length > 0) { 855 int cacheline_end = ALIGN(gpu_offset + 1, 64); 856 int this_length = min(cacheline_end - gpu_offset, length); 857 int swizzled_gpu_offset = gpu_offset ^ 64; 858 859 ret = __copy_to_user(cpu_vaddr + cpu_offset, 860 gpu_vaddr + swizzled_gpu_offset, 861 this_length); 862 if (ret) 863 return ret + length; 864 865 cpu_offset += this_length; 866 gpu_offset += this_length; 867 length -= this_length; 868 } 869 870 return 0; 871 } 872 873 static inline int 874 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 875 const char __user *cpu_vaddr, 876 int length) 877 { 878 int ret, cpu_offset = 0; 879 880 while (length > 0) { 881 int cacheline_end = ALIGN(gpu_offset + 1, 64); 882 int this_length = min(cacheline_end - gpu_offset, length); 883 int swizzled_gpu_offset = gpu_offset ^ 64; 884 885 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 886 cpu_vaddr + cpu_offset, 887 this_length); 888 if (ret) 889 return ret + length; 890 891 cpu_offset += this_length; 892 gpu_offset += this_length; 893 length -= this_length; 894 } 895 896 return 0; 897 } 898 899 /* 900 * Pins the specified object's pages and synchronizes the object with 901 * GPU accesses. Sets needs_clflush to non-zero if the caller should 902 * flush the object from the CPU cache. 903 */ 904 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 905 unsigned int *needs_clflush) 906 { 907 int ret; 908 909 lockdep_assert_held(&obj->base.dev->struct_mutex); 910 911 *needs_clflush = 0; 912 if (!i915_gem_object_has_struct_page(obj)) 913 return -ENODEV; 914 915 ret = i915_gem_object_wait(obj, 916 I915_WAIT_INTERRUPTIBLE | 917 I915_WAIT_LOCKED, 918 MAX_SCHEDULE_TIMEOUT, 919 NULL); 920 if (ret) 921 return ret; 922 923 ret = i915_gem_object_pin_pages(obj); 924 if (ret) 925 return ret; 926 927 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 928 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 929 ret = i915_gem_object_set_to_cpu_domain(obj, false); 930 if (ret) 931 goto err_unpin; 932 else 933 goto out; 934 } 935 936 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 937 938 /* If we're not in the cpu read domain, set ourself into the gtt 939 * read domain and manually flush cachelines (if required). This 940 * optimizes for the case when the gpu will dirty the data 941 * anyway again before the next pread happens. 942 */ 943 if (!obj->cache_dirty && 944 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 945 *needs_clflush = CLFLUSH_BEFORE; 946 947 out: 948 /* return with the pages pinned */ 949 return 0; 950 951 err_unpin: 952 i915_gem_object_unpin_pages(obj); 953 return ret; 954 } 955 956 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 957 unsigned int *needs_clflush) 958 { 959 int ret; 960 961 lockdep_assert_held(&obj->base.dev->struct_mutex); 962 963 *needs_clflush = 0; 964 if (!i915_gem_object_has_struct_page(obj)) 965 return -ENODEV; 966 967 ret = i915_gem_object_wait(obj, 968 I915_WAIT_INTERRUPTIBLE | 969 I915_WAIT_LOCKED | 970 I915_WAIT_ALL, 971 MAX_SCHEDULE_TIMEOUT, 972 NULL); 973 if (ret) 974 return ret; 975 976 ret = i915_gem_object_pin_pages(obj); 977 if (ret) 978 return ret; 979 980 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 981 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 982 ret = i915_gem_object_set_to_cpu_domain(obj, true); 983 if (ret) 984 goto err_unpin; 985 else 986 goto out; 987 } 988 989 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 990 991 /* If we're not in the cpu write domain, set ourself into the 992 * gtt write domain and manually flush cachelines (as required). 993 * This optimizes for the case when the gpu will use the data 994 * right away and we therefore have to clflush anyway. 995 */ 996 if (!obj->cache_dirty) { 997 *needs_clflush |= CLFLUSH_AFTER; 998 999 /* 1000 * Same trick applies to invalidate partially written 1001 * cachelines read before writing. 1002 */ 1003 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 1004 *needs_clflush |= CLFLUSH_BEFORE; 1005 } 1006 1007 out: 1008 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1009 obj->mm.dirty = true; 1010 /* return with the pages pinned */ 1011 return 0; 1012 1013 err_unpin: 1014 i915_gem_object_unpin_pages(obj); 1015 return ret; 1016 } 1017 1018 static void 1019 shmem_clflush_swizzled_range(char *addr, unsigned long length, 1020 bool swizzled) 1021 { 1022 if (unlikely(swizzled)) { 1023 unsigned long start = (unsigned long) addr; 1024 unsigned long end = (unsigned long) addr + length; 1025 1026 /* For swizzling simply ensure that we always flush both 1027 * channels. Lame, but simple and it works. Swizzled 1028 * pwrite/pread is far from a hotpath - current userspace 1029 * doesn't use it at all. */ 1030 start = round_down(start, 128); 1031 end = round_up(end, 128); 1032 1033 drm_clflush_virt_range((void *)start, end - start); 1034 } else { 1035 drm_clflush_virt_range(addr, length); 1036 } 1037 1038 } 1039 1040 /* Only difference to the fast-path function is that this can handle bit17 1041 * and uses non-atomic copy and kmap functions. */ 1042 static int 1043 shmem_pread_slow(struct page *page, int offset, int length, 1044 char __user *user_data, 1045 bool page_do_bit17_swizzling, bool needs_clflush) 1046 { 1047 char *vaddr; 1048 int ret; 1049 1050 vaddr = kmap(page); 1051 if (needs_clflush) 1052 shmem_clflush_swizzled_range(vaddr + offset, length, 1053 page_do_bit17_swizzling); 1054 1055 if (page_do_bit17_swizzling) 1056 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 1057 else 1058 ret = __copy_to_user(user_data, vaddr + offset, length); 1059 kunmap(page); 1060 1061 return ret ? - EFAULT : 0; 1062 } 1063 1064 static int 1065 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 1066 bool page_do_bit17_swizzling, bool needs_clflush) 1067 { 1068 int ret; 1069 1070 ret = -ENODEV; 1071 if (!page_do_bit17_swizzling) { 1072 char *vaddr = kmap_atomic(page); 1073 1074 if (needs_clflush) 1075 drm_clflush_virt_range(vaddr + offset, length); 1076 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1077 kunmap_atomic(vaddr); 1078 } 1079 if (ret == 0) 1080 return 0; 1081 1082 return shmem_pread_slow(page, offset, length, user_data, 1083 page_do_bit17_swizzling, needs_clflush); 1084 } 1085 1086 static int 1087 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1088 struct drm_i915_gem_pread *args) 1089 { 1090 char __user *user_data; 1091 u64 remain; 1092 unsigned int obj_do_bit17_swizzling; 1093 unsigned int needs_clflush; 1094 unsigned int idx, offset; 1095 int ret; 1096 1097 obj_do_bit17_swizzling = 0; 1098 if (i915_gem_object_needs_bit17_swizzle(obj)) 1099 obj_do_bit17_swizzling = BIT(17); 1100 1101 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1102 if (ret) 1103 return ret; 1104 1105 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1106 mutex_unlock(&obj->base.dev->struct_mutex); 1107 if (ret) 1108 return ret; 1109 1110 remain = args->size; 1111 user_data = u64_to_user_ptr(args->data_ptr); 1112 offset = offset_in_page(args->offset); 1113 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1114 struct page *page = i915_gem_object_get_page(obj, idx); 1115 int length; 1116 1117 length = remain; 1118 if (offset + length > PAGE_SIZE) 1119 length = PAGE_SIZE - offset; 1120 1121 ret = shmem_pread(page, offset, length, user_data, 1122 page_to_phys(page) & obj_do_bit17_swizzling, 1123 needs_clflush); 1124 if (ret) 1125 break; 1126 1127 remain -= length; 1128 user_data += length; 1129 offset = 0; 1130 } 1131 1132 i915_gem_obj_finish_shmem_access(obj); 1133 return ret; 1134 } 1135 1136 static inline bool 1137 gtt_user_read(struct io_mapping *mapping, 1138 loff_t base, int offset, 1139 char __user *user_data, int length) 1140 { 1141 void __iomem *vaddr; 1142 unsigned long unwritten; 1143 1144 /* We can use the cpu mem copy function because this is X86. */ 1145 vaddr = io_mapping_map_atomic_wc(mapping, base); 1146 unwritten = __copy_to_user_inatomic(user_data, 1147 (void __force *)vaddr + offset, 1148 length); 1149 io_mapping_unmap_atomic(vaddr); 1150 if (unwritten) { 1151 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1152 unwritten = copy_to_user(user_data, 1153 (void __force *)vaddr + offset, 1154 length); 1155 io_mapping_unmap(vaddr); 1156 } 1157 return unwritten; 1158 } 1159 1160 static int 1161 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1162 const struct drm_i915_gem_pread *args) 1163 { 1164 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1165 struct i915_ggtt *ggtt = &i915->ggtt; 1166 struct drm_mm_node node; 1167 struct i915_vma *vma; 1168 void __user *user_data; 1169 u64 remain, offset; 1170 int ret; 1171 1172 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1173 if (ret) 1174 return ret; 1175 1176 intel_runtime_pm_get(i915); 1177 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1178 PIN_MAPPABLE | 1179 PIN_NONFAULT | 1180 PIN_NONBLOCK); 1181 if (!IS_ERR(vma)) { 1182 node.start = i915_ggtt_offset(vma); 1183 node.allocated = false; 1184 ret = i915_vma_put_fence(vma); 1185 if (ret) { 1186 i915_vma_unpin(vma); 1187 vma = ERR_PTR(ret); 1188 } 1189 } 1190 if (IS_ERR(vma)) { 1191 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1192 if (ret) 1193 goto out_unlock; 1194 GEM_BUG_ON(!node.allocated); 1195 } 1196 1197 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1198 if (ret) 1199 goto out_unpin; 1200 1201 mutex_unlock(&i915->drm.struct_mutex); 1202 1203 user_data = u64_to_user_ptr(args->data_ptr); 1204 remain = args->size; 1205 offset = args->offset; 1206 1207 while (remain > 0) { 1208 /* Operation in this page 1209 * 1210 * page_base = page offset within aperture 1211 * page_offset = offset within page 1212 * page_length = bytes to copy for this page 1213 */ 1214 u32 page_base = node.start; 1215 unsigned page_offset = offset_in_page(offset); 1216 unsigned page_length = PAGE_SIZE - page_offset; 1217 page_length = remain < page_length ? remain : page_length; 1218 if (node.allocated) { 1219 wmb(); 1220 ggtt->base.insert_page(&ggtt->base, 1221 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1222 node.start, I915_CACHE_NONE, 0); 1223 wmb(); 1224 } else { 1225 page_base += offset & PAGE_MASK; 1226 } 1227 1228 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1229 user_data, page_length)) { 1230 ret = -EFAULT; 1231 break; 1232 } 1233 1234 remain -= page_length; 1235 user_data += page_length; 1236 offset += page_length; 1237 } 1238 1239 mutex_lock(&i915->drm.struct_mutex); 1240 out_unpin: 1241 if (node.allocated) { 1242 wmb(); 1243 ggtt->base.clear_range(&ggtt->base, 1244 node.start, node.size); 1245 remove_mappable_node(&node); 1246 } else { 1247 i915_vma_unpin(vma); 1248 } 1249 out_unlock: 1250 intel_runtime_pm_put(i915); 1251 mutex_unlock(&i915->drm.struct_mutex); 1252 1253 return ret; 1254 } 1255 1256 /** 1257 * Reads data from the object referenced by handle. 1258 * @dev: drm device pointer 1259 * @data: ioctl data blob 1260 * @file: drm file pointer 1261 * 1262 * On error, the contents of *data are undefined. 1263 */ 1264 int 1265 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1266 struct drm_file *file) 1267 { 1268 struct drm_i915_gem_pread *args = data; 1269 struct drm_i915_gem_object *obj; 1270 int ret; 1271 1272 if (args->size == 0) 1273 return 0; 1274 1275 if (!access_ok(VERIFY_WRITE, 1276 u64_to_user_ptr(args->data_ptr), 1277 args->size)) 1278 return -EFAULT; 1279 1280 obj = i915_gem_object_lookup(file, args->handle); 1281 if (!obj) 1282 return -ENOENT; 1283 1284 /* Bounds check source. */ 1285 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1286 ret = -EINVAL; 1287 goto out; 1288 } 1289 1290 trace_i915_gem_object_pread(obj, args->offset, args->size); 1291 1292 ret = i915_gem_object_wait(obj, 1293 I915_WAIT_INTERRUPTIBLE, 1294 MAX_SCHEDULE_TIMEOUT, 1295 to_rps_client(file)); 1296 if (ret) 1297 goto out; 1298 1299 ret = i915_gem_object_pin_pages(obj); 1300 if (ret) 1301 goto out; 1302 1303 ret = i915_gem_shmem_pread(obj, args); 1304 if (ret == -EFAULT || ret == -ENODEV) 1305 ret = i915_gem_gtt_pread(obj, args); 1306 1307 i915_gem_object_unpin_pages(obj); 1308 out: 1309 i915_gem_object_put(obj); 1310 return ret; 1311 } 1312 1313 /* This is the fast write path which cannot handle 1314 * page faults in the source data 1315 */ 1316 1317 static inline bool 1318 ggtt_write(struct io_mapping *mapping, 1319 loff_t base, int offset, 1320 char __user *user_data, int length) 1321 { 1322 void __iomem *vaddr; 1323 unsigned long unwritten; 1324 1325 /* We can use the cpu mem copy function because this is X86. */ 1326 vaddr = io_mapping_map_atomic_wc(mapping, base); 1327 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1328 user_data, length); 1329 io_mapping_unmap_atomic(vaddr); 1330 if (unwritten) { 1331 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1332 unwritten = copy_from_user((void __force *)vaddr + offset, 1333 user_data, length); 1334 io_mapping_unmap(vaddr); 1335 } 1336 1337 return unwritten; 1338 } 1339 1340 /** 1341 * This is the fast pwrite path, where we copy the data directly from the 1342 * user into the GTT, uncached. 1343 * @obj: i915 GEM object 1344 * @args: pwrite arguments structure 1345 */ 1346 static int 1347 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1348 const struct drm_i915_gem_pwrite *args) 1349 { 1350 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1351 struct i915_ggtt *ggtt = &i915->ggtt; 1352 struct drm_mm_node node; 1353 struct i915_vma *vma; 1354 u64 remain, offset; 1355 void __user *user_data; 1356 int ret; 1357 1358 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1359 if (ret) 1360 return ret; 1361 1362 if (i915_gem_object_has_struct_page(obj)) { 1363 /* 1364 * Avoid waking the device up if we can fallback, as 1365 * waking/resuming is very slow (worst-case 10-100 ms 1366 * depending on PCI sleeps and our own resume time). 1367 * This easily dwarfs any performance advantage from 1368 * using the cache bypass of indirect GGTT access. 1369 */ 1370 if (!intel_runtime_pm_get_if_in_use(i915)) { 1371 ret = -EFAULT; 1372 goto out_unlock; 1373 } 1374 } else { 1375 /* No backing pages, no fallback, we must force GGTT access */ 1376 intel_runtime_pm_get(i915); 1377 } 1378 1379 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1380 PIN_MAPPABLE | 1381 PIN_NONFAULT | 1382 PIN_NONBLOCK); 1383 if (!IS_ERR(vma)) { 1384 node.start = i915_ggtt_offset(vma); 1385 node.allocated = false; 1386 ret = i915_vma_put_fence(vma); 1387 if (ret) { 1388 i915_vma_unpin(vma); 1389 vma = ERR_PTR(ret); 1390 } 1391 } 1392 if (IS_ERR(vma)) { 1393 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1394 if (ret) 1395 goto out_rpm; 1396 GEM_BUG_ON(!node.allocated); 1397 } 1398 1399 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1400 if (ret) 1401 goto out_unpin; 1402 1403 mutex_unlock(&i915->drm.struct_mutex); 1404 1405 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1406 1407 user_data = u64_to_user_ptr(args->data_ptr); 1408 offset = args->offset; 1409 remain = args->size; 1410 while (remain) { 1411 /* Operation in this page 1412 * 1413 * page_base = page offset within aperture 1414 * page_offset = offset within page 1415 * page_length = bytes to copy for this page 1416 */ 1417 u32 page_base = node.start; 1418 unsigned int page_offset = offset_in_page(offset); 1419 unsigned int page_length = PAGE_SIZE - page_offset; 1420 page_length = remain < page_length ? remain : page_length; 1421 if (node.allocated) { 1422 wmb(); /* flush the write before we modify the GGTT */ 1423 ggtt->base.insert_page(&ggtt->base, 1424 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1425 node.start, I915_CACHE_NONE, 0); 1426 wmb(); /* flush modifications to the GGTT (insert_page) */ 1427 } else { 1428 page_base += offset & PAGE_MASK; 1429 } 1430 /* If we get a fault while copying data, then (presumably) our 1431 * source page isn't available. Return the error and we'll 1432 * retry in the slow path. 1433 * If the object is non-shmem backed, we retry again with the 1434 * path that handles page fault. 1435 */ 1436 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1437 user_data, page_length)) { 1438 ret = -EFAULT; 1439 break; 1440 } 1441 1442 remain -= page_length; 1443 user_data += page_length; 1444 offset += page_length; 1445 } 1446 intel_fb_obj_flush(obj, ORIGIN_CPU); 1447 1448 mutex_lock(&i915->drm.struct_mutex); 1449 out_unpin: 1450 if (node.allocated) { 1451 wmb(); 1452 ggtt->base.clear_range(&ggtt->base, 1453 node.start, node.size); 1454 remove_mappable_node(&node); 1455 } else { 1456 i915_vma_unpin(vma); 1457 } 1458 out_rpm: 1459 intel_runtime_pm_put(i915); 1460 out_unlock: 1461 mutex_unlock(&i915->drm.struct_mutex); 1462 return ret; 1463 } 1464 1465 static int 1466 shmem_pwrite_slow(struct page *page, int offset, int length, 1467 char __user *user_data, 1468 bool page_do_bit17_swizzling, 1469 bool needs_clflush_before, 1470 bool needs_clflush_after) 1471 { 1472 char *vaddr; 1473 int ret; 1474 1475 vaddr = kmap(page); 1476 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1477 shmem_clflush_swizzled_range(vaddr + offset, length, 1478 page_do_bit17_swizzling); 1479 if (page_do_bit17_swizzling) 1480 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1481 length); 1482 else 1483 ret = __copy_from_user(vaddr + offset, user_data, length); 1484 if (needs_clflush_after) 1485 shmem_clflush_swizzled_range(vaddr + offset, length, 1486 page_do_bit17_swizzling); 1487 kunmap(page); 1488 1489 return ret ? -EFAULT : 0; 1490 } 1491 1492 /* Per-page copy function for the shmem pwrite fastpath. 1493 * Flushes invalid cachelines before writing to the target if 1494 * needs_clflush_before is set and flushes out any written cachelines after 1495 * writing if needs_clflush is set. 1496 */ 1497 static int 1498 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1499 bool page_do_bit17_swizzling, 1500 bool needs_clflush_before, 1501 bool needs_clflush_after) 1502 { 1503 int ret; 1504 1505 ret = -ENODEV; 1506 if (!page_do_bit17_swizzling) { 1507 char *vaddr = kmap_atomic(page); 1508 1509 if (needs_clflush_before) 1510 drm_clflush_virt_range(vaddr + offset, len); 1511 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1512 if (needs_clflush_after) 1513 drm_clflush_virt_range(vaddr + offset, len); 1514 1515 kunmap_atomic(vaddr); 1516 } 1517 if (ret == 0) 1518 return ret; 1519 1520 return shmem_pwrite_slow(page, offset, len, user_data, 1521 page_do_bit17_swizzling, 1522 needs_clflush_before, 1523 needs_clflush_after); 1524 } 1525 1526 static int 1527 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1528 const struct drm_i915_gem_pwrite *args) 1529 { 1530 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1531 void __user *user_data; 1532 u64 remain; 1533 unsigned int obj_do_bit17_swizzling; 1534 unsigned int partial_cacheline_write; 1535 unsigned int needs_clflush; 1536 unsigned int offset, idx; 1537 int ret; 1538 1539 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1540 if (ret) 1541 return ret; 1542 1543 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1544 mutex_unlock(&i915->drm.struct_mutex); 1545 if (ret) 1546 return ret; 1547 1548 obj_do_bit17_swizzling = 0; 1549 if (i915_gem_object_needs_bit17_swizzle(obj)) 1550 obj_do_bit17_swizzling = BIT(17); 1551 1552 /* If we don't overwrite a cacheline completely we need to be 1553 * careful to have up-to-date data by first clflushing. Don't 1554 * overcomplicate things and flush the entire patch. 1555 */ 1556 partial_cacheline_write = 0; 1557 if (needs_clflush & CLFLUSH_BEFORE) 1558 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1559 1560 user_data = u64_to_user_ptr(args->data_ptr); 1561 remain = args->size; 1562 offset = offset_in_page(args->offset); 1563 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1564 struct page *page = i915_gem_object_get_page(obj, idx); 1565 int length; 1566 1567 length = remain; 1568 if (offset + length > PAGE_SIZE) 1569 length = PAGE_SIZE - offset; 1570 1571 ret = shmem_pwrite(page, offset, length, user_data, 1572 page_to_phys(page) & obj_do_bit17_swizzling, 1573 (offset | length) & partial_cacheline_write, 1574 needs_clflush & CLFLUSH_AFTER); 1575 if (ret) 1576 break; 1577 1578 remain -= length; 1579 user_data += length; 1580 offset = 0; 1581 } 1582 1583 intel_fb_obj_flush(obj, ORIGIN_CPU); 1584 i915_gem_obj_finish_shmem_access(obj); 1585 return ret; 1586 } 1587 1588 /** 1589 * Writes data to the object referenced by handle. 1590 * @dev: drm device 1591 * @data: ioctl data blob 1592 * @file: drm file 1593 * 1594 * On error, the contents of the buffer that were to be modified are undefined. 1595 */ 1596 int 1597 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1598 struct drm_file *file) 1599 { 1600 struct drm_i915_gem_pwrite *args = data; 1601 struct drm_i915_gem_object *obj; 1602 int ret; 1603 1604 if (args->size == 0) 1605 return 0; 1606 1607 if (!access_ok(VERIFY_READ, 1608 u64_to_user_ptr(args->data_ptr), 1609 args->size)) 1610 return -EFAULT; 1611 1612 obj = i915_gem_object_lookup(file, args->handle); 1613 if (!obj) 1614 return -ENOENT; 1615 1616 /* Bounds check destination. */ 1617 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1618 ret = -EINVAL; 1619 goto err; 1620 } 1621 1622 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1623 1624 ret = -ENODEV; 1625 if (obj->ops->pwrite) 1626 ret = obj->ops->pwrite(obj, args); 1627 if (ret != -ENODEV) 1628 goto err; 1629 1630 ret = i915_gem_object_wait(obj, 1631 I915_WAIT_INTERRUPTIBLE | 1632 I915_WAIT_ALL, 1633 MAX_SCHEDULE_TIMEOUT, 1634 to_rps_client(file)); 1635 if (ret) 1636 goto err; 1637 1638 ret = i915_gem_object_pin_pages(obj); 1639 if (ret) 1640 goto err; 1641 1642 ret = -EFAULT; 1643 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1644 * it would end up going through the fenced access, and we'll get 1645 * different detiling behavior between reading and writing. 1646 * pread/pwrite currently are reading and writing from the CPU 1647 * perspective, requiring manual detiling by the client. 1648 */ 1649 if (!i915_gem_object_has_struct_page(obj) || 1650 cpu_write_needs_clflush(obj)) 1651 /* Note that the gtt paths might fail with non-page-backed user 1652 * pointers (e.g. gtt mappings when moving data between 1653 * textures). Fallback to the shmem path in that case. 1654 */ 1655 ret = i915_gem_gtt_pwrite_fast(obj, args); 1656 1657 if (ret == -EFAULT || ret == -ENOSPC) { 1658 if (obj->phys_handle) 1659 ret = i915_gem_phys_pwrite(obj, args, file); 1660 else 1661 ret = i915_gem_shmem_pwrite(obj, args); 1662 } 1663 1664 i915_gem_object_unpin_pages(obj); 1665 err: 1666 i915_gem_object_put(obj); 1667 return ret; 1668 } 1669 1670 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1671 { 1672 struct drm_i915_private *i915; 1673 struct list_head *list; 1674 struct i915_vma *vma; 1675 1676 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1677 1678 for_each_ggtt_vma(vma, obj) { 1679 if (i915_vma_is_active(vma)) 1680 continue; 1681 1682 if (!drm_mm_node_allocated(&vma->node)) 1683 continue; 1684 1685 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1686 } 1687 1688 i915 = to_i915(obj->base.dev); 1689 spin_lock(&i915->mm.obj_lock); 1690 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1691 list_move_tail(&obj->mm.link, list); 1692 spin_unlock(&i915->mm.obj_lock); 1693 } 1694 1695 /** 1696 * Called when user space prepares to use an object with the CPU, either 1697 * through the mmap ioctl's mapping or a GTT mapping. 1698 * @dev: drm device 1699 * @data: ioctl data blob 1700 * @file: drm file 1701 */ 1702 int 1703 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1704 struct drm_file *file) 1705 { 1706 struct drm_i915_gem_set_domain *args = data; 1707 struct drm_i915_gem_object *obj; 1708 uint32_t read_domains = args->read_domains; 1709 uint32_t write_domain = args->write_domain; 1710 int err; 1711 1712 /* Only handle setting domains to types used by the CPU. */ 1713 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1714 return -EINVAL; 1715 1716 /* Having something in the write domain implies it's in the read 1717 * domain, and only that read domain. Enforce that in the request. 1718 */ 1719 if (write_domain != 0 && read_domains != write_domain) 1720 return -EINVAL; 1721 1722 obj = i915_gem_object_lookup(file, args->handle); 1723 if (!obj) 1724 return -ENOENT; 1725 1726 /* Try to flush the object off the GPU without holding the lock. 1727 * We will repeat the flush holding the lock in the normal manner 1728 * to catch cases where we are gazumped. 1729 */ 1730 err = i915_gem_object_wait(obj, 1731 I915_WAIT_INTERRUPTIBLE | 1732 (write_domain ? I915_WAIT_ALL : 0), 1733 MAX_SCHEDULE_TIMEOUT, 1734 to_rps_client(file)); 1735 if (err) 1736 goto out; 1737 1738 /* 1739 * Proxy objects do not control access to the backing storage, ergo 1740 * they cannot be used as a means to manipulate the cache domain 1741 * tracking for that backing storage. The proxy object is always 1742 * considered to be outside of any cache domain. 1743 */ 1744 if (i915_gem_object_is_proxy(obj)) { 1745 err = -ENXIO; 1746 goto out; 1747 } 1748 1749 /* 1750 * Flush and acquire obj->pages so that we are coherent through 1751 * direct access in memory with previous cached writes through 1752 * shmemfs and that our cache domain tracking remains valid. 1753 * For example, if the obj->filp was moved to swap without us 1754 * being notified and releasing the pages, we would mistakenly 1755 * continue to assume that the obj remained out of the CPU cached 1756 * domain. 1757 */ 1758 err = i915_gem_object_pin_pages(obj); 1759 if (err) 1760 goto out; 1761 1762 err = i915_mutex_lock_interruptible(dev); 1763 if (err) 1764 goto out_unpin; 1765 1766 if (read_domains & I915_GEM_DOMAIN_WC) 1767 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1768 else if (read_domains & I915_GEM_DOMAIN_GTT) 1769 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1770 else 1771 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1772 1773 /* And bump the LRU for this access */ 1774 i915_gem_object_bump_inactive_ggtt(obj); 1775 1776 mutex_unlock(&dev->struct_mutex); 1777 1778 if (write_domain != 0) 1779 intel_fb_obj_invalidate(obj, 1780 fb_write_origin(obj, write_domain)); 1781 1782 out_unpin: 1783 i915_gem_object_unpin_pages(obj); 1784 out: 1785 i915_gem_object_put(obj); 1786 return err; 1787 } 1788 1789 /** 1790 * Called when user space has done writes to this buffer 1791 * @dev: drm device 1792 * @data: ioctl data blob 1793 * @file: drm file 1794 */ 1795 int 1796 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1797 struct drm_file *file) 1798 { 1799 struct drm_i915_gem_sw_finish *args = data; 1800 struct drm_i915_gem_object *obj; 1801 1802 obj = i915_gem_object_lookup(file, args->handle); 1803 if (!obj) 1804 return -ENOENT; 1805 1806 /* 1807 * Proxy objects are barred from CPU access, so there is no 1808 * need to ban sw_finish as it is a nop. 1809 */ 1810 1811 /* Pinned buffers may be scanout, so flush the cache */ 1812 i915_gem_object_flush_if_display(obj); 1813 i915_gem_object_put(obj); 1814 1815 return 0; 1816 } 1817 1818 /** 1819 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1820 * it is mapped to. 1821 * @dev: drm device 1822 * @data: ioctl data blob 1823 * @file: drm file 1824 * 1825 * While the mapping holds a reference on the contents of the object, it doesn't 1826 * imply a ref on the object itself. 1827 * 1828 * IMPORTANT: 1829 * 1830 * DRM driver writers who look a this function as an example for how to do GEM 1831 * mmap support, please don't implement mmap support like here. The modern way 1832 * to implement DRM mmap support is with an mmap offset ioctl (like 1833 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1834 * That way debug tooling like valgrind will understand what's going on, hiding 1835 * the mmap call in a driver private ioctl will break that. The i915 driver only 1836 * does cpu mmaps this way because we didn't know better. 1837 */ 1838 int 1839 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1840 struct drm_file *file) 1841 { 1842 struct drm_i915_gem_mmap *args = data; 1843 struct drm_i915_gem_object *obj; 1844 unsigned long addr; 1845 1846 if (args->flags & ~(I915_MMAP_WC)) 1847 return -EINVAL; 1848 1849 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1850 return -ENODEV; 1851 1852 obj = i915_gem_object_lookup(file, args->handle); 1853 if (!obj) 1854 return -ENOENT; 1855 1856 /* prime objects have no backing filp to GEM mmap 1857 * pages from. 1858 */ 1859 if (!obj->base.filp) { 1860 i915_gem_object_put(obj); 1861 return -ENXIO; 1862 } 1863 1864 addr = vm_mmap(obj->base.filp, 0, args->size, 1865 PROT_READ | PROT_WRITE, MAP_SHARED, 1866 args->offset); 1867 if (args->flags & I915_MMAP_WC) { 1868 struct mm_struct *mm = current->mm; 1869 struct vm_area_struct *vma; 1870 1871 if (down_write_killable(&mm->mmap_sem)) { 1872 i915_gem_object_put(obj); 1873 return -EINTR; 1874 } 1875 vma = find_vma(mm, addr); 1876 if (vma) 1877 vma->vm_page_prot = 1878 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1879 else 1880 addr = -ENOMEM; 1881 up_write(&mm->mmap_sem); 1882 1883 /* This may race, but that's ok, it only gets set */ 1884 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1885 } 1886 i915_gem_object_put(obj); 1887 if (IS_ERR((void *)addr)) 1888 return addr; 1889 1890 args->addr_ptr = (uint64_t) addr; 1891 1892 return 0; 1893 } 1894 1895 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) 1896 { 1897 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1898 } 1899 1900 /** 1901 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1902 * 1903 * A history of the GTT mmap interface: 1904 * 1905 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1906 * aligned and suitable for fencing, and still fit into the available 1907 * mappable space left by the pinned display objects. A classic problem 1908 * we called the page-fault-of-doom where we would ping-pong between 1909 * two objects that could not fit inside the GTT and so the memcpy 1910 * would page one object in at the expense of the other between every 1911 * single byte. 1912 * 1913 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1914 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1915 * object is too large for the available space (or simply too large 1916 * for the mappable aperture!), a view is created instead and faulted 1917 * into userspace. (This view is aligned and sized appropriately for 1918 * fenced access.) 1919 * 1920 * 2 - Recognise WC as a separate cache domain so that we can flush the 1921 * delayed writes via GTT before performing direct access via WC. 1922 * 1923 * Restrictions: 1924 * 1925 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1926 * hangs on some architectures, corruption on others. An attempt to service 1927 * a GTT page fault from a snoopable object will generate a SIGBUS. 1928 * 1929 * * the object must be able to fit into RAM (physical memory, though no 1930 * limited to the mappable aperture). 1931 * 1932 * 1933 * Caveats: 1934 * 1935 * * a new GTT page fault will synchronize rendering from the GPU and flush 1936 * all data to system memory. Subsequent access will not be synchronized. 1937 * 1938 * * all mappings are revoked on runtime device suspend. 1939 * 1940 * * there are only 8, 16 or 32 fence registers to share between all users 1941 * (older machines require fence register for display and blitter access 1942 * as well). Contention of the fence registers will cause the previous users 1943 * to be unmapped and any new access will generate new page faults. 1944 * 1945 * * running out of memory while servicing a fault may generate a SIGBUS, 1946 * rather than the expected SIGSEGV. 1947 */ 1948 int i915_gem_mmap_gtt_version(void) 1949 { 1950 return 2; 1951 } 1952 1953 static inline struct i915_ggtt_view 1954 compute_partial_view(struct drm_i915_gem_object *obj, 1955 pgoff_t page_offset, 1956 unsigned int chunk) 1957 { 1958 struct i915_ggtt_view view; 1959 1960 if (i915_gem_object_is_tiled(obj)) 1961 chunk = roundup(chunk, tile_row_pages(obj)); 1962 1963 view.type = I915_GGTT_VIEW_PARTIAL; 1964 view.partial.offset = rounddown(page_offset, chunk); 1965 view.partial.size = 1966 min_t(unsigned int, chunk, 1967 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1968 1969 /* If the partial covers the entire object, just create a normal VMA. */ 1970 if (chunk >= obj->base.size >> PAGE_SHIFT) 1971 view.type = I915_GGTT_VIEW_NORMAL; 1972 1973 return view; 1974 } 1975 1976 /** 1977 * i915_gem_fault - fault a page into the GTT 1978 * @vmf: fault info 1979 * 1980 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1981 * from userspace. The fault handler takes care of binding the object to 1982 * the GTT (if needed), allocating and programming a fence register (again, 1983 * only if needed based on whether the old reg is still valid or the object 1984 * is tiled) and inserting a new PTE into the faulting process. 1985 * 1986 * Note that the faulting process may involve evicting existing objects 1987 * from the GTT and/or fence registers to make room. So performance may 1988 * suffer if the GTT working set is large or there are few fence registers 1989 * left. 1990 * 1991 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 1992 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 1993 */ 1994 int i915_gem_fault(struct vm_fault *vmf) 1995 { 1996 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ 1997 struct vm_area_struct *area = vmf->vma; 1998 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 1999 struct drm_device *dev = obj->base.dev; 2000 struct drm_i915_private *dev_priv = to_i915(dev); 2001 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2002 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 2003 struct i915_vma *vma; 2004 pgoff_t page_offset; 2005 int ret; 2006 2007 /* We don't use vmf->pgoff since that has the fake offset */ 2008 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 2009 2010 trace_i915_gem_object_fault(obj, page_offset, true, write); 2011 2012 /* Try to flush the object off the GPU first without holding the lock. 2013 * Upon acquiring the lock, we will perform our sanity checks and then 2014 * repeat the flush holding the lock in the normal manner to catch cases 2015 * where we are gazumped. 2016 */ 2017 ret = i915_gem_object_wait(obj, 2018 I915_WAIT_INTERRUPTIBLE, 2019 MAX_SCHEDULE_TIMEOUT, 2020 NULL); 2021 if (ret) 2022 goto err; 2023 2024 ret = i915_gem_object_pin_pages(obj); 2025 if (ret) 2026 goto err; 2027 2028 intel_runtime_pm_get(dev_priv); 2029 2030 ret = i915_mutex_lock_interruptible(dev); 2031 if (ret) 2032 goto err_rpm; 2033 2034 /* Access to snoopable pages through the GTT is incoherent. */ 2035 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2036 ret = -EFAULT; 2037 goto err_unlock; 2038 } 2039 2040 2041 /* Now pin it into the GTT as needed */ 2042 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 2043 PIN_MAPPABLE | 2044 PIN_NONBLOCK | 2045 PIN_NONFAULT); 2046 if (IS_ERR(vma)) { 2047 /* Use a partial view if it is bigger than available space */ 2048 struct i915_ggtt_view view = 2049 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 2050 unsigned int flags; 2051 2052 flags = PIN_MAPPABLE; 2053 if (view.type == I915_GGTT_VIEW_NORMAL) 2054 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 2055 2056 /* 2057 * Userspace is now writing through an untracked VMA, abandon 2058 * all hope that the hardware is able to track future writes. 2059 */ 2060 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2061 2062 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2063 if (IS_ERR(vma) && !view.type) { 2064 flags = PIN_MAPPABLE; 2065 view.type = I915_GGTT_VIEW_PARTIAL; 2066 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2067 } 2068 } 2069 if (IS_ERR(vma)) { 2070 ret = PTR_ERR(vma); 2071 goto err_unlock; 2072 } 2073 2074 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2075 if (ret) 2076 goto err_unpin; 2077 2078 ret = i915_vma_pin_fence(vma); 2079 if (ret) 2080 goto err_unpin; 2081 2082 /* Finally, remap it using the new GTT offset */ 2083 ret = remap_io_mapping(area, 2084 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 2085 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 2086 min_t(u64, vma->size, area->vm_end - area->vm_start), 2087 &ggtt->iomap); 2088 if (ret) 2089 goto err_fence; 2090 2091 /* Mark as being mmapped into userspace for later revocation */ 2092 assert_rpm_wakelock_held(dev_priv); 2093 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2094 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2095 GEM_BUG_ON(!obj->userfault_count); 2096 2097 i915_vma_set_ggtt_write(vma); 2098 2099 err_fence: 2100 i915_vma_unpin_fence(vma); 2101 err_unpin: 2102 __i915_vma_unpin(vma); 2103 err_unlock: 2104 mutex_unlock(&dev->struct_mutex); 2105 err_rpm: 2106 intel_runtime_pm_put(dev_priv); 2107 i915_gem_object_unpin_pages(obj); 2108 err: 2109 switch (ret) { 2110 case -EIO: 2111 /* 2112 * We eat errors when the gpu is terminally wedged to avoid 2113 * userspace unduly crashing (gl has no provisions for mmaps to 2114 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2115 * and so needs to be reported. 2116 */ 2117 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2118 ret = VM_FAULT_SIGBUS; 2119 break; 2120 } 2121 case -EAGAIN: 2122 /* 2123 * EAGAIN means the gpu is hung and we'll wait for the error 2124 * handler to reset everything when re-faulting in 2125 * i915_mutex_lock_interruptible. 2126 */ 2127 case 0: 2128 case -ERESTARTSYS: 2129 case -EINTR: 2130 case -EBUSY: 2131 /* 2132 * EBUSY is ok: this just means that another thread 2133 * already did the job. 2134 */ 2135 ret = VM_FAULT_NOPAGE; 2136 break; 2137 case -ENOMEM: 2138 ret = VM_FAULT_OOM; 2139 break; 2140 case -ENOSPC: 2141 case -EFAULT: 2142 ret = VM_FAULT_SIGBUS; 2143 break; 2144 default: 2145 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2146 ret = VM_FAULT_SIGBUS; 2147 break; 2148 } 2149 return ret; 2150 } 2151 2152 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2153 { 2154 struct i915_vma *vma; 2155 2156 GEM_BUG_ON(!obj->userfault_count); 2157 2158 obj->userfault_count = 0; 2159 list_del(&obj->userfault_link); 2160 drm_vma_node_unmap(&obj->base.vma_node, 2161 obj->base.dev->anon_inode->i_mapping); 2162 2163 for_each_ggtt_vma(vma, obj) 2164 i915_vma_unset_userfault(vma); 2165 } 2166 2167 /** 2168 * i915_gem_release_mmap - remove physical page mappings 2169 * @obj: obj in question 2170 * 2171 * Preserve the reservation of the mmapping with the DRM core code, but 2172 * relinquish ownership of the pages back to the system. 2173 * 2174 * It is vital that we remove the page mapping if we have mapped a tiled 2175 * object through the GTT and then lose the fence register due to 2176 * resource pressure. Similarly if the object has been moved out of the 2177 * aperture, than pages mapped into userspace must be revoked. Removing the 2178 * mapping will then trigger a page fault on the next user access, allowing 2179 * fixup by i915_gem_fault(). 2180 */ 2181 void 2182 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2183 { 2184 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2185 2186 /* Serialisation between user GTT access and our code depends upon 2187 * revoking the CPU's PTE whilst the mutex is held. The next user 2188 * pagefault then has to wait until we release the mutex. 2189 * 2190 * Note that RPM complicates somewhat by adding an additional 2191 * requirement that operations to the GGTT be made holding the RPM 2192 * wakeref. 2193 */ 2194 lockdep_assert_held(&i915->drm.struct_mutex); 2195 intel_runtime_pm_get(i915); 2196 2197 if (!obj->userfault_count) 2198 goto out; 2199 2200 __i915_gem_object_release_mmap(obj); 2201 2202 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2203 * memory transactions from userspace before we return. The TLB 2204 * flushing implied above by changing the PTE above *should* be 2205 * sufficient, an extra barrier here just provides us with a bit 2206 * of paranoid documentation about our requirement to serialise 2207 * memory writes before touching registers / GSM. 2208 */ 2209 wmb(); 2210 2211 out: 2212 intel_runtime_pm_put(i915); 2213 } 2214 2215 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2216 { 2217 struct drm_i915_gem_object *obj, *on; 2218 int i; 2219 2220 /* 2221 * Only called during RPM suspend. All users of the userfault_list 2222 * must be holding an RPM wakeref to ensure that this can not 2223 * run concurrently with themselves (and use the struct_mutex for 2224 * protection between themselves). 2225 */ 2226 2227 list_for_each_entry_safe(obj, on, 2228 &dev_priv->mm.userfault_list, userfault_link) 2229 __i915_gem_object_release_mmap(obj); 2230 2231 /* The fence will be lost when the device powers down. If any were 2232 * in use by hardware (i.e. they are pinned), we should not be powering 2233 * down! All other fences will be reacquired by the user upon waking. 2234 */ 2235 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2236 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2237 2238 /* Ideally we want to assert that the fence register is not 2239 * live at this point (i.e. that no piece of code will be 2240 * trying to write through fence + GTT, as that both violates 2241 * our tracking of activity and associated locking/barriers, 2242 * but also is illegal given that the hw is powered down). 2243 * 2244 * Previously we used reg->pin_count as a "liveness" indicator. 2245 * That is not sufficient, and we need a more fine-grained 2246 * tool if we want to have a sanity check here. 2247 */ 2248 2249 if (!reg->vma) 2250 continue; 2251 2252 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2253 reg->dirty = true; 2254 } 2255 } 2256 2257 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2258 { 2259 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2260 int err; 2261 2262 err = drm_gem_create_mmap_offset(&obj->base); 2263 if (likely(!err)) 2264 return 0; 2265 2266 /* Attempt to reap some mmap space from dead objects */ 2267 do { 2268 err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); 2269 if (err) 2270 break; 2271 2272 i915_gem_drain_freed_objects(dev_priv); 2273 err = drm_gem_create_mmap_offset(&obj->base); 2274 if (!err) 2275 break; 2276 2277 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2278 2279 return err; 2280 } 2281 2282 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2283 { 2284 drm_gem_free_mmap_offset(&obj->base); 2285 } 2286 2287 int 2288 i915_gem_mmap_gtt(struct drm_file *file, 2289 struct drm_device *dev, 2290 uint32_t handle, 2291 uint64_t *offset) 2292 { 2293 struct drm_i915_gem_object *obj; 2294 int ret; 2295 2296 obj = i915_gem_object_lookup(file, handle); 2297 if (!obj) 2298 return -ENOENT; 2299 2300 ret = i915_gem_object_create_mmap_offset(obj); 2301 if (ret == 0) 2302 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2303 2304 i915_gem_object_put(obj); 2305 return ret; 2306 } 2307 2308 /** 2309 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2310 * @dev: DRM device 2311 * @data: GTT mapping ioctl data 2312 * @file: GEM object info 2313 * 2314 * Simply returns the fake offset to userspace so it can mmap it. 2315 * The mmap call will end up in drm_gem_mmap(), which will set things 2316 * up so we can get faults in the handler above. 2317 * 2318 * The fault handler will take care of binding the object into the GTT 2319 * (since it may have been evicted to make room for something), allocating 2320 * a fence register, and mapping the appropriate aperture address into 2321 * userspace. 2322 */ 2323 int 2324 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2325 struct drm_file *file) 2326 { 2327 struct drm_i915_gem_mmap_gtt *args = data; 2328 2329 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2330 } 2331 2332 /* Immediately discard the backing storage */ 2333 static void 2334 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2335 { 2336 i915_gem_object_free_mmap_offset(obj); 2337 2338 if (obj->base.filp == NULL) 2339 return; 2340 2341 /* Our goal here is to return as much of the memory as 2342 * is possible back to the system as we are called from OOM. 2343 * To do this we must instruct the shmfs to drop all of its 2344 * backing pages, *now*. 2345 */ 2346 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2347 obj->mm.madv = __I915_MADV_PURGED; 2348 obj->mm.pages = ERR_PTR(-EFAULT); 2349 } 2350 2351 /* Try to discard unwanted pages */ 2352 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2353 { 2354 struct address_space *mapping; 2355 2356 lockdep_assert_held(&obj->mm.lock); 2357 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2358 2359 switch (obj->mm.madv) { 2360 case I915_MADV_DONTNEED: 2361 i915_gem_object_truncate(obj); 2362 case __I915_MADV_PURGED: 2363 return; 2364 } 2365 2366 if (obj->base.filp == NULL) 2367 return; 2368 2369 mapping = obj->base.filp->f_mapping, 2370 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2371 } 2372 2373 static void 2374 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2375 struct sg_table *pages) 2376 { 2377 struct sgt_iter sgt_iter; 2378 struct page *page; 2379 2380 __i915_gem_object_release_shmem(obj, pages, true); 2381 2382 i915_gem_gtt_finish_pages(obj, pages); 2383 2384 if (i915_gem_object_needs_bit17_swizzle(obj)) 2385 i915_gem_object_save_bit_17_swizzle(obj, pages); 2386 2387 for_each_sgt_page(page, sgt_iter, pages) { 2388 if (obj->mm.dirty) 2389 set_page_dirty(page); 2390 2391 if (obj->mm.madv == I915_MADV_WILLNEED) 2392 mark_page_accessed(page); 2393 2394 put_page(page); 2395 } 2396 obj->mm.dirty = false; 2397 2398 sg_free_table(pages); 2399 kfree(pages); 2400 } 2401 2402 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2403 { 2404 struct radix_tree_iter iter; 2405 void __rcu **slot; 2406 2407 rcu_read_lock(); 2408 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2409 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2410 rcu_read_unlock(); 2411 } 2412 2413 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2414 enum i915_mm_subclass subclass) 2415 { 2416 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2417 struct sg_table *pages; 2418 2419 if (i915_gem_object_has_pinned_pages(obj)) 2420 return; 2421 2422 GEM_BUG_ON(obj->bind_count); 2423 if (!i915_gem_object_has_pages(obj)) 2424 return; 2425 2426 /* May be called by shrinker from within get_pages() (on another bo) */ 2427 mutex_lock_nested(&obj->mm.lock, subclass); 2428 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2429 goto unlock; 2430 2431 /* ->put_pages might need to allocate memory for the bit17 swizzle 2432 * array, hence protect them from being reaped by removing them from gtt 2433 * lists early. */ 2434 pages = fetch_and_zero(&obj->mm.pages); 2435 GEM_BUG_ON(!pages); 2436 2437 spin_lock(&i915->mm.obj_lock); 2438 list_del(&obj->mm.link); 2439 spin_unlock(&i915->mm.obj_lock); 2440 2441 if (obj->mm.mapping) { 2442 void *ptr; 2443 2444 ptr = page_mask_bits(obj->mm.mapping); 2445 if (is_vmalloc_addr(ptr)) 2446 vunmap(ptr); 2447 else 2448 kunmap(kmap_to_page(ptr)); 2449 2450 obj->mm.mapping = NULL; 2451 } 2452 2453 __i915_gem_object_reset_page_iter(obj); 2454 2455 if (!IS_ERR(pages)) 2456 obj->ops->put_pages(obj, pages); 2457 2458 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2459 2460 unlock: 2461 mutex_unlock(&obj->mm.lock); 2462 } 2463 2464 static bool i915_sg_trim(struct sg_table *orig_st) 2465 { 2466 struct sg_table new_st; 2467 struct scatterlist *sg, *new_sg; 2468 unsigned int i; 2469 2470 if (orig_st->nents == orig_st->orig_nents) 2471 return false; 2472 2473 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2474 return false; 2475 2476 new_sg = new_st.sgl; 2477 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2478 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2479 /* called before being DMA mapped, no need to copy sg->dma_* */ 2480 new_sg = sg_next(new_sg); 2481 } 2482 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2483 2484 sg_free_table(orig_st); 2485 2486 *orig_st = new_st; 2487 return true; 2488 } 2489 2490 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2491 { 2492 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2493 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2494 unsigned long i; 2495 struct address_space *mapping; 2496 struct sg_table *st; 2497 struct scatterlist *sg; 2498 struct sgt_iter sgt_iter; 2499 struct page *page; 2500 unsigned long last_pfn = 0; /* suppress gcc warning */ 2501 unsigned int max_segment = i915_sg_segment_size(); 2502 unsigned int sg_page_sizes; 2503 gfp_t noreclaim; 2504 int ret; 2505 2506 /* Assert that the object is not currently in any GPU domain. As it 2507 * wasn't in the GTT, there shouldn't be any way it could have been in 2508 * a GPU cache 2509 */ 2510 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2511 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2512 2513 st = kmalloc(sizeof(*st), GFP_KERNEL); 2514 if (st == NULL) 2515 return -ENOMEM; 2516 2517 rebuild_st: 2518 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2519 kfree(st); 2520 return -ENOMEM; 2521 } 2522 2523 /* Get the list of pages out of our struct file. They'll be pinned 2524 * at this point until we release them. 2525 * 2526 * Fail silently without starting the shrinker 2527 */ 2528 mapping = obj->base.filp->f_mapping; 2529 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2530 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2531 2532 sg = st->sgl; 2533 st->nents = 0; 2534 sg_page_sizes = 0; 2535 for (i = 0; i < page_count; i++) { 2536 const unsigned int shrink[] = { 2537 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2538 0, 2539 }, *s = shrink; 2540 gfp_t gfp = noreclaim; 2541 2542 do { 2543 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2544 if (likely(!IS_ERR(page))) 2545 break; 2546 2547 if (!*s) { 2548 ret = PTR_ERR(page); 2549 goto err_sg; 2550 } 2551 2552 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2553 cond_resched(); 2554 2555 /* We've tried hard to allocate the memory by reaping 2556 * our own buffer, now let the real VM do its job and 2557 * go down in flames if truly OOM. 2558 * 2559 * However, since graphics tend to be disposable, 2560 * defer the oom here by reporting the ENOMEM back 2561 * to userspace. 2562 */ 2563 if (!*s) { 2564 /* reclaim and warn, but no oom */ 2565 gfp = mapping_gfp_mask(mapping); 2566 2567 /* Our bo are always dirty and so we require 2568 * kswapd to reclaim our pages (direct reclaim 2569 * does not effectively begin pageout of our 2570 * buffers on its own). However, direct reclaim 2571 * only waits for kswapd when under allocation 2572 * congestion. So as a result __GFP_RECLAIM is 2573 * unreliable and fails to actually reclaim our 2574 * dirty pages -- unless you try over and over 2575 * again with !__GFP_NORETRY. However, we still 2576 * want to fail this allocation rather than 2577 * trigger the out-of-memory killer and for 2578 * this we want __GFP_RETRY_MAYFAIL. 2579 */ 2580 gfp |= __GFP_RETRY_MAYFAIL; 2581 } 2582 } while (1); 2583 2584 if (!i || 2585 sg->length >= max_segment || 2586 page_to_pfn(page) != last_pfn + 1) { 2587 if (i) { 2588 sg_page_sizes |= sg->length; 2589 sg = sg_next(sg); 2590 } 2591 st->nents++; 2592 sg_set_page(sg, page, PAGE_SIZE, 0); 2593 } else { 2594 sg->length += PAGE_SIZE; 2595 } 2596 last_pfn = page_to_pfn(page); 2597 2598 /* Check that the i965g/gm workaround works. */ 2599 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2600 } 2601 if (sg) { /* loop terminated early; short sg table */ 2602 sg_page_sizes |= sg->length; 2603 sg_mark_end(sg); 2604 } 2605 2606 /* Trim unused sg entries to avoid wasting memory. */ 2607 i915_sg_trim(st); 2608 2609 ret = i915_gem_gtt_prepare_pages(obj, st); 2610 if (ret) { 2611 /* DMA remapping failed? One possible cause is that 2612 * it could not reserve enough large entries, asking 2613 * for PAGE_SIZE chunks instead may be helpful. 2614 */ 2615 if (max_segment > PAGE_SIZE) { 2616 for_each_sgt_page(page, sgt_iter, st) 2617 put_page(page); 2618 sg_free_table(st); 2619 2620 max_segment = PAGE_SIZE; 2621 goto rebuild_st; 2622 } else { 2623 dev_warn(&dev_priv->drm.pdev->dev, 2624 "Failed to DMA remap %lu pages\n", 2625 page_count); 2626 goto err_pages; 2627 } 2628 } 2629 2630 if (i915_gem_object_needs_bit17_swizzle(obj)) 2631 i915_gem_object_do_bit_17_swizzle(obj, st); 2632 2633 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2634 2635 return 0; 2636 2637 err_sg: 2638 sg_mark_end(sg); 2639 err_pages: 2640 for_each_sgt_page(page, sgt_iter, st) 2641 put_page(page); 2642 sg_free_table(st); 2643 kfree(st); 2644 2645 /* shmemfs first checks if there is enough memory to allocate the page 2646 * and reports ENOSPC should there be insufficient, along with the usual 2647 * ENOMEM for a genuine allocation failure. 2648 * 2649 * We use ENOSPC in our driver to mean that we have run out of aperture 2650 * space and so want to translate the error from shmemfs back to our 2651 * usual understanding of ENOMEM. 2652 */ 2653 if (ret == -ENOSPC) 2654 ret = -ENOMEM; 2655 2656 return ret; 2657 } 2658 2659 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2660 struct sg_table *pages, 2661 unsigned int sg_page_sizes) 2662 { 2663 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2664 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2665 int i; 2666 2667 lockdep_assert_held(&obj->mm.lock); 2668 2669 obj->mm.get_page.sg_pos = pages->sgl; 2670 obj->mm.get_page.sg_idx = 0; 2671 2672 obj->mm.pages = pages; 2673 2674 if (i915_gem_object_is_tiled(obj) && 2675 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2676 GEM_BUG_ON(obj->mm.quirked); 2677 __i915_gem_object_pin_pages(obj); 2678 obj->mm.quirked = true; 2679 } 2680 2681 GEM_BUG_ON(!sg_page_sizes); 2682 obj->mm.page_sizes.phys = sg_page_sizes; 2683 2684 /* 2685 * Calculate the supported page-sizes which fit into the given 2686 * sg_page_sizes. This will give us the page-sizes which we may be able 2687 * to use opportunistically when later inserting into the GTT. For 2688 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2689 * 64K or 4K pages, although in practice this will depend on a number of 2690 * other factors. 2691 */ 2692 obj->mm.page_sizes.sg = 0; 2693 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2694 if (obj->mm.page_sizes.phys & ~0u << i) 2695 obj->mm.page_sizes.sg |= BIT(i); 2696 } 2697 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2698 2699 spin_lock(&i915->mm.obj_lock); 2700 list_add(&obj->mm.link, &i915->mm.unbound_list); 2701 spin_unlock(&i915->mm.obj_lock); 2702 } 2703 2704 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2705 { 2706 int err; 2707 2708 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2709 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2710 return -EFAULT; 2711 } 2712 2713 err = obj->ops->get_pages(obj); 2714 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2715 2716 return err; 2717 } 2718 2719 /* Ensure that the associated pages are gathered from the backing storage 2720 * and pinned into our object. i915_gem_object_pin_pages() may be called 2721 * multiple times before they are released by a single call to 2722 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2723 * either as a result of memory pressure (reaping pages under the shrinker) 2724 * or as the object is itself released. 2725 */ 2726 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2727 { 2728 int err; 2729 2730 err = mutex_lock_interruptible(&obj->mm.lock); 2731 if (err) 2732 return err; 2733 2734 if (unlikely(!i915_gem_object_has_pages(obj))) { 2735 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2736 2737 err = ____i915_gem_object_get_pages(obj); 2738 if (err) 2739 goto unlock; 2740 2741 smp_mb__before_atomic(); 2742 } 2743 atomic_inc(&obj->mm.pages_pin_count); 2744 2745 unlock: 2746 mutex_unlock(&obj->mm.lock); 2747 return err; 2748 } 2749 2750 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2751 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2752 enum i915_map_type type) 2753 { 2754 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2755 struct sg_table *sgt = obj->mm.pages; 2756 struct sgt_iter sgt_iter; 2757 struct page *page; 2758 struct page *stack_pages[32]; 2759 struct page **pages = stack_pages; 2760 unsigned long i = 0; 2761 pgprot_t pgprot; 2762 void *addr; 2763 2764 /* A single page can always be kmapped */ 2765 if (n_pages == 1 && type == I915_MAP_WB) 2766 return kmap(sg_page(sgt->sgl)); 2767 2768 if (n_pages > ARRAY_SIZE(stack_pages)) { 2769 /* Too big for stack -- allocate temporary array instead */ 2770 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2771 if (!pages) 2772 return NULL; 2773 } 2774 2775 for_each_sgt_page(page, sgt_iter, sgt) 2776 pages[i++] = page; 2777 2778 /* Check that we have the expected number of pages */ 2779 GEM_BUG_ON(i != n_pages); 2780 2781 switch (type) { 2782 default: 2783 MISSING_CASE(type); 2784 /* fallthrough to use PAGE_KERNEL anyway */ 2785 case I915_MAP_WB: 2786 pgprot = PAGE_KERNEL; 2787 break; 2788 case I915_MAP_WC: 2789 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2790 break; 2791 } 2792 addr = vmap(pages, n_pages, 0, pgprot); 2793 2794 if (pages != stack_pages) 2795 kvfree(pages); 2796 2797 return addr; 2798 } 2799 2800 /* get, pin, and map the pages of the object into kernel space */ 2801 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2802 enum i915_map_type type) 2803 { 2804 enum i915_map_type has_type; 2805 bool pinned; 2806 void *ptr; 2807 int ret; 2808 2809 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2810 return ERR_PTR(-ENXIO); 2811 2812 ret = mutex_lock_interruptible(&obj->mm.lock); 2813 if (ret) 2814 return ERR_PTR(ret); 2815 2816 pinned = !(type & I915_MAP_OVERRIDE); 2817 type &= ~I915_MAP_OVERRIDE; 2818 2819 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2820 if (unlikely(!i915_gem_object_has_pages(obj))) { 2821 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2822 2823 ret = ____i915_gem_object_get_pages(obj); 2824 if (ret) 2825 goto err_unlock; 2826 2827 smp_mb__before_atomic(); 2828 } 2829 atomic_inc(&obj->mm.pages_pin_count); 2830 pinned = false; 2831 } 2832 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2833 2834 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2835 if (ptr && has_type != type) { 2836 if (pinned) { 2837 ret = -EBUSY; 2838 goto err_unpin; 2839 } 2840 2841 if (is_vmalloc_addr(ptr)) 2842 vunmap(ptr); 2843 else 2844 kunmap(kmap_to_page(ptr)); 2845 2846 ptr = obj->mm.mapping = NULL; 2847 } 2848 2849 if (!ptr) { 2850 ptr = i915_gem_object_map(obj, type); 2851 if (!ptr) { 2852 ret = -ENOMEM; 2853 goto err_unpin; 2854 } 2855 2856 obj->mm.mapping = page_pack_bits(ptr, type); 2857 } 2858 2859 out_unlock: 2860 mutex_unlock(&obj->mm.lock); 2861 return ptr; 2862 2863 err_unpin: 2864 atomic_dec(&obj->mm.pages_pin_count); 2865 err_unlock: 2866 ptr = ERR_PTR(ret); 2867 goto out_unlock; 2868 } 2869 2870 static int 2871 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2872 const struct drm_i915_gem_pwrite *arg) 2873 { 2874 struct address_space *mapping = obj->base.filp->f_mapping; 2875 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2876 u64 remain, offset; 2877 unsigned int pg; 2878 2879 /* Before we instantiate/pin the backing store for our use, we 2880 * can prepopulate the shmemfs filp efficiently using a write into 2881 * the pagecache. We avoid the penalty of instantiating all the 2882 * pages, important if the user is just writing to a few and never 2883 * uses the object on the GPU, and using a direct write into shmemfs 2884 * allows it to avoid the cost of retrieving a page (either swapin 2885 * or clearing-before-use) before it is overwritten. 2886 */ 2887 if (i915_gem_object_has_pages(obj)) 2888 return -ENODEV; 2889 2890 if (obj->mm.madv != I915_MADV_WILLNEED) 2891 return -EFAULT; 2892 2893 /* Before the pages are instantiated the object is treated as being 2894 * in the CPU domain. The pages will be clflushed as required before 2895 * use, and we can freely write into the pages directly. If userspace 2896 * races pwrite with any other operation; corruption will ensue - 2897 * that is userspace's prerogative! 2898 */ 2899 2900 remain = arg->size; 2901 offset = arg->offset; 2902 pg = offset_in_page(offset); 2903 2904 do { 2905 unsigned int len, unwritten; 2906 struct page *page; 2907 void *data, *vaddr; 2908 int err; 2909 2910 len = PAGE_SIZE - pg; 2911 if (len > remain) 2912 len = remain; 2913 2914 err = pagecache_write_begin(obj->base.filp, mapping, 2915 offset, len, 0, 2916 &page, &data); 2917 if (err < 0) 2918 return err; 2919 2920 vaddr = kmap(page); 2921 unwritten = copy_from_user(vaddr + pg, user_data, len); 2922 kunmap(page); 2923 2924 err = pagecache_write_end(obj->base.filp, mapping, 2925 offset, len, len - unwritten, 2926 page, data); 2927 if (err < 0) 2928 return err; 2929 2930 if (unwritten) 2931 return -EFAULT; 2932 2933 remain -= len; 2934 user_data += len; 2935 offset += len; 2936 pg = 0; 2937 } while (remain); 2938 2939 return 0; 2940 } 2941 2942 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, 2943 const struct i915_gem_context *ctx) 2944 { 2945 unsigned int score; 2946 unsigned long prev_hang; 2947 2948 if (i915_gem_context_is_banned(ctx)) 2949 score = I915_CLIENT_SCORE_CONTEXT_BAN; 2950 else 2951 score = 0; 2952 2953 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 2954 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 2955 score += I915_CLIENT_SCORE_HANG_FAST; 2956 2957 if (score) { 2958 atomic_add(score, &file_priv->ban_score); 2959 2960 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 2961 ctx->name, score, 2962 atomic_read(&file_priv->ban_score)); 2963 } 2964 } 2965 2966 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 2967 { 2968 unsigned int score; 2969 bool banned, bannable; 2970 2971 atomic_inc(&ctx->guilty_count); 2972 2973 bannable = i915_gem_context_is_bannable(ctx); 2974 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 2975 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 2976 2977 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s\n", 2978 ctx->name, atomic_read(&ctx->guilty_count), 2979 score, yesno(banned && bannable)); 2980 2981 /* Cool contexts don't accumulate client ban score */ 2982 if (!bannable) 2983 return; 2984 2985 if (banned) 2986 i915_gem_context_set_banned(ctx); 2987 2988 if (!IS_ERR_OR_NULL(ctx->file_priv)) 2989 i915_gem_client_mark_guilty(ctx->file_priv, ctx); 2990 } 2991 2992 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 2993 { 2994 atomic_inc(&ctx->active_count); 2995 } 2996 2997 struct i915_request * 2998 i915_gem_find_active_request(struct intel_engine_cs *engine) 2999 { 3000 struct i915_request *request, *active = NULL; 3001 unsigned long flags; 3002 3003 /* 3004 * We are called by the error capture, reset and to dump engine 3005 * state at random points in time. In particular, note that neither is 3006 * crucially ordered with an interrupt. After a hang, the GPU is dead 3007 * and we assume that no more writes can happen (we waited long enough 3008 * for all writes that were in transaction to be flushed) - adding an 3009 * extra delay for a recent interrupt is pointless. Hence, we do 3010 * not need an engine->irq_seqno_barrier() before the seqno reads. 3011 * At all other times, we must assume the GPU is still running, but 3012 * we only care about the snapshot of this moment. 3013 */ 3014 spin_lock_irqsave(&engine->timeline.lock, flags); 3015 list_for_each_entry(request, &engine->timeline.requests, link) { 3016 if (__i915_request_completed(request, request->global_seqno)) 3017 continue; 3018 3019 active = request; 3020 break; 3021 } 3022 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3023 3024 return active; 3025 } 3026 3027 /* 3028 * Ensure irq handler finishes, and not run again. 3029 * Also return the active request so that we only search for it once. 3030 */ 3031 struct i915_request * 3032 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 3033 { 3034 struct i915_request *request = NULL; 3035 3036 /* 3037 * During the reset sequence, we must prevent the engine from 3038 * entering RC6. As the context state is undefined until we restart 3039 * the engine, if it does enter RC6 during the reset, the state 3040 * written to the powercontext is undefined and so we may lose 3041 * GPU state upon resume, i.e. fail to restart after a reset. 3042 */ 3043 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 3044 3045 /* 3046 * Prevent the signaler thread from updating the request 3047 * state (by calling dma_fence_signal) as we are processing 3048 * the reset. The write from the GPU of the seqno is 3049 * asynchronous and the signaler thread may see a different 3050 * value to us and declare the request complete, even though 3051 * the reset routine have picked that request as the active 3052 * (incomplete) request. This conflict is not handled 3053 * gracefully! 3054 */ 3055 kthread_park(engine->breadcrumbs.signaler); 3056 3057 /* 3058 * Prevent request submission to the hardware until we have 3059 * completed the reset in i915_gem_reset_finish(). If a request 3060 * is completed by one engine, it may then queue a request 3061 * to a second via its execlists->tasklet *just* as we are 3062 * calling engine->init_hw() and also writing the ELSP. 3063 * Turning off the execlists->tasklet until the reset is over 3064 * prevents the race. 3065 * 3066 * Note that this needs to be a single atomic operation on the 3067 * tasklet (flush existing tasks, prevent new tasks) to prevent 3068 * a race between reset and set-wedged. It is not, so we do the best 3069 * we can atm and make sure we don't lock the machine up in the more 3070 * common case of recursively being called from set-wedged from inside 3071 * i915_reset. 3072 */ 3073 if (!atomic_read(&engine->execlists.tasklet.count)) 3074 tasklet_kill(&engine->execlists.tasklet); 3075 tasklet_disable(&engine->execlists.tasklet); 3076 3077 /* 3078 * We're using worker to queue preemption requests from the tasklet in 3079 * GuC submission mode. 3080 * Even though tasklet was disabled, we may still have a worker queued. 3081 * Let's make sure that all workers scheduled before disabling the 3082 * tasklet are completed before continuing with the reset. 3083 */ 3084 if (engine->i915->guc.preempt_wq) 3085 flush_workqueue(engine->i915->guc.preempt_wq); 3086 3087 if (engine->irq_seqno_barrier) 3088 engine->irq_seqno_barrier(engine); 3089 3090 request = i915_gem_find_active_request(engine); 3091 if (request && request->fence.error == -EIO) 3092 request = ERR_PTR(-EIO); /* Previous reset failed! */ 3093 3094 return request; 3095 } 3096 3097 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 3098 { 3099 struct intel_engine_cs *engine; 3100 struct i915_request *request; 3101 enum intel_engine_id id; 3102 int err = 0; 3103 3104 for_each_engine(engine, dev_priv, id) { 3105 request = i915_gem_reset_prepare_engine(engine); 3106 if (IS_ERR(request)) { 3107 err = PTR_ERR(request); 3108 continue; 3109 } 3110 3111 engine->hangcheck.active_request = request; 3112 } 3113 3114 i915_gem_revoke_fences(dev_priv); 3115 intel_uc_sanitize(dev_priv); 3116 3117 return err; 3118 } 3119 3120 static void skip_request(struct i915_request *request) 3121 { 3122 void *vaddr = request->ring->vaddr; 3123 u32 head; 3124 3125 /* As this request likely depends on state from the lost 3126 * context, clear out all the user operations leaving the 3127 * breadcrumb at the end (so we get the fence notifications). 3128 */ 3129 head = request->head; 3130 if (request->postfix < head) { 3131 memset(vaddr + head, 0, request->ring->size - head); 3132 head = 0; 3133 } 3134 memset(vaddr + head, 0, request->postfix - head); 3135 3136 dma_fence_set_error(&request->fence, -EIO); 3137 } 3138 3139 static void engine_skip_context(struct i915_request *request) 3140 { 3141 struct intel_engine_cs *engine = request->engine; 3142 struct i915_gem_context *hung_ctx = request->ctx; 3143 struct i915_timeline *timeline = request->timeline; 3144 unsigned long flags; 3145 3146 GEM_BUG_ON(timeline == &engine->timeline); 3147 3148 spin_lock_irqsave(&engine->timeline.lock, flags); 3149 spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); 3150 3151 list_for_each_entry_continue(request, &engine->timeline.requests, link) 3152 if (request->ctx == hung_ctx) 3153 skip_request(request); 3154 3155 list_for_each_entry(request, &timeline->requests, link) 3156 skip_request(request); 3157 3158 spin_unlock(&timeline->lock); 3159 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3160 } 3161 3162 /* Returns the request if it was guilty of the hang */ 3163 static struct i915_request * 3164 i915_gem_reset_request(struct intel_engine_cs *engine, 3165 struct i915_request *request, 3166 bool stalled) 3167 { 3168 /* The guilty request will get skipped on a hung engine. 3169 * 3170 * Users of client default contexts do not rely on logical 3171 * state preserved between batches so it is safe to execute 3172 * queued requests following the hang. Non default contexts 3173 * rely on preserved state, so skipping a batch loses the 3174 * evolution of the state and it needs to be considered corrupted. 3175 * Executing more queued batches on top of corrupted state is 3176 * risky. But we take the risk by trying to advance through 3177 * the queued requests in order to make the client behaviour 3178 * more predictable around resets, by not throwing away random 3179 * amount of batches it has prepared for execution. Sophisticated 3180 * clients can use gem_reset_stats_ioctl and dma fence status 3181 * (exported via sync_file info ioctl on explicit fences) to observe 3182 * when it loses the context state and should rebuild accordingly. 3183 * 3184 * The context ban, and ultimately the client ban, mechanism are safety 3185 * valves if client submission ends up resulting in nothing more than 3186 * subsequent hangs. 3187 */ 3188 3189 if (i915_request_completed(request)) { 3190 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", 3191 engine->name, request->global_seqno, 3192 request->fence.context, request->fence.seqno, 3193 intel_engine_get_seqno(engine)); 3194 stalled = false; 3195 } 3196 3197 if (stalled) { 3198 i915_gem_context_mark_guilty(request->ctx); 3199 skip_request(request); 3200 3201 /* If this context is now banned, skip all pending requests. */ 3202 if (i915_gem_context_is_banned(request->ctx)) 3203 engine_skip_context(request); 3204 } else { 3205 /* 3206 * Since this is not the hung engine, it may have advanced 3207 * since the hang declaration. Double check by refinding 3208 * the active request at the time of the reset. 3209 */ 3210 request = i915_gem_find_active_request(engine); 3211 if (request) { 3212 i915_gem_context_mark_innocent(request->ctx); 3213 dma_fence_set_error(&request->fence, -EAGAIN); 3214 3215 /* Rewind the engine to replay the incomplete rq */ 3216 spin_lock_irq(&engine->timeline.lock); 3217 request = list_prev_entry(request, link); 3218 if (&request->link == &engine->timeline.requests) 3219 request = NULL; 3220 spin_unlock_irq(&engine->timeline.lock); 3221 } 3222 } 3223 3224 return request; 3225 } 3226 3227 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3228 struct i915_request *request, 3229 bool stalled) 3230 { 3231 /* 3232 * Make sure this write is visible before we re-enable the interrupt 3233 * handlers on another CPU, as tasklet_enable() resolves to just 3234 * a compiler barrier which is insufficient for our purpose here. 3235 */ 3236 smp_store_mb(engine->irq_posted, 0); 3237 3238 if (request) 3239 request = i915_gem_reset_request(engine, request, stalled); 3240 3241 if (request) { 3242 DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", 3243 engine->name, request->global_seqno); 3244 } 3245 3246 /* Setup the CS to resume from the breadcrumb of the hung request */ 3247 engine->reset_hw(engine, request); 3248 } 3249 3250 void i915_gem_reset(struct drm_i915_private *dev_priv, 3251 unsigned int stalled_mask) 3252 { 3253 struct intel_engine_cs *engine; 3254 enum intel_engine_id id; 3255 3256 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3257 3258 i915_retire_requests(dev_priv); 3259 3260 for_each_engine(engine, dev_priv, id) { 3261 struct i915_gem_context *ctx; 3262 3263 i915_gem_reset_engine(engine, 3264 engine->hangcheck.active_request, 3265 stalled_mask & ENGINE_MASK(id)); 3266 ctx = fetch_and_zero(&engine->last_retired_context); 3267 if (ctx) 3268 intel_context_unpin(ctx, engine); 3269 3270 /* 3271 * Ostensibily, we always want a context loaded for powersaving, 3272 * so if the engine is idle after the reset, send a request 3273 * to load our scratch kernel_context. 3274 * 3275 * More mysteriously, if we leave the engine idle after a reset, 3276 * the next userspace batch may hang, with what appears to be 3277 * an incoherent read by the CS (presumably stale TLB). An 3278 * empty request appears sufficient to paper over the glitch. 3279 */ 3280 if (intel_engine_is_idle(engine)) { 3281 struct i915_request *rq; 3282 3283 rq = i915_request_alloc(engine, 3284 dev_priv->kernel_context); 3285 if (!IS_ERR(rq)) 3286 __i915_request_add(rq, false); 3287 } 3288 } 3289 3290 i915_gem_restore_fences(dev_priv); 3291 } 3292 3293 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3294 { 3295 tasklet_enable(&engine->execlists.tasklet); 3296 kthread_unpark(engine->breadcrumbs.signaler); 3297 3298 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3299 } 3300 3301 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3302 { 3303 struct intel_engine_cs *engine; 3304 enum intel_engine_id id; 3305 3306 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3307 3308 for_each_engine(engine, dev_priv, id) { 3309 engine->hangcheck.active_request = NULL; 3310 i915_gem_reset_finish_engine(engine); 3311 } 3312 } 3313 3314 static void nop_submit_request(struct i915_request *request) 3315 { 3316 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3317 request->engine->name, 3318 request->fence.context, request->fence.seqno); 3319 dma_fence_set_error(&request->fence, -EIO); 3320 3321 i915_request_submit(request); 3322 } 3323 3324 static void nop_complete_submit_request(struct i915_request *request) 3325 { 3326 unsigned long flags; 3327 3328 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3329 request->engine->name, 3330 request->fence.context, request->fence.seqno); 3331 dma_fence_set_error(&request->fence, -EIO); 3332 3333 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3334 __i915_request_submit(request); 3335 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3336 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3337 } 3338 3339 void i915_gem_set_wedged(struct drm_i915_private *i915) 3340 { 3341 struct intel_engine_cs *engine; 3342 enum intel_engine_id id; 3343 3344 GEM_TRACE("start\n"); 3345 3346 if (GEM_SHOW_DEBUG()) { 3347 struct drm_printer p = drm_debug_printer(__func__); 3348 3349 for_each_engine(engine, i915, id) 3350 intel_engine_dump(engine, &p, "%s\n", engine->name); 3351 } 3352 3353 set_bit(I915_WEDGED, &i915->gpu_error.flags); 3354 smp_mb__after_atomic(); 3355 3356 /* 3357 * First, stop submission to hw, but do not yet complete requests by 3358 * rolling the global seqno forward (since this would complete requests 3359 * for which we haven't set the fence error to EIO yet). 3360 */ 3361 for_each_engine(engine, i915, id) { 3362 i915_gem_reset_prepare_engine(engine); 3363 3364 engine->submit_request = nop_submit_request; 3365 engine->schedule = NULL; 3366 } 3367 i915->caps.scheduler = 0; 3368 3369 /* Even if the GPU reset fails, it should still stop the engines */ 3370 intel_gpu_reset(i915, ALL_ENGINES); 3371 3372 /* 3373 * Make sure no one is running the old callback before we proceed with 3374 * cancelling requests and resetting the completion tracking. Otherwise 3375 * we might submit a request to the hardware which never completes. 3376 */ 3377 synchronize_rcu(); 3378 3379 for_each_engine(engine, i915, id) { 3380 /* Mark all executing requests as skipped */ 3381 engine->cancel_requests(engine); 3382 3383 /* 3384 * Only once we've force-cancelled all in-flight requests can we 3385 * start to complete all requests. 3386 */ 3387 engine->submit_request = nop_complete_submit_request; 3388 } 3389 3390 /* 3391 * Make sure no request can slip through without getting completed by 3392 * either this call here to intel_engine_init_global_seqno, or the one 3393 * in nop_complete_submit_request. 3394 */ 3395 synchronize_rcu(); 3396 3397 for_each_engine(engine, i915, id) { 3398 unsigned long flags; 3399 3400 /* 3401 * Mark all pending requests as complete so that any concurrent 3402 * (lockless) lookup doesn't try and wait upon the request as we 3403 * reset it. 3404 */ 3405 spin_lock_irqsave(&engine->timeline.lock, flags); 3406 intel_engine_init_global_seqno(engine, 3407 intel_engine_last_submit(engine)); 3408 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3409 3410 i915_gem_reset_finish_engine(engine); 3411 } 3412 3413 GEM_TRACE("end\n"); 3414 3415 wake_up_all(&i915->gpu_error.reset_queue); 3416 } 3417 3418 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3419 { 3420 struct i915_timeline *tl; 3421 3422 lockdep_assert_held(&i915->drm.struct_mutex); 3423 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3424 return true; 3425 3426 GEM_TRACE("start\n"); 3427 3428 /* 3429 * Before unwedging, make sure that all pending operations 3430 * are flushed and errored out - we may have requests waiting upon 3431 * third party fences. We marked all inflight requests as EIO, and 3432 * every execbuf since returned EIO, for consistency we want all 3433 * the currently pending requests to also be marked as EIO, which 3434 * is done inside our nop_submit_request - and so we must wait. 3435 * 3436 * No more can be submitted until we reset the wedged bit. 3437 */ 3438 list_for_each_entry(tl, &i915->gt.timelines, link) { 3439 struct i915_request *rq; 3440 3441 rq = i915_gem_active_peek(&tl->last_request, 3442 &i915->drm.struct_mutex); 3443 if (!rq) 3444 continue; 3445 3446 /* 3447 * We can't use our normal waiter as we want to 3448 * avoid recursively trying to handle the current 3449 * reset. The basic dma_fence_default_wait() installs 3450 * a callback for dma_fence_signal(), which is 3451 * triggered by our nop handler (indirectly, the 3452 * callback enables the signaler thread which is 3453 * woken by the nop_submit_request() advancing the seqno 3454 * and when the seqno passes the fence, the signaler 3455 * then signals the fence waking us up). 3456 */ 3457 if (dma_fence_default_wait(&rq->fence, true, 3458 MAX_SCHEDULE_TIMEOUT) < 0) 3459 return false; 3460 } 3461 i915_retire_requests(i915); 3462 GEM_BUG_ON(i915->gt.active_requests); 3463 3464 /* 3465 * Undo nop_submit_request. We prevent all new i915 requests from 3466 * being queued (by disallowing execbuf whilst wedged) so having 3467 * waited for all active requests above, we know the system is idle 3468 * and do not have to worry about a thread being inside 3469 * engine->submit_request() as we swap over. So unlike installing 3470 * the nop_submit_request on reset, we can do this from normal 3471 * context and do not require stop_machine(). 3472 */ 3473 intel_engines_reset_default_submission(i915); 3474 i915_gem_contexts_lost(i915); 3475 3476 GEM_TRACE("end\n"); 3477 3478 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3479 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3480 3481 return true; 3482 } 3483 3484 static void 3485 i915_gem_retire_work_handler(struct work_struct *work) 3486 { 3487 struct drm_i915_private *dev_priv = 3488 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3489 struct drm_device *dev = &dev_priv->drm; 3490 3491 /* Come back later if the device is busy... */ 3492 if (mutex_trylock(&dev->struct_mutex)) { 3493 i915_retire_requests(dev_priv); 3494 mutex_unlock(&dev->struct_mutex); 3495 } 3496 3497 /* 3498 * Keep the retire handler running until we are finally idle. 3499 * We do not need to do this test under locking as in the worst-case 3500 * we queue the retire worker once too often. 3501 */ 3502 if (READ_ONCE(dev_priv->gt.awake)) 3503 queue_delayed_work(dev_priv->wq, 3504 &dev_priv->gt.retire_work, 3505 round_jiffies_up_relative(HZ)); 3506 } 3507 3508 static void shrink_caches(struct drm_i915_private *i915) 3509 { 3510 /* 3511 * kmem_cache_shrink() discards empty slabs and reorders partially 3512 * filled slabs to prioritise allocating from the mostly full slabs, 3513 * with the aim of reducing fragmentation. 3514 */ 3515 kmem_cache_shrink(i915->priorities); 3516 kmem_cache_shrink(i915->dependencies); 3517 kmem_cache_shrink(i915->requests); 3518 kmem_cache_shrink(i915->luts); 3519 kmem_cache_shrink(i915->vmas); 3520 kmem_cache_shrink(i915->objects); 3521 } 3522 3523 struct sleep_rcu_work { 3524 union { 3525 struct rcu_head rcu; 3526 struct work_struct work; 3527 }; 3528 struct drm_i915_private *i915; 3529 unsigned int epoch; 3530 }; 3531 3532 static inline bool 3533 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3534 { 3535 /* 3536 * There is a small chance that the epoch wrapped since we started 3537 * sleeping. If we assume that epoch is at least a u32, then it will 3538 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3539 */ 3540 return epoch == READ_ONCE(i915->gt.epoch); 3541 } 3542 3543 static void __sleep_work(struct work_struct *work) 3544 { 3545 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3546 struct drm_i915_private *i915 = s->i915; 3547 unsigned int epoch = s->epoch; 3548 3549 kfree(s); 3550 if (same_epoch(i915, epoch)) 3551 shrink_caches(i915); 3552 } 3553 3554 static void __sleep_rcu(struct rcu_head *rcu) 3555 { 3556 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3557 struct drm_i915_private *i915 = s->i915; 3558 3559 if (same_epoch(i915, s->epoch)) { 3560 INIT_WORK(&s->work, __sleep_work); 3561 queue_work(i915->wq, &s->work); 3562 } else { 3563 kfree(s); 3564 } 3565 } 3566 3567 static inline bool 3568 new_requests_since_last_retire(const struct drm_i915_private *i915) 3569 { 3570 return (READ_ONCE(i915->gt.active_requests) || 3571 work_pending(&i915->gt.idle_work.work)); 3572 } 3573 3574 static void 3575 i915_gem_idle_work_handler(struct work_struct *work) 3576 { 3577 struct drm_i915_private *dev_priv = 3578 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3579 unsigned int epoch = I915_EPOCH_INVALID; 3580 bool rearm_hangcheck; 3581 3582 if (!READ_ONCE(dev_priv->gt.awake)) 3583 return; 3584 3585 /* 3586 * Wait for last execlists context complete, but bail out in case a 3587 * new request is submitted. As we don't trust the hardware, we 3588 * continue on if the wait times out. This is necessary to allow 3589 * the machine to suspend even if the hardware dies, and we will 3590 * try to recover in resume (after depriving the hardware of power, 3591 * it may be in a better mmod). 3592 */ 3593 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3594 intel_engines_are_idle(dev_priv), 3595 I915_IDLE_ENGINES_TIMEOUT * 1000, 3596 10, 500); 3597 3598 rearm_hangcheck = 3599 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3600 3601 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3602 /* Currently busy, come back later */ 3603 mod_delayed_work(dev_priv->wq, 3604 &dev_priv->gt.idle_work, 3605 msecs_to_jiffies(50)); 3606 goto out_rearm; 3607 } 3608 3609 /* 3610 * New request retired after this work handler started, extend active 3611 * period until next instance of the work. 3612 */ 3613 if (new_requests_since_last_retire(dev_priv)) 3614 goto out_unlock; 3615 3616 epoch = __i915_gem_park(dev_priv); 3617 3618 rearm_hangcheck = false; 3619 out_unlock: 3620 mutex_unlock(&dev_priv->drm.struct_mutex); 3621 3622 out_rearm: 3623 if (rearm_hangcheck) { 3624 GEM_BUG_ON(!dev_priv->gt.awake); 3625 i915_queue_hangcheck(dev_priv); 3626 } 3627 3628 /* 3629 * When we are idle, it is an opportune time to reap our caches. 3630 * However, we have many objects that utilise RCU and the ordered 3631 * i915->wq that this work is executing on. To try and flush any 3632 * pending frees now we are idle, we first wait for an RCU grace 3633 * period, and then queue a task (that will run last on the wq) to 3634 * shrink and re-optimize the caches. 3635 */ 3636 if (same_epoch(dev_priv, epoch)) { 3637 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3638 if (s) { 3639 s->i915 = dev_priv; 3640 s->epoch = epoch; 3641 call_rcu(&s->rcu, __sleep_rcu); 3642 } 3643 } 3644 } 3645 3646 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3647 { 3648 struct drm_i915_private *i915 = to_i915(gem->dev); 3649 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3650 struct drm_i915_file_private *fpriv = file->driver_priv; 3651 struct i915_lut_handle *lut, *ln; 3652 3653 mutex_lock(&i915->drm.struct_mutex); 3654 3655 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3656 struct i915_gem_context *ctx = lut->ctx; 3657 struct i915_vma *vma; 3658 3659 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3660 if (ctx->file_priv != fpriv) 3661 continue; 3662 3663 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3664 GEM_BUG_ON(vma->obj != obj); 3665 3666 /* We allow the process to have multiple handles to the same 3667 * vma, in the same fd namespace, by virtue of flink/open. 3668 */ 3669 GEM_BUG_ON(!vma->open_count); 3670 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3671 i915_vma_close(vma); 3672 3673 list_del(&lut->obj_link); 3674 list_del(&lut->ctx_link); 3675 3676 kmem_cache_free(i915->luts, lut); 3677 __i915_gem_object_release_unless_active(obj); 3678 } 3679 3680 mutex_unlock(&i915->drm.struct_mutex); 3681 } 3682 3683 static unsigned long to_wait_timeout(s64 timeout_ns) 3684 { 3685 if (timeout_ns < 0) 3686 return MAX_SCHEDULE_TIMEOUT; 3687 3688 if (timeout_ns == 0) 3689 return 0; 3690 3691 return nsecs_to_jiffies_timeout(timeout_ns); 3692 } 3693 3694 /** 3695 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3696 * @dev: drm device pointer 3697 * @data: ioctl data blob 3698 * @file: drm file pointer 3699 * 3700 * Returns 0 if successful, else an error is returned with the remaining time in 3701 * the timeout parameter. 3702 * -ETIME: object is still busy after timeout 3703 * -ERESTARTSYS: signal interrupted the wait 3704 * -ENONENT: object doesn't exist 3705 * Also possible, but rare: 3706 * -EAGAIN: incomplete, restart syscall 3707 * -ENOMEM: damn 3708 * -ENODEV: Internal IRQ fail 3709 * -E?: The add request failed 3710 * 3711 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3712 * non-zero timeout parameter the wait ioctl will wait for the given number of 3713 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3714 * without holding struct_mutex the object may become re-busied before this 3715 * function completes. A similar but shorter * race condition exists in the busy 3716 * ioctl 3717 */ 3718 int 3719 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3720 { 3721 struct drm_i915_gem_wait *args = data; 3722 struct drm_i915_gem_object *obj; 3723 ktime_t start; 3724 long ret; 3725 3726 if (args->flags != 0) 3727 return -EINVAL; 3728 3729 obj = i915_gem_object_lookup(file, args->bo_handle); 3730 if (!obj) 3731 return -ENOENT; 3732 3733 start = ktime_get(); 3734 3735 ret = i915_gem_object_wait(obj, 3736 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, 3737 to_wait_timeout(args->timeout_ns), 3738 to_rps_client(file)); 3739 3740 if (args->timeout_ns > 0) { 3741 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3742 if (args->timeout_ns < 0) 3743 args->timeout_ns = 0; 3744 3745 /* 3746 * Apparently ktime isn't accurate enough and occasionally has a 3747 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3748 * things up to make the test happy. We allow up to 1 jiffy. 3749 * 3750 * This is a regression from the timespec->ktime conversion. 3751 */ 3752 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3753 args->timeout_ns = 0; 3754 3755 /* Asked to wait beyond the jiffie/scheduler precision? */ 3756 if (ret == -ETIME && args->timeout_ns) 3757 ret = -EAGAIN; 3758 } 3759 3760 i915_gem_object_put(obj); 3761 return ret; 3762 } 3763 3764 static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) 3765 { 3766 return i915_gem_active_wait(&tl->last_request, flags); 3767 } 3768 3769 static int wait_for_engines(struct drm_i915_private *i915) 3770 { 3771 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3772 dev_err(i915->drm.dev, 3773 "Failed to idle engines, declaring wedged!\n"); 3774 GEM_TRACE_DUMP(); 3775 i915_gem_set_wedged(i915); 3776 return -EIO; 3777 } 3778 3779 return 0; 3780 } 3781 3782 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) 3783 { 3784 /* If the device is asleep, we have no requests outstanding */ 3785 if (!READ_ONCE(i915->gt.awake)) 3786 return 0; 3787 3788 if (flags & I915_WAIT_LOCKED) { 3789 struct i915_timeline *tl; 3790 int err; 3791 3792 lockdep_assert_held(&i915->drm.struct_mutex); 3793 3794 list_for_each_entry(tl, &i915->gt.timelines, link) { 3795 err = wait_for_timeline(tl, flags); 3796 if (err) 3797 return err; 3798 } 3799 i915_retire_requests(i915); 3800 3801 return wait_for_engines(i915); 3802 } else { 3803 struct intel_engine_cs *engine; 3804 enum intel_engine_id id; 3805 int err; 3806 3807 for_each_engine(engine, i915, id) { 3808 err = wait_for_timeline(&engine->timeline, flags); 3809 if (err) 3810 return err; 3811 } 3812 3813 return 0; 3814 } 3815 } 3816 3817 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3818 { 3819 /* 3820 * We manually flush the CPU domain so that we can override and 3821 * force the flush for the display, and perform it asyncrhonously. 3822 */ 3823 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3824 if (obj->cache_dirty) 3825 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3826 obj->write_domain = 0; 3827 } 3828 3829 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3830 { 3831 if (!READ_ONCE(obj->pin_global)) 3832 return; 3833 3834 mutex_lock(&obj->base.dev->struct_mutex); 3835 __i915_gem_object_flush_for_display(obj); 3836 mutex_unlock(&obj->base.dev->struct_mutex); 3837 } 3838 3839 /** 3840 * Moves a single object to the WC read, and possibly write domain. 3841 * @obj: object to act on 3842 * @write: ask for write access or read only 3843 * 3844 * This function returns when the move is complete, including waiting on 3845 * flushes to occur. 3846 */ 3847 int 3848 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3849 { 3850 int ret; 3851 3852 lockdep_assert_held(&obj->base.dev->struct_mutex); 3853 3854 ret = i915_gem_object_wait(obj, 3855 I915_WAIT_INTERRUPTIBLE | 3856 I915_WAIT_LOCKED | 3857 (write ? I915_WAIT_ALL : 0), 3858 MAX_SCHEDULE_TIMEOUT, 3859 NULL); 3860 if (ret) 3861 return ret; 3862 3863 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3864 return 0; 3865 3866 /* Flush and acquire obj->pages so that we are coherent through 3867 * direct access in memory with previous cached writes through 3868 * shmemfs and that our cache domain tracking remains valid. 3869 * For example, if the obj->filp was moved to swap without us 3870 * being notified and releasing the pages, we would mistakenly 3871 * continue to assume that the obj remained out of the CPU cached 3872 * domain. 3873 */ 3874 ret = i915_gem_object_pin_pages(obj); 3875 if (ret) 3876 return ret; 3877 3878 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3879 3880 /* Serialise direct access to this object with the barriers for 3881 * coherent writes from the GPU, by effectively invalidating the 3882 * WC domain upon first access. 3883 */ 3884 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3885 mb(); 3886 3887 /* It should now be out of any other write domains, and we can update 3888 * the domain values for our changes. 3889 */ 3890 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3891 obj->read_domains |= I915_GEM_DOMAIN_WC; 3892 if (write) { 3893 obj->read_domains = I915_GEM_DOMAIN_WC; 3894 obj->write_domain = I915_GEM_DOMAIN_WC; 3895 obj->mm.dirty = true; 3896 } 3897 3898 i915_gem_object_unpin_pages(obj); 3899 return 0; 3900 } 3901 3902 /** 3903 * Moves a single object to the GTT read, and possibly write domain. 3904 * @obj: object to act on 3905 * @write: ask for write access or read only 3906 * 3907 * This function returns when the move is complete, including waiting on 3908 * flushes to occur. 3909 */ 3910 int 3911 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3912 { 3913 int ret; 3914 3915 lockdep_assert_held(&obj->base.dev->struct_mutex); 3916 3917 ret = i915_gem_object_wait(obj, 3918 I915_WAIT_INTERRUPTIBLE | 3919 I915_WAIT_LOCKED | 3920 (write ? I915_WAIT_ALL : 0), 3921 MAX_SCHEDULE_TIMEOUT, 3922 NULL); 3923 if (ret) 3924 return ret; 3925 3926 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3927 return 0; 3928 3929 /* Flush and acquire obj->pages so that we are coherent through 3930 * direct access in memory with previous cached writes through 3931 * shmemfs and that our cache domain tracking remains valid. 3932 * For example, if the obj->filp was moved to swap without us 3933 * being notified and releasing the pages, we would mistakenly 3934 * continue to assume that the obj remained out of the CPU cached 3935 * domain. 3936 */ 3937 ret = i915_gem_object_pin_pages(obj); 3938 if (ret) 3939 return ret; 3940 3941 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3942 3943 /* Serialise direct access to this object with the barriers for 3944 * coherent writes from the GPU, by effectively invalidating the 3945 * GTT domain upon first access. 3946 */ 3947 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3948 mb(); 3949 3950 /* It should now be out of any other write domains, and we can update 3951 * the domain values for our changes. 3952 */ 3953 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3954 obj->read_domains |= I915_GEM_DOMAIN_GTT; 3955 if (write) { 3956 obj->read_domains = I915_GEM_DOMAIN_GTT; 3957 obj->write_domain = I915_GEM_DOMAIN_GTT; 3958 obj->mm.dirty = true; 3959 } 3960 3961 i915_gem_object_unpin_pages(obj); 3962 return 0; 3963 } 3964 3965 /** 3966 * Changes the cache-level of an object across all VMA. 3967 * @obj: object to act on 3968 * @cache_level: new cache level to set for the object 3969 * 3970 * After this function returns, the object will be in the new cache-level 3971 * across all GTT and the contents of the backing storage will be coherent, 3972 * with respect to the new cache-level. In order to keep the backing storage 3973 * coherent for all users, we only allow a single cache level to be set 3974 * globally on the object and prevent it from being changed whilst the 3975 * hardware is reading from the object. That is if the object is currently 3976 * on the scanout it will be set to uncached (or equivalent display 3977 * cache coherency) and all non-MOCS GPU access will also be uncached so 3978 * that all direct access to the scanout remains coherent. 3979 */ 3980 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3981 enum i915_cache_level cache_level) 3982 { 3983 struct i915_vma *vma; 3984 int ret; 3985 3986 lockdep_assert_held(&obj->base.dev->struct_mutex); 3987 3988 if (obj->cache_level == cache_level) 3989 return 0; 3990 3991 /* Inspect the list of currently bound VMA and unbind any that would 3992 * be invalid given the new cache-level. This is principally to 3993 * catch the issue of the CS prefetch crossing page boundaries and 3994 * reading an invalid PTE on older architectures. 3995 */ 3996 restart: 3997 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3998 if (!drm_mm_node_allocated(&vma->node)) 3999 continue; 4000 4001 if (i915_vma_is_pinned(vma)) { 4002 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4003 return -EBUSY; 4004 } 4005 4006 if (!i915_vma_is_closed(vma) && 4007 i915_gem_valid_gtt_space(vma, cache_level)) 4008 continue; 4009 4010 ret = i915_vma_unbind(vma); 4011 if (ret) 4012 return ret; 4013 4014 /* As unbinding may affect other elements in the 4015 * obj->vma_list (due to side-effects from retiring 4016 * an active vma), play safe and restart the iterator. 4017 */ 4018 goto restart; 4019 } 4020 4021 /* We can reuse the existing drm_mm nodes but need to change the 4022 * cache-level on the PTE. We could simply unbind them all and 4023 * rebind with the correct cache-level on next use. However since 4024 * we already have a valid slot, dma mapping, pages etc, we may as 4025 * rewrite the PTE in the belief that doing so tramples upon less 4026 * state and so involves less work. 4027 */ 4028 if (obj->bind_count) { 4029 /* Before we change the PTE, the GPU must not be accessing it. 4030 * If we wait upon the object, we know that all the bound 4031 * VMA are no longer active. 4032 */ 4033 ret = i915_gem_object_wait(obj, 4034 I915_WAIT_INTERRUPTIBLE | 4035 I915_WAIT_LOCKED | 4036 I915_WAIT_ALL, 4037 MAX_SCHEDULE_TIMEOUT, 4038 NULL); 4039 if (ret) 4040 return ret; 4041 4042 if (!HAS_LLC(to_i915(obj->base.dev)) && 4043 cache_level != I915_CACHE_NONE) { 4044 /* Access to snoopable pages through the GTT is 4045 * incoherent and on some machines causes a hard 4046 * lockup. Relinquish the CPU mmaping to force 4047 * userspace to refault in the pages and we can 4048 * then double check if the GTT mapping is still 4049 * valid for that pointer access. 4050 */ 4051 i915_gem_release_mmap(obj); 4052 4053 /* As we no longer need a fence for GTT access, 4054 * we can relinquish it now (and so prevent having 4055 * to steal a fence from someone else on the next 4056 * fence request). Note GPU activity would have 4057 * dropped the fence as all snoopable access is 4058 * supposed to be linear. 4059 */ 4060 for_each_ggtt_vma(vma, obj) { 4061 ret = i915_vma_put_fence(vma); 4062 if (ret) 4063 return ret; 4064 } 4065 } else { 4066 /* We either have incoherent backing store and 4067 * so no GTT access or the architecture is fully 4068 * coherent. In such cases, existing GTT mmaps 4069 * ignore the cache bit in the PTE and we can 4070 * rewrite it without confusing the GPU or having 4071 * to force userspace to fault back in its mmaps. 4072 */ 4073 } 4074 4075 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4076 if (!drm_mm_node_allocated(&vma->node)) 4077 continue; 4078 4079 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4080 if (ret) 4081 return ret; 4082 } 4083 } 4084 4085 list_for_each_entry(vma, &obj->vma_list, obj_link) 4086 vma->node.color = cache_level; 4087 i915_gem_object_set_cache_coherency(obj, cache_level); 4088 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 4089 4090 return 0; 4091 } 4092 4093 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4094 struct drm_file *file) 4095 { 4096 struct drm_i915_gem_caching *args = data; 4097 struct drm_i915_gem_object *obj; 4098 int err = 0; 4099 4100 rcu_read_lock(); 4101 obj = i915_gem_object_lookup_rcu(file, args->handle); 4102 if (!obj) { 4103 err = -ENOENT; 4104 goto out; 4105 } 4106 4107 switch (obj->cache_level) { 4108 case I915_CACHE_LLC: 4109 case I915_CACHE_L3_LLC: 4110 args->caching = I915_CACHING_CACHED; 4111 break; 4112 4113 case I915_CACHE_WT: 4114 args->caching = I915_CACHING_DISPLAY; 4115 break; 4116 4117 default: 4118 args->caching = I915_CACHING_NONE; 4119 break; 4120 } 4121 out: 4122 rcu_read_unlock(); 4123 return err; 4124 } 4125 4126 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4127 struct drm_file *file) 4128 { 4129 struct drm_i915_private *i915 = to_i915(dev); 4130 struct drm_i915_gem_caching *args = data; 4131 struct drm_i915_gem_object *obj; 4132 enum i915_cache_level level; 4133 int ret = 0; 4134 4135 switch (args->caching) { 4136 case I915_CACHING_NONE: 4137 level = I915_CACHE_NONE; 4138 break; 4139 case I915_CACHING_CACHED: 4140 /* 4141 * Due to a HW issue on BXT A stepping, GPU stores via a 4142 * snooped mapping may leave stale data in a corresponding CPU 4143 * cacheline, whereas normally such cachelines would get 4144 * invalidated. 4145 */ 4146 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4147 return -ENODEV; 4148 4149 level = I915_CACHE_LLC; 4150 break; 4151 case I915_CACHING_DISPLAY: 4152 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4153 break; 4154 default: 4155 return -EINVAL; 4156 } 4157 4158 obj = i915_gem_object_lookup(file, args->handle); 4159 if (!obj) 4160 return -ENOENT; 4161 4162 /* 4163 * The caching mode of proxy object is handled by its generator, and 4164 * not allowed to be changed by userspace. 4165 */ 4166 if (i915_gem_object_is_proxy(obj)) { 4167 ret = -ENXIO; 4168 goto out; 4169 } 4170 4171 if (obj->cache_level == level) 4172 goto out; 4173 4174 ret = i915_gem_object_wait(obj, 4175 I915_WAIT_INTERRUPTIBLE, 4176 MAX_SCHEDULE_TIMEOUT, 4177 to_rps_client(file)); 4178 if (ret) 4179 goto out; 4180 4181 ret = i915_mutex_lock_interruptible(dev); 4182 if (ret) 4183 goto out; 4184 4185 ret = i915_gem_object_set_cache_level(obj, level); 4186 mutex_unlock(&dev->struct_mutex); 4187 4188 out: 4189 i915_gem_object_put(obj); 4190 return ret; 4191 } 4192 4193 /* 4194 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 4195 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 4196 * (for pageflips). We only flush the caches while preparing the buffer for 4197 * display, the callers are responsible for frontbuffer flush. 4198 */ 4199 struct i915_vma * 4200 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4201 u32 alignment, 4202 const struct i915_ggtt_view *view, 4203 unsigned int flags) 4204 { 4205 struct i915_vma *vma; 4206 int ret; 4207 4208 lockdep_assert_held(&obj->base.dev->struct_mutex); 4209 4210 /* Mark the global pin early so that we account for the 4211 * display coherency whilst setting up the cache domains. 4212 */ 4213 obj->pin_global++; 4214 4215 /* The display engine is not coherent with the LLC cache on gen6. As 4216 * a result, we make sure that the pinning that is about to occur is 4217 * done with uncached PTEs. This is lowest common denominator for all 4218 * chipsets. 4219 * 4220 * However for gen6+, we could do better by using the GFDT bit instead 4221 * of uncaching, which would allow us to flush all the LLC-cached data 4222 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4223 */ 4224 ret = i915_gem_object_set_cache_level(obj, 4225 HAS_WT(to_i915(obj->base.dev)) ? 4226 I915_CACHE_WT : I915_CACHE_NONE); 4227 if (ret) { 4228 vma = ERR_PTR(ret); 4229 goto err_unpin_global; 4230 } 4231 4232 /* As the user may map the buffer once pinned in the display plane 4233 * (e.g. libkms for the bootup splash), we have to ensure that we 4234 * always use map_and_fenceable for all scanout buffers. However, 4235 * it may simply be too big to fit into mappable, in which case 4236 * put it anyway and hope that userspace can cope (but always first 4237 * try to preserve the existing ABI). 4238 */ 4239 vma = ERR_PTR(-ENOSPC); 4240 if ((flags & PIN_MAPPABLE) == 0 && 4241 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4242 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4243 flags | 4244 PIN_MAPPABLE | 4245 PIN_NONBLOCK); 4246 if (IS_ERR(vma)) 4247 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4248 if (IS_ERR(vma)) 4249 goto err_unpin_global; 4250 4251 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4252 4253 __i915_gem_object_flush_for_display(obj); 4254 4255 /* It should now be out of any other write domains, and we can update 4256 * the domain values for our changes. 4257 */ 4258 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4259 4260 return vma; 4261 4262 err_unpin_global: 4263 obj->pin_global--; 4264 return vma; 4265 } 4266 4267 void 4268 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4269 { 4270 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4271 4272 if (WARN_ON(vma->obj->pin_global == 0)) 4273 return; 4274 4275 if (--vma->obj->pin_global == 0) 4276 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4277 4278 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4279 i915_gem_object_bump_inactive_ggtt(vma->obj); 4280 4281 i915_vma_unpin(vma); 4282 } 4283 4284 /** 4285 * Moves a single object to the CPU read, and possibly write domain. 4286 * @obj: object to act on 4287 * @write: requesting write or read-only access 4288 * 4289 * This function returns when the move is complete, including waiting on 4290 * flushes to occur. 4291 */ 4292 int 4293 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4294 { 4295 int ret; 4296 4297 lockdep_assert_held(&obj->base.dev->struct_mutex); 4298 4299 ret = i915_gem_object_wait(obj, 4300 I915_WAIT_INTERRUPTIBLE | 4301 I915_WAIT_LOCKED | 4302 (write ? I915_WAIT_ALL : 0), 4303 MAX_SCHEDULE_TIMEOUT, 4304 NULL); 4305 if (ret) 4306 return ret; 4307 4308 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4309 4310 /* Flush the CPU cache if it's still invalid. */ 4311 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4312 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4313 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4314 } 4315 4316 /* It should now be out of any other write domains, and we can update 4317 * the domain values for our changes. 4318 */ 4319 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4320 4321 /* If we're writing through the CPU, then the GPU read domains will 4322 * need to be invalidated at next use. 4323 */ 4324 if (write) 4325 __start_cpu_write(obj); 4326 4327 return 0; 4328 } 4329 4330 /* Throttle our rendering by waiting until the ring has completed our requests 4331 * emitted over 20 msec ago. 4332 * 4333 * Note that if we were to use the current jiffies each time around the loop, 4334 * we wouldn't escape the function with any frames outstanding if the time to 4335 * render a frame was over 20ms. 4336 * 4337 * This should get us reasonable parallelism between CPU and GPU but also 4338 * relatively low latency when blocking on a particular request to finish. 4339 */ 4340 static int 4341 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4342 { 4343 struct drm_i915_private *dev_priv = to_i915(dev); 4344 struct drm_i915_file_private *file_priv = file->driver_priv; 4345 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4346 struct i915_request *request, *target = NULL; 4347 long ret; 4348 4349 /* ABI: return -EIO if already wedged */ 4350 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4351 return -EIO; 4352 4353 spin_lock(&file_priv->mm.lock); 4354 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4355 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4356 break; 4357 4358 if (target) { 4359 list_del(&target->client_link); 4360 target->file_priv = NULL; 4361 } 4362 4363 target = request; 4364 } 4365 if (target) 4366 i915_request_get(target); 4367 spin_unlock(&file_priv->mm.lock); 4368 4369 if (target == NULL) 4370 return 0; 4371 4372 ret = i915_request_wait(target, 4373 I915_WAIT_INTERRUPTIBLE, 4374 MAX_SCHEDULE_TIMEOUT); 4375 i915_request_put(target); 4376 4377 return ret < 0 ? ret : 0; 4378 } 4379 4380 struct i915_vma * 4381 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4382 const struct i915_ggtt_view *view, 4383 u64 size, 4384 u64 alignment, 4385 u64 flags) 4386 { 4387 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4388 struct i915_address_space *vm = &dev_priv->ggtt.base; 4389 struct i915_vma *vma; 4390 int ret; 4391 4392 lockdep_assert_held(&obj->base.dev->struct_mutex); 4393 4394 if (flags & PIN_MAPPABLE && 4395 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4396 /* If the required space is larger than the available 4397 * aperture, we will not able to find a slot for the 4398 * object and unbinding the object now will be in 4399 * vain. Worse, doing so may cause us to ping-pong 4400 * the object in and out of the Global GTT and 4401 * waste a lot of cycles under the mutex. 4402 */ 4403 if (obj->base.size > dev_priv->ggtt.mappable_end) 4404 return ERR_PTR(-E2BIG); 4405 4406 /* If NONBLOCK is set the caller is optimistically 4407 * trying to cache the full object within the mappable 4408 * aperture, and *must* have a fallback in place for 4409 * situations where we cannot bind the object. We 4410 * can be a little more lax here and use the fallback 4411 * more often to avoid costly migrations of ourselves 4412 * and other objects within the aperture. 4413 * 4414 * Half-the-aperture is used as a simple heuristic. 4415 * More interesting would to do search for a free 4416 * block prior to making the commitment to unbind. 4417 * That caters for the self-harm case, and with a 4418 * little more heuristics (e.g. NOFAULT, NOEVICT) 4419 * we could try to minimise harm to others. 4420 */ 4421 if (flags & PIN_NONBLOCK && 4422 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4423 return ERR_PTR(-ENOSPC); 4424 } 4425 4426 vma = i915_vma_instance(obj, vm, view); 4427 if (unlikely(IS_ERR(vma))) 4428 return vma; 4429 4430 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4431 if (flags & PIN_NONBLOCK) { 4432 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4433 return ERR_PTR(-ENOSPC); 4434 4435 if (flags & PIN_MAPPABLE && 4436 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4437 return ERR_PTR(-ENOSPC); 4438 } 4439 4440 WARN(i915_vma_is_pinned(vma), 4441 "bo is already pinned in ggtt with incorrect alignment:" 4442 " offset=%08x, req.alignment=%llx," 4443 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4444 i915_ggtt_offset(vma), alignment, 4445 !!(flags & PIN_MAPPABLE), 4446 i915_vma_is_map_and_fenceable(vma)); 4447 ret = i915_vma_unbind(vma); 4448 if (ret) 4449 return ERR_PTR(ret); 4450 } 4451 4452 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4453 if (ret) 4454 return ERR_PTR(ret); 4455 4456 return vma; 4457 } 4458 4459 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4460 { 4461 /* Note that we could alias engines in the execbuf API, but 4462 * that would be very unwise as it prevents userspace from 4463 * fine control over engine selection. Ahem. 4464 * 4465 * This should be something like EXEC_MAX_ENGINE instead of 4466 * I915_NUM_ENGINES. 4467 */ 4468 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4469 return 0x10000 << id; 4470 } 4471 4472 static __always_inline unsigned int __busy_write_id(unsigned int id) 4473 { 4474 /* The uABI guarantees an active writer is also amongst the read 4475 * engines. This would be true if we accessed the activity tracking 4476 * under the lock, but as we perform the lookup of the object and 4477 * its activity locklessly we can not guarantee that the last_write 4478 * being active implies that we have set the same engine flag from 4479 * last_read - hence we always set both read and write busy for 4480 * last_write. 4481 */ 4482 return id | __busy_read_flag(id); 4483 } 4484 4485 static __always_inline unsigned int 4486 __busy_set_if_active(const struct dma_fence *fence, 4487 unsigned int (*flag)(unsigned int id)) 4488 { 4489 struct i915_request *rq; 4490 4491 /* We have to check the current hw status of the fence as the uABI 4492 * guarantees forward progress. We could rely on the idle worker 4493 * to eventually flush us, but to minimise latency just ask the 4494 * hardware. 4495 * 4496 * Note we only report on the status of native fences. 4497 */ 4498 if (!dma_fence_is_i915(fence)) 4499 return 0; 4500 4501 /* opencode to_request() in order to avoid const warnings */ 4502 rq = container_of(fence, struct i915_request, fence); 4503 if (i915_request_completed(rq)) 4504 return 0; 4505 4506 return flag(rq->engine->uabi_id); 4507 } 4508 4509 static __always_inline unsigned int 4510 busy_check_reader(const struct dma_fence *fence) 4511 { 4512 return __busy_set_if_active(fence, __busy_read_flag); 4513 } 4514 4515 static __always_inline unsigned int 4516 busy_check_writer(const struct dma_fence *fence) 4517 { 4518 if (!fence) 4519 return 0; 4520 4521 return __busy_set_if_active(fence, __busy_write_id); 4522 } 4523 4524 int 4525 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4526 struct drm_file *file) 4527 { 4528 struct drm_i915_gem_busy *args = data; 4529 struct drm_i915_gem_object *obj; 4530 struct reservation_object_list *list; 4531 unsigned int seq; 4532 int err; 4533 4534 err = -ENOENT; 4535 rcu_read_lock(); 4536 obj = i915_gem_object_lookup_rcu(file, args->handle); 4537 if (!obj) 4538 goto out; 4539 4540 /* A discrepancy here is that we do not report the status of 4541 * non-i915 fences, i.e. even though we may report the object as idle, 4542 * a call to set-domain may still stall waiting for foreign rendering. 4543 * This also means that wait-ioctl may report an object as busy, 4544 * where busy-ioctl considers it idle. 4545 * 4546 * We trade the ability to warn of foreign fences to report on which 4547 * i915 engines are active for the object. 4548 * 4549 * Alternatively, we can trade that extra information on read/write 4550 * activity with 4551 * args->busy = 4552 * !reservation_object_test_signaled_rcu(obj->resv, true); 4553 * to report the overall busyness. This is what the wait-ioctl does. 4554 * 4555 */ 4556 retry: 4557 seq = raw_read_seqcount(&obj->resv->seq); 4558 4559 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4560 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4561 4562 /* Translate shared fences to READ set of engines */ 4563 list = rcu_dereference(obj->resv->fence); 4564 if (list) { 4565 unsigned int shared_count = list->shared_count, i; 4566 4567 for (i = 0; i < shared_count; ++i) { 4568 struct dma_fence *fence = 4569 rcu_dereference(list->shared[i]); 4570 4571 args->busy |= busy_check_reader(fence); 4572 } 4573 } 4574 4575 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4576 goto retry; 4577 4578 err = 0; 4579 out: 4580 rcu_read_unlock(); 4581 return err; 4582 } 4583 4584 int 4585 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4586 struct drm_file *file_priv) 4587 { 4588 return i915_gem_ring_throttle(dev, file_priv); 4589 } 4590 4591 int 4592 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4593 struct drm_file *file_priv) 4594 { 4595 struct drm_i915_private *dev_priv = to_i915(dev); 4596 struct drm_i915_gem_madvise *args = data; 4597 struct drm_i915_gem_object *obj; 4598 int err; 4599 4600 switch (args->madv) { 4601 case I915_MADV_DONTNEED: 4602 case I915_MADV_WILLNEED: 4603 break; 4604 default: 4605 return -EINVAL; 4606 } 4607 4608 obj = i915_gem_object_lookup(file_priv, args->handle); 4609 if (!obj) 4610 return -ENOENT; 4611 4612 err = mutex_lock_interruptible(&obj->mm.lock); 4613 if (err) 4614 goto out; 4615 4616 if (i915_gem_object_has_pages(obj) && 4617 i915_gem_object_is_tiled(obj) && 4618 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4619 if (obj->mm.madv == I915_MADV_WILLNEED) { 4620 GEM_BUG_ON(!obj->mm.quirked); 4621 __i915_gem_object_unpin_pages(obj); 4622 obj->mm.quirked = false; 4623 } 4624 if (args->madv == I915_MADV_WILLNEED) { 4625 GEM_BUG_ON(obj->mm.quirked); 4626 __i915_gem_object_pin_pages(obj); 4627 obj->mm.quirked = true; 4628 } 4629 } 4630 4631 if (obj->mm.madv != __I915_MADV_PURGED) 4632 obj->mm.madv = args->madv; 4633 4634 /* if the object is no longer attached, discard its backing storage */ 4635 if (obj->mm.madv == I915_MADV_DONTNEED && 4636 !i915_gem_object_has_pages(obj)) 4637 i915_gem_object_truncate(obj); 4638 4639 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4640 mutex_unlock(&obj->mm.lock); 4641 4642 out: 4643 i915_gem_object_put(obj); 4644 return err; 4645 } 4646 4647 static void 4648 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 4649 { 4650 struct drm_i915_gem_object *obj = 4651 container_of(active, typeof(*obj), frontbuffer_write); 4652 4653 intel_fb_obj_flush(obj, ORIGIN_CS); 4654 } 4655 4656 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4657 const struct drm_i915_gem_object_ops *ops) 4658 { 4659 mutex_init(&obj->mm.lock); 4660 4661 INIT_LIST_HEAD(&obj->vma_list); 4662 INIT_LIST_HEAD(&obj->lut_list); 4663 INIT_LIST_HEAD(&obj->batch_pool_link); 4664 4665 obj->ops = ops; 4666 4667 reservation_object_init(&obj->__builtin_resv); 4668 obj->resv = &obj->__builtin_resv; 4669 4670 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4671 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4672 4673 obj->mm.madv = I915_MADV_WILLNEED; 4674 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4675 mutex_init(&obj->mm.get_page.lock); 4676 4677 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4678 } 4679 4680 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4681 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4682 I915_GEM_OBJECT_IS_SHRINKABLE, 4683 4684 .get_pages = i915_gem_object_get_pages_gtt, 4685 .put_pages = i915_gem_object_put_pages_gtt, 4686 4687 .pwrite = i915_gem_object_pwrite_gtt, 4688 }; 4689 4690 static int i915_gem_object_create_shmem(struct drm_device *dev, 4691 struct drm_gem_object *obj, 4692 size_t size) 4693 { 4694 struct drm_i915_private *i915 = to_i915(dev); 4695 unsigned long flags = VM_NORESERVE; 4696 struct file *filp; 4697 4698 drm_gem_private_object_init(dev, obj, size); 4699 4700 if (i915->mm.gemfs) 4701 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4702 flags); 4703 else 4704 filp = shmem_file_setup("i915", size, flags); 4705 4706 if (IS_ERR(filp)) 4707 return PTR_ERR(filp); 4708 4709 obj->filp = filp; 4710 4711 return 0; 4712 } 4713 4714 struct drm_i915_gem_object * 4715 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4716 { 4717 struct drm_i915_gem_object *obj; 4718 struct address_space *mapping; 4719 unsigned int cache_level; 4720 gfp_t mask; 4721 int ret; 4722 4723 /* There is a prevalence of the assumption that we fit the object's 4724 * page count inside a 32bit _signed_ variable. Let's document this and 4725 * catch if we ever need to fix it. In the meantime, if you do spot 4726 * such a local variable, please consider fixing! 4727 */ 4728 if (size >> PAGE_SHIFT > INT_MAX) 4729 return ERR_PTR(-E2BIG); 4730 4731 if (overflows_type(size, obj->base.size)) 4732 return ERR_PTR(-E2BIG); 4733 4734 obj = i915_gem_object_alloc(dev_priv); 4735 if (obj == NULL) 4736 return ERR_PTR(-ENOMEM); 4737 4738 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4739 if (ret) 4740 goto fail; 4741 4742 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4743 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4744 /* 965gm cannot relocate objects above 4GiB. */ 4745 mask &= ~__GFP_HIGHMEM; 4746 mask |= __GFP_DMA32; 4747 } 4748 4749 mapping = obj->base.filp->f_mapping; 4750 mapping_set_gfp_mask(mapping, mask); 4751 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4752 4753 i915_gem_object_init(obj, &i915_gem_object_ops); 4754 4755 obj->write_domain = I915_GEM_DOMAIN_CPU; 4756 obj->read_domains = I915_GEM_DOMAIN_CPU; 4757 4758 if (HAS_LLC(dev_priv)) 4759 /* On some devices, we can have the GPU use the LLC (the CPU 4760 * cache) for about a 10% performance improvement 4761 * compared to uncached. Graphics requests other than 4762 * display scanout are coherent with the CPU in 4763 * accessing this cache. This means in this mode we 4764 * don't need to clflush on the CPU side, and on the 4765 * GPU side we only need to flush internal caches to 4766 * get data visible to the CPU. 4767 * 4768 * However, we maintain the display planes as UC, and so 4769 * need to rebind when first used as such. 4770 */ 4771 cache_level = I915_CACHE_LLC; 4772 else 4773 cache_level = I915_CACHE_NONE; 4774 4775 i915_gem_object_set_cache_coherency(obj, cache_level); 4776 4777 trace_i915_gem_object_create(obj); 4778 4779 return obj; 4780 4781 fail: 4782 i915_gem_object_free(obj); 4783 return ERR_PTR(ret); 4784 } 4785 4786 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4787 { 4788 /* If we are the last user of the backing storage (be it shmemfs 4789 * pages or stolen etc), we know that the pages are going to be 4790 * immediately released. In this case, we can then skip copying 4791 * back the contents from the GPU. 4792 */ 4793 4794 if (obj->mm.madv != I915_MADV_WILLNEED) 4795 return false; 4796 4797 if (obj->base.filp == NULL) 4798 return true; 4799 4800 /* At first glance, this looks racy, but then again so would be 4801 * userspace racing mmap against close. However, the first external 4802 * reference to the filp can only be obtained through the 4803 * i915_gem_mmap_ioctl() which safeguards us against the user 4804 * acquiring such a reference whilst we are in the middle of 4805 * freeing the object. 4806 */ 4807 return atomic_long_read(&obj->base.filp->f_count) == 1; 4808 } 4809 4810 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4811 struct llist_node *freed) 4812 { 4813 struct drm_i915_gem_object *obj, *on; 4814 4815 intel_runtime_pm_get(i915); 4816 llist_for_each_entry_safe(obj, on, freed, freed) { 4817 struct i915_vma *vma, *vn; 4818 4819 trace_i915_gem_object_destroy(obj); 4820 4821 mutex_lock(&i915->drm.struct_mutex); 4822 4823 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4824 list_for_each_entry_safe(vma, vn, 4825 &obj->vma_list, obj_link) { 4826 GEM_BUG_ON(i915_vma_is_active(vma)); 4827 vma->flags &= ~I915_VMA_PIN_MASK; 4828 i915_vma_destroy(vma); 4829 } 4830 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4831 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4832 4833 /* This serializes freeing with the shrinker. Since the free 4834 * is delayed, first by RCU then by the workqueue, we want the 4835 * shrinker to be able to free pages of unreferenced objects, 4836 * or else we may oom whilst there are plenty of deferred 4837 * freed objects. 4838 */ 4839 if (i915_gem_object_has_pages(obj)) { 4840 spin_lock(&i915->mm.obj_lock); 4841 list_del_init(&obj->mm.link); 4842 spin_unlock(&i915->mm.obj_lock); 4843 } 4844 4845 mutex_unlock(&i915->drm.struct_mutex); 4846 4847 GEM_BUG_ON(obj->bind_count); 4848 GEM_BUG_ON(obj->userfault_count); 4849 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4850 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4851 4852 if (obj->ops->release) 4853 obj->ops->release(obj); 4854 4855 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4856 atomic_set(&obj->mm.pages_pin_count, 0); 4857 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4858 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4859 4860 if (obj->base.import_attach) 4861 drm_prime_gem_destroy(&obj->base, NULL); 4862 4863 reservation_object_fini(&obj->__builtin_resv); 4864 drm_gem_object_release(&obj->base); 4865 i915_gem_info_remove_obj(i915, obj->base.size); 4866 4867 kfree(obj->bit_17); 4868 i915_gem_object_free(obj); 4869 4870 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4871 atomic_dec(&i915->mm.free_count); 4872 4873 if (on) 4874 cond_resched(); 4875 } 4876 intel_runtime_pm_put(i915); 4877 } 4878 4879 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4880 { 4881 struct llist_node *freed; 4882 4883 /* Free the oldest, most stale object to keep the free_list short */ 4884 freed = NULL; 4885 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4886 /* Only one consumer of llist_del_first() allowed */ 4887 spin_lock(&i915->mm.free_lock); 4888 freed = llist_del_first(&i915->mm.free_list); 4889 spin_unlock(&i915->mm.free_lock); 4890 } 4891 if (unlikely(freed)) { 4892 freed->next = NULL; 4893 __i915_gem_free_objects(i915, freed); 4894 } 4895 } 4896 4897 static void __i915_gem_free_work(struct work_struct *work) 4898 { 4899 struct drm_i915_private *i915 = 4900 container_of(work, struct drm_i915_private, mm.free_work); 4901 struct llist_node *freed; 4902 4903 /* 4904 * All file-owned VMA should have been released by this point through 4905 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4906 * However, the object may also be bound into the global GTT (e.g. 4907 * older GPUs without per-process support, or for direct access through 4908 * the GTT either for the user or for scanout). Those VMA still need to 4909 * unbound now. 4910 */ 4911 4912 spin_lock(&i915->mm.free_lock); 4913 while ((freed = llist_del_all(&i915->mm.free_list))) { 4914 spin_unlock(&i915->mm.free_lock); 4915 4916 __i915_gem_free_objects(i915, freed); 4917 if (need_resched()) 4918 return; 4919 4920 spin_lock(&i915->mm.free_lock); 4921 } 4922 spin_unlock(&i915->mm.free_lock); 4923 } 4924 4925 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4926 { 4927 struct drm_i915_gem_object *obj = 4928 container_of(head, typeof(*obj), rcu); 4929 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4930 4931 /* 4932 * Since we require blocking on struct_mutex to unbind the freed 4933 * object from the GPU before releasing resources back to the 4934 * system, we can not do that directly from the RCU callback (which may 4935 * be a softirq context), but must instead then defer that work onto a 4936 * kthread. We use the RCU callback rather than move the freed object 4937 * directly onto the work queue so that we can mix between using the 4938 * worker and performing frees directly from subsequent allocations for 4939 * crude but effective memory throttling. 4940 */ 4941 if (llist_add(&obj->freed, &i915->mm.free_list)) 4942 queue_work(i915->wq, &i915->mm.free_work); 4943 } 4944 4945 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4946 { 4947 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4948 4949 if (obj->mm.quirked) 4950 __i915_gem_object_unpin_pages(obj); 4951 4952 if (discard_backing_storage(obj)) 4953 obj->mm.madv = I915_MADV_DONTNEED; 4954 4955 /* 4956 * Before we free the object, make sure any pure RCU-only 4957 * read-side critical sections are complete, e.g. 4958 * i915_gem_busy_ioctl(). For the corresponding synchronized 4959 * lookup see i915_gem_object_lookup_rcu(). 4960 */ 4961 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 4962 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4963 } 4964 4965 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4966 { 4967 lockdep_assert_held(&obj->base.dev->struct_mutex); 4968 4969 if (!i915_gem_object_has_active_reference(obj) && 4970 i915_gem_object_is_active(obj)) 4971 i915_gem_object_set_active_reference(obj); 4972 else 4973 i915_gem_object_put(obj); 4974 } 4975 4976 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 4977 { 4978 struct i915_gem_context *kernel_context = i915->kernel_context; 4979 struct intel_engine_cs *engine; 4980 enum intel_engine_id id; 4981 4982 for_each_engine(engine, i915, id) { 4983 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 4984 GEM_BUG_ON(engine->last_retired_context != kernel_context); 4985 } 4986 } 4987 4988 void i915_gem_sanitize(struct drm_i915_private *i915) 4989 { 4990 if (i915_terminally_wedged(&i915->gpu_error)) { 4991 mutex_lock(&i915->drm.struct_mutex); 4992 i915_gem_unset_wedged(i915); 4993 mutex_unlock(&i915->drm.struct_mutex); 4994 } 4995 4996 /* 4997 * If we inherit context state from the BIOS or earlier occupants 4998 * of the GPU, the GPU may be in an inconsistent state when we 4999 * try to take over. The only way to remove the earlier state 5000 * is by resetting. However, resetting on earlier gen is tricky as 5001 * it may impact the display and we are uncertain about the stability 5002 * of the reset, so this could be applied to even earlier gen. 5003 */ 5004 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 5005 WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 5006 } 5007 5008 int i915_gem_suspend(struct drm_i915_private *dev_priv) 5009 { 5010 struct drm_device *dev = &dev_priv->drm; 5011 int ret; 5012 5013 intel_runtime_pm_get(dev_priv); 5014 intel_suspend_gt_powersave(dev_priv); 5015 5016 mutex_lock(&dev->struct_mutex); 5017 5018 /* We have to flush all the executing contexts to main memory so 5019 * that they can saved in the hibernation image. To ensure the last 5020 * context image is coherent, we have to switch away from it. That 5021 * leaves the dev_priv->kernel_context still active when 5022 * we actually suspend, and its image in memory may not match the GPU 5023 * state. Fortunately, the kernel_context is disposable and we do 5024 * not rely on its state. 5025 */ 5026 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5027 ret = i915_gem_switch_to_kernel_context(dev_priv); 5028 if (ret) 5029 goto err_unlock; 5030 5031 ret = i915_gem_wait_for_idle(dev_priv, 5032 I915_WAIT_INTERRUPTIBLE | 5033 I915_WAIT_LOCKED); 5034 if (ret && ret != -EIO) 5035 goto err_unlock; 5036 5037 assert_kernel_context_is_current(dev_priv); 5038 } 5039 i915_gem_contexts_lost(dev_priv); 5040 mutex_unlock(&dev->struct_mutex); 5041 5042 intel_uc_suspend(dev_priv); 5043 5044 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 5045 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 5046 5047 /* As the idle_work is rearming if it detects a race, play safe and 5048 * repeat the flush until it is definitely idle. 5049 */ 5050 drain_delayed_work(&dev_priv->gt.idle_work); 5051 5052 /* Assert that we sucessfully flushed all the work and 5053 * reset the GPU back to its idle, low power state. 5054 */ 5055 WARN_ON(dev_priv->gt.awake); 5056 if (WARN_ON(!intel_engines_are_idle(dev_priv))) 5057 i915_gem_set_wedged(dev_priv); /* no hope, discard everything */ 5058 5059 /* 5060 * Neither the BIOS, ourselves or any other kernel 5061 * expects the system to be in execlists mode on startup, 5062 * so we need to reset the GPU back to legacy mode. And the only 5063 * known way to disable logical contexts is through a GPU reset. 5064 * 5065 * So in order to leave the system in a known default configuration, 5066 * always reset the GPU upon unload and suspend. Afterwards we then 5067 * clean up the GEM state tracking, flushing off the requests and 5068 * leaving the system in a known idle state. 5069 * 5070 * Note that is of the upmost importance that the GPU is idle and 5071 * all stray writes are flushed *before* we dismantle the backing 5072 * storage for the pinned objects. 5073 * 5074 * However, since we are uncertain that resetting the GPU on older 5075 * machines is a good idea, we don't - just in case it leaves the 5076 * machine in an unusable condition. 5077 */ 5078 intel_uc_sanitize(dev_priv); 5079 i915_gem_sanitize(dev_priv); 5080 5081 intel_runtime_pm_put(dev_priv); 5082 return 0; 5083 5084 err_unlock: 5085 mutex_unlock(&dev->struct_mutex); 5086 intel_runtime_pm_put(dev_priv); 5087 return ret; 5088 } 5089 5090 void i915_gem_resume(struct drm_i915_private *i915) 5091 { 5092 WARN_ON(i915->gt.awake); 5093 5094 mutex_lock(&i915->drm.struct_mutex); 5095 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5096 5097 i915_gem_restore_gtt_mappings(i915); 5098 i915_gem_restore_fences(i915); 5099 5100 /* 5101 * As we didn't flush the kernel context before suspend, we cannot 5102 * guarantee that the context image is complete. So let's just reset 5103 * it and start again. 5104 */ 5105 i915->gt.resume(i915); 5106 5107 if (i915_gem_init_hw(i915)) 5108 goto err_wedged; 5109 5110 intel_uc_resume(i915); 5111 5112 /* Always reload a context for powersaving. */ 5113 if (i915_gem_switch_to_kernel_context(i915)) 5114 goto err_wedged; 5115 5116 out_unlock: 5117 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5118 mutex_unlock(&i915->drm.struct_mutex); 5119 return; 5120 5121 err_wedged: 5122 if (!i915_terminally_wedged(&i915->gpu_error)) { 5123 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5124 i915_gem_set_wedged(i915); 5125 } 5126 goto out_unlock; 5127 } 5128 5129 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5130 { 5131 if (INTEL_GEN(dev_priv) < 5 || 5132 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5133 return; 5134 5135 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5136 DISP_TILE_SURFACE_SWIZZLING); 5137 5138 if (IS_GEN5(dev_priv)) 5139 return; 5140 5141 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5142 if (IS_GEN6(dev_priv)) 5143 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5144 else if (IS_GEN7(dev_priv)) 5145 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5146 else if (IS_GEN8(dev_priv)) 5147 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5148 else 5149 BUG(); 5150 } 5151 5152 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5153 { 5154 I915_WRITE(RING_CTL(base), 0); 5155 I915_WRITE(RING_HEAD(base), 0); 5156 I915_WRITE(RING_TAIL(base), 0); 5157 I915_WRITE(RING_START(base), 0); 5158 } 5159 5160 static void init_unused_rings(struct drm_i915_private *dev_priv) 5161 { 5162 if (IS_I830(dev_priv)) { 5163 init_unused_ring(dev_priv, PRB1_BASE); 5164 init_unused_ring(dev_priv, SRB0_BASE); 5165 init_unused_ring(dev_priv, SRB1_BASE); 5166 init_unused_ring(dev_priv, SRB2_BASE); 5167 init_unused_ring(dev_priv, SRB3_BASE); 5168 } else if (IS_GEN2(dev_priv)) { 5169 init_unused_ring(dev_priv, SRB0_BASE); 5170 init_unused_ring(dev_priv, SRB1_BASE); 5171 } else if (IS_GEN3(dev_priv)) { 5172 init_unused_ring(dev_priv, PRB1_BASE); 5173 init_unused_ring(dev_priv, PRB2_BASE); 5174 } 5175 } 5176 5177 static int __i915_gem_restart_engines(void *data) 5178 { 5179 struct drm_i915_private *i915 = data; 5180 struct intel_engine_cs *engine; 5181 enum intel_engine_id id; 5182 int err; 5183 5184 for_each_engine(engine, i915, id) { 5185 err = engine->init_hw(engine); 5186 if (err) { 5187 DRM_ERROR("Failed to restart %s (%d)\n", 5188 engine->name, err); 5189 return err; 5190 } 5191 } 5192 5193 return 0; 5194 } 5195 5196 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5197 { 5198 int ret; 5199 5200 dev_priv->gt.last_init_time = ktime_get(); 5201 5202 /* Double layer security blanket, see i915_gem_init() */ 5203 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5204 5205 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5206 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5207 5208 if (IS_HASWELL(dev_priv)) 5209 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5210 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5211 5212 if (HAS_PCH_NOP(dev_priv)) { 5213 if (IS_IVYBRIDGE(dev_priv)) { 5214 u32 temp = I915_READ(GEN7_MSG_CTL); 5215 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5216 I915_WRITE(GEN7_MSG_CTL, temp); 5217 } else if (INTEL_GEN(dev_priv) >= 7) { 5218 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5219 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5220 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5221 } 5222 } 5223 5224 intel_gt_workarounds_apply(dev_priv); 5225 5226 i915_gem_init_swizzling(dev_priv); 5227 5228 /* 5229 * At least 830 can leave some of the unused rings 5230 * "active" (ie. head != tail) after resume which 5231 * will prevent c3 entry. Makes sure all unused rings 5232 * are totally idle. 5233 */ 5234 init_unused_rings(dev_priv); 5235 5236 BUG_ON(!dev_priv->kernel_context); 5237 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5238 ret = -EIO; 5239 goto out; 5240 } 5241 5242 ret = i915_ppgtt_init_hw(dev_priv); 5243 if (ret) { 5244 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5245 goto out; 5246 } 5247 5248 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 5249 if (ret) { 5250 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 5251 goto out; 5252 } 5253 5254 /* We can't enable contexts until all firmware is loaded */ 5255 ret = intel_uc_init_hw(dev_priv); 5256 if (ret) { 5257 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5258 goto out; 5259 } 5260 5261 intel_mocs_init_l3cc_table(dev_priv); 5262 5263 /* Only when the HW is re-initialised, can we replay the requests */ 5264 ret = __i915_gem_restart_engines(dev_priv); 5265 out: 5266 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5267 return ret; 5268 } 5269 5270 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5271 { 5272 struct i915_gem_context *ctx; 5273 struct intel_engine_cs *engine; 5274 enum intel_engine_id id; 5275 int err; 5276 5277 /* 5278 * As we reset the gpu during very early sanitisation, the current 5279 * register state on the GPU should reflect its defaults values. 5280 * We load a context onto the hw (with restore-inhibit), then switch 5281 * over to a second context to save that default register state. We 5282 * can then prime every new context with that state so they all start 5283 * from the same default HW values. 5284 */ 5285 5286 ctx = i915_gem_context_create_kernel(i915, 0); 5287 if (IS_ERR(ctx)) 5288 return PTR_ERR(ctx); 5289 5290 for_each_engine(engine, i915, id) { 5291 struct i915_request *rq; 5292 5293 rq = i915_request_alloc(engine, ctx); 5294 if (IS_ERR(rq)) { 5295 err = PTR_ERR(rq); 5296 goto out_ctx; 5297 } 5298 5299 err = 0; 5300 if (engine->init_context) 5301 err = engine->init_context(rq); 5302 5303 __i915_request_add(rq, true); 5304 if (err) 5305 goto err_active; 5306 } 5307 5308 err = i915_gem_switch_to_kernel_context(i915); 5309 if (err) 5310 goto err_active; 5311 5312 err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); 5313 if (err) 5314 goto err_active; 5315 5316 assert_kernel_context_is_current(i915); 5317 5318 for_each_engine(engine, i915, id) { 5319 struct i915_vma *state; 5320 5321 state = to_intel_context(ctx, engine)->state; 5322 if (!state) 5323 continue; 5324 5325 /* 5326 * As we will hold a reference to the logical state, it will 5327 * not be torn down with the context, and importantly the 5328 * object will hold onto its vma (making it possible for a 5329 * stray GTT write to corrupt our defaults). Unmap the vma 5330 * from the GTT to prevent such accidents and reclaim the 5331 * space. 5332 */ 5333 err = i915_vma_unbind(state); 5334 if (err) 5335 goto err_active; 5336 5337 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5338 if (err) 5339 goto err_active; 5340 5341 engine->default_state = i915_gem_object_get(state->obj); 5342 } 5343 5344 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5345 unsigned int found = intel_engines_has_context_isolation(i915); 5346 5347 /* 5348 * Make sure that classes with multiple engine instances all 5349 * share the same basic configuration. 5350 */ 5351 for_each_engine(engine, i915, id) { 5352 unsigned int bit = BIT(engine->uabi_class); 5353 unsigned int expected = engine->default_state ? bit : 0; 5354 5355 if ((found & bit) != expected) { 5356 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5357 engine->uabi_class, engine->name); 5358 } 5359 } 5360 } 5361 5362 out_ctx: 5363 i915_gem_context_set_closed(ctx); 5364 i915_gem_context_put(ctx); 5365 return err; 5366 5367 err_active: 5368 /* 5369 * If we have to abandon now, we expect the engines to be idle 5370 * and ready to be torn-down. First try to flush any remaining 5371 * request, ensure we are pointing at the kernel context and 5372 * then remove it. 5373 */ 5374 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5375 goto out_ctx; 5376 5377 if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) 5378 goto out_ctx; 5379 5380 i915_gem_contexts_lost(i915); 5381 goto out_ctx; 5382 } 5383 5384 int i915_gem_init(struct drm_i915_private *dev_priv) 5385 { 5386 int ret; 5387 5388 /* 5389 * We need to fallback to 4K pages since gvt gtt handling doesn't 5390 * support huge page entries - we will need to check either hypervisor 5391 * mm can support huge guest page or just do emulation in gvt. 5392 */ 5393 if (intel_vgpu_active(dev_priv)) 5394 mkwrite_device_info(dev_priv)->page_sizes = 5395 I915_GTT_PAGE_SIZE_4K; 5396 5397 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5398 5399 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5400 dev_priv->gt.resume = intel_lr_context_resume; 5401 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5402 } else { 5403 dev_priv->gt.resume = intel_legacy_submission_resume; 5404 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5405 } 5406 5407 ret = i915_gem_init_userptr(dev_priv); 5408 if (ret) 5409 return ret; 5410 5411 ret = intel_wopcm_init(&dev_priv->wopcm); 5412 if (ret) 5413 return ret; 5414 5415 ret = intel_uc_init_misc(dev_priv); 5416 if (ret) 5417 return ret; 5418 5419 /* This is just a security blanket to placate dragons. 5420 * On some systems, we very sporadically observe that the first TLBs 5421 * used by the CS may be stale, despite us poking the TLB reset. If 5422 * we hold the forcewake during initialisation these problems 5423 * just magically go away. 5424 */ 5425 mutex_lock(&dev_priv->drm.struct_mutex); 5426 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5427 5428 ret = i915_gem_init_ggtt(dev_priv); 5429 if (ret) { 5430 GEM_BUG_ON(ret == -EIO); 5431 goto err_unlock; 5432 } 5433 5434 ret = i915_gem_contexts_init(dev_priv); 5435 if (ret) { 5436 GEM_BUG_ON(ret == -EIO); 5437 goto err_ggtt; 5438 } 5439 5440 ret = intel_engines_init(dev_priv); 5441 if (ret) { 5442 GEM_BUG_ON(ret == -EIO); 5443 goto err_context; 5444 } 5445 5446 intel_init_gt_powersave(dev_priv); 5447 5448 ret = intel_uc_init(dev_priv); 5449 if (ret) 5450 goto err_pm; 5451 5452 ret = i915_gem_init_hw(dev_priv); 5453 if (ret) 5454 goto err_uc_init; 5455 5456 /* 5457 * Despite its name intel_init_clock_gating applies both display 5458 * clock gating workarounds; GT mmio workarounds and the occasional 5459 * GT power context workaround. Worse, sometimes it includes a context 5460 * register workaround which we need to apply before we record the 5461 * default HW state for all contexts. 5462 * 5463 * FIXME: break up the workarounds and apply them at the right time! 5464 */ 5465 intel_init_clock_gating(dev_priv); 5466 5467 ret = __intel_engines_record_defaults(dev_priv); 5468 if (ret) 5469 goto err_init_hw; 5470 5471 if (i915_inject_load_failure()) { 5472 ret = -ENODEV; 5473 goto err_init_hw; 5474 } 5475 5476 if (i915_inject_load_failure()) { 5477 ret = -EIO; 5478 goto err_init_hw; 5479 } 5480 5481 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5482 mutex_unlock(&dev_priv->drm.struct_mutex); 5483 5484 return 0; 5485 5486 /* 5487 * Unwinding is complicated by that we want to handle -EIO to mean 5488 * disable GPU submission but keep KMS alive. We want to mark the 5489 * HW as irrevisibly wedged, but keep enough state around that the 5490 * driver doesn't explode during runtime. 5491 */ 5492 err_init_hw: 5493 i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); 5494 i915_gem_contexts_lost(dev_priv); 5495 intel_uc_fini_hw(dev_priv); 5496 err_uc_init: 5497 intel_uc_fini(dev_priv); 5498 err_pm: 5499 if (ret != -EIO) { 5500 intel_cleanup_gt_powersave(dev_priv); 5501 i915_gem_cleanup_engines(dev_priv); 5502 } 5503 err_context: 5504 if (ret != -EIO) 5505 i915_gem_contexts_fini(dev_priv); 5506 err_ggtt: 5507 err_unlock: 5508 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5509 mutex_unlock(&dev_priv->drm.struct_mutex); 5510 5511 intel_uc_fini_misc(dev_priv); 5512 5513 if (ret != -EIO) 5514 i915_gem_cleanup_userptr(dev_priv); 5515 5516 if (ret == -EIO) { 5517 /* 5518 * Allow engine initialisation to fail by marking the GPU as 5519 * wedged. But we only want to do this where the GPU is angry, 5520 * for all other failure, such as an allocation failure, bail. 5521 */ 5522 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5523 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5524 i915_gem_set_wedged(dev_priv); 5525 } 5526 ret = 0; 5527 } 5528 5529 i915_gem_drain_freed_objects(dev_priv); 5530 return ret; 5531 } 5532 5533 void i915_gem_init_mmio(struct drm_i915_private *i915) 5534 { 5535 i915_gem_sanitize(i915); 5536 } 5537 5538 void 5539 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5540 { 5541 struct intel_engine_cs *engine; 5542 enum intel_engine_id id; 5543 5544 for_each_engine(engine, dev_priv, id) 5545 dev_priv->gt.cleanup_engine(engine); 5546 } 5547 5548 void 5549 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5550 { 5551 int i; 5552 5553 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5554 !IS_CHERRYVIEW(dev_priv)) 5555 dev_priv->num_fence_regs = 32; 5556 else if (INTEL_GEN(dev_priv) >= 4 || 5557 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5558 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5559 dev_priv->num_fence_regs = 16; 5560 else 5561 dev_priv->num_fence_regs = 8; 5562 5563 if (intel_vgpu_active(dev_priv)) 5564 dev_priv->num_fence_regs = 5565 I915_READ(vgtif_reg(avail_rs.fence_num)); 5566 5567 /* Initialize fence registers to zero */ 5568 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5569 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5570 5571 fence->i915 = dev_priv; 5572 fence->id = i; 5573 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5574 } 5575 i915_gem_restore_fences(dev_priv); 5576 5577 i915_gem_detect_bit_6_swizzle(dev_priv); 5578 } 5579 5580 static void i915_gem_init__mm(struct drm_i915_private *i915) 5581 { 5582 spin_lock_init(&i915->mm.object_stat_lock); 5583 spin_lock_init(&i915->mm.obj_lock); 5584 spin_lock_init(&i915->mm.free_lock); 5585 5586 init_llist_head(&i915->mm.free_list); 5587 5588 INIT_LIST_HEAD(&i915->mm.unbound_list); 5589 INIT_LIST_HEAD(&i915->mm.bound_list); 5590 INIT_LIST_HEAD(&i915->mm.fence_list); 5591 INIT_LIST_HEAD(&i915->mm.userfault_list); 5592 5593 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5594 } 5595 5596 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5597 { 5598 int err = -ENOMEM; 5599 5600 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5601 if (!dev_priv->objects) 5602 goto err_out; 5603 5604 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5605 if (!dev_priv->vmas) 5606 goto err_objects; 5607 5608 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5609 if (!dev_priv->luts) 5610 goto err_vmas; 5611 5612 dev_priv->requests = KMEM_CACHE(i915_request, 5613 SLAB_HWCACHE_ALIGN | 5614 SLAB_RECLAIM_ACCOUNT | 5615 SLAB_TYPESAFE_BY_RCU); 5616 if (!dev_priv->requests) 5617 goto err_luts; 5618 5619 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5620 SLAB_HWCACHE_ALIGN | 5621 SLAB_RECLAIM_ACCOUNT); 5622 if (!dev_priv->dependencies) 5623 goto err_requests; 5624 5625 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5626 if (!dev_priv->priorities) 5627 goto err_dependencies; 5628 5629 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5630 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5631 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5632 5633 i915_gem_init__mm(dev_priv); 5634 5635 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5636 i915_gem_retire_work_handler); 5637 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5638 i915_gem_idle_work_handler); 5639 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5640 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5641 5642 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5643 5644 spin_lock_init(&dev_priv->fb_tracking.lock); 5645 5646 err = i915_gemfs_init(dev_priv); 5647 if (err) 5648 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5649 5650 return 0; 5651 5652 err_dependencies: 5653 kmem_cache_destroy(dev_priv->dependencies); 5654 err_requests: 5655 kmem_cache_destroy(dev_priv->requests); 5656 err_luts: 5657 kmem_cache_destroy(dev_priv->luts); 5658 err_vmas: 5659 kmem_cache_destroy(dev_priv->vmas); 5660 err_objects: 5661 kmem_cache_destroy(dev_priv->objects); 5662 err_out: 5663 return err; 5664 } 5665 5666 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5667 { 5668 i915_gem_drain_freed_objects(dev_priv); 5669 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5670 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5671 WARN_ON(dev_priv->mm.object_count); 5672 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5673 5674 kmem_cache_destroy(dev_priv->priorities); 5675 kmem_cache_destroy(dev_priv->dependencies); 5676 kmem_cache_destroy(dev_priv->requests); 5677 kmem_cache_destroy(dev_priv->luts); 5678 kmem_cache_destroy(dev_priv->vmas); 5679 kmem_cache_destroy(dev_priv->objects); 5680 5681 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5682 rcu_barrier(); 5683 5684 i915_gemfs_fini(dev_priv); 5685 } 5686 5687 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5688 { 5689 /* Discard all purgeable objects, let userspace recover those as 5690 * required after resuming. 5691 */ 5692 i915_gem_shrink_all(dev_priv); 5693 5694 return 0; 5695 } 5696 5697 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 5698 { 5699 struct drm_i915_gem_object *obj; 5700 struct list_head *phases[] = { 5701 &dev_priv->mm.unbound_list, 5702 &dev_priv->mm.bound_list, 5703 NULL 5704 }, **p; 5705 5706 /* Called just before we write the hibernation image. 5707 * 5708 * We need to update the domain tracking to reflect that the CPU 5709 * will be accessing all the pages to create and restore from the 5710 * hibernation, and so upon restoration those pages will be in the 5711 * CPU domain. 5712 * 5713 * To make sure the hibernation image contains the latest state, 5714 * we update that state just before writing out the image. 5715 * 5716 * To try and reduce the hibernation image, we manually shrink 5717 * the objects as well, see i915_gem_freeze() 5718 */ 5719 5720 i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); 5721 i915_gem_drain_freed_objects(dev_priv); 5722 5723 spin_lock(&dev_priv->mm.obj_lock); 5724 for (p = phases; *p; p++) { 5725 list_for_each_entry(obj, *p, mm.link) 5726 __start_cpu_write(obj); 5727 } 5728 spin_unlock(&dev_priv->mm.obj_lock); 5729 5730 return 0; 5731 } 5732 5733 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5734 { 5735 struct drm_i915_file_private *file_priv = file->driver_priv; 5736 struct i915_request *request; 5737 5738 /* Clean up our request list when the client is going away, so that 5739 * later retire_requests won't dereference our soon-to-be-gone 5740 * file_priv. 5741 */ 5742 spin_lock(&file_priv->mm.lock); 5743 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5744 request->file_priv = NULL; 5745 spin_unlock(&file_priv->mm.lock); 5746 } 5747 5748 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5749 { 5750 struct drm_i915_file_private *file_priv; 5751 int ret; 5752 5753 DRM_DEBUG("\n"); 5754 5755 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5756 if (!file_priv) 5757 return -ENOMEM; 5758 5759 file->driver_priv = file_priv; 5760 file_priv->dev_priv = i915; 5761 file_priv->file = file; 5762 5763 spin_lock_init(&file_priv->mm.lock); 5764 INIT_LIST_HEAD(&file_priv->mm.request_list); 5765 5766 file_priv->bsd_engine = -1; 5767 file_priv->hang_timestamp = jiffies; 5768 5769 ret = i915_gem_context_open(i915, file); 5770 if (ret) 5771 kfree(file_priv); 5772 5773 return ret; 5774 } 5775 5776 /** 5777 * i915_gem_track_fb - update frontbuffer tracking 5778 * @old: current GEM buffer for the frontbuffer slots 5779 * @new: new GEM buffer for the frontbuffer slots 5780 * @frontbuffer_bits: bitmask of frontbuffer slots 5781 * 5782 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5783 * from @old and setting them in @new. Both @old and @new can be NULL. 5784 */ 5785 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5786 struct drm_i915_gem_object *new, 5787 unsigned frontbuffer_bits) 5788 { 5789 /* Control of individual bits within the mask are guarded by 5790 * the owning plane->mutex, i.e. we can never see concurrent 5791 * manipulation of individual bits. But since the bitfield as a whole 5792 * is updated using RMW, we need to use atomics in order to update 5793 * the bits. 5794 */ 5795 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5796 sizeof(atomic_t) * BITS_PER_BYTE); 5797 5798 if (old) { 5799 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5800 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5801 } 5802 5803 if (new) { 5804 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5805 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5806 } 5807 } 5808 5809 /* Allocate a new GEM object and fill it with the supplied data */ 5810 struct drm_i915_gem_object * 5811 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5812 const void *data, size_t size) 5813 { 5814 struct drm_i915_gem_object *obj; 5815 struct file *file; 5816 size_t offset; 5817 int err; 5818 5819 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5820 if (IS_ERR(obj)) 5821 return obj; 5822 5823 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 5824 5825 file = obj->base.filp; 5826 offset = 0; 5827 do { 5828 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5829 struct page *page; 5830 void *pgdata, *vaddr; 5831 5832 err = pagecache_write_begin(file, file->f_mapping, 5833 offset, len, 0, 5834 &page, &pgdata); 5835 if (err < 0) 5836 goto fail; 5837 5838 vaddr = kmap(page); 5839 memcpy(vaddr, data, len); 5840 kunmap(page); 5841 5842 err = pagecache_write_end(file, file->f_mapping, 5843 offset, len, len, 5844 page, pgdata); 5845 if (err < 0) 5846 goto fail; 5847 5848 size -= len; 5849 data += len; 5850 offset += len; 5851 } while (size); 5852 5853 return obj; 5854 5855 fail: 5856 i915_gem_object_put(obj); 5857 return ERR_PTR(err); 5858 } 5859 5860 struct scatterlist * 5861 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 5862 unsigned int n, 5863 unsigned int *offset) 5864 { 5865 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 5866 struct scatterlist *sg; 5867 unsigned int idx, count; 5868 5869 might_sleep(); 5870 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 5871 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 5872 5873 /* As we iterate forward through the sg, we record each entry in a 5874 * radixtree for quick repeated (backwards) lookups. If we have seen 5875 * this index previously, we will have an entry for it. 5876 * 5877 * Initial lookup is O(N), but this is amortized to O(1) for 5878 * sequential page access (where each new request is consecutive 5879 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 5880 * i.e. O(1) with a large constant! 5881 */ 5882 if (n < READ_ONCE(iter->sg_idx)) 5883 goto lookup; 5884 5885 mutex_lock(&iter->lock); 5886 5887 /* We prefer to reuse the last sg so that repeated lookup of this 5888 * (or the subsequent) sg are fast - comparing against the last 5889 * sg is faster than going through the radixtree. 5890 */ 5891 5892 sg = iter->sg_pos; 5893 idx = iter->sg_idx; 5894 count = __sg_page_count(sg); 5895 5896 while (idx + count <= n) { 5897 unsigned long exception, i; 5898 int ret; 5899 5900 /* If we cannot allocate and insert this entry, or the 5901 * individual pages from this range, cancel updating the 5902 * sg_idx so that on this lookup we are forced to linearly 5903 * scan onwards, but on future lookups we will try the 5904 * insertion again (in which case we need to be careful of 5905 * the error return reporting that we have already inserted 5906 * this index). 5907 */ 5908 ret = radix_tree_insert(&iter->radix, idx, sg); 5909 if (ret && ret != -EEXIST) 5910 goto scan; 5911 5912 exception = 5913 RADIX_TREE_EXCEPTIONAL_ENTRY | 5914 idx << RADIX_TREE_EXCEPTIONAL_SHIFT; 5915 for (i = 1; i < count; i++) { 5916 ret = radix_tree_insert(&iter->radix, idx + i, 5917 (void *)exception); 5918 if (ret && ret != -EEXIST) 5919 goto scan; 5920 } 5921 5922 idx += count; 5923 sg = ____sg_next(sg); 5924 count = __sg_page_count(sg); 5925 } 5926 5927 scan: 5928 iter->sg_pos = sg; 5929 iter->sg_idx = idx; 5930 5931 mutex_unlock(&iter->lock); 5932 5933 if (unlikely(n < idx)) /* insertion completed by another thread */ 5934 goto lookup; 5935 5936 /* In case we failed to insert the entry into the radixtree, we need 5937 * to look beyond the current sg. 5938 */ 5939 while (idx + count <= n) { 5940 idx += count; 5941 sg = ____sg_next(sg); 5942 count = __sg_page_count(sg); 5943 } 5944 5945 *offset = n - idx; 5946 return sg; 5947 5948 lookup: 5949 rcu_read_lock(); 5950 5951 sg = radix_tree_lookup(&iter->radix, n); 5952 GEM_BUG_ON(!sg); 5953 5954 /* If this index is in the middle of multi-page sg entry, 5955 * the radixtree will contain an exceptional entry that points 5956 * to the start of that range. We will return the pointer to 5957 * the base page and the offset of this page within the 5958 * sg entry's range. 5959 */ 5960 *offset = 0; 5961 if (unlikely(radix_tree_exception(sg))) { 5962 unsigned long base = 5963 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 5964 5965 sg = radix_tree_lookup(&iter->radix, base); 5966 GEM_BUG_ON(!sg); 5967 5968 *offset = n - base; 5969 } 5970 5971 rcu_read_unlock(); 5972 5973 return sg; 5974 } 5975 5976 struct page * 5977 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 5978 { 5979 struct scatterlist *sg; 5980 unsigned int offset; 5981 5982 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 5983 5984 sg = i915_gem_object_get_sg(obj, n, &offset); 5985 return nth_page(sg_page(sg), offset); 5986 } 5987 5988 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5989 struct page * 5990 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 5991 unsigned int n) 5992 { 5993 struct page *page; 5994 5995 page = i915_gem_object_get_page(obj, n); 5996 if (!obj->mm.dirty) 5997 set_page_dirty(page); 5998 5999 return page; 6000 } 6001 6002 dma_addr_t 6003 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 6004 unsigned long n) 6005 { 6006 struct scatterlist *sg; 6007 unsigned int offset; 6008 6009 sg = i915_gem_object_get_sg(obj, n, &offset); 6010 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 6011 } 6012 6013 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 6014 { 6015 struct sg_table *pages; 6016 int err; 6017 6018 if (align > obj->base.size) 6019 return -EINVAL; 6020 6021 if (obj->ops == &i915_gem_phys_ops) 6022 return 0; 6023 6024 if (obj->ops != &i915_gem_object_ops) 6025 return -EINVAL; 6026 6027 err = i915_gem_object_unbind(obj); 6028 if (err) 6029 return err; 6030 6031 mutex_lock(&obj->mm.lock); 6032 6033 if (obj->mm.madv != I915_MADV_WILLNEED) { 6034 err = -EFAULT; 6035 goto err_unlock; 6036 } 6037 6038 if (obj->mm.quirked) { 6039 err = -EFAULT; 6040 goto err_unlock; 6041 } 6042 6043 if (obj->mm.mapping) { 6044 err = -EBUSY; 6045 goto err_unlock; 6046 } 6047 6048 pages = fetch_and_zero(&obj->mm.pages); 6049 if (pages) { 6050 struct drm_i915_private *i915 = to_i915(obj->base.dev); 6051 6052 __i915_gem_object_reset_page_iter(obj); 6053 6054 spin_lock(&i915->mm.obj_lock); 6055 list_del(&obj->mm.link); 6056 spin_unlock(&i915->mm.obj_lock); 6057 } 6058 6059 obj->ops = &i915_gem_phys_ops; 6060 6061 err = ____i915_gem_object_get_pages(obj); 6062 if (err) 6063 goto err_xfer; 6064 6065 /* Perma-pin (until release) the physical set of pages */ 6066 __i915_gem_object_pin_pages(obj); 6067 6068 if (!IS_ERR_OR_NULL(pages)) 6069 i915_gem_object_ops.put_pages(obj, pages); 6070 mutex_unlock(&obj->mm.lock); 6071 return 0; 6072 6073 err_xfer: 6074 obj->ops = &i915_gem_object_ops; 6075 obj->mm.pages = pages; 6076 err_unlock: 6077 mutex_unlock(&obj->mm.lock); 6078 return err; 6079 } 6080 6081 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6082 #include "selftests/scatterlist.c" 6083 #include "selftests/mock_gem_device.c" 6084 #include "selftests/huge_gem_object.c" 6085 #include "selftests/huge_pages.c" 6086 #include "selftests/i915_gem_object.c" 6087 #include "selftests/i915_gem_coherency.c" 6088 #endif 6089