1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "intel_workarounds.h" 39 #include "i915_gemfs.h" 40 #include <linux/dma-fence-array.h> 41 #include <linux/kthread.h> 42 #include <linux/reservation.h> 43 #include <linux/shmem_fs.h> 44 #include <linux/slab.h> 45 #include <linux/stop_machine.h> 46 #include <linux/swap.h> 47 #include <linux/pci.h> 48 #include <linux/dma-buf.h> 49 50 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->cache_dirty) 55 return false; 56 57 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 58 return true; 59 60 return obj->pin_global; /* currently in use by HW, keep flushed */ 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 static u32 __i915_gem_park(struct drm_i915_private *i915) 141 { 142 GEM_TRACE("\n"); 143 144 lockdep_assert_held(&i915->drm.struct_mutex); 145 GEM_BUG_ON(i915->gt.active_requests); 146 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 147 148 if (!i915->gt.awake) 149 return I915_EPOCH_INVALID; 150 151 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 152 153 /* 154 * Be paranoid and flush a concurrent interrupt to make sure 155 * we don't reactivate any irq tasklets after parking. 156 * 157 * FIXME: Note that even though we have waited for execlists to be idle, 158 * there may still be an in-flight interrupt even though the CSB 159 * is now empty. synchronize_irq() makes sure that a residual interrupt 160 * is completed before we continue, but it doesn't prevent the HW from 161 * raising a spurious interrupt later. To complete the shield we should 162 * coordinate disabling the CS irq with flushing the interrupts. 163 */ 164 synchronize_irq(i915->drm.irq); 165 166 intel_engines_park(i915); 167 i915_timelines_park(i915); 168 169 i915_pmu_gt_parked(i915); 170 i915_vma_parked(i915); 171 172 i915->gt.awake = false; 173 174 if (INTEL_GEN(i915) >= 6) 175 gen6_rps_idle(i915); 176 177 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); 178 179 intel_runtime_pm_put(i915); 180 181 return i915->gt.epoch; 182 } 183 184 void i915_gem_park(struct drm_i915_private *i915) 185 { 186 GEM_TRACE("\n"); 187 188 lockdep_assert_held(&i915->drm.struct_mutex); 189 GEM_BUG_ON(i915->gt.active_requests); 190 191 if (!i915->gt.awake) 192 return; 193 194 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 195 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 196 } 197 198 void i915_gem_unpark(struct drm_i915_private *i915) 199 { 200 GEM_TRACE("\n"); 201 202 lockdep_assert_held(&i915->drm.struct_mutex); 203 GEM_BUG_ON(!i915->gt.active_requests); 204 205 if (i915->gt.awake) 206 return; 207 208 intel_runtime_pm_get_noresume(i915); 209 210 /* 211 * It seems that the DMC likes to transition between the DC states a lot 212 * when there are no connected displays (no active power domains) during 213 * command submission. 214 * 215 * This activity has negative impact on the performance of the chip with 216 * huge latencies observed in the interrupt handler and elsewhere. 217 * 218 * Work around it by grabbing a GT IRQ power domain whilst there is any 219 * GT activity, preventing any DC state transitions. 220 */ 221 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 222 223 i915->gt.awake = true; 224 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 225 i915->gt.epoch = 1; 226 227 intel_enable_gt_powersave(i915); 228 i915_update_gfx_val(i915); 229 if (INTEL_GEN(i915) >= 6) 230 gen6_rps_busy(i915); 231 i915_pmu_gt_unparked(i915); 232 233 intel_engines_unpark(i915); 234 235 i915_queue_hangcheck(i915); 236 237 queue_delayed_work(i915->wq, 238 &i915->gt.retire_work, 239 round_jiffies_up_relative(HZ)); 240 } 241 242 int 243 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_private *dev_priv = to_i915(dev); 247 struct i915_ggtt *ggtt = &dev_priv->ggtt; 248 struct drm_i915_gem_get_aperture *args = data; 249 struct i915_vma *vma; 250 u64 pinned; 251 252 pinned = ggtt->vm.reserved; 253 mutex_lock(&dev->struct_mutex); 254 list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) 255 if (i915_vma_is_pinned(vma)) 256 pinned += vma->node.size; 257 list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) 258 if (i915_vma_is_pinned(vma)) 259 pinned += vma->node.size; 260 mutex_unlock(&dev->struct_mutex); 261 262 args->aper_size = ggtt->vm.total; 263 args->aper_available_size = args->aper_size - pinned; 264 265 return 0; 266 } 267 268 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 269 { 270 struct address_space *mapping = obj->base.filp->f_mapping; 271 drm_dma_handle_t *phys; 272 struct sg_table *st; 273 struct scatterlist *sg; 274 char *vaddr; 275 int i; 276 int err; 277 278 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 279 return -EINVAL; 280 281 /* Always aligning to the object size, allows a single allocation 282 * to handle all possible callers, and given typical object sizes, 283 * the alignment of the buddy allocation will naturally match. 284 */ 285 phys = drm_pci_alloc(obj->base.dev, 286 roundup_pow_of_two(obj->base.size), 287 roundup_pow_of_two(obj->base.size)); 288 if (!phys) 289 return -ENOMEM; 290 291 vaddr = phys->vaddr; 292 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 293 struct page *page; 294 char *src; 295 296 page = shmem_read_mapping_page(mapping, i); 297 if (IS_ERR(page)) { 298 err = PTR_ERR(page); 299 goto err_phys; 300 } 301 302 src = kmap_atomic(page); 303 memcpy(vaddr, src, PAGE_SIZE); 304 drm_clflush_virt_range(vaddr, PAGE_SIZE); 305 kunmap_atomic(src); 306 307 put_page(page); 308 vaddr += PAGE_SIZE; 309 } 310 311 i915_gem_chipset_flush(to_i915(obj->base.dev)); 312 313 st = kmalloc(sizeof(*st), GFP_KERNEL); 314 if (!st) { 315 err = -ENOMEM; 316 goto err_phys; 317 } 318 319 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 320 kfree(st); 321 err = -ENOMEM; 322 goto err_phys; 323 } 324 325 sg = st->sgl; 326 sg->offset = 0; 327 sg->length = obj->base.size; 328 329 sg_dma_address(sg) = phys->busaddr; 330 sg_dma_len(sg) = obj->base.size; 331 332 obj->phys_handle = phys; 333 334 __i915_gem_object_set_pages(obj, st, sg->length); 335 336 return 0; 337 338 err_phys: 339 drm_pci_free(obj->base.dev, phys); 340 341 return err; 342 } 343 344 static void __start_cpu_write(struct drm_i915_gem_object *obj) 345 { 346 obj->read_domains = I915_GEM_DOMAIN_CPU; 347 obj->write_domain = I915_GEM_DOMAIN_CPU; 348 if (cpu_write_needs_clflush(obj)) 349 obj->cache_dirty = true; 350 } 351 352 static void 353 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 354 struct sg_table *pages, 355 bool needs_clflush) 356 { 357 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 358 359 if (obj->mm.madv == I915_MADV_DONTNEED) 360 obj->mm.dirty = false; 361 362 if (needs_clflush && 363 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 364 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 365 drm_clflush_sg(pages); 366 367 __start_cpu_write(obj); 368 } 369 370 static void 371 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 372 struct sg_table *pages) 373 { 374 __i915_gem_object_release_shmem(obj, pages, false); 375 376 if (obj->mm.dirty) { 377 struct address_space *mapping = obj->base.filp->f_mapping; 378 char *vaddr = obj->phys_handle->vaddr; 379 int i; 380 381 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 382 struct page *page; 383 char *dst; 384 385 page = shmem_read_mapping_page(mapping, i); 386 if (IS_ERR(page)) 387 continue; 388 389 dst = kmap_atomic(page); 390 drm_clflush_virt_range(vaddr, PAGE_SIZE); 391 memcpy(dst, vaddr, PAGE_SIZE); 392 kunmap_atomic(dst); 393 394 set_page_dirty(page); 395 if (obj->mm.madv == I915_MADV_WILLNEED) 396 mark_page_accessed(page); 397 put_page(page); 398 vaddr += PAGE_SIZE; 399 } 400 obj->mm.dirty = false; 401 } 402 403 sg_free_table(pages); 404 kfree(pages); 405 406 drm_pci_free(obj->base.dev, obj->phys_handle); 407 } 408 409 static void 410 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 411 { 412 i915_gem_object_unpin_pages(obj); 413 } 414 415 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 416 .get_pages = i915_gem_object_get_pages_phys, 417 .put_pages = i915_gem_object_put_pages_phys, 418 .release = i915_gem_object_release_phys, 419 }; 420 421 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 422 423 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 424 { 425 struct i915_vma *vma; 426 LIST_HEAD(still_in_list); 427 int ret; 428 429 lockdep_assert_held(&obj->base.dev->struct_mutex); 430 431 /* Closed vma are removed from the obj->vma_list - but they may 432 * still have an active binding on the object. To remove those we 433 * must wait for all rendering to complete to the object (as unbinding 434 * must anyway), and retire the requests. 435 */ 436 ret = i915_gem_object_set_to_cpu_domain(obj, false); 437 if (ret) 438 return ret; 439 440 while ((vma = list_first_entry_or_null(&obj->vma_list, 441 struct i915_vma, 442 obj_link))) { 443 list_move_tail(&vma->obj_link, &still_in_list); 444 ret = i915_vma_unbind(vma); 445 if (ret) 446 break; 447 } 448 list_splice(&still_in_list, &obj->vma_list); 449 450 return ret; 451 } 452 453 static long 454 i915_gem_object_wait_fence(struct dma_fence *fence, 455 unsigned int flags, 456 long timeout, 457 struct intel_rps_client *rps_client) 458 { 459 struct i915_request *rq; 460 461 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 462 463 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 464 return timeout; 465 466 if (!dma_fence_is_i915(fence)) 467 return dma_fence_wait_timeout(fence, 468 flags & I915_WAIT_INTERRUPTIBLE, 469 timeout); 470 471 rq = to_request(fence); 472 if (i915_request_completed(rq)) 473 goto out; 474 475 /* 476 * This client is about to stall waiting for the GPU. In many cases 477 * this is undesirable and limits the throughput of the system, as 478 * many clients cannot continue processing user input/output whilst 479 * blocked. RPS autotuning may take tens of milliseconds to respond 480 * to the GPU load and thus incurs additional latency for the client. 481 * We can circumvent that by promoting the GPU frequency to maximum 482 * before we wait. This makes the GPU throttle up much more quickly 483 * (good for benchmarks and user experience, e.g. window animations), 484 * but at a cost of spending more power processing the workload 485 * (bad for battery). Not all clients even want their results 486 * immediately and for them we should just let the GPU select its own 487 * frequency to maximise efficiency. To prevent a single client from 488 * forcing the clocks too high for the whole system, we only allow 489 * each client to waitboost once in a busy period. 490 */ 491 if (rps_client && !i915_request_started(rq)) { 492 if (INTEL_GEN(rq->i915) >= 6) 493 gen6_rps_boost(rq, rps_client); 494 } 495 496 timeout = i915_request_wait(rq, flags, timeout); 497 498 out: 499 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 500 i915_request_retire_upto(rq); 501 502 return timeout; 503 } 504 505 static long 506 i915_gem_object_wait_reservation(struct reservation_object *resv, 507 unsigned int flags, 508 long timeout, 509 struct intel_rps_client *rps_client) 510 { 511 unsigned int seq = __read_seqcount_begin(&resv->seq); 512 struct dma_fence *excl; 513 bool prune_fences = false; 514 515 if (flags & I915_WAIT_ALL) { 516 struct dma_fence **shared; 517 unsigned int count, i; 518 int ret; 519 520 ret = reservation_object_get_fences_rcu(resv, 521 &excl, &count, &shared); 522 if (ret) 523 return ret; 524 525 for (i = 0; i < count; i++) { 526 timeout = i915_gem_object_wait_fence(shared[i], 527 flags, timeout, 528 rps_client); 529 if (timeout < 0) 530 break; 531 532 dma_fence_put(shared[i]); 533 } 534 535 for (; i < count; i++) 536 dma_fence_put(shared[i]); 537 kfree(shared); 538 539 /* 540 * If both shared fences and an exclusive fence exist, 541 * then by construction the shared fences must be later 542 * than the exclusive fence. If we successfully wait for 543 * all the shared fences, we know that the exclusive fence 544 * must all be signaled. If all the shared fences are 545 * signaled, we can prune the array and recover the 546 * floating references on the fences/requests. 547 */ 548 prune_fences = count && timeout >= 0; 549 } else { 550 excl = reservation_object_get_excl_rcu(resv); 551 } 552 553 if (excl && timeout >= 0) 554 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 555 rps_client); 556 557 dma_fence_put(excl); 558 559 /* 560 * Opportunistically prune the fences iff we know they have *all* been 561 * signaled and that the reservation object has not been changed (i.e. 562 * no new fences have been added). 563 */ 564 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 565 if (reservation_object_trylock(resv)) { 566 if (!__read_seqcount_retry(&resv->seq, seq)) 567 reservation_object_add_excl_fence(resv, NULL); 568 reservation_object_unlock(resv); 569 } 570 } 571 572 return timeout; 573 } 574 575 static void __fence_set_priority(struct dma_fence *fence, 576 const struct i915_sched_attr *attr) 577 { 578 struct i915_request *rq; 579 struct intel_engine_cs *engine; 580 581 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 582 return; 583 584 rq = to_request(fence); 585 engine = rq->engine; 586 587 local_bh_disable(); 588 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 589 if (engine->schedule) 590 engine->schedule(rq, attr); 591 rcu_read_unlock(); 592 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 593 } 594 595 static void fence_set_priority(struct dma_fence *fence, 596 const struct i915_sched_attr *attr) 597 { 598 /* Recurse once into a fence-array */ 599 if (dma_fence_is_array(fence)) { 600 struct dma_fence_array *array = to_dma_fence_array(fence); 601 int i; 602 603 for (i = 0; i < array->num_fences; i++) 604 __fence_set_priority(array->fences[i], attr); 605 } else { 606 __fence_set_priority(fence, attr); 607 } 608 } 609 610 int 611 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 612 unsigned int flags, 613 const struct i915_sched_attr *attr) 614 { 615 struct dma_fence *excl; 616 617 if (flags & I915_WAIT_ALL) { 618 struct dma_fence **shared; 619 unsigned int count, i; 620 int ret; 621 622 ret = reservation_object_get_fences_rcu(obj->resv, 623 &excl, &count, &shared); 624 if (ret) 625 return ret; 626 627 for (i = 0; i < count; i++) { 628 fence_set_priority(shared[i], attr); 629 dma_fence_put(shared[i]); 630 } 631 632 kfree(shared); 633 } else { 634 excl = reservation_object_get_excl_rcu(obj->resv); 635 } 636 637 if (excl) { 638 fence_set_priority(excl, attr); 639 dma_fence_put(excl); 640 } 641 return 0; 642 } 643 644 /** 645 * Waits for rendering to the object to be completed 646 * @obj: i915 gem object 647 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 648 * @timeout: how long to wait 649 * @rps_client: client (user process) to charge for any waitboosting 650 */ 651 int 652 i915_gem_object_wait(struct drm_i915_gem_object *obj, 653 unsigned int flags, 654 long timeout, 655 struct intel_rps_client *rps_client) 656 { 657 might_sleep(); 658 #if IS_ENABLED(CONFIG_LOCKDEP) 659 GEM_BUG_ON(debug_locks && 660 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 661 !!(flags & I915_WAIT_LOCKED)); 662 #endif 663 GEM_BUG_ON(timeout < 0); 664 665 timeout = i915_gem_object_wait_reservation(obj->resv, 666 flags, timeout, 667 rps_client); 668 return timeout < 0 ? timeout : 0; 669 } 670 671 static struct intel_rps_client *to_rps_client(struct drm_file *file) 672 { 673 struct drm_i915_file_private *fpriv = file->driver_priv; 674 675 return &fpriv->rps_client; 676 } 677 678 static int 679 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 680 struct drm_i915_gem_pwrite *args, 681 struct drm_file *file) 682 { 683 void *vaddr = obj->phys_handle->vaddr + args->offset; 684 char __user *user_data = u64_to_user_ptr(args->data_ptr); 685 686 /* We manually control the domain here and pretend that it 687 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 688 */ 689 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 690 if (copy_from_user(vaddr, user_data, args->size)) 691 return -EFAULT; 692 693 drm_clflush_virt_range(vaddr, args->size); 694 i915_gem_chipset_flush(to_i915(obj->base.dev)); 695 696 intel_fb_obj_flush(obj, ORIGIN_CPU); 697 return 0; 698 } 699 700 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 701 { 702 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 703 } 704 705 void i915_gem_object_free(struct drm_i915_gem_object *obj) 706 { 707 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 708 kmem_cache_free(dev_priv->objects, obj); 709 } 710 711 static int 712 i915_gem_create(struct drm_file *file, 713 struct drm_i915_private *dev_priv, 714 uint64_t size, 715 uint32_t *handle_p) 716 { 717 struct drm_i915_gem_object *obj; 718 int ret; 719 u32 handle; 720 721 size = roundup(size, PAGE_SIZE); 722 if (size == 0) 723 return -EINVAL; 724 725 /* Allocate the new object */ 726 obj = i915_gem_object_create(dev_priv, size); 727 if (IS_ERR(obj)) 728 return PTR_ERR(obj); 729 730 ret = drm_gem_handle_create(file, &obj->base, &handle); 731 /* drop reference from allocate - handle holds it now */ 732 i915_gem_object_put(obj); 733 if (ret) 734 return ret; 735 736 *handle_p = handle; 737 return 0; 738 } 739 740 int 741 i915_gem_dumb_create(struct drm_file *file, 742 struct drm_device *dev, 743 struct drm_mode_create_dumb *args) 744 { 745 /* have to work out size/pitch and return them */ 746 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 747 args->size = args->pitch * args->height; 748 return i915_gem_create(file, to_i915(dev), 749 args->size, &args->handle); 750 } 751 752 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 753 { 754 return !(obj->cache_level == I915_CACHE_NONE || 755 obj->cache_level == I915_CACHE_WT); 756 } 757 758 /** 759 * Creates a new mm object and returns a handle to it. 760 * @dev: drm device pointer 761 * @data: ioctl data blob 762 * @file: drm file pointer 763 */ 764 int 765 i915_gem_create_ioctl(struct drm_device *dev, void *data, 766 struct drm_file *file) 767 { 768 struct drm_i915_private *dev_priv = to_i915(dev); 769 struct drm_i915_gem_create *args = data; 770 771 i915_gem_flush_free_objects(dev_priv); 772 773 return i915_gem_create(file, dev_priv, 774 args->size, &args->handle); 775 } 776 777 static inline enum fb_op_origin 778 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 779 { 780 return (domain == I915_GEM_DOMAIN_GTT ? 781 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 782 } 783 784 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 785 { 786 /* 787 * No actual flushing is required for the GTT write domain for reads 788 * from the GTT domain. Writes to it "immediately" go to main memory 789 * as far as we know, so there's no chipset flush. It also doesn't 790 * land in the GPU render cache. 791 * 792 * However, we do have to enforce the order so that all writes through 793 * the GTT land before any writes to the device, such as updates to 794 * the GATT itself. 795 * 796 * We also have to wait a bit for the writes to land from the GTT. 797 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 798 * timing. This issue has only been observed when switching quickly 799 * between GTT writes and CPU reads from inside the kernel on recent hw, 800 * and it appears to only affect discrete GTT blocks (i.e. on LLC 801 * system agents we cannot reproduce this behaviour, until Cannonlake 802 * that was!). 803 */ 804 805 wmb(); 806 807 if (INTEL_INFO(dev_priv)->has_coherent_ggtt) 808 return; 809 810 i915_gem_chipset_flush(dev_priv); 811 812 intel_runtime_pm_get(dev_priv); 813 spin_lock_irq(&dev_priv->uncore.lock); 814 815 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 816 817 spin_unlock_irq(&dev_priv->uncore.lock); 818 intel_runtime_pm_put(dev_priv); 819 } 820 821 static void 822 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 823 { 824 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 825 struct i915_vma *vma; 826 827 if (!(obj->write_domain & flush_domains)) 828 return; 829 830 switch (obj->write_domain) { 831 case I915_GEM_DOMAIN_GTT: 832 i915_gem_flush_ggtt_writes(dev_priv); 833 834 intel_fb_obj_flush(obj, 835 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 836 837 for_each_ggtt_vma(vma, obj) { 838 if (vma->iomap) 839 continue; 840 841 i915_vma_unset_ggtt_write(vma); 842 } 843 break; 844 845 case I915_GEM_DOMAIN_WC: 846 wmb(); 847 break; 848 849 case I915_GEM_DOMAIN_CPU: 850 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 851 break; 852 853 case I915_GEM_DOMAIN_RENDER: 854 if (gpu_write_needs_clflush(obj)) 855 obj->cache_dirty = true; 856 break; 857 } 858 859 obj->write_domain = 0; 860 } 861 862 static inline int 863 __copy_to_user_swizzled(char __user *cpu_vaddr, 864 const char *gpu_vaddr, int gpu_offset, 865 int length) 866 { 867 int ret, cpu_offset = 0; 868 869 while (length > 0) { 870 int cacheline_end = ALIGN(gpu_offset + 1, 64); 871 int this_length = min(cacheline_end - gpu_offset, length); 872 int swizzled_gpu_offset = gpu_offset ^ 64; 873 874 ret = __copy_to_user(cpu_vaddr + cpu_offset, 875 gpu_vaddr + swizzled_gpu_offset, 876 this_length); 877 if (ret) 878 return ret + length; 879 880 cpu_offset += this_length; 881 gpu_offset += this_length; 882 length -= this_length; 883 } 884 885 return 0; 886 } 887 888 static inline int 889 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 890 const char __user *cpu_vaddr, 891 int length) 892 { 893 int ret, cpu_offset = 0; 894 895 while (length > 0) { 896 int cacheline_end = ALIGN(gpu_offset + 1, 64); 897 int this_length = min(cacheline_end - gpu_offset, length); 898 int swizzled_gpu_offset = gpu_offset ^ 64; 899 900 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 901 cpu_vaddr + cpu_offset, 902 this_length); 903 if (ret) 904 return ret + length; 905 906 cpu_offset += this_length; 907 gpu_offset += this_length; 908 length -= this_length; 909 } 910 911 return 0; 912 } 913 914 /* 915 * Pins the specified object's pages and synchronizes the object with 916 * GPU accesses. Sets needs_clflush to non-zero if the caller should 917 * flush the object from the CPU cache. 918 */ 919 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 920 unsigned int *needs_clflush) 921 { 922 int ret; 923 924 lockdep_assert_held(&obj->base.dev->struct_mutex); 925 926 *needs_clflush = 0; 927 if (!i915_gem_object_has_struct_page(obj)) 928 return -ENODEV; 929 930 ret = i915_gem_object_wait(obj, 931 I915_WAIT_INTERRUPTIBLE | 932 I915_WAIT_LOCKED, 933 MAX_SCHEDULE_TIMEOUT, 934 NULL); 935 if (ret) 936 return ret; 937 938 ret = i915_gem_object_pin_pages(obj); 939 if (ret) 940 return ret; 941 942 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 943 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 944 ret = i915_gem_object_set_to_cpu_domain(obj, false); 945 if (ret) 946 goto err_unpin; 947 else 948 goto out; 949 } 950 951 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 952 953 /* If we're not in the cpu read domain, set ourself into the gtt 954 * read domain and manually flush cachelines (if required). This 955 * optimizes for the case when the gpu will dirty the data 956 * anyway again before the next pread happens. 957 */ 958 if (!obj->cache_dirty && 959 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 960 *needs_clflush = CLFLUSH_BEFORE; 961 962 out: 963 /* return with the pages pinned */ 964 return 0; 965 966 err_unpin: 967 i915_gem_object_unpin_pages(obj); 968 return ret; 969 } 970 971 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 972 unsigned int *needs_clflush) 973 { 974 int ret; 975 976 lockdep_assert_held(&obj->base.dev->struct_mutex); 977 978 *needs_clflush = 0; 979 if (!i915_gem_object_has_struct_page(obj)) 980 return -ENODEV; 981 982 ret = i915_gem_object_wait(obj, 983 I915_WAIT_INTERRUPTIBLE | 984 I915_WAIT_LOCKED | 985 I915_WAIT_ALL, 986 MAX_SCHEDULE_TIMEOUT, 987 NULL); 988 if (ret) 989 return ret; 990 991 ret = i915_gem_object_pin_pages(obj); 992 if (ret) 993 return ret; 994 995 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 996 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 997 ret = i915_gem_object_set_to_cpu_domain(obj, true); 998 if (ret) 999 goto err_unpin; 1000 else 1001 goto out; 1002 } 1003 1004 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 1005 1006 /* If we're not in the cpu write domain, set ourself into the 1007 * gtt write domain and manually flush cachelines (as required). 1008 * This optimizes for the case when the gpu will use the data 1009 * right away and we therefore have to clflush anyway. 1010 */ 1011 if (!obj->cache_dirty) { 1012 *needs_clflush |= CLFLUSH_AFTER; 1013 1014 /* 1015 * Same trick applies to invalidate partially written 1016 * cachelines read before writing. 1017 */ 1018 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 1019 *needs_clflush |= CLFLUSH_BEFORE; 1020 } 1021 1022 out: 1023 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1024 obj->mm.dirty = true; 1025 /* return with the pages pinned */ 1026 return 0; 1027 1028 err_unpin: 1029 i915_gem_object_unpin_pages(obj); 1030 return ret; 1031 } 1032 1033 static void 1034 shmem_clflush_swizzled_range(char *addr, unsigned long length, 1035 bool swizzled) 1036 { 1037 if (unlikely(swizzled)) { 1038 unsigned long start = (unsigned long) addr; 1039 unsigned long end = (unsigned long) addr + length; 1040 1041 /* For swizzling simply ensure that we always flush both 1042 * channels. Lame, but simple and it works. Swizzled 1043 * pwrite/pread is far from a hotpath - current userspace 1044 * doesn't use it at all. */ 1045 start = round_down(start, 128); 1046 end = round_up(end, 128); 1047 1048 drm_clflush_virt_range((void *)start, end - start); 1049 } else { 1050 drm_clflush_virt_range(addr, length); 1051 } 1052 1053 } 1054 1055 /* Only difference to the fast-path function is that this can handle bit17 1056 * and uses non-atomic copy and kmap functions. */ 1057 static int 1058 shmem_pread_slow(struct page *page, int offset, int length, 1059 char __user *user_data, 1060 bool page_do_bit17_swizzling, bool needs_clflush) 1061 { 1062 char *vaddr; 1063 int ret; 1064 1065 vaddr = kmap(page); 1066 if (needs_clflush) 1067 shmem_clflush_swizzled_range(vaddr + offset, length, 1068 page_do_bit17_swizzling); 1069 1070 if (page_do_bit17_swizzling) 1071 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 1072 else 1073 ret = __copy_to_user(user_data, vaddr + offset, length); 1074 kunmap(page); 1075 1076 return ret ? - EFAULT : 0; 1077 } 1078 1079 static int 1080 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 1081 bool page_do_bit17_swizzling, bool needs_clflush) 1082 { 1083 int ret; 1084 1085 ret = -ENODEV; 1086 if (!page_do_bit17_swizzling) { 1087 char *vaddr = kmap_atomic(page); 1088 1089 if (needs_clflush) 1090 drm_clflush_virt_range(vaddr + offset, length); 1091 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1092 kunmap_atomic(vaddr); 1093 } 1094 if (ret == 0) 1095 return 0; 1096 1097 return shmem_pread_slow(page, offset, length, user_data, 1098 page_do_bit17_swizzling, needs_clflush); 1099 } 1100 1101 static int 1102 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1103 struct drm_i915_gem_pread *args) 1104 { 1105 char __user *user_data; 1106 u64 remain; 1107 unsigned int obj_do_bit17_swizzling; 1108 unsigned int needs_clflush; 1109 unsigned int idx, offset; 1110 int ret; 1111 1112 obj_do_bit17_swizzling = 0; 1113 if (i915_gem_object_needs_bit17_swizzle(obj)) 1114 obj_do_bit17_swizzling = BIT(17); 1115 1116 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1117 if (ret) 1118 return ret; 1119 1120 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1121 mutex_unlock(&obj->base.dev->struct_mutex); 1122 if (ret) 1123 return ret; 1124 1125 remain = args->size; 1126 user_data = u64_to_user_ptr(args->data_ptr); 1127 offset = offset_in_page(args->offset); 1128 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1129 struct page *page = i915_gem_object_get_page(obj, idx); 1130 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1131 1132 ret = shmem_pread(page, offset, length, user_data, 1133 page_to_phys(page) & obj_do_bit17_swizzling, 1134 needs_clflush); 1135 if (ret) 1136 break; 1137 1138 remain -= length; 1139 user_data += length; 1140 offset = 0; 1141 } 1142 1143 i915_gem_obj_finish_shmem_access(obj); 1144 return ret; 1145 } 1146 1147 static inline bool 1148 gtt_user_read(struct io_mapping *mapping, 1149 loff_t base, int offset, 1150 char __user *user_data, int length) 1151 { 1152 void __iomem *vaddr; 1153 unsigned long unwritten; 1154 1155 /* We can use the cpu mem copy function because this is X86. */ 1156 vaddr = io_mapping_map_atomic_wc(mapping, base); 1157 unwritten = __copy_to_user_inatomic(user_data, 1158 (void __force *)vaddr + offset, 1159 length); 1160 io_mapping_unmap_atomic(vaddr); 1161 if (unwritten) { 1162 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1163 unwritten = copy_to_user(user_data, 1164 (void __force *)vaddr + offset, 1165 length); 1166 io_mapping_unmap(vaddr); 1167 } 1168 return unwritten; 1169 } 1170 1171 static int 1172 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1173 const struct drm_i915_gem_pread *args) 1174 { 1175 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1176 struct i915_ggtt *ggtt = &i915->ggtt; 1177 struct drm_mm_node node; 1178 struct i915_vma *vma; 1179 void __user *user_data; 1180 u64 remain, offset; 1181 int ret; 1182 1183 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1184 if (ret) 1185 return ret; 1186 1187 intel_runtime_pm_get(i915); 1188 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1189 PIN_MAPPABLE | 1190 PIN_NONFAULT | 1191 PIN_NONBLOCK); 1192 if (!IS_ERR(vma)) { 1193 node.start = i915_ggtt_offset(vma); 1194 node.allocated = false; 1195 ret = i915_vma_put_fence(vma); 1196 if (ret) { 1197 i915_vma_unpin(vma); 1198 vma = ERR_PTR(ret); 1199 } 1200 } 1201 if (IS_ERR(vma)) { 1202 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1203 if (ret) 1204 goto out_unlock; 1205 GEM_BUG_ON(!node.allocated); 1206 } 1207 1208 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1209 if (ret) 1210 goto out_unpin; 1211 1212 mutex_unlock(&i915->drm.struct_mutex); 1213 1214 user_data = u64_to_user_ptr(args->data_ptr); 1215 remain = args->size; 1216 offset = args->offset; 1217 1218 while (remain > 0) { 1219 /* Operation in this page 1220 * 1221 * page_base = page offset within aperture 1222 * page_offset = offset within page 1223 * page_length = bytes to copy for this page 1224 */ 1225 u32 page_base = node.start; 1226 unsigned page_offset = offset_in_page(offset); 1227 unsigned page_length = PAGE_SIZE - page_offset; 1228 page_length = remain < page_length ? remain : page_length; 1229 if (node.allocated) { 1230 wmb(); 1231 ggtt->vm.insert_page(&ggtt->vm, 1232 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1233 node.start, I915_CACHE_NONE, 0); 1234 wmb(); 1235 } else { 1236 page_base += offset & PAGE_MASK; 1237 } 1238 1239 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 1240 user_data, page_length)) { 1241 ret = -EFAULT; 1242 break; 1243 } 1244 1245 remain -= page_length; 1246 user_data += page_length; 1247 offset += page_length; 1248 } 1249 1250 mutex_lock(&i915->drm.struct_mutex); 1251 out_unpin: 1252 if (node.allocated) { 1253 wmb(); 1254 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1255 remove_mappable_node(&node); 1256 } else { 1257 i915_vma_unpin(vma); 1258 } 1259 out_unlock: 1260 intel_runtime_pm_put(i915); 1261 mutex_unlock(&i915->drm.struct_mutex); 1262 1263 return ret; 1264 } 1265 1266 /** 1267 * Reads data from the object referenced by handle. 1268 * @dev: drm device pointer 1269 * @data: ioctl data blob 1270 * @file: drm file pointer 1271 * 1272 * On error, the contents of *data are undefined. 1273 */ 1274 int 1275 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1276 struct drm_file *file) 1277 { 1278 struct drm_i915_gem_pread *args = data; 1279 struct drm_i915_gem_object *obj; 1280 int ret; 1281 1282 if (args->size == 0) 1283 return 0; 1284 1285 if (!access_ok(VERIFY_WRITE, 1286 u64_to_user_ptr(args->data_ptr), 1287 args->size)) 1288 return -EFAULT; 1289 1290 obj = i915_gem_object_lookup(file, args->handle); 1291 if (!obj) 1292 return -ENOENT; 1293 1294 /* Bounds check source. */ 1295 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1296 ret = -EINVAL; 1297 goto out; 1298 } 1299 1300 trace_i915_gem_object_pread(obj, args->offset, args->size); 1301 1302 ret = i915_gem_object_wait(obj, 1303 I915_WAIT_INTERRUPTIBLE, 1304 MAX_SCHEDULE_TIMEOUT, 1305 to_rps_client(file)); 1306 if (ret) 1307 goto out; 1308 1309 ret = i915_gem_object_pin_pages(obj); 1310 if (ret) 1311 goto out; 1312 1313 ret = i915_gem_shmem_pread(obj, args); 1314 if (ret == -EFAULT || ret == -ENODEV) 1315 ret = i915_gem_gtt_pread(obj, args); 1316 1317 i915_gem_object_unpin_pages(obj); 1318 out: 1319 i915_gem_object_put(obj); 1320 return ret; 1321 } 1322 1323 /* This is the fast write path which cannot handle 1324 * page faults in the source data 1325 */ 1326 1327 static inline bool 1328 ggtt_write(struct io_mapping *mapping, 1329 loff_t base, int offset, 1330 char __user *user_data, int length) 1331 { 1332 void __iomem *vaddr; 1333 unsigned long unwritten; 1334 1335 /* We can use the cpu mem copy function because this is X86. */ 1336 vaddr = io_mapping_map_atomic_wc(mapping, base); 1337 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1338 user_data, length); 1339 io_mapping_unmap_atomic(vaddr); 1340 if (unwritten) { 1341 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1342 unwritten = copy_from_user((void __force *)vaddr + offset, 1343 user_data, length); 1344 io_mapping_unmap(vaddr); 1345 } 1346 1347 return unwritten; 1348 } 1349 1350 /** 1351 * This is the fast pwrite path, where we copy the data directly from the 1352 * user into the GTT, uncached. 1353 * @obj: i915 GEM object 1354 * @args: pwrite arguments structure 1355 */ 1356 static int 1357 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1358 const struct drm_i915_gem_pwrite *args) 1359 { 1360 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1361 struct i915_ggtt *ggtt = &i915->ggtt; 1362 struct drm_mm_node node; 1363 struct i915_vma *vma; 1364 u64 remain, offset; 1365 void __user *user_data; 1366 int ret; 1367 1368 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1369 if (ret) 1370 return ret; 1371 1372 if (i915_gem_object_has_struct_page(obj)) { 1373 /* 1374 * Avoid waking the device up if we can fallback, as 1375 * waking/resuming is very slow (worst-case 10-100 ms 1376 * depending on PCI sleeps and our own resume time). 1377 * This easily dwarfs any performance advantage from 1378 * using the cache bypass of indirect GGTT access. 1379 */ 1380 if (!intel_runtime_pm_get_if_in_use(i915)) { 1381 ret = -EFAULT; 1382 goto out_unlock; 1383 } 1384 } else { 1385 /* No backing pages, no fallback, we must force GGTT access */ 1386 intel_runtime_pm_get(i915); 1387 } 1388 1389 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1390 PIN_MAPPABLE | 1391 PIN_NONFAULT | 1392 PIN_NONBLOCK); 1393 if (!IS_ERR(vma)) { 1394 node.start = i915_ggtt_offset(vma); 1395 node.allocated = false; 1396 ret = i915_vma_put_fence(vma); 1397 if (ret) { 1398 i915_vma_unpin(vma); 1399 vma = ERR_PTR(ret); 1400 } 1401 } 1402 if (IS_ERR(vma)) { 1403 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1404 if (ret) 1405 goto out_rpm; 1406 GEM_BUG_ON(!node.allocated); 1407 } 1408 1409 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1410 if (ret) 1411 goto out_unpin; 1412 1413 mutex_unlock(&i915->drm.struct_mutex); 1414 1415 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1416 1417 user_data = u64_to_user_ptr(args->data_ptr); 1418 offset = args->offset; 1419 remain = args->size; 1420 while (remain) { 1421 /* Operation in this page 1422 * 1423 * page_base = page offset within aperture 1424 * page_offset = offset within page 1425 * page_length = bytes to copy for this page 1426 */ 1427 u32 page_base = node.start; 1428 unsigned int page_offset = offset_in_page(offset); 1429 unsigned int page_length = PAGE_SIZE - page_offset; 1430 page_length = remain < page_length ? remain : page_length; 1431 if (node.allocated) { 1432 wmb(); /* flush the write before we modify the GGTT */ 1433 ggtt->vm.insert_page(&ggtt->vm, 1434 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1435 node.start, I915_CACHE_NONE, 0); 1436 wmb(); /* flush modifications to the GGTT (insert_page) */ 1437 } else { 1438 page_base += offset & PAGE_MASK; 1439 } 1440 /* If we get a fault while copying data, then (presumably) our 1441 * source page isn't available. Return the error and we'll 1442 * retry in the slow path. 1443 * If the object is non-shmem backed, we retry again with the 1444 * path that handles page fault. 1445 */ 1446 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 1447 user_data, page_length)) { 1448 ret = -EFAULT; 1449 break; 1450 } 1451 1452 remain -= page_length; 1453 user_data += page_length; 1454 offset += page_length; 1455 } 1456 intel_fb_obj_flush(obj, ORIGIN_CPU); 1457 1458 mutex_lock(&i915->drm.struct_mutex); 1459 out_unpin: 1460 if (node.allocated) { 1461 wmb(); 1462 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1463 remove_mappable_node(&node); 1464 } else { 1465 i915_vma_unpin(vma); 1466 } 1467 out_rpm: 1468 intel_runtime_pm_put(i915); 1469 out_unlock: 1470 mutex_unlock(&i915->drm.struct_mutex); 1471 return ret; 1472 } 1473 1474 static int 1475 shmem_pwrite_slow(struct page *page, int offset, int length, 1476 char __user *user_data, 1477 bool page_do_bit17_swizzling, 1478 bool needs_clflush_before, 1479 bool needs_clflush_after) 1480 { 1481 char *vaddr; 1482 int ret; 1483 1484 vaddr = kmap(page); 1485 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1486 shmem_clflush_swizzled_range(vaddr + offset, length, 1487 page_do_bit17_swizzling); 1488 if (page_do_bit17_swizzling) 1489 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1490 length); 1491 else 1492 ret = __copy_from_user(vaddr + offset, user_data, length); 1493 if (needs_clflush_after) 1494 shmem_clflush_swizzled_range(vaddr + offset, length, 1495 page_do_bit17_swizzling); 1496 kunmap(page); 1497 1498 return ret ? -EFAULT : 0; 1499 } 1500 1501 /* Per-page copy function for the shmem pwrite fastpath. 1502 * Flushes invalid cachelines before writing to the target if 1503 * needs_clflush_before is set and flushes out any written cachelines after 1504 * writing if needs_clflush is set. 1505 */ 1506 static int 1507 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1508 bool page_do_bit17_swizzling, 1509 bool needs_clflush_before, 1510 bool needs_clflush_after) 1511 { 1512 int ret; 1513 1514 ret = -ENODEV; 1515 if (!page_do_bit17_swizzling) { 1516 char *vaddr = kmap_atomic(page); 1517 1518 if (needs_clflush_before) 1519 drm_clflush_virt_range(vaddr + offset, len); 1520 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1521 if (needs_clflush_after) 1522 drm_clflush_virt_range(vaddr + offset, len); 1523 1524 kunmap_atomic(vaddr); 1525 } 1526 if (ret == 0) 1527 return ret; 1528 1529 return shmem_pwrite_slow(page, offset, len, user_data, 1530 page_do_bit17_swizzling, 1531 needs_clflush_before, 1532 needs_clflush_after); 1533 } 1534 1535 static int 1536 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1537 const struct drm_i915_gem_pwrite *args) 1538 { 1539 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1540 void __user *user_data; 1541 u64 remain; 1542 unsigned int obj_do_bit17_swizzling; 1543 unsigned int partial_cacheline_write; 1544 unsigned int needs_clflush; 1545 unsigned int offset, idx; 1546 int ret; 1547 1548 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1549 if (ret) 1550 return ret; 1551 1552 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1553 mutex_unlock(&i915->drm.struct_mutex); 1554 if (ret) 1555 return ret; 1556 1557 obj_do_bit17_swizzling = 0; 1558 if (i915_gem_object_needs_bit17_swizzle(obj)) 1559 obj_do_bit17_swizzling = BIT(17); 1560 1561 /* If we don't overwrite a cacheline completely we need to be 1562 * careful to have up-to-date data by first clflushing. Don't 1563 * overcomplicate things and flush the entire patch. 1564 */ 1565 partial_cacheline_write = 0; 1566 if (needs_clflush & CLFLUSH_BEFORE) 1567 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1568 1569 user_data = u64_to_user_ptr(args->data_ptr); 1570 remain = args->size; 1571 offset = offset_in_page(args->offset); 1572 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1573 struct page *page = i915_gem_object_get_page(obj, idx); 1574 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1575 1576 ret = shmem_pwrite(page, offset, length, user_data, 1577 page_to_phys(page) & obj_do_bit17_swizzling, 1578 (offset | length) & partial_cacheline_write, 1579 needs_clflush & CLFLUSH_AFTER); 1580 if (ret) 1581 break; 1582 1583 remain -= length; 1584 user_data += length; 1585 offset = 0; 1586 } 1587 1588 intel_fb_obj_flush(obj, ORIGIN_CPU); 1589 i915_gem_obj_finish_shmem_access(obj); 1590 return ret; 1591 } 1592 1593 /** 1594 * Writes data to the object referenced by handle. 1595 * @dev: drm device 1596 * @data: ioctl data blob 1597 * @file: drm file 1598 * 1599 * On error, the contents of the buffer that were to be modified are undefined. 1600 */ 1601 int 1602 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1603 struct drm_file *file) 1604 { 1605 struct drm_i915_gem_pwrite *args = data; 1606 struct drm_i915_gem_object *obj; 1607 int ret; 1608 1609 if (args->size == 0) 1610 return 0; 1611 1612 if (!access_ok(VERIFY_READ, 1613 u64_to_user_ptr(args->data_ptr), 1614 args->size)) 1615 return -EFAULT; 1616 1617 obj = i915_gem_object_lookup(file, args->handle); 1618 if (!obj) 1619 return -ENOENT; 1620 1621 /* Bounds check destination. */ 1622 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1623 ret = -EINVAL; 1624 goto err; 1625 } 1626 1627 /* Writes not allowed into this read-only object */ 1628 if (i915_gem_object_is_readonly(obj)) { 1629 ret = -EINVAL; 1630 goto err; 1631 } 1632 1633 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1634 1635 ret = -ENODEV; 1636 if (obj->ops->pwrite) 1637 ret = obj->ops->pwrite(obj, args); 1638 if (ret != -ENODEV) 1639 goto err; 1640 1641 ret = i915_gem_object_wait(obj, 1642 I915_WAIT_INTERRUPTIBLE | 1643 I915_WAIT_ALL, 1644 MAX_SCHEDULE_TIMEOUT, 1645 to_rps_client(file)); 1646 if (ret) 1647 goto err; 1648 1649 ret = i915_gem_object_pin_pages(obj); 1650 if (ret) 1651 goto err; 1652 1653 ret = -EFAULT; 1654 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1655 * it would end up going through the fenced access, and we'll get 1656 * different detiling behavior between reading and writing. 1657 * pread/pwrite currently are reading and writing from the CPU 1658 * perspective, requiring manual detiling by the client. 1659 */ 1660 if (!i915_gem_object_has_struct_page(obj) || 1661 cpu_write_needs_clflush(obj)) 1662 /* Note that the gtt paths might fail with non-page-backed user 1663 * pointers (e.g. gtt mappings when moving data between 1664 * textures). Fallback to the shmem path in that case. 1665 */ 1666 ret = i915_gem_gtt_pwrite_fast(obj, args); 1667 1668 if (ret == -EFAULT || ret == -ENOSPC) { 1669 if (obj->phys_handle) 1670 ret = i915_gem_phys_pwrite(obj, args, file); 1671 else 1672 ret = i915_gem_shmem_pwrite(obj, args); 1673 } 1674 1675 i915_gem_object_unpin_pages(obj); 1676 err: 1677 i915_gem_object_put(obj); 1678 return ret; 1679 } 1680 1681 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1682 { 1683 struct drm_i915_private *i915; 1684 struct list_head *list; 1685 struct i915_vma *vma; 1686 1687 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1688 1689 for_each_ggtt_vma(vma, obj) { 1690 if (i915_vma_is_active(vma)) 1691 continue; 1692 1693 if (!drm_mm_node_allocated(&vma->node)) 1694 continue; 1695 1696 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1697 } 1698 1699 i915 = to_i915(obj->base.dev); 1700 spin_lock(&i915->mm.obj_lock); 1701 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1702 list_move_tail(&obj->mm.link, list); 1703 spin_unlock(&i915->mm.obj_lock); 1704 } 1705 1706 /** 1707 * Called when user space prepares to use an object with the CPU, either 1708 * through the mmap ioctl's mapping or a GTT mapping. 1709 * @dev: drm device 1710 * @data: ioctl data blob 1711 * @file: drm file 1712 */ 1713 int 1714 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1715 struct drm_file *file) 1716 { 1717 struct drm_i915_gem_set_domain *args = data; 1718 struct drm_i915_gem_object *obj; 1719 uint32_t read_domains = args->read_domains; 1720 uint32_t write_domain = args->write_domain; 1721 int err; 1722 1723 /* Only handle setting domains to types used by the CPU. */ 1724 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1725 return -EINVAL; 1726 1727 /* Having something in the write domain implies it's in the read 1728 * domain, and only that read domain. Enforce that in the request. 1729 */ 1730 if (write_domain != 0 && read_domains != write_domain) 1731 return -EINVAL; 1732 1733 obj = i915_gem_object_lookup(file, args->handle); 1734 if (!obj) 1735 return -ENOENT; 1736 1737 /* Try to flush the object off the GPU without holding the lock. 1738 * We will repeat the flush holding the lock in the normal manner 1739 * to catch cases where we are gazumped. 1740 */ 1741 err = i915_gem_object_wait(obj, 1742 I915_WAIT_INTERRUPTIBLE | 1743 I915_WAIT_PRIORITY | 1744 (write_domain ? I915_WAIT_ALL : 0), 1745 MAX_SCHEDULE_TIMEOUT, 1746 to_rps_client(file)); 1747 if (err) 1748 goto out; 1749 1750 /* 1751 * Proxy objects do not control access to the backing storage, ergo 1752 * they cannot be used as a means to manipulate the cache domain 1753 * tracking for that backing storage. The proxy object is always 1754 * considered to be outside of any cache domain. 1755 */ 1756 if (i915_gem_object_is_proxy(obj)) { 1757 err = -ENXIO; 1758 goto out; 1759 } 1760 1761 /* 1762 * Flush and acquire obj->pages so that we are coherent through 1763 * direct access in memory with previous cached writes through 1764 * shmemfs and that our cache domain tracking remains valid. 1765 * For example, if the obj->filp was moved to swap without us 1766 * being notified and releasing the pages, we would mistakenly 1767 * continue to assume that the obj remained out of the CPU cached 1768 * domain. 1769 */ 1770 err = i915_gem_object_pin_pages(obj); 1771 if (err) 1772 goto out; 1773 1774 err = i915_mutex_lock_interruptible(dev); 1775 if (err) 1776 goto out_unpin; 1777 1778 if (read_domains & I915_GEM_DOMAIN_WC) 1779 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1780 else if (read_domains & I915_GEM_DOMAIN_GTT) 1781 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1782 else 1783 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1784 1785 /* And bump the LRU for this access */ 1786 i915_gem_object_bump_inactive_ggtt(obj); 1787 1788 mutex_unlock(&dev->struct_mutex); 1789 1790 if (write_domain != 0) 1791 intel_fb_obj_invalidate(obj, 1792 fb_write_origin(obj, write_domain)); 1793 1794 out_unpin: 1795 i915_gem_object_unpin_pages(obj); 1796 out: 1797 i915_gem_object_put(obj); 1798 return err; 1799 } 1800 1801 /** 1802 * Called when user space has done writes to this buffer 1803 * @dev: drm device 1804 * @data: ioctl data blob 1805 * @file: drm file 1806 */ 1807 int 1808 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1809 struct drm_file *file) 1810 { 1811 struct drm_i915_gem_sw_finish *args = data; 1812 struct drm_i915_gem_object *obj; 1813 1814 obj = i915_gem_object_lookup(file, args->handle); 1815 if (!obj) 1816 return -ENOENT; 1817 1818 /* 1819 * Proxy objects are barred from CPU access, so there is no 1820 * need to ban sw_finish as it is a nop. 1821 */ 1822 1823 /* Pinned buffers may be scanout, so flush the cache */ 1824 i915_gem_object_flush_if_display(obj); 1825 i915_gem_object_put(obj); 1826 1827 return 0; 1828 } 1829 1830 /** 1831 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1832 * it is mapped to. 1833 * @dev: drm device 1834 * @data: ioctl data blob 1835 * @file: drm file 1836 * 1837 * While the mapping holds a reference on the contents of the object, it doesn't 1838 * imply a ref on the object itself. 1839 * 1840 * IMPORTANT: 1841 * 1842 * DRM driver writers who look a this function as an example for how to do GEM 1843 * mmap support, please don't implement mmap support like here. The modern way 1844 * to implement DRM mmap support is with an mmap offset ioctl (like 1845 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1846 * That way debug tooling like valgrind will understand what's going on, hiding 1847 * the mmap call in a driver private ioctl will break that. The i915 driver only 1848 * does cpu mmaps this way because we didn't know better. 1849 */ 1850 int 1851 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1852 struct drm_file *file) 1853 { 1854 struct drm_i915_gem_mmap *args = data; 1855 struct drm_i915_gem_object *obj; 1856 unsigned long addr; 1857 1858 if (args->flags & ~(I915_MMAP_WC)) 1859 return -EINVAL; 1860 1861 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1862 return -ENODEV; 1863 1864 obj = i915_gem_object_lookup(file, args->handle); 1865 if (!obj) 1866 return -ENOENT; 1867 1868 /* prime objects have no backing filp to GEM mmap 1869 * pages from. 1870 */ 1871 if (!obj->base.filp) { 1872 i915_gem_object_put(obj); 1873 return -ENXIO; 1874 } 1875 1876 addr = vm_mmap(obj->base.filp, 0, args->size, 1877 PROT_READ | PROT_WRITE, MAP_SHARED, 1878 args->offset); 1879 if (args->flags & I915_MMAP_WC) { 1880 struct mm_struct *mm = current->mm; 1881 struct vm_area_struct *vma; 1882 1883 if (down_write_killable(&mm->mmap_sem)) { 1884 i915_gem_object_put(obj); 1885 return -EINTR; 1886 } 1887 vma = find_vma(mm, addr); 1888 if (vma) 1889 vma->vm_page_prot = 1890 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1891 else 1892 addr = -ENOMEM; 1893 up_write(&mm->mmap_sem); 1894 1895 /* This may race, but that's ok, it only gets set */ 1896 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1897 } 1898 i915_gem_object_put(obj); 1899 if (IS_ERR((void *)addr)) 1900 return addr; 1901 1902 args->addr_ptr = (uint64_t) addr; 1903 1904 return 0; 1905 } 1906 1907 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) 1908 { 1909 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1910 } 1911 1912 /** 1913 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1914 * 1915 * A history of the GTT mmap interface: 1916 * 1917 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1918 * aligned and suitable for fencing, and still fit into the available 1919 * mappable space left by the pinned display objects. A classic problem 1920 * we called the page-fault-of-doom where we would ping-pong between 1921 * two objects that could not fit inside the GTT and so the memcpy 1922 * would page one object in at the expense of the other between every 1923 * single byte. 1924 * 1925 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1926 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1927 * object is too large for the available space (or simply too large 1928 * for the mappable aperture!), a view is created instead and faulted 1929 * into userspace. (This view is aligned and sized appropriately for 1930 * fenced access.) 1931 * 1932 * 2 - Recognise WC as a separate cache domain so that we can flush the 1933 * delayed writes via GTT before performing direct access via WC. 1934 * 1935 * Restrictions: 1936 * 1937 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1938 * hangs on some architectures, corruption on others. An attempt to service 1939 * a GTT page fault from a snoopable object will generate a SIGBUS. 1940 * 1941 * * the object must be able to fit into RAM (physical memory, though no 1942 * limited to the mappable aperture). 1943 * 1944 * 1945 * Caveats: 1946 * 1947 * * a new GTT page fault will synchronize rendering from the GPU and flush 1948 * all data to system memory. Subsequent access will not be synchronized. 1949 * 1950 * * all mappings are revoked on runtime device suspend. 1951 * 1952 * * there are only 8, 16 or 32 fence registers to share between all users 1953 * (older machines require fence register for display and blitter access 1954 * as well). Contention of the fence registers will cause the previous users 1955 * to be unmapped and any new access will generate new page faults. 1956 * 1957 * * running out of memory while servicing a fault may generate a SIGBUS, 1958 * rather than the expected SIGSEGV. 1959 */ 1960 int i915_gem_mmap_gtt_version(void) 1961 { 1962 return 2; 1963 } 1964 1965 static inline struct i915_ggtt_view 1966 compute_partial_view(const struct drm_i915_gem_object *obj, 1967 pgoff_t page_offset, 1968 unsigned int chunk) 1969 { 1970 struct i915_ggtt_view view; 1971 1972 if (i915_gem_object_is_tiled(obj)) 1973 chunk = roundup(chunk, tile_row_pages(obj)); 1974 1975 view.type = I915_GGTT_VIEW_PARTIAL; 1976 view.partial.offset = rounddown(page_offset, chunk); 1977 view.partial.size = 1978 min_t(unsigned int, chunk, 1979 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1980 1981 /* If the partial covers the entire object, just create a normal VMA. */ 1982 if (chunk >= obj->base.size >> PAGE_SHIFT) 1983 view.type = I915_GGTT_VIEW_NORMAL; 1984 1985 return view; 1986 } 1987 1988 /** 1989 * i915_gem_fault - fault a page into the GTT 1990 * @vmf: fault info 1991 * 1992 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1993 * from userspace. The fault handler takes care of binding the object to 1994 * the GTT (if needed), allocating and programming a fence register (again, 1995 * only if needed based on whether the old reg is still valid or the object 1996 * is tiled) and inserting a new PTE into the faulting process. 1997 * 1998 * Note that the faulting process may involve evicting existing objects 1999 * from the GTT and/or fence registers to make room. So performance may 2000 * suffer if the GTT working set is large or there are few fence registers 2001 * left. 2002 * 2003 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 2004 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 2005 */ 2006 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 2007 { 2008 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 2009 struct vm_area_struct *area = vmf->vma; 2010 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 2011 struct drm_device *dev = obj->base.dev; 2012 struct drm_i915_private *dev_priv = to_i915(dev); 2013 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2014 bool write = area->vm_flags & VM_WRITE; 2015 struct i915_vma *vma; 2016 pgoff_t page_offset; 2017 int ret; 2018 2019 /* Sanity check that we allow writing into this object */ 2020 if (i915_gem_object_is_readonly(obj) && write) 2021 return VM_FAULT_SIGBUS; 2022 2023 /* We don't use vmf->pgoff since that has the fake offset */ 2024 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 2025 2026 trace_i915_gem_object_fault(obj, page_offset, true, write); 2027 2028 /* Try to flush the object off the GPU first without holding the lock. 2029 * Upon acquiring the lock, we will perform our sanity checks and then 2030 * repeat the flush holding the lock in the normal manner to catch cases 2031 * where we are gazumped. 2032 */ 2033 ret = i915_gem_object_wait(obj, 2034 I915_WAIT_INTERRUPTIBLE, 2035 MAX_SCHEDULE_TIMEOUT, 2036 NULL); 2037 if (ret) 2038 goto err; 2039 2040 ret = i915_gem_object_pin_pages(obj); 2041 if (ret) 2042 goto err; 2043 2044 intel_runtime_pm_get(dev_priv); 2045 2046 ret = i915_mutex_lock_interruptible(dev); 2047 if (ret) 2048 goto err_rpm; 2049 2050 /* Access to snoopable pages through the GTT is incoherent. */ 2051 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2052 ret = -EFAULT; 2053 goto err_unlock; 2054 } 2055 2056 2057 /* Now pin it into the GTT as needed */ 2058 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 2059 PIN_MAPPABLE | 2060 PIN_NONBLOCK | 2061 PIN_NONFAULT); 2062 if (IS_ERR(vma)) { 2063 /* Use a partial view if it is bigger than available space */ 2064 struct i915_ggtt_view view = 2065 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 2066 unsigned int flags; 2067 2068 flags = PIN_MAPPABLE; 2069 if (view.type == I915_GGTT_VIEW_NORMAL) 2070 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 2071 2072 /* 2073 * Userspace is now writing through an untracked VMA, abandon 2074 * all hope that the hardware is able to track future writes. 2075 */ 2076 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2077 2078 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2079 if (IS_ERR(vma) && !view.type) { 2080 flags = PIN_MAPPABLE; 2081 view.type = I915_GGTT_VIEW_PARTIAL; 2082 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2083 } 2084 } 2085 if (IS_ERR(vma)) { 2086 ret = PTR_ERR(vma); 2087 goto err_unlock; 2088 } 2089 2090 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2091 if (ret) 2092 goto err_unpin; 2093 2094 ret = i915_vma_pin_fence(vma); 2095 if (ret) 2096 goto err_unpin; 2097 2098 /* Finally, remap it using the new GTT offset */ 2099 ret = remap_io_mapping(area, 2100 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 2101 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 2102 min_t(u64, vma->size, area->vm_end - area->vm_start), 2103 &ggtt->iomap); 2104 if (ret) 2105 goto err_fence; 2106 2107 /* Mark as being mmapped into userspace for later revocation */ 2108 assert_rpm_wakelock_held(dev_priv); 2109 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2110 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2111 GEM_BUG_ON(!obj->userfault_count); 2112 2113 i915_vma_set_ggtt_write(vma); 2114 2115 err_fence: 2116 i915_vma_unpin_fence(vma); 2117 err_unpin: 2118 __i915_vma_unpin(vma); 2119 err_unlock: 2120 mutex_unlock(&dev->struct_mutex); 2121 err_rpm: 2122 intel_runtime_pm_put(dev_priv); 2123 i915_gem_object_unpin_pages(obj); 2124 err: 2125 switch (ret) { 2126 case -EIO: 2127 /* 2128 * We eat errors when the gpu is terminally wedged to avoid 2129 * userspace unduly crashing (gl has no provisions for mmaps to 2130 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2131 * and so needs to be reported. 2132 */ 2133 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2134 return VM_FAULT_SIGBUS; 2135 /* else: fall through */ 2136 case -EAGAIN: 2137 /* 2138 * EAGAIN means the gpu is hung and we'll wait for the error 2139 * handler to reset everything when re-faulting in 2140 * i915_mutex_lock_interruptible. 2141 */ 2142 case 0: 2143 case -ERESTARTSYS: 2144 case -EINTR: 2145 case -EBUSY: 2146 /* 2147 * EBUSY is ok: this just means that another thread 2148 * already did the job. 2149 */ 2150 return VM_FAULT_NOPAGE; 2151 case -ENOMEM: 2152 return VM_FAULT_OOM; 2153 case -ENOSPC: 2154 case -EFAULT: 2155 return VM_FAULT_SIGBUS; 2156 default: 2157 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2158 return VM_FAULT_SIGBUS; 2159 } 2160 } 2161 2162 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2163 { 2164 struct i915_vma *vma; 2165 2166 GEM_BUG_ON(!obj->userfault_count); 2167 2168 obj->userfault_count = 0; 2169 list_del(&obj->userfault_link); 2170 drm_vma_node_unmap(&obj->base.vma_node, 2171 obj->base.dev->anon_inode->i_mapping); 2172 2173 for_each_ggtt_vma(vma, obj) 2174 i915_vma_unset_userfault(vma); 2175 } 2176 2177 /** 2178 * i915_gem_release_mmap - remove physical page mappings 2179 * @obj: obj in question 2180 * 2181 * Preserve the reservation of the mmapping with the DRM core code, but 2182 * relinquish ownership of the pages back to the system. 2183 * 2184 * It is vital that we remove the page mapping if we have mapped a tiled 2185 * object through the GTT and then lose the fence register due to 2186 * resource pressure. Similarly if the object has been moved out of the 2187 * aperture, than pages mapped into userspace must be revoked. Removing the 2188 * mapping will then trigger a page fault on the next user access, allowing 2189 * fixup by i915_gem_fault(). 2190 */ 2191 void 2192 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2193 { 2194 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2195 2196 /* Serialisation between user GTT access and our code depends upon 2197 * revoking the CPU's PTE whilst the mutex is held. The next user 2198 * pagefault then has to wait until we release the mutex. 2199 * 2200 * Note that RPM complicates somewhat by adding an additional 2201 * requirement that operations to the GGTT be made holding the RPM 2202 * wakeref. 2203 */ 2204 lockdep_assert_held(&i915->drm.struct_mutex); 2205 intel_runtime_pm_get(i915); 2206 2207 if (!obj->userfault_count) 2208 goto out; 2209 2210 __i915_gem_object_release_mmap(obj); 2211 2212 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2213 * memory transactions from userspace before we return. The TLB 2214 * flushing implied above by changing the PTE above *should* be 2215 * sufficient, an extra barrier here just provides us with a bit 2216 * of paranoid documentation about our requirement to serialise 2217 * memory writes before touching registers / GSM. 2218 */ 2219 wmb(); 2220 2221 out: 2222 intel_runtime_pm_put(i915); 2223 } 2224 2225 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2226 { 2227 struct drm_i915_gem_object *obj, *on; 2228 int i; 2229 2230 /* 2231 * Only called during RPM suspend. All users of the userfault_list 2232 * must be holding an RPM wakeref to ensure that this can not 2233 * run concurrently with themselves (and use the struct_mutex for 2234 * protection between themselves). 2235 */ 2236 2237 list_for_each_entry_safe(obj, on, 2238 &dev_priv->mm.userfault_list, userfault_link) 2239 __i915_gem_object_release_mmap(obj); 2240 2241 /* The fence will be lost when the device powers down. If any were 2242 * in use by hardware (i.e. they are pinned), we should not be powering 2243 * down! All other fences will be reacquired by the user upon waking. 2244 */ 2245 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2246 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2247 2248 /* Ideally we want to assert that the fence register is not 2249 * live at this point (i.e. that no piece of code will be 2250 * trying to write through fence + GTT, as that both violates 2251 * our tracking of activity and associated locking/barriers, 2252 * but also is illegal given that the hw is powered down). 2253 * 2254 * Previously we used reg->pin_count as a "liveness" indicator. 2255 * That is not sufficient, and we need a more fine-grained 2256 * tool if we want to have a sanity check here. 2257 */ 2258 2259 if (!reg->vma) 2260 continue; 2261 2262 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2263 reg->dirty = true; 2264 } 2265 } 2266 2267 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2268 { 2269 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2270 int err; 2271 2272 err = drm_gem_create_mmap_offset(&obj->base); 2273 if (likely(!err)) 2274 return 0; 2275 2276 /* Attempt to reap some mmap space from dead objects */ 2277 do { 2278 err = i915_gem_wait_for_idle(dev_priv, 2279 I915_WAIT_INTERRUPTIBLE, 2280 MAX_SCHEDULE_TIMEOUT); 2281 if (err) 2282 break; 2283 2284 i915_gem_drain_freed_objects(dev_priv); 2285 err = drm_gem_create_mmap_offset(&obj->base); 2286 if (!err) 2287 break; 2288 2289 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2290 2291 return err; 2292 } 2293 2294 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2295 { 2296 drm_gem_free_mmap_offset(&obj->base); 2297 } 2298 2299 int 2300 i915_gem_mmap_gtt(struct drm_file *file, 2301 struct drm_device *dev, 2302 uint32_t handle, 2303 uint64_t *offset) 2304 { 2305 struct drm_i915_gem_object *obj; 2306 int ret; 2307 2308 obj = i915_gem_object_lookup(file, handle); 2309 if (!obj) 2310 return -ENOENT; 2311 2312 ret = i915_gem_object_create_mmap_offset(obj); 2313 if (ret == 0) 2314 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2315 2316 i915_gem_object_put(obj); 2317 return ret; 2318 } 2319 2320 /** 2321 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2322 * @dev: DRM device 2323 * @data: GTT mapping ioctl data 2324 * @file: GEM object info 2325 * 2326 * Simply returns the fake offset to userspace so it can mmap it. 2327 * The mmap call will end up in drm_gem_mmap(), which will set things 2328 * up so we can get faults in the handler above. 2329 * 2330 * The fault handler will take care of binding the object into the GTT 2331 * (since it may have been evicted to make room for something), allocating 2332 * a fence register, and mapping the appropriate aperture address into 2333 * userspace. 2334 */ 2335 int 2336 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2337 struct drm_file *file) 2338 { 2339 struct drm_i915_gem_mmap_gtt *args = data; 2340 2341 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2342 } 2343 2344 /* Immediately discard the backing storage */ 2345 static void 2346 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2347 { 2348 i915_gem_object_free_mmap_offset(obj); 2349 2350 if (obj->base.filp == NULL) 2351 return; 2352 2353 /* Our goal here is to return as much of the memory as 2354 * is possible back to the system as we are called from OOM. 2355 * To do this we must instruct the shmfs to drop all of its 2356 * backing pages, *now*. 2357 */ 2358 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2359 obj->mm.madv = __I915_MADV_PURGED; 2360 obj->mm.pages = ERR_PTR(-EFAULT); 2361 } 2362 2363 /* Try to discard unwanted pages */ 2364 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2365 { 2366 struct address_space *mapping; 2367 2368 lockdep_assert_held(&obj->mm.lock); 2369 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2370 2371 switch (obj->mm.madv) { 2372 case I915_MADV_DONTNEED: 2373 i915_gem_object_truncate(obj); 2374 case __I915_MADV_PURGED: 2375 return; 2376 } 2377 2378 if (obj->base.filp == NULL) 2379 return; 2380 2381 mapping = obj->base.filp->f_mapping, 2382 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2383 } 2384 2385 /* 2386 * Move pages to appropriate lru and release the pagevec, decrementing the 2387 * ref count of those pages. 2388 */ 2389 static void check_release_pagevec(struct pagevec *pvec) 2390 { 2391 check_move_unevictable_pages(pvec); 2392 __pagevec_release(pvec); 2393 cond_resched(); 2394 } 2395 2396 static void 2397 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2398 struct sg_table *pages) 2399 { 2400 struct sgt_iter sgt_iter; 2401 struct pagevec pvec; 2402 struct page *page; 2403 2404 __i915_gem_object_release_shmem(obj, pages, true); 2405 2406 i915_gem_gtt_finish_pages(obj, pages); 2407 2408 if (i915_gem_object_needs_bit17_swizzle(obj)) 2409 i915_gem_object_save_bit_17_swizzle(obj, pages); 2410 2411 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); 2412 2413 pagevec_init(&pvec); 2414 for_each_sgt_page(page, sgt_iter, pages) { 2415 if (obj->mm.dirty) 2416 set_page_dirty(page); 2417 2418 if (obj->mm.madv == I915_MADV_WILLNEED) 2419 mark_page_accessed(page); 2420 2421 if (!pagevec_add(&pvec, page)) 2422 check_release_pagevec(&pvec); 2423 } 2424 if (pagevec_count(&pvec)) 2425 check_release_pagevec(&pvec); 2426 obj->mm.dirty = false; 2427 2428 sg_free_table(pages); 2429 kfree(pages); 2430 } 2431 2432 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2433 { 2434 struct radix_tree_iter iter; 2435 void __rcu **slot; 2436 2437 rcu_read_lock(); 2438 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2439 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2440 rcu_read_unlock(); 2441 } 2442 2443 static struct sg_table * 2444 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2445 { 2446 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2447 struct sg_table *pages; 2448 2449 pages = fetch_and_zero(&obj->mm.pages); 2450 if (!pages) 2451 return NULL; 2452 2453 spin_lock(&i915->mm.obj_lock); 2454 list_del(&obj->mm.link); 2455 spin_unlock(&i915->mm.obj_lock); 2456 2457 if (obj->mm.mapping) { 2458 void *ptr; 2459 2460 ptr = page_mask_bits(obj->mm.mapping); 2461 if (is_vmalloc_addr(ptr)) 2462 vunmap(ptr); 2463 else 2464 kunmap(kmap_to_page(ptr)); 2465 2466 obj->mm.mapping = NULL; 2467 } 2468 2469 __i915_gem_object_reset_page_iter(obj); 2470 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2471 2472 return pages; 2473 } 2474 2475 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2476 enum i915_mm_subclass subclass) 2477 { 2478 struct sg_table *pages; 2479 2480 if (i915_gem_object_has_pinned_pages(obj)) 2481 return; 2482 2483 GEM_BUG_ON(obj->bind_count); 2484 if (!i915_gem_object_has_pages(obj)) 2485 return; 2486 2487 /* May be called by shrinker from within get_pages() (on another bo) */ 2488 mutex_lock_nested(&obj->mm.lock, subclass); 2489 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2490 goto unlock; 2491 2492 /* 2493 * ->put_pages might need to allocate memory for the bit17 swizzle 2494 * array, hence protect them from being reaped by removing them from gtt 2495 * lists early. 2496 */ 2497 pages = __i915_gem_object_unset_pages(obj); 2498 if (!IS_ERR(pages)) 2499 obj->ops->put_pages(obj, pages); 2500 2501 unlock: 2502 mutex_unlock(&obj->mm.lock); 2503 } 2504 2505 bool i915_sg_trim(struct sg_table *orig_st) 2506 { 2507 struct sg_table new_st; 2508 struct scatterlist *sg, *new_sg; 2509 unsigned int i; 2510 2511 if (orig_st->nents == orig_st->orig_nents) 2512 return false; 2513 2514 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2515 return false; 2516 2517 new_sg = new_st.sgl; 2518 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2519 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2520 sg_dma_address(new_sg) = sg_dma_address(sg); 2521 sg_dma_len(new_sg) = sg_dma_len(sg); 2522 2523 new_sg = sg_next(new_sg); 2524 } 2525 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2526 2527 sg_free_table(orig_st); 2528 2529 *orig_st = new_st; 2530 return true; 2531 } 2532 2533 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2534 { 2535 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2536 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2537 unsigned long i; 2538 struct address_space *mapping; 2539 struct sg_table *st; 2540 struct scatterlist *sg; 2541 struct sgt_iter sgt_iter; 2542 struct page *page; 2543 unsigned long last_pfn = 0; /* suppress gcc warning */ 2544 unsigned int max_segment = i915_sg_segment_size(); 2545 unsigned int sg_page_sizes; 2546 struct pagevec pvec; 2547 gfp_t noreclaim; 2548 int ret; 2549 2550 /* 2551 * Assert that the object is not currently in any GPU domain. As it 2552 * wasn't in the GTT, there shouldn't be any way it could have been in 2553 * a GPU cache 2554 */ 2555 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2556 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2557 2558 /* 2559 * If there's no chance of allocating enough pages for the whole 2560 * object, bail early. 2561 */ 2562 if (page_count > totalram_pages()) 2563 return -ENOMEM; 2564 2565 st = kmalloc(sizeof(*st), GFP_KERNEL); 2566 if (st == NULL) 2567 return -ENOMEM; 2568 2569 rebuild_st: 2570 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2571 kfree(st); 2572 return -ENOMEM; 2573 } 2574 2575 /* 2576 * Get the list of pages out of our struct file. They'll be pinned 2577 * at this point until we release them. 2578 * 2579 * Fail silently without starting the shrinker 2580 */ 2581 mapping = obj->base.filp->f_mapping; 2582 mapping_set_unevictable(mapping); 2583 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2584 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2585 2586 sg = st->sgl; 2587 st->nents = 0; 2588 sg_page_sizes = 0; 2589 for (i = 0; i < page_count; i++) { 2590 const unsigned int shrink[] = { 2591 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2592 0, 2593 }, *s = shrink; 2594 gfp_t gfp = noreclaim; 2595 2596 do { 2597 cond_resched(); 2598 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2599 if (likely(!IS_ERR(page))) 2600 break; 2601 2602 if (!*s) { 2603 ret = PTR_ERR(page); 2604 goto err_sg; 2605 } 2606 2607 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2608 2609 /* 2610 * We've tried hard to allocate the memory by reaping 2611 * our own buffer, now let the real VM do its job and 2612 * go down in flames if truly OOM. 2613 * 2614 * However, since graphics tend to be disposable, 2615 * defer the oom here by reporting the ENOMEM back 2616 * to userspace. 2617 */ 2618 if (!*s) { 2619 /* reclaim and warn, but no oom */ 2620 gfp = mapping_gfp_mask(mapping); 2621 2622 /* 2623 * Our bo are always dirty and so we require 2624 * kswapd to reclaim our pages (direct reclaim 2625 * does not effectively begin pageout of our 2626 * buffers on its own). However, direct reclaim 2627 * only waits for kswapd when under allocation 2628 * congestion. So as a result __GFP_RECLAIM is 2629 * unreliable and fails to actually reclaim our 2630 * dirty pages -- unless you try over and over 2631 * again with !__GFP_NORETRY. However, we still 2632 * want to fail this allocation rather than 2633 * trigger the out-of-memory killer and for 2634 * this we want __GFP_RETRY_MAYFAIL. 2635 */ 2636 gfp |= __GFP_RETRY_MAYFAIL; 2637 } 2638 } while (1); 2639 2640 if (!i || 2641 sg->length >= max_segment || 2642 page_to_pfn(page) != last_pfn + 1) { 2643 if (i) { 2644 sg_page_sizes |= sg->length; 2645 sg = sg_next(sg); 2646 } 2647 st->nents++; 2648 sg_set_page(sg, page, PAGE_SIZE, 0); 2649 } else { 2650 sg->length += PAGE_SIZE; 2651 } 2652 last_pfn = page_to_pfn(page); 2653 2654 /* Check that the i965g/gm workaround works. */ 2655 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2656 } 2657 if (sg) { /* loop terminated early; short sg table */ 2658 sg_page_sizes |= sg->length; 2659 sg_mark_end(sg); 2660 } 2661 2662 /* Trim unused sg entries to avoid wasting memory. */ 2663 i915_sg_trim(st); 2664 2665 ret = i915_gem_gtt_prepare_pages(obj, st); 2666 if (ret) { 2667 /* 2668 * DMA remapping failed? One possible cause is that 2669 * it could not reserve enough large entries, asking 2670 * for PAGE_SIZE chunks instead may be helpful. 2671 */ 2672 if (max_segment > PAGE_SIZE) { 2673 for_each_sgt_page(page, sgt_iter, st) 2674 put_page(page); 2675 sg_free_table(st); 2676 2677 max_segment = PAGE_SIZE; 2678 goto rebuild_st; 2679 } else { 2680 dev_warn(&dev_priv->drm.pdev->dev, 2681 "Failed to DMA remap %lu pages\n", 2682 page_count); 2683 goto err_pages; 2684 } 2685 } 2686 2687 if (i915_gem_object_needs_bit17_swizzle(obj)) 2688 i915_gem_object_do_bit_17_swizzle(obj, st); 2689 2690 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 2691 2692 return 0; 2693 2694 err_sg: 2695 sg_mark_end(sg); 2696 err_pages: 2697 mapping_clear_unevictable(mapping); 2698 pagevec_init(&pvec); 2699 for_each_sgt_page(page, sgt_iter, st) { 2700 if (!pagevec_add(&pvec, page)) 2701 check_release_pagevec(&pvec); 2702 } 2703 if (pagevec_count(&pvec)) 2704 check_release_pagevec(&pvec); 2705 sg_free_table(st); 2706 kfree(st); 2707 2708 /* 2709 * shmemfs first checks if there is enough memory to allocate the page 2710 * and reports ENOSPC should there be insufficient, along with the usual 2711 * ENOMEM for a genuine allocation failure. 2712 * 2713 * We use ENOSPC in our driver to mean that we have run out of aperture 2714 * space and so want to translate the error from shmemfs back to our 2715 * usual understanding of ENOMEM. 2716 */ 2717 if (ret == -ENOSPC) 2718 ret = -ENOMEM; 2719 2720 return ret; 2721 } 2722 2723 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2724 struct sg_table *pages, 2725 unsigned int sg_page_sizes) 2726 { 2727 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2728 unsigned long supported = INTEL_INFO(i915)->page_sizes; 2729 int i; 2730 2731 lockdep_assert_held(&obj->mm.lock); 2732 2733 obj->mm.get_page.sg_pos = pages->sgl; 2734 obj->mm.get_page.sg_idx = 0; 2735 2736 obj->mm.pages = pages; 2737 2738 if (i915_gem_object_is_tiled(obj) && 2739 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2740 GEM_BUG_ON(obj->mm.quirked); 2741 __i915_gem_object_pin_pages(obj); 2742 obj->mm.quirked = true; 2743 } 2744 2745 GEM_BUG_ON(!sg_page_sizes); 2746 obj->mm.page_sizes.phys = sg_page_sizes; 2747 2748 /* 2749 * Calculate the supported page-sizes which fit into the given 2750 * sg_page_sizes. This will give us the page-sizes which we may be able 2751 * to use opportunistically when later inserting into the GTT. For 2752 * example if phys=2G, then in theory we should be able to use 1G, 2M, 2753 * 64K or 4K pages, although in practice this will depend on a number of 2754 * other factors. 2755 */ 2756 obj->mm.page_sizes.sg = 0; 2757 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 2758 if (obj->mm.page_sizes.phys & ~0u << i) 2759 obj->mm.page_sizes.sg |= BIT(i); 2760 } 2761 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 2762 2763 spin_lock(&i915->mm.obj_lock); 2764 list_add(&obj->mm.link, &i915->mm.unbound_list); 2765 spin_unlock(&i915->mm.obj_lock); 2766 } 2767 2768 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2769 { 2770 int err; 2771 2772 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2773 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2774 return -EFAULT; 2775 } 2776 2777 err = obj->ops->get_pages(obj); 2778 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 2779 2780 return err; 2781 } 2782 2783 /* Ensure that the associated pages are gathered from the backing storage 2784 * and pinned into our object. i915_gem_object_pin_pages() may be called 2785 * multiple times before they are released by a single call to 2786 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2787 * either as a result of memory pressure (reaping pages under the shrinker) 2788 * or as the object is itself released. 2789 */ 2790 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2791 { 2792 int err; 2793 2794 err = mutex_lock_interruptible(&obj->mm.lock); 2795 if (err) 2796 return err; 2797 2798 if (unlikely(!i915_gem_object_has_pages(obj))) { 2799 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2800 2801 err = ____i915_gem_object_get_pages(obj); 2802 if (err) 2803 goto unlock; 2804 2805 smp_mb__before_atomic(); 2806 } 2807 atomic_inc(&obj->mm.pages_pin_count); 2808 2809 unlock: 2810 mutex_unlock(&obj->mm.lock); 2811 return err; 2812 } 2813 2814 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2815 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2816 enum i915_map_type type) 2817 { 2818 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2819 struct sg_table *sgt = obj->mm.pages; 2820 struct sgt_iter sgt_iter; 2821 struct page *page; 2822 struct page *stack_pages[32]; 2823 struct page **pages = stack_pages; 2824 unsigned long i = 0; 2825 pgprot_t pgprot; 2826 void *addr; 2827 2828 /* A single page can always be kmapped */ 2829 if (n_pages == 1 && type == I915_MAP_WB) 2830 return kmap(sg_page(sgt->sgl)); 2831 2832 if (n_pages > ARRAY_SIZE(stack_pages)) { 2833 /* Too big for stack -- allocate temporary array instead */ 2834 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 2835 if (!pages) 2836 return NULL; 2837 } 2838 2839 for_each_sgt_page(page, sgt_iter, sgt) 2840 pages[i++] = page; 2841 2842 /* Check that we have the expected number of pages */ 2843 GEM_BUG_ON(i != n_pages); 2844 2845 switch (type) { 2846 default: 2847 MISSING_CASE(type); 2848 /* fallthrough to use PAGE_KERNEL anyway */ 2849 case I915_MAP_WB: 2850 pgprot = PAGE_KERNEL; 2851 break; 2852 case I915_MAP_WC: 2853 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2854 break; 2855 } 2856 addr = vmap(pages, n_pages, 0, pgprot); 2857 2858 if (pages != stack_pages) 2859 kvfree(pages); 2860 2861 return addr; 2862 } 2863 2864 /* get, pin, and map the pages of the object into kernel space */ 2865 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2866 enum i915_map_type type) 2867 { 2868 enum i915_map_type has_type; 2869 bool pinned; 2870 void *ptr; 2871 int ret; 2872 2873 if (unlikely(!i915_gem_object_has_struct_page(obj))) 2874 return ERR_PTR(-ENXIO); 2875 2876 ret = mutex_lock_interruptible(&obj->mm.lock); 2877 if (ret) 2878 return ERR_PTR(ret); 2879 2880 pinned = !(type & I915_MAP_OVERRIDE); 2881 type &= ~I915_MAP_OVERRIDE; 2882 2883 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2884 if (unlikely(!i915_gem_object_has_pages(obj))) { 2885 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2886 2887 ret = ____i915_gem_object_get_pages(obj); 2888 if (ret) 2889 goto err_unlock; 2890 2891 smp_mb__before_atomic(); 2892 } 2893 atomic_inc(&obj->mm.pages_pin_count); 2894 pinned = false; 2895 } 2896 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 2897 2898 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 2899 if (ptr && has_type != type) { 2900 if (pinned) { 2901 ret = -EBUSY; 2902 goto err_unpin; 2903 } 2904 2905 if (is_vmalloc_addr(ptr)) 2906 vunmap(ptr); 2907 else 2908 kunmap(kmap_to_page(ptr)); 2909 2910 ptr = obj->mm.mapping = NULL; 2911 } 2912 2913 if (!ptr) { 2914 ptr = i915_gem_object_map(obj, type); 2915 if (!ptr) { 2916 ret = -ENOMEM; 2917 goto err_unpin; 2918 } 2919 2920 obj->mm.mapping = page_pack_bits(ptr, type); 2921 } 2922 2923 out_unlock: 2924 mutex_unlock(&obj->mm.lock); 2925 return ptr; 2926 2927 err_unpin: 2928 atomic_dec(&obj->mm.pages_pin_count); 2929 err_unlock: 2930 ptr = ERR_PTR(ret); 2931 goto out_unlock; 2932 } 2933 2934 static int 2935 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2936 const struct drm_i915_gem_pwrite *arg) 2937 { 2938 struct address_space *mapping = obj->base.filp->f_mapping; 2939 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2940 u64 remain, offset; 2941 unsigned int pg; 2942 2943 /* Before we instantiate/pin the backing store for our use, we 2944 * can prepopulate the shmemfs filp efficiently using a write into 2945 * the pagecache. We avoid the penalty of instantiating all the 2946 * pages, important if the user is just writing to a few and never 2947 * uses the object on the GPU, and using a direct write into shmemfs 2948 * allows it to avoid the cost of retrieving a page (either swapin 2949 * or clearing-before-use) before it is overwritten. 2950 */ 2951 if (i915_gem_object_has_pages(obj)) 2952 return -ENODEV; 2953 2954 if (obj->mm.madv != I915_MADV_WILLNEED) 2955 return -EFAULT; 2956 2957 /* Before the pages are instantiated the object is treated as being 2958 * in the CPU domain. The pages will be clflushed as required before 2959 * use, and we can freely write into the pages directly. If userspace 2960 * races pwrite with any other operation; corruption will ensue - 2961 * that is userspace's prerogative! 2962 */ 2963 2964 remain = arg->size; 2965 offset = arg->offset; 2966 pg = offset_in_page(offset); 2967 2968 do { 2969 unsigned int len, unwritten; 2970 struct page *page; 2971 void *data, *vaddr; 2972 int err; 2973 2974 len = PAGE_SIZE - pg; 2975 if (len > remain) 2976 len = remain; 2977 2978 err = pagecache_write_begin(obj->base.filp, mapping, 2979 offset, len, 0, 2980 &page, &data); 2981 if (err < 0) 2982 return err; 2983 2984 vaddr = kmap(page); 2985 unwritten = copy_from_user(vaddr + pg, user_data, len); 2986 kunmap(page); 2987 2988 err = pagecache_write_end(obj->base.filp, mapping, 2989 offset, len, len - unwritten, 2990 page, data); 2991 if (err < 0) 2992 return err; 2993 2994 if (unwritten) 2995 return -EFAULT; 2996 2997 remain -= len; 2998 user_data += len; 2999 offset += len; 3000 pg = 0; 3001 } while (remain); 3002 3003 return 0; 3004 } 3005 3006 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, 3007 const struct i915_gem_context *ctx) 3008 { 3009 unsigned int score; 3010 unsigned long prev_hang; 3011 3012 if (i915_gem_context_is_banned(ctx)) 3013 score = I915_CLIENT_SCORE_CONTEXT_BAN; 3014 else 3015 score = 0; 3016 3017 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 3018 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 3019 score += I915_CLIENT_SCORE_HANG_FAST; 3020 3021 if (score) { 3022 atomic_add(score, &file_priv->ban_score); 3023 3024 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 3025 ctx->name, score, 3026 atomic_read(&file_priv->ban_score)); 3027 } 3028 } 3029 3030 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 3031 { 3032 unsigned int score; 3033 bool banned, bannable; 3034 3035 atomic_inc(&ctx->guilty_count); 3036 3037 bannable = i915_gem_context_is_bannable(ctx); 3038 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 3039 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 3040 3041 /* Cool contexts don't accumulate client ban score */ 3042 if (!bannable) 3043 return; 3044 3045 if (banned) { 3046 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", 3047 ctx->name, atomic_read(&ctx->guilty_count), 3048 score); 3049 i915_gem_context_set_banned(ctx); 3050 } 3051 3052 if (!IS_ERR_OR_NULL(ctx->file_priv)) 3053 i915_gem_client_mark_guilty(ctx->file_priv, ctx); 3054 } 3055 3056 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 3057 { 3058 atomic_inc(&ctx->active_count); 3059 } 3060 3061 struct i915_request * 3062 i915_gem_find_active_request(struct intel_engine_cs *engine) 3063 { 3064 struct i915_request *request, *active = NULL; 3065 unsigned long flags; 3066 3067 /* 3068 * We are called by the error capture, reset and to dump engine 3069 * state at random points in time. In particular, note that neither is 3070 * crucially ordered with an interrupt. After a hang, the GPU is dead 3071 * and we assume that no more writes can happen (we waited long enough 3072 * for all writes that were in transaction to be flushed) - adding an 3073 * extra delay for a recent interrupt is pointless. Hence, we do 3074 * not need an engine->irq_seqno_barrier() before the seqno reads. 3075 * At all other times, we must assume the GPU is still running, but 3076 * we only care about the snapshot of this moment. 3077 */ 3078 spin_lock_irqsave(&engine->timeline.lock, flags); 3079 list_for_each_entry(request, &engine->timeline.requests, link) { 3080 if (__i915_request_completed(request, request->global_seqno)) 3081 continue; 3082 3083 active = request; 3084 break; 3085 } 3086 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3087 3088 return active; 3089 } 3090 3091 /* 3092 * Ensure irq handler finishes, and not run again. 3093 * Also return the active request so that we only search for it once. 3094 */ 3095 struct i915_request * 3096 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 3097 { 3098 struct i915_request *request; 3099 3100 /* 3101 * During the reset sequence, we must prevent the engine from 3102 * entering RC6. As the context state is undefined until we restart 3103 * the engine, if it does enter RC6 during the reset, the state 3104 * written to the powercontext is undefined and so we may lose 3105 * GPU state upon resume, i.e. fail to restart after a reset. 3106 */ 3107 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 3108 3109 request = engine->reset.prepare(engine); 3110 if (request && request->fence.error == -EIO) 3111 request = ERR_PTR(-EIO); /* Previous reset failed! */ 3112 3113 return request; 3114 } 3115 3116 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 3117 { 3118 struct intel_engine_cs *engine; 3119 struct i915_request *request; 3120 enum intel_engine_id id; 3121 int err = 0; 3122 3123 for_each_engine(engine, dev_priv, id) { 3124 request = i915_gem_reset_prepare_engine(engine); 3125 if (IS_ERR(request)) { 3126 err = PTR_ERR(request); 3127 continue; 3128 } 3129 3130 engine->hangcheck.active_request = request; 3131 } 3132 3133 i915_gem_revoke_fences(dev_priv); 3134 intel_uc_sanitize(dev_priv); 3135 3136 return err; 3137 } 3138 3139 static void engine_skip_context(struct i915_request *request) 3140 { 3141 struct intel_engine_cs *engine = request->engine; 3142 struct i915_gem_context *hung_ctx = request->gem_context; 3143 struct i915_timeline *timeline = request->timeline; 3144 unsigned long flags; 3145 3146 GEM_BUG_ON(timeline == &engine->timeline); 3147 3148 spin_lock_irqsave(&engine->timeline.lock, flags); 3149 spin_lock(&timeline->lock); 3150 3151 list_for_each_entry_continue(request, &engine->timeline.requests, link) 3152 if (request->gem_context == hung_ctx) 3153 i915_request_skip(request, -EIO); 3154 3155 list_for_each_entry(request, &timeline->requests, link) 3156 i915_request_skip(request, -EIO); 3157 3158 spin_unlock(&timeline->lock); 3159 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3160 } 3161 3162 /* Returns the request if it was guilty of the hang */ 3163 static struct i915_request * 3164 i915_gem_reset_request(struct intel_engine_cs *engine, 3165 struct i915_request *request, 3166 bool stalled) 3167 { 3168 /* The guilty request will get skipped on a hung engine. 3169 * 3170 * Users of client default contexts do not rely on logical 3171 * state preserved between batches so it is safe to execute 3172 * queued requests following the hang. Non default contexts 3173 * rely on preserved state, so skipping a batch loses the 3174 * evolution of the state and it needs to be considered corrupted. 3175 * Executing more queued batches on top of corrupted state is 3176 * risky. But we take the risk by trying to advance through 3177 * the queued requests in order to make the client behaviour 3178 * more predictable around resets, by not throwing away random 3179 * amount of batches it has prepared for execution. Sophisticated 3180 * clients can use gem_reset_stats_ioctl and dma fence status 3181 * (exported via sync_file info ioctl on explicit fences) to observe 3182 * when it loses the context state and should rebuild accordingly. 3183 * 3184 * The context ban, and ultimately the client ban, mechanism are safety 3185 * valves if client submission ends up resulting in nothing more than 3186 * subsequent hangs. 3187 */ 3188 3189 if (i915_request_completed(request)) { 3190 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", 3191 engine->name, request->global_seqno, 3192 request->fence.context, request->fence.seqno, 3193 intel_engine_get_seqno(engine)); 3194 stalled = false; 3195 } 3196 3197 if (stalled) { 3198 i915_gem_context_mark_guilty(request->gem_context); 3199 i915_request_skip(request, -EIO); 3200 3201 /* If this context is now banned, skip all pending requests. */ 3202 if (i915_gem_context_is_banned(request->gem_context)) 3203 engine_skip_context(request); 3204 } else { 3205 /* 3206 * Since this is not the hung engine, it may have advanced 3207 * since the hang declaration. Double check by refinding 3208 * the active request at the time of the reset. 3209 */ 3210 request = i915_gem_find_active_request(engine); 3211 if (request) { 3212 unsigned long flags; 3213 3214 i915_gem_context_mark_innocent(request->gem_context); 3215 dma_fence_set_error(&request->fence, -EAGAIN); 3216 3217 /* Rewind the engine to replay the incomplete rq */ 3218 spin_lock_irqsave(&engine->timeline.lock, flags); 3219 request = list_prev_entry(request, link); 3220 if (&request->link == &engine->timeline.requests) 3221 request = NULL; 3222 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3223 } 3224 } 3225 3226 return request; 3227 } 3228 3229 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3230 struct i915_request *request, 3231 bool stalled) 3232 { 3233 /* 3234 * Make sure this write is visible before we re-enable the interrupt 3235 * handlers on another CPU, as tasklet_enable() resolves to just 3236 * a compiler barrier which is insufficient for our purpose here. 3237 */ 3238 smp_store_mb(engine->irq_posted, 0); 3239 3240 if (request) 3241 request = i915_gem_reset_request(engine, request, stalled); 3242 3243 /* Setup the CS to resume from the breadcrumb of the hung request */ 3244 engine->reset.reset(engine, request); 3245 } 3246 3247 void i915_gem_reset(struct drm_i915_private *dev_priv, 3248 unsigned int stalled_mask) 3249 { 3250 struct intel_engine_cs *engine; 3251 enum intel_engine_id id; 3252 3253 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3254 3255 i915_retire_requests(dev_priv); 3256 3257 for_each_engine(engine, dev_priv, id) { 3258 struct intel_context *ce; 3259 3260 i915_gem_reset_engine(engine, 3261 engine->hangcheck.active_request, 3262 stalled_mask & ENGINE_MASK(id)); 3263 ce = fetch_and_zero(&engine->last_retired_context); 3264 if (ce) 3265 intel_context_unpin(ce); 3266 3267 /* 3268 * Ostensibily, we always want a context loaded for powersaving, 3269 * so if the engine is idle after the reset, send a request 3270 * to load our scratch kernel_context. 3271 * 3272 * More mysteriously, if we leave the engine idle after a reset, 3273 * the next userspace batch may hang, with what appears to be 3274 * an incoherent read by the CS (presumably stale TLB). An 3275 * empty request appears sufficient to paper over the glitch. 3276 */ 3277 if (intel_engine_is_idle(engine)) { 3278 struct i915_request *rq; 3279 3280 rq = i915_request_alloc(engine, 3281 dev_priv->kernel_context); 3282 if (!IS_ERR(rq)) 3283 i915_request_add(rq); 3284 } 3285 } 3286 3287 i915_gem_restore_fences(dev_priv); 3288 } 3289 3290 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3291 { 3292 engine->reset.finish(engine); 3293 3294 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3295 } 3296 3297 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3298 { 3299 struct intel_engine_cs *engine; 3300 enum intel_engine_id id; 3301 3302 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3303 3304 for_each_engine(engine, dev_priv, id) { 3305 engine->hangcheck.active_request = NULL; 3306 i915_gem_reset_finish_engine(engine); 3307 } 3308 } 3309 3310 static void nop_submit_request(struct i915_request *request) 3311 { 3312 unsigned long flags; 3313 3314 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3315 request->engine->name, 3316 request->fence.context, request->fence.seqno); 3317 dma_fence_set_error(&request->fence, -EIO); 3318 3319 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3320 __i915_request_submit(request); 3321 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3322 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3323 } 3324 3325 void i915_gem_set_wedged(struct drm_i915_private *i915) 3326 { 3327 struct intel_engine_cs *engine; 3328 enum intel_engine_id id; 3329 3330 GEM_TRACE("start\n"); 3331 3332 if (GEM_SHOW_DEBUG()) { 3333 struct drm_printer p = drm_debug_printer(__func__); 3334 3335 for_each_engine(engine, i915, id) 3336 intel_engine_dump(engine, &p, "%s\n", engine->name); 3337 } 3338 3339 if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) 3340 goto out; 3341 3342 /* 3343 * First, stop submission to hw, but do not yet complete requests by 3344 * rolling the global seqno forward (since this would complete requests 3345 * for which we haven't set the fence error to EIO yet). 3346 */ 3347 for_each_engine(engine, i915, id) 3348 i915_gem_reset_prepare_engine(engine); 3349 3350 /* Even if the GPU reset fails, it should still stop the engines */ 3351 if (INTEL_GEN(i915) >= 5) 3352 intel_gpu_reset(i915, ALL_ENGINES); 3353 3354 for_each_engine(engine, i915, id) { 3355 engine->submit_request = nop_submit_request; 3356 engine->schedule = NULL; 3357 } 3358 i915->caps.scheduler = 0; 3359 3360 /* 3361 * Make sure no request can slip through without getting completed by 3362 * either this call here to intel_engine_init_global_seqno, or the one 3363 * in nop_submit_request. 3364 */ 3365 synchronize_rcu(); 3366 3367 /* Mark all executing requests as skipped */ 3368 for_each_engine(engine, i915, id) 3369 engine->cancel_requests(engine); 3370 3371 for_each_engine(engine, i915, id) { 3372 i915_gem_reset_finish_engine(engine); 3373 intel_engine_wakeup(engine); 3374 } 3375 3376 out: 3377 GEM_TRACE("end\n"); 3378 3379 wake_up_all(&i915->gpu_error.reset_queue); 3380 } 3381 3382 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3383 { 3384 struct i915_timeline *tl; 3385 3386 lockdep_assert_held(&i915->drm.struct_mutex); 3387 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3388 return true; 3389 3390 GEM_TRACE("start\n"); 3391 3392 /* 3393 * Before unwedging, make sure that all pending operations 3394 * are flushed and errored out - we may have requests waiting upon 3395 * third party fences. We marked all inflight requests as EIO, and 3396 * every execbuf since returned EIO, for consistency we want all 3397 * the currently pending requests to also be marked as EIO, which 3398 * is done inside our nop_submit_request - and so we must wait. 3399 * 3400 * No more can be submitted until we reset the wedged bit. 3401 */ 3402 list_for_each_entry(tl, &i915->gt.timelines, link) { 3403 struct i915_request *rq; 3404 3405 rq = i915_gem_active_peek(&tl->last_request, 3406 &i915->drm.struct_mutex); 3407 if (!rq) 3408 continue; 3409 3410 /* 3411 * We can't use our normal waiter as we want to 3412 * avoid recursively trying to handle the current 3413 * reset. The basic dma_fence_default_wait() installs 3414 * a callback for dma_fence_signal(), which is 3415 * triggered by our nop handler (indirectly, the 3416 * callback enables the signaler thread which is 3417 * woken by the nop_submit_request() advancing the seqno 3418 * and when the seqno passes the fence, the signaler 3419 * then signals the fence waking us up). 3420 */ 3421 if (dma_fence_default_wait(&rq->fence, true, 3422 MAX_SCHEDULE_TIMEOUT) < 0) 3423 return false; 3424 } 3425 i915_retire_requests(i915); 3426 GEM_BUG_ON(i915->gt.active_requests); 3427 3428 if (!intel_gpu_reset(i915, ALL_ENGINES)) 3429 intel_engines_sanitize(i915); 3430 3431 /* 3432 * Undo nop_submit_request. We prevent all new i915 requests from 3433 * being queued (by disallowing execbuf whilst wedged) so having 3434 * waited for all active requests above, we know the system is idle 3435 * and do not have to worry about a thread being inside 3436 * engine->submit_request() as we swap over. So unlike installing 3437 * the nop_submit_request on reset, we can do this from normal 3438 * context and do not require stop_machine(). 3439 */ 3440 intel_engines_reset_default_submission(i915); 3441 i915_gem_contexts_lost(i915); 3442 3443 GEM_TRACE("end\n"); 3444 3445 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3446 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3447 3448 return true; 3449 } 3450 3451 static void 3452 i915_gem_retire_work_handler(struct work_struct *work) 3453 { 3454 struct drm_i915_private *dev_priv = 3455 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3456 struct drm_device *dev = &dev_priv->drm; 3457 3458 /* Come back later if the device is busy... */ 3459 if (mutex_trylock(&dev->struct_mutex)) { 3460 i915_retire_requests(dev_priv); 3461 mutex_unlock(&dev->struct_mutex); 3462 } 3463 3464 /* 3465 * Keep the retire handler running until we are finally idle. 3466 * We do not need to do this test under locking as in the worst-case 3467 * we queue the retire worker once too often. 3468 */ 3469 if (READ_ONCE(dev_priv->gt.awake)) 3470 queue_delayed_work(dev_priv->wq, 3471 &dev_priv->gt.retire_work, 3472 round_jiffies_up_relative(HZ)); 3473 } 3474 3475 static void shrink_caches(struct drm_i915_private *i915) 3476 { 3477 /* 3478 * kmem_cache_shrink() discards empty slabs and reorders partially 3479 * filled slabs to prioritise allocating from the mostly full slabs, 3480 * with the aim of reducing fragmentation. 3481 */ 3482 kmem_cache_shrink(i915->priorities); 3483 kmem_cache_shrink(i915->dependencies); 3484 kmem_cache_shrink(i915->requests); 3485 kmem_cache_shrink(i915->luts); 3486 kmem_cache_shrink(i915->vmas); 3487 kmem_cache_shrink(i915->objects); 3488 } 3489 3490 struct sleep_rcu_work { 3491 union { 3492 struct rcu_head rcu; 3493 struct work_struct work; 3494 }; 3495 struct drm_i915_private *i915; 3496 unsigned int epoch; 3497 }; 3498 3499 static inline bool 3500 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3501 { 3502 /* 3503 * There is a small chance that the epoch wrapped since we started 3504 * sleeping. If we assume that epoch is at least a u32, then it will 3505 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3506 */ 3507 return epoch == READ_ONCE(i915->gt.epoch); 3508 } 3509 3510 static void __sleep_work(struct work_struct *work) 3511 { 3512 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3513 struct drm_i915_private *i915 = s->i915; 3514 unsigned int epoch = s->epoch; 3515 3516 kfree(s); 3517 if (same_epoch(i915, epoch)) 3518 shrink_caches(i915); 3519 } 3520 3521 static void __sleep_rcu(struct rcu_head *rcu) 3522 { 3523 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3524 struct drm_i915_private *i915 = s->i915; 3525 3526 destroy_rcu_head(&s->rcu); 3527 3528 if (same_epoch(i915, s->epoch)) { 3529 INIT_WORK(&s->work, __sleep_work); 3530 queue_work(i915->wq, &s->work); 3531 } else { 3532 kfree(s); 3533 } 3534 } 3535 3536 static inline bool 3537 new_requests_since_last_retire(const struct drm_i915_private *i915) 3538 { 3539 return (READ_ONCE(i915->gt.active_requests) || 3540 work_pending(&i915->gt.idle_work.work)); 3541 } 3542 3543 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 3544 { 3545 struct intel_engine_cs *engine; 3546 enum intel_engine_id id; 3547 3548 if (i915_terminally_wedged(&i915->gpu_error)) 3549 return; 3550 3551 GEM_BUG_ON(i915->gt.active_requests); 3552 for_each_engine(engine, i915, id) { 3553 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 3554 GEM_BUG_ON(engine->last_retired_context != 3555 to_intel_context(i915->kernel_context, engine)); 3556 } 3557 } 3558 3559 static void 3560 i915_gem_idle_work_handler(struct work_struct *work) 3561 { 3562 struct drm_i915_private *dev_priv = 3563 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3564 unsigned int epoch = I915_EPOCH_INVALID; 3565 bool rearm_hangcheck; 3566 3567 if (!READ_ONCE(dev_priv->gt.awake)) 3568 return; 3569 3570 if (READ_ONCE(dev_priv->gt.active_requests)) 3571 return; 3572 3573 /* 3574 * Flush out the last user context, leaving only the pinned 3575 * kernel context resident. When we are idling on the kernel_context, 3576 * no more new requests (with a context switch) are emitted and we 3577 * can finally rest. A consequence is that the idle work handler is 3578 * always called at least twice before idling (and if the system is 3579 * idle that implies a round trip through the retire worker). 3580 */ 3581 mutex_lock(&dev_priv->drm.struct_mutex); 3582 i915_gem_switch_to_kernel_context(dev_priv); 3583 mutex_unlock(&dev_priv->drm.struct_mutex); 3584 3585 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", 3586 READ_ONCE(dev_priv->gt.active_requests)); 3587 3588 /* 3589 * Wait for last execlists context complete, but bail out in case a 3590 * new request is submitted. As we don't trust the hardware, we 3591 * continue on if the wait times out. This is necessary to allow 3592 * the machine to suspend even if the hardware dies, and we will 3593 * try to recover in resume (after depriving the hardware of power, 3594 * it may be in a better mmod). 3595 */ 3596 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3597 intel_engines_are_idle(dev_priv), 3598 I915_IDLE_ENGINES_TIMEOUT * 1000, 3599 10, 500); 3600 3601 rearm_hangcheck = 3602 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3603 3604 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 3605 /* Currently busy, come back later */ 3606 mod_delayed_work(dev_priv->wq, 3607 &dev_priv->gt.idle_work, 3608 msecs_to_jiffies(50)); 3609 goto out_rearm; 3610 } 3611 3612 /* 3613 * New request retired after this work handler started, extend active 3614 * period until next instance of the work. 3615 */ 3616 if (new_requests_since_last_retire(dev_priv)) 3617 goto out_unlock; 3618 3619 epoch = __i915_gem_park(dev_priv); 3620 3621 assert_kernel_context_is_current(dev_priv); 3622 3623 rearm_hangcheck = false; 3624 out_unlock: 3625 mutex_unlock(&dev_priv->drm.struct_mutex); 3626 3627 out_rearm: 3628 if (rearm_hangcheck) { 3629 GEM_BUG_ON(!dev_priv->gt.awake); 3630 i915_queue_hangcheck(dev_priv); 3631 } 3632 3633 /* 3634 * When we are idle, it is an opportune time to reap our caches. 3635 * However, we have many objects that utilise RCU and the ordered 3636 * i915->wq that this work is executing on. To try and flush any 3637 * pending frees now we are idle, we first wait for an RCU grace 3638 * period, and then queue a task (that will run last on the wq) to 3639 * shrink and re-optimize the caches. 3640 */ 3641 if (same_epoch(dev_priv, epoch)) { 3642 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 3643 if (s) { 3644 init_rcu_head(&s->rcu); 3645 s->i915 = dev_priv; 3646 s->epoch = epoch; 3647 call_rcu(&s->rcu, __sleep_rcu); 3648 } 3649 } 3650 } 3651 3652 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3653 { 3654 struct drm_i915_private *i915 = to_i915(gem->dev); 3655 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3656 struct drm_i915_file_private *fpriv = file->driver_priv; 3657 struct i915_lut_handle *lut, *ln; 3658 3659 mutex_lock(&i915->drm.struct_mutex); 3660 3661 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 3662 struct i915_gem_context *ctx = lut->ctx; 3663 struct i915_vma *vma; 3664 3665 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 3666 if (ctx->file_priv != fpriv) 3667 continue; 3668 3669 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 3670 GEM_BUG_ON(vma->obj != obj); 3671 3672 /* We allow the process to have multiple handles to the same 3673 * vma, in the same fd namespace, by virtue of flink/open. 3674 */ 3675 GEM_BUG_ON(!vma->open_count); 3676 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 3677 i915_vma_close(vma); 3678 3679 list_del(&lut->obj_link); 3680 list_del(&lut->ctx_link); 3681 3682 kmem_cache_free(i915->luts, lut); 3683 __i915_gem_object_release_unless_active(obj); 3684 } 3685 3686 mutex_unlock(&i915->drm.struct_mutex); 3687 } 3688 3689 static unsigned long to_wait_timeout(s64 timeout_ns) 3690 { 3691 if (timeout_ns < 0) 3692 return MAX_SCHEDULE_TIMEOUT; 3693 3694 if (timeout_ns == 0) 3695 return 0; 3696 3697 return nsecs_to_jiffies_timeout(timeout_ns); 3698 } 3699 3700 /** 3701 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3702 * @dev: drm device pointer 3703 * @data: ioctl data blob 3704 * @file: drm file pointer 3705 * 3706 * Returns 0 if successful, else an error is returned with the remaining time in 3707 * the timeout parameter. 3708 * -ETIME: object is still busy after timeout 3709 * -ERESTARTSYS: signal interrupted the wait 3710 * -ENONENT: object doesn't exist 3711 * Also possible, but rare: 3712 * -EAGAIN: incomplete, restart syscall 3713 * -ENOMEM: damn 3714 * -ENODEV: Internal IRQ fail 3715 * -E?: The add request failed 3716 * 3717 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3718 * non-zero timeout parameter the wait ioctl will wait for the given number of 3719 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3720 * without holding struct_mutex the object may become re-busied before this 3721 * function completes. A similar but shorter * race condition exists in the busy 3722 * ioctl 3723 */ 3724 int 3725 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3726 { 3727 struct drm_i915_gem_wait *args = data; 3728 struct drm_i915_gem_object *obj; 3729 ktime_t start; 3730 long ret; 3731 3732 if (args->flags != 0) 3733 return -EINVAL; 3734 3735 obj = i915_gem_object_lookup(file, args->bo_handle); 3736 if (!obj) 3737 return -ENOENT; 3738 3739 start = ktime_get(); 3740 3741 ret = i915_gem_object_wait(obj, 3742 I915_WAIT_INTERRUPTIBLE | 3743 I915_WAIT_PRIORITY | 3744 I915_WAIT_ALL, 3745 to_wait_timeout(args->timeout_ns), 3746 to_rps_client(file)); 3747 3748 if (args->timeout_ns > 0) { 3749 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3750 if (args->timeout_ns < 0) 3751 args->timeout_ns = 0; 3752 3753 /* 3754 * Apparently ktime isn't accurate enough and occasionally has a 3755 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3756 * things up to make the test happy. We allow up to 1 jiffy. 3757 * 3758 * This is a regression from the timespec->ktime conversion. 3759 */ 3760 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3761 args->timeout_ns = 0; 3762 3763 /* Asked to wait beyond the jiffie/scheduler precision? */ 3764 if (ret == -ETIME && args->timeout_ns) 3765 ret = -EAGAIN; 3766 } 3767 3768 i915_gem_object_put(obj); 3769 return ret; 3770 } 3771 3772 static long wait_for_timeline(struct i915_timeline *tl, 3773 unsigned int flags, long timeout) 3774 { 3775 struct i915_request *rq; 3776 3777 rq = i915_gem_active_get_unlocked(&tl->last_request); 3778 if (!rq) 3779 return timeout; 3780 3781 /* 3782 * "Race-to-idle". 3783 * 3784 * Switching to the kernel context is often used a synchronous 3785 * step prior to idling, e.g. in suspend for flushing all 3786 * current operations to memory before sleeping. These we 3787 * want to complete as quickly as possible to avoid prolonged 3788 * stalls, so allow the gpu to boost to maximum clocks. 3789 */ 3790 if (flags & I915_WAIT_FOR_IDLE_BOOST) 3791 gen6_rps_boost(rq, NULL); 3792 3793 timeout = i915_request_wait(rq, flags, timeout); 3794 i915_request_put(rq); 3795 3796 return timeout; 3797 } 3798 3799 static int wait_for_engines(struct drm_i915_private *i915) 3800 { 3801 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 3802 dev_err(i915->drm.dev, 3803 "Failed to idle engines, declaring wedged!\n"); 3804 GEM_TRACE_DUMP(); 3805 i915_gem_set_wedged(i915); 3806 return -EIO; 3807 } 3808 3809 return 0; 3810 } 3811 3812 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 3813 unsigned int flags, long timeout) 3814 { 3815 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 3816 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 3817 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 3818 3819 /* If the device is asleep, we have no requests outstanding */ 3820 if (!READ_ONCE(i915->gt.awake)) 3821 return 0; 3822 3823 if (flags & I915_WAIT_LOCKED) { 3824 struct i915_timeline *tl; 3825 int err; 3826 3827 lockdep_assert_held(&i915->drm.struct_mutex); 3828 3829 list_for_each_entry(tl, &i915->gt.timelines, link) { 3830 timeout = wait_for_timeline(tl, flags, timeout); 3831 if (timeout < 0) 3832 return timeout; 3833 } 3834 if (GEM_SHOW_DEBUG() && !timeout) { 3835 /* Presume that timeout was non-zero to begin with! */ 3836 dev_warn(&i915->drm.pdev->dev, 3837 "Missed idle-completion interrupt!\n"); 3838 GEM_TRACE_DUMP(); 3839 } 3840 3841 err = wait_for_engines(i915); 3842 if (err) 3843 return err; 3844 3845 i915_retire_requests(i915); 3846 GEM_BUG_ON(i915->gt.active_requests); 3847 } else { 3848 struct intel_engine_cs *engine; 3849 enum intel_engine_id id; 3850 3851 for_each_engine(engine, i915, id) { 3852 struct i915_timeline *tl = &engine->timeline; 3853 3854 timeout = wait_for_timeline(tl, flags, timeout); 3855 if (timeout < 0) 3856 return timeout; 3857 } 3858 } 3859 3860 return 0; 3861 } 3862 3863 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3864 { 3865 /* 3866 * We manually flush the CPU domain so that we can override and 3867 * force the flush for the display, and perform it asyncrhonously. 3868 */ 3869 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 3870 if (obj->cache_dirty) 3871 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3872 obj->write_domain = 0; 3873 } 3874 3875 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3876 { 3877 if (!READ_ONCE(obj->pin_global)) 3878 return; 3879 3880 mutex_lock(&obj->base.dev->struct_mutex); 3881 __i915_gem_object_flush_for_display(obj); 3882 mutex_unlock(&obj->base.dev->struct_mutex); 3883 } 3884 3885 /** 3886 * Moves a single object to the WC read, and possibly write domain. 3887 * @obj: object to act on 3888 * @write: ask for write access or read only 3889 * 3890 * This function returns when the move is complete, including waiting on 3891 * flushes to occur. 3892 */ 3893 int 3894 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 3895 { 3896 int ret; 3897 3898 lockdep_assert_held(&obj->base.dev->struct_mutex); 3899 3900 ret = i915_gem_object_wait(obj, 3901 I915_WAIT_INTERRUPTIBLE | 3902 I915_WAIT_LOCKED | 3903 (write ? I915_WAIT_ALL : 0), 3904 MAX_SCHEDULE_TIMEOUT, 3905 NULL); 3906 if (ret) 3907 return ret; 3908 3909 if (obj->write_domain == I915_GEM_DOMAIN_WC) 3910 return 0; 3911 3912 /* Flush and acquire obj->pages so that we are coherent through 3913 * direct access in memory with previous cached writes through 3914 * shmemfs and that our cache domain tracking remains valid. 3915 * For example, if the obj->filp was moved to swap without us 3916 * being notified and releasing the pages, we would mistakenly 3917 * continue to assume that the obj remained out of the CPU cached 3918 * domain. 3919 */ 3920 ret = i915_gem_object_pin_pages(obj); 3921 if (ret) 3922 return ret; 3923 3924 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 3925 3926 /* Serialise direct access to this object with the barriers for 3927 * coherent writes from the GPU, by effectively invalidating the 3928 * WC domain upon first access. 3929 */ 3930 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 3931 mb(); 3932 3933 /* It should now be out of any other write domains, and we can update 3934 * the domain values for our changes. 3935 */ 3936 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 3937 obj->read_domains |= I915_GEM_DOMAIN_WC; 3938 if (write) { 3939 obj->read_domains = I915_GEM_DOMAIN_WC; 3940 obj->write_domain = I915_GEM_DOMAIN_WC; 3941 obj->mm.dirty = true; 3942 } 3943 3944 i915_gem_object_unpin_pages(obj); 3945 return 0; 3946 } 3947 3948 /** 3949 * Moves a single object to the GTT read, and possibly write domain. 3950 * @obj: object to act on 3951 * @write: ask for write access or read only 3952 * 3953 * This function returns when the move is complete, including waiting on 3954 * flushes to occur. 3955 */ 3956 int 3957 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3958 { 3959 int ret; 3960 3961 lockdep_assert_held(&obj->base.dev->struct_mutex); 3962 3963 ret = i915_gem_object_wait(obj, 3964 I915_WAIT_INTERRUPTIBLE | 3965 I915_WAIT_LOCKED | 3966 (write ? I915_WAIT_ALL : 0), 3967 MAX_SCHEDULE_TIMEOUT, 3968 NULL); 3969 if (ret) 3970 return ret; 3971 3972 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 3973 return 0; 3974 3975 /* Flush and acquire obj->pages so that we are coherent through 3976 * direct access in memory with previous cached writes through 3977 * shmemfs and that our cache domain tracking remains valid. 3978 * For example, if the obj->filp was moved to swap without us 3979 * being notified and releasing the pages, we would mistakenly 3980 * continue to assume that the obj remained out of the CPU cached 3981 * domain. 3982 */ 3983 ret = i915_gem_object_pin_pages(obj); 3984 if (ret) 3985 return ret; 3986 3987 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 3988 3989 /* Serialise direct access to this object with the barriers for 3990 * coherent writes from the GPU, by effectively invalidating the 3991 * GTT domain upon first access. 3992 */ 3993 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 3994 mb(); 3995 3996 /* It should now be out of any other write domains, and we can update 3997 * the domain values for our changes. 3998 */ 3999 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4000 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4001 if (write) { 4002 obj->read_domains = I915_GEM_DOMAIN_GTT; 4003 obj->write_domain = I915_GEM_DOMAIN_GTT; 4004 obj->mm.dirty = true; 4005 } 4006 4007 i915_gem_object_unpin_pages(obj); 4008 return 0; 4009 } 4010 4011 /** 4012 * Changes the cache-level of an object across all VMA. 4013 * @obj: object to act on 4014 * @cache_level: new cache level to set for the object 4015 * 4016 * After this function returns, the object will be in the new cache-level 4017 * across all GTT and the contents of the backing storage will be coherent, 4018 * with respect to the new cache-level. In order to keep the backing storage 4019 * coherent for all users, we only allow a single cache level to be set 4020 * globally on the object and prevent it from being changed whilst the 4021 * hardware is reading from the object. That is if the object is currently 4022 * on the scanout it will be set to uncached (or equivalent display 4023 * cache coherency) and all non-MOCS GPU access will also be uncached so 4024 * that all direct access to the scanout remains coherent. 4025 */ 4026 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4027 enum i915_cache_level cache_level) 4028 { 4029 struct i915_vma *vma; 4030 int ret; 4031 4032 lockdep_assert_held(&obj->base.dev->struct_mutex); 4033 4034 if (obj->cache_level == cache_level) 4035 return 0; 4036 4037 /* Inspect the list of currently bound VMA and unbind any that would 4038 * be invalid given the new cache-level. This is principally to 4039 * catch the issue of the CS prefetch crossing page boundaries and 4040 * reading an invalid PTE on older architectures. 4041 */ 4042 restart: 4043 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4044 if (!drm_mm_node_allocated(&vma->node)) 4045 continue; 4046 4047 if (i915_vma_is_pinned(vma)) { 4048 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4049 return -EBUSY; 4050 } 4051 4052 if (!i915_vma_is_closed(vma) && 4053 i915_gem_valid_gtt_space(vma, cache_level)) 4054 continue; 4055 4056 ret = i915_vma_unbind(vma); 4057 if (ret) 4058 return ret; 4059 4060 /* As unbinding may affect other elements in the 4061 * obj->vma_list (due to side-effects from retiring 4062 * an active vma), play safe and restart the iterator. 4063 */ 4064 goto restart; 4065 } 4066 4067 /* We can reuse the existing drm_mm nodes but need to change the 4068 * cache-level on the PTE. We could simply unbind them all and 4069 * rebind with the correct cache-level on next use. However since 4070 * we already have a valid slot, dma mapping, pages etc, we may as 4071 * rewrite the PTE in the belief that doing so tramples upon less 4072 * state and so involves less work. 4073 */ 4074 if (obj->bind_count) { 4075 /* Before we change the PTE, the GPU must not be accessing it. 4076 * If we wait upon the object, we know that all the bound 4077 * VMA are no longer active. 4078 */ 4079 ret = i915_gem_object_wait(obj, 4080 I915_WAIT_INTERRUPTIBLE | 4081 I915_WAIT_LOCKED | 4082 I915_WAIT_ALL, 4083 MAX_SCHEDULE_TIMEOUT, 4084 NULL); 4085 if (ret) 4086 return ret; 4087 4088 if (!HAS_LLC(to_i915(obj->base.dev)) && 4089 cache_level != I915_CACHE_NONE) { 4090 /* Access to snoopable pages through the GTT is 4091 * incoherent and on some machines causes a hard 4092 * lockup. Relinquish the CPU mmaping to force 4093 * userspace to refault in the pages and we can 4094 * then double check if the GTT mapping is still 4095 * valid for that pointer access. 4096 */ 4097 i915_gem_release_mmap(obj); 4098 4099 /* As we no longer need a fence for GTT access, 4100 * we can relinquish it now (and so prevent having 4101 * to steal a fence from someone else on the next 4102 * fence request). Note GPU activity would have 4103 * dropped the fence as all snoopable access is 4104 * supposed to be linear. 4105 */ 4106 for_each_ggtt_vma(vma, obj) { 4107 ret = i915_vma_put_fence(vma); 4108 if (ret) 4109 return ret; 4110 } 4111 } else { 4112 /* We either have incoherent backing store and 4113 * so no GTT access or the architecture is fully 4114 * coherent. In such cases, existing GTT mmaps 4115 * ignore the cache bit in the PTE and we can 4116 * rewrite it without confusing the GPU or having 4117 * to force userspace to fault back in its mmaps. 4118 */ 4119 } 4120 4121 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4122 if (!drm_mm_node_allocated(&vma->node)) 4123 continue; 4124 4125 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4126 if (ret) 4127 return ret; 4128 } 4129 } 4130 4131 list_for_each_entry(vma, &obj->vma_list, obj_link) 4132 vma->node.color = cache_level; 4133 i915_gem_object_set_cache_coherency(obj, cache_level); 4134 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 4135 4136 return 0; 4137 } 4138 4139 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4140 struct drm_file *file) 4141 { 4142 struct drm_i915_gem_caching *args = data; 4143 struct drm_i915_gem_object *obj; 4144 int err = 0; 4145 4146 rcu_read_lock(); 4147 obj = i915_gem_object_lookup_rcu(file, args->handle); 4148 if (!obj) { 4149 err = -ENOENT; 4150 goto out; 4151 } 4152 4153 switch (obj->cache_level) { 4154 case I915_CACHE_LLC: 4155 case I915_CACHE_L3_LLC: 4156 args->caching = I915_CACHING_CACHED; 4157 break; 4158 4159 case I915_CACHE_WT: 4160 args->caching = I915_CACHING_DISPLAY; 4161 break; 4162 4163 default: 4164 args->caching = I915_CACHING_NONE; 4165 break; 4166 } 4167 out: 4168 rcu_read_unlock(); 4169 return err; 4170 } 4171 4172 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4173 struct drm_file *file) 4174 { 4175 struct drm_i915_private *i915 = to_i915(dev); 4176 struct drm_i915_gem_caching *args = data; 4177 struct drm_i915_gem_object *obj; 4178 enum i915_cache_level level; 4179 int ret = 0; 4180 4181 switch (args->caching) { 4182 case I915_CACHING_NONE: 4183 level = I915_CACHE_NONE; 4184 break; 4185 case I915_CACHING_CACHED: 4186 /* 4187 * Due to a HW issue on BXT A stepping, GPU stores via a 4188 * snooped mapping may leave stale data in a corresponding CPU 4189 * cacheline, whereas normally such cachelines would get 4190 * invalidated. 4191 */ 4192 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4193 return -ENODEV; 4194 4195 level = I915_CACHE_LLC; 4196 break; 4197 case I915_CACHING_DISPLAY: 4198 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4199 break; 4200 default: 4201 return -EINVAL; 4202 } 4203 4204 obj = i915_gem_object_lookup(file, args->handle); 4205 if (!obj) 4206 return -ENOENT; 4207 4208 /* 4209 * The caching mode of proxy object is handled by its generator, and 4210 * not allowed to be changed by userspace. 4211 */ 4212 if (i915_gem_object_is_proxy(obj)) { 4213 ret = -ENXIO; 4214 goto out; 4215 } 4216 4217 if (obj->cache_level == level) 4218 goto out; 4219 4220 ret = i915_gem_object_wait(obj, 4221 I915_WAIT_INTERRUPTIBLE, 4222 MAX_SCHEDULE_TIMEOUT, 4223 to_rps_client(file)); 4224 if (ret) 4225 goto out; 4226 4227 ret = i915_mutex_lock_interruptible(dev); 4228 if (ret) 4229 goto out; 4230 4231 ret = i915_gem_object_set_cache_level(obj, level); 4232 mutex_unlock(&dev->struct_mutex); 4233 4234 out: 4235 i915_gem_object_put(obj); 4236 return ret; 4237 } 4238 4239 /* 4240 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 4241 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 4242 * (for pageflips). We only flush the caches while preparing the buffer for 4243 * display, the callers are responsible for frontbuffer flush. 4244 */ 4245 struct i915_vma * 4246 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4247 u32 alignment, 4248 const struct i915_ggtt_view *view, 4249 unsigned int flags) 4250 { 4251 struct i915_vma *vma; 4252 int ret; 4253 4254 lockdep_assert_held(&obj->base.dev->struct_mutex); 4255 4256 /* Mark the global pin early so that we account for the 4257 * display coherency whilst setting up the cache domains. 4258 */ 4259 obj->pin_global++; 4260 4261 /* The display engine is not coherent with the LLC cache on gen6. As 4262 * a result, we make sure that the pinning that is about to occur is 4263 * done with uncached PTEs. This is lowest common denominator for all 4264 * chipsets. 4265 * 4266 * However for gen6+, we could do better by using the GFDT bit instead 4267 * of uncaching, which would allow us to flush all the LLC-cached data 4268 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4269 */ 4270 ret = i915_gem_object_set_cache_level(obj, 4271 HAS_WT(to_i915(obj->base.dev)) ? 4272 I915_CACHE_WT : I915_CACHE_NONE); 4273 if (ret) { 4274 vma = ERR_PTR(ret); 4275 goto err_unpin_global; 4276 } 4277 4278 /* As the user may map the buffer once pinned in the display plane 4279 * (e.g. libkms for the bootup splash), we have to ensure that we 4280 * always use map_and_fenceable for all scanout buffers. However, 4281 * it may simply be too big to fit into mappable, in which case 4282 * put it anyway and hope that userspace can cope (but always first 4283 * try to preserve the existing ABI). 4284 */ 4285 vma = ERR_PTR(-ENOSPC); 4286 if ((flags & PIN_MAPPABLE) == 0 && 4287 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4288 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4289 flags | 4290 PIN_MAPPABLE | 4291 PIN_NONBLOCK); 4292 if (IS_ERR(vma)) 4293 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4294 if (IS_ERR(vma)) 4295 goto err_unpin_global; 4296 4297 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4298 4299 __i915_gem_object_flush_for_display(obj); 4300 4301 /* It should now be out of any other write domains, and we can update 4302 * the domain values for our changes. 4303 */ 4304 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4305 4306 return vma; 4307 4308 err_unpin_global: 4309 obj->pin_global--; 4310 return vma; 4311 } 4312 4313 void 4314 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4315 { 4316 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4317 4318 if (WARN_ON(vma->obj->pin_global == 0)) 4319 return; 4320 4321 if (--vma->obj->pin_global == 0) 4322 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4323 4324 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4325 i915_gem_object_bump_inactive_ggtt(vma->obj); 4326 4327 i915_vma_unpin(vma); 4328 } 4329 4330 /** 4331 * Moves a single object to the CPU read, and possibly write domain. 4332 * @obj: object to act on 4333 * @write: requesting write or read-only access 4334 * 4335 * This function returns when the move is complete, including waiting on 4336 * flushes to occur. 4337 */ 4338 int 4339 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4340 { 4341 int ret; 4342 4343 lockdep_assert_held(&obj->base.dev->struct_mutex); 4344 4345 ret = i915_gem_object_wait(obj, 4346 I915_WAIT_INTERRUPTIBLE | 4347 I915_WAIT_LOCKED | 4348 (write ? I915_WAIT_ALL : 0), 4349 MAX_SCHEDULE_TIMEOUT, 4350 NULL); 4351 if (ret) 4352 return ret; 4353 4354 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4355 4356 /* Flush the CPU cache if it's still invalid. */ 4357 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4358 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4359 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4360 } 4361 4362 /* It should now be out of any other write domains, and we can update 4363 * the domain values for our changes. 4364 */ 4365 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4366 4367 /* If we're writing through the CPU, then the GPU read domains will 4368 * need to be invalidated at next use. 4369 */ 4370 if (write) 4371 __start_cpu_write(obj); 4372 4373 return 0; 4374 } 4375 4376 /* Throttle our rendering by waiting until the ring has completed our requests 4377 * emitted over 20 msec ago. 4378 * 4379 * Note that if we were to use the current jiffies each time around the loop, 4380 * we wouldn't escape the function with any frames outstanding if the time to 4381 * render a frame was over 20ms. 4382 * 4383 * This should get us reasonable parallelism between CPU and GPU but also 4384 * relatively low latency when blocking on a particular request to finish. 4385 */ 4386 static int 4387 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4388 { 4389 struct drm_i915_private *dev_priv = to_i915(dev); 4390 struct drm_i915_file_private *file_priv = file->driver_priv; 4391 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4392 struct i915_request *request, *target = NULL; 4393 long ret; 4394 4395 /* ABI: return -EIO if already wedged */ 4396 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4397 return -EIO; 4398 4399 spin_lock(&file_priv->mm.lock); 4400 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4401 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4402 break; 4403 4404 if (target) { 4405 list_del(&target->client_link); 4406 target->file_priv = NULL; 4407 } 4408 4409 target = request; 4410 } 4411 if (target) 4412 i915_request_get(target); 4413 spin_unlock(&file_priv->mm.lock); 4414 4415 if (target == NULL) 4416 return 0; 4417 4418 ret = i915_request_wait(target, 4419 I915_WAIT_INTERRUPTIBLE, 4420 MAX_SCHEDULE_TIMEOUT); 4421 i915_request_put(target); 4422 4423 return ret < 0 ? ret : 0; 4424 } 4425 4426 struct i915_vma * 4427 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4428 const struct i915_ggtt_view *view, 4429 u64 size, 4430 u64 alignment, 4431 u64 flags) 4432 { 4433 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4434 struct i915_address_space *vm = &dev_priv->ggtt.vm; 4435 struct i915_vma *vma; 4436 int ret; 4437 4438 lockdep_assert_held(&obj->base.dev->struct_mutex); 4439 4440 if (flags & PIN_MAPPABLE && 4441 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4442 /* If the required space is larger than the available 4443 * aperture, we will not able to find a slot for the 4444 * object and unbinding the object now will be in 4445 * vain. Worse, doing so may cause us to ping-pong 4446 * the object in and out of the Global GTT and 4447 * waste a lot of cycles under the mutex. 4448 */ 4449 if (obj->base.size > dev_priv->ggtt.mappable_end) 4450 return ERR_PTR(-E2BIG); 4451 4452 /* If NONBLOCK is set the caller is optimistically 4453 * trying to cache the full object within the mappable 4454 * aperture, and *must* have a fallback in place for 4455 * situations where we cannot bind the object. We 4456 * can be a little more lax here and use the fallback 4457 * more often to avoid costly migrations of ourselves 4458 * and other objects within the aperture. 4459 * 4460 * Half-the-aperture is used as a simple heuristic. 4461 * More interesting would to do search for a free 4462 * block prior to making the commitment to unbind. 4463 * That caters for the self-harm case, and with a 4464 * little more heuristics (e.g. NOFAULT, NOEVICT) 4465 * we could try to minimise harm to others. 4466 */ 4467 if (flags & PIN_NONBLOCK && 4468 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4469 return ERR_PTR(-ENOSPC); 4470 } 4471 4472 vma = i915_vma_instance(obj, vm, view); 4473 if (unlikely(IS_ERR(vma))) 4474 return vma; 4475 4476 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4477 if (flags & PIN_NONBLOCK) { 4478 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4479 return ERR_PTR(-ENOSPC); 4480 4481 if (flags & PIN_MAPPABLE && 4482 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4483 return ERR_PTR(-ENOSPC); 4484 } 4485 4486 WARN(i915_vma_is_pinned(vma), 4487 "bo is already pinned in ggtt with incorrect alignment:" 4488 " offset=%08x, req.alignment=%llx," 4489 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4490 i915_ggtt_offset(vma), alignment, 4491 !!(flags & PIN_MAPPABLE), 4492 i915_vma_is_map_and_fenceable(vma)); 4493 ret = i915_vma_unbind(vma); 4494 if (ret) 4495 return ERR_PTR(ret); 4496 } 4497 4498 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4499 if (ret) 4500 return ERR_PTR(ret); 4501 4502 return vma; 4503 } 4504 4505 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4506 { 4507 /* Note that we could alias engines in the execbuf API, but 4508 * that would be very unwise as it prevents userspace from 4509 * fine control over engine selection. Ahem. 4510 * 4511 * This should be something like EXEC_MAX_ENGINE instead of 4512 * I915_NUM_ENGINES. 4513 */ 4514 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4515 return 0x10000 << id; 4516 } 4517 4518 static __always_inline unsigned int __busy_write_id(unsigned int id) 4519 { 4520 /* The uABI guarantees an active writer is also amongst the read 4521 * engines. This would be true if we accessed the activity tracking 4522 * under the lock, but as we perform the lookup of the object and 4523 * its activity locklessly we can not guarantee that the last_write 4524 * being active implies that we have set the same engine flag from 4525 * last_read - hence we always set both read and write busy for 4526 * last_write. 4527 */ 4528 return id | __busy_read_flag(id); 4529 } 4530 4531 static __always_inline unsigned int 4532 __busy_set_if_active(const struct dma_fence *fence, 4533 unsigned int (*flag)(unsigned int id)) 4534 { 4535 struct i915_request *rq; 4536 4537 /* We have to check the current hw status of the fence as the uABI 4538 * guarantees forward progress. We could rely on the idle worker 4539 * to eventually flush us, but to minimise latency just ask the 4540 * hardware. 4541 * 4542 * Note we only report on the status of native fences. 4543 */ 4544 if (!dma_fence_is_i915(fence)) 4545 return 0; 4546 4547 /* opencode to_request() in order to avoid const warnings */ 4548 rq = container_of(fence, struct i915_request, fence); 4549 if (i915_request_completed(rq)) 4550 return 0; 4551 4552 return flag(rq->engine->uabi_id); 4553 } 4554 4555 static __always_inline unsigned int 4556 busy_check_reader(const struct dma_fence *fence) 4557 { 4558 return __busy_set_if_active(fence, __busy_read_flag); 4559 } 4560 4561 static __always_inline unsigned int 4562 busy_check_writer(const struct dma_fence *fence) 4563 { 4564 if (!fence) 4565 return 0; 4566 4567 return __busy_set_if_active(fence, __busy_write_id); 4568 } 4569 4570 int 4571 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4572 struct drm_file *file) 4573 { 4574 struct drm_i915_gem_busy *args = data; 4575 struct drm_i915_gem_object *obj; 4576 struct reservation_object_list *list; 4577 unsigned int seq; 4578 int err; 4579 4580 err = -ENOENT; 4581 rcu_read_lock(); 4582 obj = i915_gem_object_lookup_rcu(file, args->handle); 4583 if (!obj) 4584 goto out; 4585 4586 /* A discrepancy here is that we do not report the status of 4587 * non-i915 fences, i.e. even though we may report the object as idle, 4588 * a call to set-domain may still stall waiting for foreign rendering. 4589 * This also means that wait-ioctl may report an object as busy, 4590 * where busy-ioctl considers it idle. 4591 * 4592 * We trade the ability to warn of foreign fences to report on which 4593 * i915 engines are active for the object. 4594 * 4595 * Alternatively, we can trade that extra information on read/write 4596 * activity with 4597 * args->busy = 4598 * !reservation_object_test_signaled_rcu(obj->resv, true); 4599 * to report the overall busyness. This is what the wait-ioctl does. 4600 * 4601 */ 4602 retry: 4603 seq = raw_read_seqcount(&obj->resv->seq); 4604 4605 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4606 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4607 4608 /* Translate shared fences to READ set of engines */ 4609 list = rcu_dereference(obj->resv->fence); 4610 if (list) { 4611 unsigned int shared_count = list->shared_count, i; 4612 4613 for (i = 0; i < shared_count; ++i) { 4614 struct dma_fence *fence = 4615 rcu_dereference(list->shared[i]); 4616 4617 args->busy |= busy_check_reader(fence); 4618 } 4619 } 4620 4621 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4622 goto retry; 4623 4624 err = 0; 4625 out: 4626 rcu_read_unlock(); 4627 return err; 4628 } 4629 4630 int 4631 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4632 struct drm_file *file_priv) 4633 { 4634 return i915_gem_ring_throttle(dev, file_priv); 4635 } 4636 4637 int 4638 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4639 struct drm_file *file_priv) 4640 { 4641 struct drm_i915_private *dev_priv = to_i915(dev); 4642 struct drm_i915_gem_madvise *args = data; 4643 struct drm_i915_gem_object *obj; 4644 int err; 4645 4646 switch (args->madv) { 4647 case I915_MADV_DONTNEED: 4648 case I915_MADV_WILLNEED: 4649 break; 4650 default: 4651 return -EINVAL; 4652 } 4653 4654 obj = i915_gem_object_lookup(file_priv, args->handle); 4655 if (!obj) 4656 return -ENOENT; 4657 4658 err = mutex_lock_interruptible(&obj->mm.lock); 4659 if (err) 4660 goto out; 4661 4662 if (i915_gem_object_has_pages(obj) && 4663 i915_gem_object_is_tiled(obj) && 4664 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4665 if (obj->mm.madv == I915_MADV_WILLNEED) { 4666 GEM_BUG_ON(!obj->mm.quirked); 4667 __i915_gem_object_unpin_pages(obj); 4668 obj->mm.quirked = false; 4669 } 4670 if (args->madv == I915_MADV_WILLNEED) { 4671 GEM_BUG_ON(obj->mm.quirked); 4672 __i915_gem_object_pin_pages(obj); 4673 obj->mm.quirked = true; 4674 } 4675 } 4676 4677 if (obj->mm.madv != __I915_MADV_PURGED) 4678 obj->mm.madv = args->madv; 4679 4680 /* if the object is no longer attached, discard its backing storage */ 4681 if (obj->mm.madv == I915_MADV_DONTNEED && 4682 !i915_gem_object_has_pages(obj)) 4683 i915_gem_object_truncate(obj); 4684 4685 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4686 mutex_unlock(&obj->mm.lock); 4687 4688 out: 4689 i915_gem_object_put(obj); 4690 return err; 4691 } 4692 4693 static void 4694 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 4695 { 4696 struct drm_i915_gem_object *obj = 4697 container_of(active, typeof(*obj), frontbuffer_write); 4698 4699 intel_fb_obj_flush(obj, ORIGIN_CS); 4700 } 4701 4702 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4703 const struct drm_i915_gem_object_ops *ops) 4704 { 4705 mutex_init(&obj->mm.lock); 4706 4707 INIT_LIST_HEAD(&obj->vma_list); 4708 INIT_LIST_HEAD(&obj->lut_list); 4709 INIT_LIST_HEAD(&obj->batch_pool_link); 4710 4711 init_rcu_head(&obj->rcu); 4712 4713 obj->ops = ops; 4714 4715 reservation_object_init(&obj->__builtin_resv); 4716 obj->resv = &obj->__builtin_resv; 4717 4718 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4719 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4720 4721 obj->mm.madv = I915_MADV_WILLNEED; 4722 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4723 mutex_init(&obj->mm.get_page.lock); 4724 4725 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4726 } 4727 4728 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4729 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4730 I915_GEM_OBJECT_IS_SHRINKABLE, 4731 4732 .get_pages = i915_gem_object_get_pages_gtt, 4733 .put_pages = i915_gem_object_put_pages_gtt, 4734 4735 .pwrite = i915_gem_object_pwrite_gtt, 4736 }; 4737 4738 static int i915_gem_object_create_shmem(struct drm_device *dev, 4739 struct drm_gem_object *obj, 4740 size_t size) 4741 { 4742 struct drm_i915_private *i915 = to_i915(dev); 4743 unsigned long flags = VM_NORESERVE; 4744 struct file *filp; 4745 4746 drm_gem_private_object_init(dev, obj, size); 4747 4748 if (i915->mm.gemfs) 4749 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 4750 flags); 4751 else 4752 filp = shmem_file_setup("i915", size, flags); 4753 4754 if (IS_ERR(filp)) 4755 return PTR_ERR(filp); 4756 4757 obj->filp = filp; 4758 4759 return 0; 4760 } 4761 4762 struct drm_i915_gem_object * 4763 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4764 { 4765 struct drm_i915_gem_object *obj; 4766 struct address_space *mapping; 4767 unsigned int cache_level; 4768 gfp_t mask; 4769 int ret; 4770 4771 /* There is a prevalence of the assumption that we fit the object's 4772 * page count inside a 32bit _signed_ variable. Let's document this and 4773 * catch if we ever need to fix it. In the meantime, if you do spot 4774 * such a local variable, please consider fixing! 4775 */ 4776 if (size >> PAGE_SHIFT > INT_MAX) 4777 return ERR_PTR(-E2BIG); 4778 4779 if (overflows_type(size, obj->base.size)) 4780 return ERR_PTR(-E2BIG); 4781 4782 obj = i915_gem_object_alloc(dev_priv); 4783 if (obj == NULL) 4784 return ERR_PTR(-ENOMEM); 4785 4786 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 4787 if (ret) 4788 goto fail; 4789 4790 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4791 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4792 /* 965gm cannot relocate objects above 4GiB. */ 4793 mask &= ~__GFP_HIGHMEM; 4794 mask |= __GFP_DMA32; 4795 } 4796 4797 mapping = obj->base.filp->f_mapping; 4798 mapping_set_gfp_mask(mapping, mask); 4799 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4800 4801 i915_gem_object_init(obj, &i915_gem_object_ops); 4802 4803 obj->write_domain = I915_GEM_DOMAIN_CPU; 4804 obj->read_domains = I915_GEM_DOMAIN_CPU; 4805 4806 if (HAS_LLC(dev_priv)) 4807 /* On some devices, we can have the GPU use the LLC (the CPU 4808 * cache) for about a 10% performance improvement 4809 * compared to uncached. Graphics requests other than 4810 * display scanout are coherent with the CPU in 4811 * accessing this cache. This means in this mode we 4812 * don't need to clflush on the CPU side, and on the 4813 * GPU side we only need to flush internal caches to 4814 * get data visible to the CPU. 4815 * 4816 * However, we maintain the display planes as UC, and so 4817 * need to rebind when first used as such. 4818 */ 4819 cache_level = I915_CACHE_LLC; 4820 else 4821 cache_level = I915_CACHE_NONE; 4822 4823 i915_gem_object_set_cache_coherency(obj, cache_level); 4824 4825 trace_i915_gem_object_create(obj); 4826 4827 return obj; 4828 4829 fail: 4830 i915_gem_object_free(obj); 4831 return ERR_PTR(ret); 4832 } 4833 4834 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4835 { 4836 /* If we are the last user of the backing storage (be it shmemfs 4837 * pages or stolen etc), we know that the pages are going to be 4838 * immediately released. In this case, we can then skip copying 4839 * back the contents from the GPU. 4840 */ 4841 4842 if (obj->mm.madv != I915_MADV_WILLNEED) 4843 return false; 4844 4845 if (obj->base.filp == NULL) 4846 return true; 4847 4848 /* At first glance, this looks racy, but then again so would be 4849 * userspace racing mmap against close. However, the first external 4850 * reference to the filp can only be obtained through the 4851 * i915_gem_mmap_ioctl() which safeguards us against the user 4852 * acquiring such a reference whilst we are in the middle of 4853 * freeing the object. 4854 */ 4855 return atomic_long_read(&obj->base.filp->f_count) == 1; 4856 } 4857 4858 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4859 struct llist_node *freed) 4860 { 4861 struct drm_i915_gem_object *obj, *on; 4862 4863 intel_runtime_pm_get(i915); 4864 llist_for_each_entry_safe(obj, on, freed, freed) { 4865 struct i915_vma *vma, *vn; 4866 4867 trace_i915_gem_object_destroy(obj); 4868 4869 mutex_lock(&i915->drm.struct_mutex); 4870 4871 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4872 list_for_each_entry_safe(vma, vn, 4873 &obj->vma_list, obj_link) { 4874 GEM_BUG_ON(i915_vma_is_active(vma)); 4875 vma->flags &= ~I915_VMA_PIN_MASK; 4876 i915_vma_destroy(vma); 4877 } 4878 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4879 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4880 4881 /* This serializes freeing with the shrinker. Since the free 4882 * is delayed, first by RCU then by the workqueue, we want the 4883 * shrinker to be able to free pages of unreferenced objects, 4884 * or else we may oom whilst there are plenty of deferred 4885 * freed objects. 4886 */ 4887 if (i915_gem_object_has_pages(obj)) { 4888 spin_lock(&i915->mm.obj_lock); 4889 list_del_init(&obj->mm.link); 4890 spin_unlock(&i915->mm.obj_lock); 4891 } 4892 4893 mutex_unlock(&i915->drm.struct_mutex); 4894 4895 GEM_BUG_ON(obj->bind_count); 4896 GEM_BUG_ON(obj->userfault_count); 4897 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4898 GEM_BUG_ON(!list_empty(&obj->lut_list)); 4899 4900 if (obj->ops->release) 4901 obj->ops->release(obj); 4902 4903 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4904 atomic_set(&obj->mm.pages_pin_count, 0); 4905 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4906 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 4907 4908 if (obj->base.import_attach) 4909 drm_prime_gem_destroy(&obj->base, NULL); 4910 4911 reservation_object_fini(&obj->__builtin_resv); 4912 drm_gem_object_release(&obj->base); 4913 i915_gem_info_remove_obj(i915, obj->base.size); 4914 4915 kfree(obj->bit_17); 4916 i915_gem_object_free(obj); 4917 4918 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 4919 atomic_dec(&i915->mm.free_count); 4920 4921 if (on) 4922 cond_resched(); 4923 } 4924 intel_runtime_pm_put(i915); 4925 } 4926 4927 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4928 { 4929 struct llist_node *freed; 4930 4931 /* Free the oldest, most stale object to keep the free_list short */ 4932 freed = NULL; 4933 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 4934 /* Only one consumer of llist_del_first() allowed */ 4935 spin_lock(&i915->mm.free_lock); 4936 freed = llist_del_first(&i915->mm.free_list); 4937 spin_unlock(&i915->mm.free_lock); 4938 } 4939 if (unlikely(freed)) { 4940 freed->next = NULL; 4941 __i915_gem_free_objects(i915, freed); 4942 } 4943 } 4944 4945 static void __i915_gem_free_work(struct work_struct *work) 4946 { 4947 struct drm_i915_private *i915 = 4948 container_of(work, struct drm_i915_private, mm.free_work); 4949 struct llist_node *freed; 4950 4951 /* 4952 * All file-owned VMA should have been released by this point through 4953 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4954 * However, the object may also be bound into the global GTT (e.g. 4955 * older GPUs without per-process support, or for direct access through 4956 * the GTT either for the user or for scanout). Those VMA still need to 4957 * unbound now. 4958 */ 4959 4960 spin_lock(&i915->mm.free_lock); 4961 while ((freed = llist_del_all(&i915->mm.free_list))) { 4962 spin_unlock(&i915->mm.free_lock); 4963 4964 __i915_gem_free_objects(i915, freed); 4965 if (need_resched()) 4966 return; 4967 4968 spin_lock(&i915->mm.free_lock); 4969 } 4970 spin_unlock(&i915->mm.free_lock); 4971 } 4972 4973 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4974 { 4975 struct drm_i915_gem_object *obj = 4976 container_of(head, typeof(*obj), rcu); 4977 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4978 4979 /* 4980 * We reuse obj->rcu for the freed list, so we had better not treat 4981 * it like a rcu_head from this point forwards. And we expect all 4982 * objects to be freed via this path. 4983 */ 4984 destroy_rcu_head(&obj->rcu); 4985 4986 /* 4987 * Since we require blocking on struct_mutex to unbind the freed 4988 * object from the GPU before releasing resources back to the 4989 * system, we can not do that directly from the RCU callback (which may 4990 * be a softirq context), but must instead then defer that work onto a 4991 * kthread. We use the RCU callback rather than move the freed object 4992 * directly onto the work queue so that we can mix between using the 4993 * worker and performing frees directly from subsequent allocations for 4994 * crude but effective memory throttling. 4995 */ 4996 if (llist_add(&obj->freed, &i915->mm.free_list)) 4997 queue_work(i915->wq, &i915->mm.free_work); 4998 } 4999 5000 void i915_gem_free_object(struct drm_gem_object *gem_obj) 5001 { 5002 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 5003 5004 if (obj->mm.quirked) 5005 __i915_gem_object_unpin_pages(obj); 5006 5007 if (discard_backing_storage(obj)) 5008 obj->mm.madv = I915_MADV_DONTNEED; 5009 5010 /* 5011 * Before we free the object, make sure any pure RCU-only 5012 * read-side critical sections are complete, e.g. 5013 * i915_gem_busy_ioctl(). For the corresponding synchronized 5014 * lookup see i915_gem_object_lookup_rcu(). 5015 */ 5016 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 5017 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 5018 } 5019 5020 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 5021 { 5022 lockdep_assert_held(&obj->base.dev->struct_mutex); 5023 5024 if (!i915_gem_object_has_active_reference(obj) && 5025 i915_gem_object_is_active(obj)) 5026 i915_gem_object_set_active_reference(obj); 5027 else 5028 i915_gem_object_put(obj); 5029 } 5030 5031 void i915_gem_sanitize(struct drm_i915_private *i915) 5032 { 5033 int err; 5034 5035 GEM_TRACE("\n"); 5036 5037 mutex_lock(&i915->drm.struct_mutex); 5038 5039 intel_runtime_pm_get(i915); 5040 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5041 5042 /* 5043 * As we have just resumed the machine and woken the device up from 5044 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 5045 * back to defaults, recovering from whatever wedged state we left it 5046 * in and so worth trying to use the device once more. 5047 */ 5048 if (i915_terminally_wedged(&i915->gpu_error)) 5049 i915_gem_unset_wedged(i915); 5050 5051 /* 5052 * If we inherit context state from the BIOS or earlier occupants 5053 * of the GPU, the GPU may be in an inconsistent state when we 5054 * try to take over. The only way to remove the earlier state 5055 * is by resetting. However, resetting on earlier gen is tricky as 5056 * it may impact the display and we are uncertain about the stability 5057 * of the reset, so this could be applied to even earlier gen. 5058 */ 5059 err = -ENODEV; 5060 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 5061 err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 5062 if (!err) 5063 intel_engines_sanitize(i915); 5064 5065 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5066 intel_runtime_pm_put(i915); 5067 5068 i915_gem_contexts_lost(i915); 5069 mutex_unlock(&i915->drm.struct_mutex); 5070 } 5071 5072 int i915_gem_suspend(struct drm_i915_private *i915) 5073 { 5074 int ret; 5075 5076 GEM_TRACE("\n"); 5077 5078 intel_runtime_pm_get(i915); 5079 intel_suspend_gt_powersave(i915); 5080 5081 mutex_lock(&i915->drm.struct_mutex); 5082 5083 /* 5084 * We have to flush all the executing contexts to main memory so 5085 * that they can saved in the hibernation image. To ensure the last 5086 * context image is coherent, we have to switch away from it. That 5087 * leaves the i915->kernel_context still active when 5088 * we actually suspend, and its image in memory may not match the GPU 5089 * state. Fortunately, the kernel_context is disposable and we do 5090 * not rely on its state. 5091 */ 5092 if (!i915_terminally_wedged(&i915->gpu_error)) { 5093 ret = i915_gem_switch_to_kernel_context(i915); 5094 if (ret) 5095 goto err_unlock; 5096 5097 ret = i915_gem_wait_for_idle(i915, 5098 I915_WAIT_INTERRUPTIBLE | 5099 I915_WAIT_LOCKED | 5100 I915_WAIT_FOR_IDLE_BOOST, 5101 MAX_SCHEDULE_TIMEOUT); 5102 if (ret && ret != -EIO) 5103 goto err_unlock; 5104 5105 assert_kernel_context_is_current(i915); 5106 } 5107 i915_retire_requests(i915); /* ensure we flush after wedging */ 5108 5109 mutex_unlock(&i915->drm.struct_mutex); 5110 5111 intel_uc_suspend(i915); 5112 5113 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); 5114 cancel_delayed_work_sync(&i915->gt.retire_work); 5115 5116 /* 5117 * As the idle_work is rearming if it detects a race, play safe and 5118 * repeat the flush until it is definitely idle. 5119 */ 5120 drain_delayed_work(&i915->gt.idle_work); 5121 5122 /* 5123 * Assert that we successfully flushed all the work and 5124 * reset the GPU back to its idle, low power state. 5125 */ 5126 WARN_ON(i915->gt.awake); 5127 if (WARN_ON(!intel_engines_are_idle(i915))) 5128 i915_gem_set_wedged(i915); /* no hope, discard everything */ 5129 5130 intel_runtime_pm_put(i915); 5131 return 0; 5132 5133 err_unlock: 5134 mutex_unlock(&i915->drm.struct_mutex); 5135 intel_runtime_pm_put(i915); 5136 return ret; 5137 } 5138 5139 void i915_gem_suspend_late(struct drm_i915_private *i915) 5140 { 5141 struct drm_i915_gem_object *obj; 5142 struct list_head *phases[] = { 5143 &i915->mm.unbound_list, 5144 &i915->mm.bound_list, 5145 NULL 5146 }, **phase; 5147 5148 /* 5149 * Neither the BIOS, ourselves or any other kernel 5150 * expects the system to be in execlists mode on startup, 5151 * so we need to reset the GPU back to legacy mode. And the only 5152 * known way to disable logical contexts is through a GPU reset. 5153 * 5154 * So in order to leave the system in a known default configuration, 5155 * always reset the GPU upon unload and suspend. Afterwards we then 5156 * clean up the GEM state tracking, flushing off the requests and 5157 * leaving the system in a known idle state. 5158 * 5159 * Note that is of the upmost importance that the GPU is idle and 5160 * all stray writes are flushed *before* we dismantle the backing 5161 * storage for the pinned objects. 5162 * 5163 * However, since we are uncertain that resetting the GPU on older 5164 * machines is a good idea, we don't - just in case it leaves the 5165 * machine in an unusable condition. 5166 */ 5167 5168 mutex_lock(&i915->drm.struct_mutex); 5169 for (phase = phases; *phase; phase++) { 5170 list_for_each_entry(obj, *phase, mm.link) 5171 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 5172 } 5173 mutex_unlock(&i915->drm.struct_mutex); 5174 5175 intel_uc_sanitize(i915); 5176 i915_gem_sanitize(i915); 5177 } 5178 5179 void i915_gem_resume(struct drm_i915_private *i915) 5180 { 5181 GEM_TRACE("\n"); 5182 5183 WARN_ON(i915->gt.awake); 5184 5185 mutex_lock(&i915->drm.struct_mutex); 5186 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5187 5188 i915_gem_restore_gtt_mappings(i915); 5189 i915_gem_restore_fences(i915); 5190 5191 /* 5192 * As we didn't flush the kernel context before suspend, we cannot 5193 * guarantee that the context image is complete. So let's just reset 5194 * it and start again. 5195 */ 5196 i915->gt.resume(i915); 5197 5198 if (i915_gem_init_hw(i915)) 5199 goto err_wedged; 5200 5201 intel_uc_resume(i915); 5202 5203 /* Always reload a context for powersaving. */ 5204 if (i915_gem_switch_to_kernel_context(i915)) 5205 goto err_wedged; 5206 5207 out_unlock: 5208 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5209 mutex_unlock(&i915->drm.struct_mutex); 5210 return; 5211 5212 err_wedged: 5213 if (!i915_terminally_wedged(&i915->gpu_error)) { 5214 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5215 i915_gem_set_wedged(i915); 5216 } 5217 goto out_unlock; 5218 } 5219 5220 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5221 { 5222 if (INTEL_GEN(dev_priv) < 5 || 5223 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5224 return; 5225 5226 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5227 DISP_TILE_SURFACE_SWIZZLING); 5228 5229 if (IS_GEN5(dev_priv)) 5230 return; 5231 5232 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5233 if (IS_GEN6(dev_priv)) 5234 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5235 else if (IS_GEN7(dev_priv)) 5236 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5237 else if (IS_GEN8(dev_priv)) 5238 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5239 else 5240 BUG(); 5241 } 5242 5243 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5244 { 5245 I915_WRITE(RING_CTL(base), 0); 5246 I915_WRITE(RING_HEAD(base), 0); 5247 I915_WRITE(RING_TAIL(base), 0); 5248 I915_WRITE(RING_START(base), 0); 5249 } 5250 5251 static void init_unused_rings(struct drm_i915_private *dev_priv) 5252 { 5253 if (IS_I830(dev_priv)) { 5254 init_unused_ring(dev_priv, PRB1_BASE); 5255 init_unused_ring(dev_priv, SRB0_BASE); 5256 init_unused_ring(dev_priv, SRB1_BASE); 5257 init_unused_ring(dev_priv, SRB2_BASE); 5258 init_unused_ring(dev_priv, SRB3_BASE); 5259 } else if (IS_GEN2(dev_priv)) { 5260 init_unused_ring(dev_priv, SRB0_BASE); 5261 init_unused_ring(dev_priv, SRB1_BASE); 5262 } else if (IS_GEN3(dev_priv)) { 5263 init_unused_ring(dev_priv, PRB1_BASE); 5264 init_unused_ring(dev_priv, PRB2_BASE); 5265 } 5266 } 5267 5268 static int __i915_gem_restart_engines(void *data) 5269 { 5270 struct drm_i915_private *i915 = data; 5271 struct intel_engine_cs *engine; 5272 enum intel_engine_id id; 5273 int err; 5274 5275 for_each_engine(engine, i915, id) { 5276 err = engine->init_hw(engine); 5277 if (err) { 5278 DRM_ERROR("Failed to restart %s (%d)\n", 5279 engine->name, err); 5280 return err; 5281 } 5282 } 5283 5284 return 0; 5285 } 5286 5287 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5288 { 5289 int ret; 5290 5291 dev_priv->gt.last_init_time = ktime_get(); 5292 5293 /* Double layer security blanket, see i915_gem_init() */ 5294 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5295 5296 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5297 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5298 5299 if (IS_HASWELL(dev_priv)) 5300 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5301 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5302 5303 /* Apply the GT workarounds... */ 5304 intel_gt_apply_workarounds(dev_priv); 5305 /* ...and determine whether they are sticking. */ 5306 intel_gt_verify_workarounds(dev_priv, "init"); 5307 5308 i915_gem_init_swizzling(dev_priv); 5309 5310 /* 5311 * At least 830 can leave some of the unused rings 5312 * "active" (ie. head != tail) after resume which 5313 * will prevent c3 entry. Makes sure all unused rings 5314 * are totally idle. 5315 */ 5316 init_unused_rings(dev_priv); 5317 5318 BUG_ON(!dev_priv->kernel_context); 5319 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5320 ret = -EIO; 5321 goto out; 5322 } 5323 5324 ret = i915_ppgtt_init_hw(dev_priv); 5325 if (ret) { 5326 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5327 goto out; 5328 } 5329 5330 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 5331 if (ret) { 5332 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 5333 goto out; 5334 } 5335 5336 /* We can't enable contexts until all firmware is loaded */ 5337 ret = intel_uc_init_hw(dev_priv); 5338 if (ret) { 5339 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5340 goto out; 5341 } 5342 5343 intel_mocs_init_l3cc_table(dev_priv); 5344 5345 /* Only when the HW is re-initialised, can we replay the requests */ 5346 ret = __i915_gem_restart_engines(dev_priv); 5347 if (ret) 5348 goto cleanup_uc; 5349 5350 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5351 5352 return 0; 5353 5354 cleanup_uc: 5355 intel_uc_fini_hw(dev_priv); 5356 out: 5357 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5358 5359 return ret; 5360 } 5361 5362 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5363 { 5364 struct i915_gem_context *ctx; 5365 struct intel_engine_cs *engine; 5366 enum intel_engine_id id; 5367 int err; 5368 5369 /* 5370 * As we reset the gpu during very early sanitisation, the current 5371 * register state on the GPU should reflect its defaults values. 5372 * We load a context onto the hw (with restore-inhibit), then switch 5373 * over to a second context to save that default register state. We 5374 * can then prime every new context with that state so they all start 5375 * from the same default HW values. 5376 */ 5377 5378 ctx = i915_gem_context_create_kernel(i915, 0); 5379 if (IS_ERR(ctx)) 5380 return PTR_ERR(ctx); 5381 5382 for_each_engine(engine, i915, id) { 5383 struct i915_request *rq; 5384 5385 rq = i915_request_alloc(engine, ctx); 5386 if (IS_ERR(rq)) { 5387 err = PTR_ERR(rq); 5388 goto out_ctx; 5389 } 5390 5391 err = 0; 5392 if (engine->init_context) 5393 err = engine->init_context(rq); 5394 5395 i915_request_add(rq); 5396 if (err) 5397 goto err_active; 5398 } 5399 5400 err = i915_gem_switch_to_kernel_context(i915); 5401 if (err) 5402 goto err_active; 5403 5404 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { 5405 i915_gem_set_wedged(i915); 5406 err = -EIO; /* Caller will declare us wedged */ 5407 goto err_active; 5408 } 5409 5410 assert_kernel_context_is_current(i915); 5411 5412 /* 5413 * Immediately park the GPU so that we enable powersaving and 5414 * treat it as idle. The next time we issue a request, we will 5415 * unpark and start using the engine->pinned_default_state, otherwise 5416 * it is in limbo and an early reset may fail. 5417 */ 5418 __i915_gem_park(i915); 5419 5420 for_each_engine(engine, i915, id) { 5421 struct i915_vma *state; 5422 void *vaddr; 5423 5424 GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count); 5425 5426 state = to_intel_context(ctx, engine)->state; 5427 if (!state) 5428 continue; 5429 5430 /* 5431 * As we will hold a reference to the logical state, it will 5432 * not be torn down with the context, and importantly the 5433 * object will hold onto its vma (making it possible for a 5434 * stray GTT write to corrupt our defaults). Unmap the vma 5435 * from the GTT to prevent such accidents and reclaim the 5436 * space. 5437 */ 5438 err = i915_vma_unbind(state); 5439 if (err) 5440 goto err_active; 5441 5442 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5443 if (err) 5444 goto err_active; 5445 5446 engine->default_state = i915_gem_object_get(state->obj); 5447 5448 /* Check we can acquire the image of the context state */ 5449 vaddr = i915_gem_object_pin_map(engine->default_state, 5450 I915_MAP_FORCE_WB); 5451 if (IS_ERR(vaddr)) { 5452 err = PTR_ERR(vaddr); 5453 goto err_active; 5454 } 5455 5456 i915_gem_object_unpin_map(engine->default_state); 5457 } 5458 5459 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5460 unsigned int found = intel_engines_has_context_isolation(i915); 5461 5462 /* 5463 * Make sure that classes with multiple engine instances all 5464 * share the same basic configuration. 5465 */ 5466 for_each_engine(engine, i915, id) { 5467 unsigned int bit = BIT(engine->uabi_class); 5468 unsigned int expected = engine->default_state ? bit : 0; 5469 5470 if ((found & bit) != expected) { 5471 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5472 engine->uabi_class, engine->name); 5473 } 5474 } 5475 } 5476 5477 out_ctx: 5478 i915_gem_context_set_closed(ctx); 5479 i915_gem_context_put(ctx); 5480 return err; 5481 5482 err_active: 5483 /* 5484 * If we have to abandon now, we expect the engines to be idle 5485 * and ready to be torn-down. First try to flush any remaining 5486 * request, ensure we are pointing at the kernel context and 5487 * then remove it. 5488 */ 5489 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5490 goto out_ctx; 5491 5492 if (WARN_ON(i915_gem_wait_for_idle(i915, 5493 I915_WAIT_LOCKED, 5494 MAX_SCHEDULE_TIMEOUT))) 5495 goto out_ctx; 5496 5497 i915_gem_contexts_lost(i915); 5498 goto out_ctx; 5499 } 5500 5501 static int 5502 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) 5503 { 5504 struct drm_i915_gem_object *obj; 5505 struct i915_vma *vma; 5506 int ret; 5507 5508 obj = i915_gem_object_create_stolen(i915, size); 5509 if (!obj) 5510 obj = i915_gem_object_create_internal(i915, size); 5511 if (IS_ERR(obj)) { 5512 DRM_ERROR("Failed to allocate scratch page\n"); 5513 return PTR_ERR(obj); 5514 } 5515 5516 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 5517 if (IS_ERR(vma)) { 5518 ret = PTR_ERR(vma); 5519 goto err_unref; 5520 } 5521 5522 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); 5523 if (ret) 5524 goto err_unref; 5525 5526 i915->gt.scratch = vma; 5527 return 0; 5528 5529 err_unref: 5530 i915_gem_object_put(obj); 5531 return ret; 5532 } 5533 5534 static void i915_gem_fini_scratch(struct drm_i915_private *i915) 5535 { 5536 i915_vma_unpin_and_release(&i915->gt.scratch, 0); 5537 } 5538 5539 int i915_gem_init(struct drm_i915_private *dev_priv) 5540 { 5541 int ret; 5542 5543 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 5544 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 5545 mkwrite_device_info(dev_priv)->page_sizes = 5546 I915_GTT_PAGE_SIZE_4K; 5547 5548 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5549 5550 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5551 dev_priv->gt.resume = intel_lr_context_resume; 5552 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5553 } else { 5554 dev_priv->gt.resume = intel_legacy_submission_resume; 5555 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5556 } 5557 5558 ret = i915_gem_init_userptr(dev_priv); 5559 if (ret) 5560 return ret; 5561 5562 ret = intel_uc_init_misc(dev_priv); 5563 if (ret) 5564 return ret; 5565 5566 ret = intel_wopcm_init(&dev_priv->wopcm); 5567 if (ret) 5568 goto err_uc_misc; 5569 5570 /* This is just a security blanket to placate dragons. 5571 * On some systems, we very sporadically observe that the first TLBs 5572 * used by the CS may be stale, despite us poking the TLB reset. If 5573 * we hold the forcewake during initialisation these problems 5574 * just magically go away. 5575 */ 5576 mutex_lock(&dev_priv->drm.struct_mutex); 5577 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5578 5579 ret = i915_gem_init_ggtt(dev_priv); 5580 if (ret) { 5581 GEM_BUG_ON(ret == -EIO); 5582 goto err_unlock; 5583 } 5584 5585 ret = i915_gem_init_scratch(dev_priv, 5586 IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE); 5587 if (ret) { 5588 GEM_BUG_ON(ret == -EIO); 5589 goto err_ggtt; 5590 } 5591 5592 ret = i915_gem_contexts_init(dev_priv); 5593 if (ret) { 5594 GEM_BUG_ON(ret == -EIO); 5595 goto err_scratch; 5596 } 5597 5598 ret = intel_engines_init(dev_priv); 5599 if (ret) { 5600 GEM_BUG_ON(ret == -EIO); 5601 goto err_context; 5602 } 5603 5604 intel_init_gt_powersave(dev_priv); 5605 5606 ret = intel_uc_init(dev_priv); 5607 if (ret) 5608 goto err_pm; 5609 5610 ret = i915_gem_init_hw(dev_priv); 5611 if (ret) 5612 goto err_uc_init; 5613 5614 /* 5615 * Despite its name intel_init_clock_gating applies both display 5616 * clock gating workarounds; GT mmio workarounds and the occasional 5617 * GT power context workaround. Worse, sometimes it includes a context 5618 * register workaround which we need to apply before we record the 5619 * default HW state for all contexts. 5620 * 5621 * FIXME: break up the workarounds and apply them at the right time! 5622 */ 5623 intel_init_clock_gating(dev_priv); 5624 5625 ret = __intel_engines_record_defaults(dev_priv); 5626 if (ret) 5627 goto err_init_hw; 5628 5629 if (i915_inject_load_failure()) { 5630 ret = -ENODEV; 5631 goto err_init_hw; 5632 } 5633 5634 if (i915_inject_load_failure()) { 5635 ret = -EIO; 5636 goto err_init_hw; 5637 } 5638 5639 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5640 mutex_unlock(&dev_priv->drm.struct_mutex); 5641 5642 return 0; 5643 5644 /* 5645 * Unwinding is complicated by that we want to handle -EIO to mean 5646 * disable GPU submission but keep KMS alive. We want to mark the 5647 * HW as irrevisibly wedged, but keep enough state around that the 5648 * driver doesn't explode during runtime. 5649 */ 5650 err_init_hw: 5651 mutex_unlock(&dev_priv->drm.struct_mutex); 5652 5653 WARN_ON(i915_gem_suspend(dev_priv)); 5654 i915_gem_suspend_late(dev_priv); 5655 5656 i915_gem_drain_workqueue(dev_priv); 5657 5658 mutex_lock(&dev_priv->drm.struct_mutex); 5659 intel_uc_fini_hw(dev_priv); 5660 err_uc_init: 5661 intel_uc_fini(dev_priv); 5662 err_pm: 5663 if (ret != -EIO) { 5664 intel_cleanup_gt_powersave(dev_priv); 5665 i915_gem_cleanup_engines(dev_priv); 5666 } 5667 err_context: 5668 if (ret != -EIO) 5669 i915_gem_contexts_fini(dev_priv); 5670 err_scratch: 5671 i915_gem_fini_scratch(dev_priv); 5672 err_ggtt: 5673 err_unlock: 5674 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5675 mutex_unlock(&dev_priv->drm.struct_mutex); 5676 5677 err_uc_misc: 5678 intel_uc_fini_misc(dev_priv); 5679 5680 if (ret != -EIO) 5681 i915_gem_cleanup_userptr(dev_priv); 5682 5683 if (ret == -EIO) { 5684 mutex_lock(&dev_priv->drm.struct_mutex); 5685 5686 /* 5687 * Allow engine initialisation to fail by marking the GPU as 5688 * wedged. But we only want to do this where the GPU is angry, 5689 * for all other failure, such as an allocation failure, bail. 5690 */ 5691 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 5692 i915_load_error(dev_priv, 5693 "Failed to initialize GPU, declaring it wedged!\n"); 5694 i915_gem_set_wedged(dev_priv); 5695 } 5696 5697 /* Minimal basic recovery for KMS */ 5698 ret = i915_ggtt_enable_hw(dev_priv); 5699 i915_gem_restore_gtt_mappings(dev_priv); 5700 i915_gem_restore_fences(dev_priv); 5701 intel_init_clock_gating(dev_priv); 5702 5703 mutex_unlock(&dev_priv->drm.struct_mutex); 5704 } 5705 5706 i915_gem_drain_freed_objects(dev_priv); 5707 return ret; 5708 } 5709 5710 void i915_gem_fini(struct drm_i915_private *dev_priv) 5711 { 5712 i915_gem_suspend_late(dev_priv); 5713 intel_disable_gt_powersave(dev_priv); 5714 5715 /* Flush any outstanding unpin_work. */ 5716 i915_gem_drain_workqueue(dev_priv); 5717 5718 mutex_lock(&dev_priv->drm.struct_mutex); 5719 intel_uc_fini_hw(dev_priv); 5720 intel_uc_fini(dev_priv); 5721 i915_gem_cleanup_engines(dev_priv); 5722 i915_gem_contexts_fini(dev_priv); 5723 i915_gem_fini_scratch(dev_priv); 5724 mutex_unlock(&dev_priv->drm.struct_mutex); 5725 5726 intel_wa_list_free(&dev_priv->gt_wa_list); 5727 5728 intel_cleanup_gt_powersave(dev_priv); 5729 5730 intel_uc_fini_misc(dev_priv); 5731 i915_gem_cleanup_userptr(dev_priv); 5732 5733 i915_gem_drain_freed_objects(dev_priv); 5734 5735 WARN_ON(!list_empty(&dev_priv->contexts.list)); 5736 } 5737 5738 void i915_gem_init_mmio(struct drm_i915_private *i915) 5739 { 5740 i915_gem_sanitize(i915); 5741 } 5742 5743 void 5744 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 5745 { 5746 struct intel_engine_cs *engine; 5747 enum intel_engine_id id; 5748 5749 for_each_engine(engine, dev_priv, id) 5750 dev_priv->gt.cleanup_engine(engine); 5751 } 5752 5753 void 5754 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5755 { 5756 int i; 5757 5758 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 5759 !IS_CHERRYVIEW(dev_priv)) 5760 dev_priv->num_fence_regs = 32; 5761 else if (INTEL_GEN(dev_priv) >= 4 || 5762 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 5763 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 5764 dev_priv->num_fence_regs = 16; 5765 else 5766 dev_priv->num_fence_regs = 8; 5767 5768 if (intel_vgpu_active(dev_priv)) 5769 dev_priv->num_fence_regs = 5770 I915_READ(vgtif_reg(avail_rs.fence_num)); 5771 5772 /* Initialize fence registers to zero */ 5773 for (i = 0; i < dev_priv->num_fence_regs; i++) { 5774 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 5775 5776 fence->i915 = dev_priv; 5777 fence->id = i; 5778 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 5779 } 5780 i915_gem_restore_fences(dev_priv); 5781 5782 i915_gem_detect_bit_6_swizzle(dev_priv); 5783 } 5784 5785 static void i915_gem_init__mm(struct drm_i915_private *i915) 5786 { 5787 spin_lock_init(&i915->mm.object_stat_lock); 5788 spin_lock_init(&i915->mm.obj_lock); 5789 spin_lock_init(&i915->mm.free_lock); 5790 5791 init_llist_head(&i915->mm.free_list); 5792 5793 INIT_LIST_HEAD(&i915->mm.unbound_list); 5794 INIT_LIST_HEAD(&i915->mm.bound_list); 5795 INIT_LIST_HEAD(&i915->mm.fence_list); 5796 INIT_LIST_HEAD(&i915->mm.userfault_list); 5797 5798 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 5799 } 5800 5801 int i915_gem_init_early(struct drm_i915_private *dev_priv) 5802 { 5803 int err = -ENOMEM; 5804 5805 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 5806 if (!dev_priv->objects) 5807 goto err_out; 5808 5809 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 5810 if (!dev_priv->vmas) 5811 goto err_objects; 5812 5813 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 5814 if (!dev_priv->luts) 5815 goto err_vmas; 5816 5817 dev_priv->requests = KMEM_CACHE(i915_request, 5818 SLAB_HWCACHE_ALIGN | 5819 SLAB_RECLAIM_ACCOUNT | 5820 SLAB_TYPESAFE_BY_RCU); 5821 if (!dev_priv->requests) 5822 goto err_luts; 5823 5824 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 5825 SLAB_HWCACHE_ALIGN | 5826 SLAB_RECLAIM_ACCOUNT); 5827 if (!dev_priv->dependencies) 5828 goto err_requests; 5829 5830 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 5831 if (!dev_priv->priorities) 5832 goto err_dependencies; 5833 5834 INIT_LIST_HEAD(&dev_priv->gt.timelines); 5835 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 5836 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 5837 5838 i915_gem_init__mm(dev_priv); 5839 5840 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5841 i915_gem_retire_work_handler); 5842 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5843 i915_gem_idle_work_handler); 5844 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5845 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5846 5847 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 5848 5849 spin_lock_init(&dev_priv->fb_tracking.lock); 5850 5851 err = i915_gemfs_init(dev_priv); 5852 if (err) 5853 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 5854 5855 return 0; 5856 5857 err_dependencies: 5858 kmem_cache_destroy(dev_priv->dependencies); 5859 err_requests: 5860 kmem_cache_destroy(dev_priv->requests); 5861 err_luts: 5862 kmem_cache_destroy(dev_priv->luts); 5863 err_vmas: 5864 kmem_cache_destroy(dev_priv->vmas); 5865 err_objects: 5866 kmem_cache_destroy(dev_priv->objects); 5867 err_out: 5868 return err; 5869 } 5870 5871 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 5872 { 5873 i915_gem_drain_freed_objects(dev_priv); 5874 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 5875 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 5876 WARN_ON(dev_priv->mm.object_count); 5877 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 5878 5879 kmem_cache_destroy(dev_priv->priorities); 5880 kmem_cache_destroy(dev_priv->dependencies); 5881 kmem_cache_destroy(dev_priv->requests); 5882 kmem_cache_destroy(dev_priv->luts); 5883 kmem_cache_destroy(dev_priv->vmas); 5884 kmem_cache_destroy(dev_priv->objects); 5885 5886 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 5887 rcu_barrier(); 5888 5889 i915_gemfs_fini(dev_priv); 5890 } 5891 5892 int i915_gem_freeze(struct drm_i915_private *dev_priv) 5893 { 5894 /* Discard all purgeable objects, let userspace recover those as 5895 * required after resuming. 5896 */ 5897 i915_gem_shrink_all(dev_priv); 5898 5899 return 0; 5900 } 5901 5902 int i915_gem_freeze_late(struct drm_i915_private *i915) 5903 { 5904 struct drm_i915_gem_object *obj; 5905 struct list_head *phases[] = { 5906 &i915->mm.unbound_list, 5907 &i915->mm.bound_list, 5908 NULL 5909 }, **phase; 5910 5911 /* 5912 * Called just before we write the hibernation image. 5913 * 5914 * We need to update the domain tracking to reflect that the CPU 5915 * will be accessing all the pages to create and restore from the 5916 * hibernation, and so upon restoration those pages will be in the 5917 * CPU domain. 5918 * 5919 * To make sure the hibernation image contains the latest state, 5920 * we update that state just before writing out the image. 5921 * 5922 * To try and reduce the hibernation image, we manually shrink 5923 * the objects as well, see i915_gem_freeze() 5924 */ 5925 5926 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 5927 i915_gem_drain_freed_objects(i915); 5928 5929 mutex_lock(&i915->drm.struct_mutex); 5930 for (phase = phases; *phase; phase++) { 5931 list_for_each_entry(obj, *phase, mm.link) 5932 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 5933 } 5934 mutex_unlock(&i915->drm.struct_mutex); 5935 5936 return 0; 5937 } 5938 5939 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5940 { 5941 struct drm_i915_file_private *file_priv = file->driver_priv; 5942 struct i915_request *request; 5943 5944 /* Clean up our request list when the client is going away, so that 5945 * later retire_requests won't dereference our soon-to-be-gone 5946 * file_priv. 5947 */ 5948 spin_lock(&file_priv->mm.lock); 5949 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 5950 request->file_priv = NULL; 5951 spin_unlock(&file_priv->mm.lock); 5952 } 5953 5954 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 5955 { 5956 struct drm_i915_file_private *file_priv; 5957 int ret; 5958 5959 DRM_DEBUG("\n"); 5960 5961 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5962 if (!file_priv) 5963 return -ENOMEM; 5964 5965 file->driver_priv = file_priv; 5966 file_priv->dev_priv = i915; 5967 file_priv->file = file; 5968 5969 spin_lock_init(&file_priv->mm.lock); 5970 INIT_LIST_HEAD(&file_priv->mm.request_list); 5971 5972 file_priv->bsd_engine = -1; 5973 file_priv->hang_timestamp = jiffies; 5974 5975 ret = i915_gem_context_open(i915, file); 5976 if (ret) 5977 kfree(file_priv); 5978 5979 return ret; 5980 } 5981 5982 /** 5983 * i915_gem_track_fb - update frontbuffer tracking 5984 * @old: current GEM buffer for the frontbuffer slots 5985 * @new: new GEM buffer for the frontbuffer slots 5986 * @frontbuffer_bits: bitmask of frontbuffer slots 5987 * 5988 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5989 * from @old and setting them in @new. Both @old and @new can be NULL. 5990 */ 5991 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5992 struct drm_i915_gem_object *new, 5993 unsigned frontbuffer_bits) 5994 { 5995 /* Control of individual bits within the mask are guarded by 5996 * the owning plane->mutex, i.e. we can never see concurrent 5997 * manipulation of individual bits. But since the bitfield as a whole 5998 * is updated using RMW, we need to use atomics in order to update 5999 * the bits. 6000 */ 6001 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 6002 BITS_PER_TYPE(atomic_t)); 6003 6004 if (old) { 6005 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 6006 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 6007 } 6008 6009 if (new) { 6010 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 6011 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 6012 } 6013 } 6014 6015 /* Allocate a new GEM object and fill it with the supplied data */ 6016 struct drm_i915_gem_object * 6017 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 6018 const void *data, size_t size) 6019 { 6020 struct drm_i915_gem_object *obj; 6021 struct file *file; 6022 size_t offset; 6023 int err; 6024 6025 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 6026 if (IS_ERR(obj)) 6027 return obj; 6028 6029 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 6030 6031 file = obj->base.filp; 6032 offset = 0; 6033 do { 6034 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 6035 struct page *page; 6036 void *pgdata, *vaddr; 6037 6038 err = pagecache_write_begin(file, file->f_mapping, 6039 offset, len, 0, 6040 &page, &pgdata); 6041 if (err < 0) 6042 goto fail; 6043 6044 vaddr = kmap(page); 6045 memcpy(vaddr, data, len); 6046 kunmap(page); 6047 6048 err = pagecache_write_end(file, file->f_mapping, 6049 offset, len, len, 6050 page, pgdata); 6051 if (err < 0) 6052 goto fail; 6053 6054 size -= len; 6055 data += len; 6056 offset += len; 6057 } while (size); 6058 6059 return obj; 6060 6061 fail: 6062 i915_gem_object_put(obj); 6063 return ERR_PTR(err); 6064 } 6065 6066 struct scatterlist * 6067 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 6068 unsigned int n, 6069 unsigned int *offset) 6070 { 6071 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 6072 struct scatterlist *sg; 6073 unsigned int idx, count; 6074 6075 might_sleep(); 6076 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 6077 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 6078 6079 /* As we iterate forward through the sg, we record each entry in a 6080 * radixtree for quick repeated (backwards) lookups. If we have seen 6081 * this index previously, we will have an entry for it. 6082 * 6083 * Initial lookup is O(N), but this is amortized to O(1) for 6084 * sequential page access (where each new request is consecutive 6085 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 6086 * i.e. O(1) with a large constant! 6087 */ 6088 if (n < READ_ONCE(iter->sg_idx)) 6089 goto lookup; 6090 6091 mutex_lock(&iter->lock); 6092 6093 /* We prefer to reuse the last sg so that repeated lookup of this 6094 * (or the subsequent) sg are fast - comparing against the last 6095 * sg is faster than going through the radixtree. 6096 */ 6097 6098 sg = iter->sg_pos; 6099 idx = iter->sg_idx; 6100 count = __sg_page_count(sg); 6101 6102 while (idx + count <= n) { 6103 void *entry; 6104 unsigned long i; 6105 int ret; 6106 6107 /* If we cannot allocate and insert this entry, or the 6108 * individual pages from this range, cancel updating the 6109 * sg_idx so that on this lookup we are forced to linearly 6110 * scan onwards, but on future lookups we will try the 6111 * insertion again (in which case we need to be careful of 6112 * the error return reporting that we have already inserted 6113 * this index). 6114 */ 6115 ret = radix_tree_insert(&iter->radix, idx, sg); 6116 if (ret && ret != -EEXIST) 6117 goto scan; 6118 6119 entry = xa_mk_value(idx); 6120 for (i = 1; i < count; i++) { 6121 ret = radix_tree_insert(&iter->radix, idx + i, entry); 6122 if (ret && ret != -EEXIST) 6123 goto scan; 6124 } 6125 6126 idx += count; 6127 sg = ____sg_next(sg); 6128 count = __sg_page_count(sg); 6129 } 6130 6131 scan: 6132 iter->sg_pos = sg; 6133 iter->sg_idx = idx; 6134 6135 mutex_unlock(&iter->lock); 6136 6137 if (unlikely(n < idx)) /* insertion completed by another thread */ 6138 goto lookup; 6139 6140 /* In case we failed to insert the entry into the radixtree, we need 6141 * to look beyond the current sg. 6142 */ 6143 while (idx + count <= n) { 6144 idx += count; 6145 sg = ____sg_next(sg); 6146 count = __sg_page_count(sg); 6147 } 6148 6149 *offset = n - idx; 6150 return sg; 6151 6152 lookup: 6153 rcu_read_lock(); 6154 6155 sg = radix_tree_lookup(&iter->radix, n); 6156 GEM_BUG_ON(!sg); 6157 6158 /* If this index is in the middle of multi-page sg entry, 6159 * the radix tree will contain a value entry that points 6160 * to the start of that range. We will return the pointer to 6161 * the base page and the offset of this page within the 6162 * sg entry's range. 6163 */ 6164 *offset = 0; 6165 if (unlikely(xa_is_value(sg))) { 6166 unsigned long base = xa_to_value(sg); 6167 6168 sg = radix_tree_lookup(&iter->radix, base); 6169 GEM_BUG_ON(!sg); 6170 6171 *offset = n - base; 6172 } 6173 6174 rcu_read_unlock(); 6175 6176 return sg; 6177 } 6178 6179 struct page * 6180 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 6181 { 6182 struct scatterlist *sg; 6183 unsigned int offset; 6184 6185 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 6186 6187 sg = i915_gem_object_get_sg(obj, n, &offset); 6188 return nth_page(sg_page(sg), offset); 6189 } 6190 6191 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 6192 struct page * 6193 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 6194 unsigned int n) 6195 { 6196 struct page *page; 6197 6198 page = i915_gem_object_get_page(obj, n); 6199 if (!obj->mm.dirty) 6200 set_page_dirty(page); 6201 6202 return page; 6203 } 6204 6205 dma_addr_t 6206 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 6207 unsigned long n) 6208 { 6209 struct scatterlist *sg; 6210 unsigned int offset; 6211 6212 sg = i915_gem_object_get_sg(obj, n, &offset); 6213 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 6214 } 6215 6216 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 6217 { 6218 struct sg_table *pages; 6219 int err; 6220 6221 if (align > obj->base.size) 6222 return -EINVAL; 6223 6224 if (obj->ops == &i915_gem_phys_ops) 6225 return 0; 6226 6227 if (obj->ops != &i915_gem_object_ops) 6228 return -EINVAL; 6229 6230 err = i915_gem_object_unbind(obj); 6231 if (err) 6232 return err; 6233 6234 mutex_lock(&obj->mm.lock); 6235 6236 if (obj->mm.madv != I915_MADV_WILLNEED) { 6237 err = -EFAULT; 6238 goto err_unlock; 6239 } 6240 6241 if (obj->mm.quirked) { 6242 err = -EFAULT; 6243 goto err_unlock; 6244 } 6245 6246 if (obj->mm.mapping) { 6247 err = -EBUSY; 6248 goto err_unlock; 6249 } 6250 6251 pages = __i915_gem_object_unset_pages(obj); 6252 6253 obj->ops = &i915_gem_phys_ops; 6254 6255 err = ____i915_gem_object_get_pages(obj); 6256 if (err) 6257 goto err_xfer; 6258 6259 /* Perma-pin (until release) the physical set of pages */ 6260 __i915_gem_object_pin_pages(obj); 6261 6262 if (!IS_ERR_OR_NULL(pages)) 6263 i915_gem_object_ops.put_pages(obj, pages); 6264 mutex_unlock(&obj->mm.lock); 6265 return 0; 6266 6267 err_xfer: 6268 obj->ops = &i915_gem_object_ops; 6269 if (!IS_ERR_OR_NULL(pages)) { 6270 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 6271 6272 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 6273 } 6274 err_unlock: 6275 mutex_unlock(&obj->mm.lock); 6276 return err; 6277 } 6278 6279 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6280 #include "selftests/scatterlist.c" 6281 #include "selftests/mock_gem_device.c" 6282 #include "selftests/huge_gem_object.c" 6283 #include "selftests/huge_pages.c" 6284 #include "selftests/i915_gem_object.c" 6285 #include "selftests/i915_gem_coherency.c" 6286 #include "selftests/i915_gem.c" 6287 #endif 6288