1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include <linux/dma-fence-array.h> 39 #include <linux/kthread.h> 40 #include <linux/reservation.h> 41 #include <linux/shmem_fs.h> 42 #include <linux/slab.h> 43 #include <linux/stop_machine.h> 44 #include <linux/swap.h> 45 #include <linux/pci.h> 46 #include <linux/dma-buf.h> 47 48 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 49 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 50 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 55 return false; 56 57 if (!i915_gem_object_is_coherent(obj)) 58 return true; 59 60 return obj->pin_display; 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->base.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 int 141 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 142 struct drm_file *file) 143 { 144 struct drm_i915_private *dev_priv = to_i915(dev); 145 struct i915_ggtt *ggtt = &dev_priv->ggtt; 146 struct drm_i915_gem_get_aperture *args = data; 147 struct i915_vma *vma; 148 size_t pinned; 149 150 pinned = 0; 151 mutex_lock(&dev->struct_mutex); 152 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 153 if (i915_vma_is_pinned(vma)) 154 pinned += vma->node.size; 155 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 156 if (i915_vma_is_pinned(vma)) 157 pinned += vma->node.size; 158 mutex_unlock(&dev->struct_mutex); 159 160 args->aper_size = ggtt->base.total; 161 args->aper_available_size = args->aper_size - pinned; 162 163 return 0; 164 } 165 166 static struct sg_table * 167 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 168 { 169 struct address_space *mapping = obj->base.filp->f_mapping; 170 drm_dma_handle_t *phys; 171 struct sg_table *st; 172 struct scatterlist *sg; 173 char *vaddr; 174 int i; 175 176 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 177 return ERR_PTR(-EINVAL); 178 179 /* Always aligning to the object size, allows a single allocation 180 * to handle all possible callers, and given typical object sizes, 181 * the alignment of the buddy allocation will naturally match. 182 */ 183 phys = drm_pci_alloc(obj->base.dev, 184 obj->base.size, 185 roundup_pow_of_two(obj->base.size)); 186 if (!phys) 187 return ERR_PTR(-ENOMEM); 188 189 vaddr = phys->vaddr; 190 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 191 struct page *page; 192 char *src; 193 194 page = shmem_read_mapping_page(mapping, i); 195 if (IS_ERR(page)) { 196 st = ERR_CAST(page); 197 goto err_phys; 198 } 199 200 src = kmap_atomic(page); 201 memcpy(vaddr, src, PAGE_SIZE); 202 drm_clflush_virt_range(vaddr, PAGE_SIZE); 203 kunmap_atomic(src); 204 205 put_page(page); 206 vaddr += PAGE_SIZE; 207 } 208 209 i915_gem_chipset_flush(to_i915(obj->base.dev)); 210 211 st = kmalloc(sizeof(*st), GFP_KERNEL); 212 if (!st) { 213 st = ERR_PTR(-ENOMEM); 214 goto err_phys; 215 } 216 217 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 218 kfree(st); 219 st = ERR_PTR(-ENOMEM); 220 goto err_phys; 221 } 222 223 sg = st->sgl; 224 sg->offset = 0; 225 sg->length = obj->base.size; 226 227 sg_dma_address(sg) = phys->busaddr; 228 sg_dma_len(sg) = obj->base.size; 229 230 obj->phys_handle = phys; 231 return st; 232 233 err_phys: 234 drm_pci_free(obj->base.dev, phys); 235 return st; 236 } 237 238 static void 239 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 240 struct sg_table *pages, 241 bool needs_clflush) 242 { 243 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 244 245 if (obj->mm.madv == I915_MADV_DONTNEED) 246 obj->mm.dirty = false; 247 248 if (needs_clflush && 249 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && 250 !i915_gem_object_is_coherent(obj)) 251 drm_clflush_sg(pages); 252 253 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 254 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 255 } 256 257 static void 258 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 259 struct sg_table *pages) 260 { 261 __i915_gem_object_release_shmem(obj, pages, false); 262 263 if (obj->mm.dirty) { 264 struct address_space *mapping = obj->base.filp->f_mapping; 265 char *vaddr = obj->phys_handle->vaddr; 266 int i; 267 268 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 269 struct page *page; 270 char *dst; 271 272 page = shmem_read_mapping_page(mapping, i); 273 if (IS_ERR(page)) 274 continue; 275 276 dst = kmap_atomic(page); 277 drm_clflush_virt_range(vaddr, PAGE_SIZE); 278 memcpy(dst, vaddr, PAGE_SIZE); 279 kunmap_atomic(dst); 280 281 set_page_dirty(page); 282 if (obj->mm.madv == I915_MADV_WILLNEED) 283 mark_page_accessed(page); 284 put_page(page); 285 vaddr += PAGE_SIZE; 286 } 287 obj->mm.dirty = false; 288 } 289 290 sg_free_table(pages); 291 kfree(pages); 292 293 drm_pci_free(obj->base.dev, obj->phys_handle); 294 } 295 296 static void 297 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 298 { 299 i915_gem_object_unpin_pages(obj); 300 } 301 302 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 303 .get_pages = i915_gem_object_get_pages_phys, 304 .put_pages = i915_gem_object_put_pages_phys, 305 .release = i915_gem_object_release_phys, 306 }; 307 308 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 309 310 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 311 { 312 struct i915_vma *vma; 313 LIST_HEAD(still_in_list); 314 int ret; 315 316 lockdep_assert_held(&obj->base.dev->struct_mutex); 317 318 /* Closed vma are removed from the obj->vma_list - but they may 319 * still have an active binding on the object. To remove those we 320 * must wait for all rendering to complete to the object (as unbinding 321 * must anyway), and retire the requests. 322 */ 323 ret = i915_gem_object_wait(obj, 324 I915_WAIT_INTERRUPTIBLE | 325 I915_WAIT_LOCKED | 326 I915_WAIT_ALL, 327 MAX_SCHEDULE_TIMEOUT, 328 NULL); 329 if (ret) 330 return ret; 331 332 i915_gem_retire_requests(to_i915(obj->base.dev)); 333 334 while ((vma = list_first_entry_or_null(&obj->vma_list, 335 struct i915_vma, 336 obj_link))) { 337 list_move_tail(&vma->obj_link, &still_in_list); 338 ret = i915_vma_unbind(vma); 339 if (ret) 340 break; 341 } 342 list_splice(&still_in_list, &obj->vma_list); 343 344 return ret; 345 } 346 347 static long 348 i915_gem_object_wait_fence(struct dma_fence *fence, 349 unsigned int flags, 350 long timeout, 351 struct intel_rps_client *rps) 352 { 353 struct drm_i915_gem_request *rq; 354 355 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 356 357 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 358 return timeout; 359 360 if (!dma_fence_is_i915(fence)) 361 return dma_fence_wait_timeout(fence, 362 flags & I915_WAIT_INTERRUPTIBLE, 363 timeout); 364 365 rq = to_request(fence); 366 if (i915_gem_request_completed(rq)) 367 goto out; 368 369 /* This client is about to stall waiting for the GPU. In many cases 370 * this is undesirable and limits the throughput of the system, as 371 * many clients cannot continue processing user input/output whilst 372 * blocked. RPS autotuning may take tens of milliseconds to respond 373 * to the GPU load and thus incurs additional latency for the client. 374 * We can circumvent that by promoting the GPU frequency to maximum 375 * before we wait. This makes the GPU throttle up much more quickly 376 * (good for benchmarks and user experience, e.g. window animations), 377 * but at a cost of spending more power processing the workload 378 * (bad for battery). Not all clients even want their results 379 * immediately and for them we should just let the GPU select its own 380 * frequency to maximise efficiency. To prevent a single client from 381 * forcing the clocks too high for the whole system, we only allow 382 * each client to waitboost once in a busy period. 383 */ 384 if (rps) { 385 if (INTEL_GEN(rq->i915) >= 6) 386 gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies); 387 else 388 rps = NULL; 389 } 390 391 timeout = i915_wait_request(rq, flags, timeout); 392 393 out: 394 if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) 395 i915_gem_request_retire_upto(rq); 396 397 if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) { 398 /* The GPU is now idle and this client has stalled. 399 * Since no other client has submitted a request in the 400 * meantime, assume that this client is the only one 401 * supplying work to the GPU but is unable to keep that 402 * work supplied because it is waiting. Since the GPU is 403 * then never kept fully busy, RPS autoclocking will 404 * keep the clocks relatively low, causing further delays. 405 * Compensate by giving the synchronous client credit for 406 * a waitboost next time. 407 */ 408 spin_lock(&rq->i915->rps.client_lock); 409 list_del_init(&rps->link); 410 spin_unlock(&rq->i915->rps.client_lock); 411 } 412 413 return timeout; 414 } 415 416 static long 417 i915_gem_object_wait_reservation(struct reservation_object *resv, 418 unsigned int flags, 419 long timeout, 420 struct intel_rps_client *rps) 421 { 422 unsigned int seq = __read_seqcount_begin(&resv->seq); 423 struct dma_fence *excl; 424 bool prune_fences = false; 425 426 if (flags & I915_WAIT_ALL) { 427 struct dma_fence **shared; 428 unsigned int count, i; 429 int ret; 430 431 ret = reservation_object_get_fences_rcu(resv, 432 &excl, &count, &shared); 433 if (ret) 434 return ret; 435 436 for (i = 0; i < count; i++) { 437 timeout = i915_gem_object_wait_fence(shared[i], 438 flags, timeout, 439 rps); 440 if (timeout < 0) 441 break; 442 443 dma_fence_put(shared[i]); 444 } 445 446 for (; i < count; i++) 447 dma_fence_put(shared[i]); 448 kfree(shared); 449 450 prune_fences = count && timeout >= 0; 451 } else { 452 excl = reservation_object_get_excl_rcu(resv); 453 } 454 455 if (excl && timeout >= 0) { 456 timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); 457 prune_fences = timeout >= 0; 458 } 459 460 dma_fence_put(excl); 461 462 /* Oportunistically prune the fences iff we know they have *all* been 463 * signaled and that the reservation object has not been changed (i.e. 464 * no new fences have been added). 465 */ 466 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 467 if (reservation_object_trylock(resv)) { 468 if (!__read_seqcount_retry(&resv->seq, seq)) 469 reservation_object_add_excl_fence(resv, NULL); 470 reservation_object_unlock(resv); 471 } 472 } 473 474 return timeout; 475 } 476 477 static void __fence_set_priority(struct dma_fence *fence, int prio) 478 { 479 struct drm_i915_gem_request *rq; 480 struct intel_engine_cs *engine; 481 482 if (!dma_fence_is_i915(fence)) 483 return; 484 485 rq = to_request(fence); 486 engine = rq->engine; 487 if (!engine->schedule) 488 return; 489 490 engine->schedule(rq, prio); 491 } 492 493 static void fence_set_priority(struct dma_fence *fence, int prio) 494 { 495 /* Recurse once into a fence-array */ 496 if (dma_fence_is_array(fence)) { 497 struct dma_fence_array *array = to_dma_fence_array(fence); 498 int i; 499 500 for (i = 0; i < array->num_fences; i++) 501 __fence_set_priority(array->fences[i], prio); 502 } else { 503 __fence_set_priority(fence, prio); 504 } 505 } 506 507 int 508 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 509 unsigned int flags, 510 int prio) 511 { 512 struct dma_fence *excl; 513 514 if (flags & I915_WAIT_ALL) { 515 struct dma_fence **shared; 516 unsigned int count, i; 517 int ret; 518 519 ret = reservation_object_get_fences_rcu(obj->resv, 520 &excl, &count, &shared); 521 if (ret) 522 return ret; 523 524 for (i = 0; i < count; i++) { 525 fence_set_priority(shared[i], prio); 526 dma_fence_put(shared[i]); 527 } 528 529 kfree(shared); 530 } else { 531 excl = reservation_object_get_excl_rcu(obj->resv); 532 } 533 534 if (excl) { 535 fence_set_priority(excl, prio); 536 dma_fence_put(excl); 537 } 538 return 0; 539 } 540 541 /** 542 * Waits for rendering to the object to be completed 543 * @obj: i915 gem object 544 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 545 * @timeout: how long to wait 546 * @rps: client (user process) to charge for any waitboosting 547 */ 548 int 549 i915_gem_object_wait(struct drm_i915_gem_object *obj, 550 unsigned int flags, 551 long timeout, 552 struct intel_rps_client *rps) 553 { 554 might_sleep(); 555 #if IS_ENABLED(CONFIG_LOCKDEP) 556 GEM_BUG_ON(debug_locks && 557 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 558 !!(flags & I915_WAIT_LOCKED)); 559 #endif 560 GEM_BUG_ON(timeout < 0); 561 562 timeout = i915_gem_object_wait_reservation(obj->resv, 563 flags, timeout, 564 rps); 565 return timeout < 0 ? timeout : 0; 566 } 567 568 static struct intel_rps_client *to_rps_client(struct drm_file *file) 569 { 570 struct drm_i915_file_private *fpriv = file->driver_priv; 571 572 return &fpriv->rps; 573 } 574 575 int 576 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 577 int align) 578 { 579 int ret; 580 581 if (align > obj->base.size) 582 return -EINVAL; 583 584 if (obj->ops == &i915_gem_phys_ops) 585 return 0; 586 587 if (obj->mm.madv != I915_MADV_WILLNEED) 588 return -EFAULT; 589 590 if (obj->base.filp == NULL) 591 return -EINVAL; 592 593 ret = i915_gem_object_unbind(obj); 594 if (ret) 595 return ret; 596 597 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 598 if (obj->mm.pages) 599 return -EBUSY; 600 601 GEM_BUG_ON(obj->ops != &i915_gem_object_ops); 602 obj->ops = &i915_gem_phys_ops; 603 604 ret = i915_gem_object_pin_pages(obj); 605 if (ret) 606 goto err_xfer; 607 608 return 0; 609 610 err_xfer: 611 obj->ops = &i915_gem_object_ops; 612 return ret; 613 } 614 615 static int 616 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 617 struct drm_i915_gem_pwrite *args, 618 struct drm_file *file) 619 { 620 void *vaddr = obj->phys_handle->vaddr + args->offset; 621 char __user *user_data = u64_to_user_ptr(args->data_ptr); 622 623 /* We manually control the domain here and pretend that it 624 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 625 */ 626 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 627 if (copy_from_user(vaddr, user_data, args->size)) 628 return -EFAULT; 629 630 drm_clflush_virt_range(vaddr, args->size); 631 i915_gem_chipset_flush(to_i915(obj->base.dev)); 632 633 intel_fb_obj_flush(obj, ORIGIN_CPU); 634 return 0; 635 } 636 637 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 638 { 639 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 640 } 641 642 void i915_gem_object_free(struct drm_i915_gem_object *obj) 643 { 644 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 645 kmem_cache_free(dev_priv->objects, obj); 646 } 647 648 static int 649 i915_gem_create(struct drm_file *file, 650 struct drm_i915_private *dev_priv, 651 uint64_t size, 652 uint32_t *handle_p) 653 { 654 struct drm_i915_gem_object *obj; 655 int ret; 656 u32 handle; 657 658 size = roundup(size, PAGE_SIZE); 659 if (size == 0) 660 return -EINVAL; 661 662 /* Allocate the new object */ 663 obj = i915_gem_object_create(dev_priv, size); 664 if (IS_ERR(obj)) 665 return PTR_ERR(obj); 666 667 ret = drm_gem_handle_create(file, &obj->base, &handle); 668 /* drop reference from allocate - handle holds it now */ 669 i915_gem_object_put(obj); 670 if (ret) 671 return ret; 672 673 *handle_p = handle; 674 return 0; 675 } 676 677 int 678 i915_gem_dumb_create(struct drm_file *file, 679 struct drm_device *dev, 680 struct drm_mode_create_dumb *args) 681 { 682 /* have to work out size/pitch and return them */ 683 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 684 args->size = args->pitch * args->height; 685 return i915_gem_create(file, to_i915(dev), 686 args->size, &args->handle); 687 } 688 689 /** 690 * Creates a new mm object and returns a handle to it. 691 * @dev: drm device pointer 692 * @data: ioctl data blob 693 * @file: drm file pointer 694 */ 695 int 696 i915_gem_create_ioctl(struct drm_device *dev, void *data, 697 struct drm_file *file) 698 { 699 struct drm_i915_private *dev_priv = to_i915(dev); 700 struct drm_i915_gem_create *args = data; 701 702 i915_gem_flush_free_objects(dev_priv); 703 704 return i915_gem_create(file, dev_priv, 705 args->size, &args->handle); 706 } 707 708 static inline int 709 __copy_to_user_swizzled(char __user *cpu_vaddr, 710 const char *gpu_vaddr, int gpu_offset, 711 int length) 712 { 713 int ret, cpu_offset = 0; 714 715 while (length > 0) { 716 int cacheline_end = ALIGN(gpu_offset + 1, 64); 717 int this_length = min(cacheline_end - gpu_offset, length); 718 int swizzled_gpu_offset = gpu_offset ^ 64; 719 720 ret = __copy_to_user(cpu_vaddr + cpu_offset, 721 gpu_vaddr + swizzled_gpu_offset, 722 this_length); 723 if (ret) 724 return ret + length; 725 726 cpu_offset += this_length; 727 gpu_offset += this_length; 728 length -= this_length; 729 } 730 731 return 0; 732 } 733 734 static inline int 735 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 736 const char __user *cpu_vaddr, 737 int length) 738 { 739 int ret, cpu_offset = 0; 740 741 while (length > 0) { 742 int cacheline_end = ALIGN(gpu_offset + 1, 64); 743 int this_length = min(cacheline_end - gpu_offset, length); 744 int swizzled_gpu_offset = gpu_offset ^ 64; 745 746 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 747 cpu_vaddr + cpu_offset, 748 this_length); 749 if (ret) 750 return ret + length; 751 752 cpu_offset += this_length; 753 gpu_offset += this_length; 754 length -= this_length; 755 } 756 757 return 0; 758 } 759 760 /* 761 * Pins the specified object's pages and synchronizes the object with 762 * GPU accesses. Sets needs_clflush to non-zero if the caller should 763 * flush the object from the CPU cache. 764 */ 765 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 766 unsigned int *needs_clflush) 767 { 768 int ret; 769 770 lockdep_assert_held(&obj->base.dev->struct_mutex); 771 772 *needs_clflush = 0; 773 if (!i915_gem_object_has_struct_page(obj)) 774 return -ENODEV; 775 776 ret = i915_gem_object_wait(obj, 777 I915_WAIT_INTERRUPTIBLE | 778 I915_WAIT_LOCKED, 779 MAX_SCHEDULE_TIMEOUT, 780 NULL); 781 if (ret) 782 return ret; 783 784 ret = i915_gem_object_pin_pages(obj); 785 if (ret) 786 return ret; 787 788 if (i915_gem_object_is_coherent(obj) || 789 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 790 ret = i915_gem_object_set_to_cpu_domain(obj, false); 791 if (ret) 792 goto err_unpin; 793 else 794 goto out; 795 } 796 797 i915_gem_object_flush_gtt_write_domain(obj); 798 799 /* If we're not in the cpu read domain, set ourself into the gtt 800 * read domain and manually flush cachelines (if required). This 801 * optimizes for the case when the gpu will dirty the data 802 * anyway again before the next pread happens. 803 */ 804 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) 805 *needs_clflush = CLFLUSH_BEFORE; 806 807 out: 808 /* return with the pages pinned */ 809 return 0; 810 811 err_unpin: 812 i915_gem_object_unpin_pages(obj); 813 return ret; 814 } 815 816 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 817 unsigned int *needs_clflush) 818 { 819 int ret; 820 821 lockdep_assert_held(&obj->base.dev->struct_mutex); 822 823 *needs_clflush = 0; 824 if (!i915_gem_object_has_struct_page(obj)) 825 return -ENODEV; 826 827 ret = i915_gem_object_wait(obj, 828 I915_WAIT_INTERRUPTIBLE | 829 I915_WAIT_LOCKED | 830 I915_WAIT_ALL, 831 MAX_SCHEDULE_TIMEOUT, 832 NULL); 833 if (ret) 834 return ret; 835 836 ret = i915_gem_object_pin_pages(obj); 837 if (ret) 838 return ret; 839 840 if (i915_gem_object_is_coherent(obj) || 841 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 842 ret = i915_gem_object_set_to_cpu_domain(obj, true); 843 if (ret) 844 goto err_unpin; 845 else 846 goto out; 847 } 848 849 i915_gem_object_flush_gtt_write_domain(obj); 850 851 /* If we're not in the cpu write domain, set ourself into the 852 * gtt write domain and manually flush cachelines (as required). 853 * This optimizes for the case when the gpu will use the data 854 * right away and we therefore have to clflush anyway. 855 */ 856 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 857 *needs_clflush |= CLFLUSH_AFTER; 858 859 /* Same trick applies to invalidate partially written cachelines read 860 * before writing. 861 */ 862 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) 863 *needs_clflush |= CLFLUSH_BEFORE; 864 865 out: 866 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 867 obj->mm.dirty = true; 868 /* return with the pages pinned */ 869 return 0; 870 871 err_unpin: 872 i915_gem_object_unpin_pages(obj); 873 return ret; 874 } 875 876 static void 877 shmem_clflush_swizzled_range(char *addr, unsigned long length, 878 bool swizzled) 879 { 880 if (unlikely(swizzled)) { 881 unsigned long start = (unsigned long) addr; 882 unsigned long end = (unsigned long) addr + length; 883 884 /* For swizzling simply ensure that we always flush both 885 * channels. Lame, but simple and it works. Swizzled 886 * pwrite/pread is far from a hotpath - current userspace 887 * doesn't use it at all. */ 888 start = round_down(start, 128); 889 end = round_up(end, 128); 890 891 drm_clflush_virt_range((void *)start, end - start); 892 } else { 893 drm_clflush_virt_range(addr, length); 894 } 895 896 } 897 898 /* Only difference to the fast-path function is that this can handle bit17 899 * and uses non-atomic copy and kmap functions. */ 900 static int 901 shmem_pread_slow(struct page *page, int offset, int length, 902 char __user *user_data, 903 bool page_do_bit17_swizzling, bool needs_clflush) 904 { 905 char *vaddr; 906 int ret; 907 908 vaddr = kmap(page); 909 if (needs_clflush) 910 shmem_clflush_swizzled_range(vaddr + offset, length, 911 page_do_bit17_swizzling); 912 913 if (page_do_bit17_swizzling) 914 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 915 else 916 ret = __copy_to_user(user_data, vaddr + offset, length); 917 kunmap(page); 918 919 return ret ? - EFAULT : 0; 920 } 921 922 static int 923 shmem_pread(struct page *page, int offset, int length, char __user *user_data, 924 bool page_do_bit17_swizzling, bool needs_clflush) 925 { 926 int ret; 927 928 ret = -ENODEV; 929 if (!page_do_bit17_swizzling) { 930 char *vaddr = kmap_atomic(page); 931 932 if (needs_clflush) 933 drm_clflush_virt_range(vaddr + offset, length); 934 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 935 kunmap_atomic(vaddr); 936 } 937 if (ret == 0) 938 return 0; 939 940 return shmem_pread_slow(page, offset, length, user_data, 941 page_do_bit17_swizzling, needs_clflush); 942 } 943 944 static int 945 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 946 struct drm_i915_gem_pread *args) 947 { 948 char __user *user_data; 949 u64 remain; 950 unsigned int obj_do_bit17_swizzling; 951 unsigned int needs_clflush; 952 unsigned int idx, offset; 953 int ret; 954 955 obj_do_bit17_swizzling = 0; 956 if (i915_gem_object_needs_bit17_swizzle(obj)) 957 obj_do_bit17_swizzling = BIT(17); 958 959 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 960 if (ret) 961 return ret; 962 963 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 964 mutex_unlock(&obj->base.dev->struct_mutex); 965 if (ret) 966 return ret; 967 968 remain = args->size; 969 user_data = u64_to_user_ptr(args->data_ptr); 970 offset = offset_in_page(args->offset); 971 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 972 struct page *page = i915_gem_object_get_page(obj, idx); 973 int length; 974 975 length = remain; 976 if (offset + length > PAGE_SIZE) 977 length = PAGE_SIZE - offset; 978 979 ret = shmem_pread(page, offset, length, user_data, 980 page_to_phys(page) & obj_do_bit17_swizzling, 981 needs_clflush); 982 if (ret) 983 break; 984 985 remain -= length; 986 user_data += length; 987 offset = 0; 988 } 989 990 i915_gem_obj_finish_shmem_access(obj); 991 return ret; 992 } 993 994 static inline bool 995 gtt_user_read(struct io_mapping *mapping, 996 loff_t base, int offset, 997 char __user *user_data, int length) 998 { 999 void *vaddr; 1000 unsigned long unwritten; 1001 1002 /* We can use the cpu mem copy function because this is X86. */ 1003 vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base); 1004 unwritten = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1005 io_mapping_unmap_atomic(vaddr); 1006 if (unwritten) { 1007 vaddr = (void __force *) 1008 io_mapping_map_wc(mapping, base, PAGE_SIZE); 1009 unwritten = copy_to_user(user_data, vaddr + offset, length); 1010 io_mapping_unmap(vaddr); 1011 } 1012 return unwritten; 1013 } 1014 1015 static int 1016 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1017 const struct drm_i915_gem_pread *args) 1018 { 1019 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1020 struct i915_ggtt *ggtt = &i915->ggtt; 1021 struct drm_mm_node node; 1022 struct i915_vma *vma; 1023 void __user *user_data; 1024 u64 remain, offset; 1025 int ret; 1026 1027 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1028 if (ret) 1029 return ret; 1030 1031 intel_runtime_pm_get(i915); 1032 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1033 PIN_MAPPABLE | PIN_NONBLOCK); 1034 if (!IS_ERR(vma)) { 1035 node.start = i915_ggtt_offset(vma); 1036 node.allocated = false; 1037 ret = i915_vma_put_fence(vma); 1038 if (ret) { 1039 i915_vma_unpin(vma); 1040 vma = ERR_PTR(ret); 1041 } 1042 } 1043 if (IS_ERR(vma)) { 1044 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1045 if (ret) 1046 goto out_unlock; 1047 GEM_BUG_ON(!node.allocated); 1048 } 1049 1050 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1051 if (ret) 1052 goto out_unpin; 1053 1054 mutex_unlock(&i915->drm.struct_mutex); 1055 1056 user_data = u64_to_user_ptr(args->data_ptr); 1057 remain = args->size; 1058 offset = args->offset; 1059 1060 while (remain > 0) { 1061 /* Operation in this page 1062 * 1063 * page_base = page offset within aperture 1064 * page_offset = offset within page 1065 * page_length = bytes to copy for this page 1066 */ 1067 u32 page_base = node.start; 1068 unsigned page_offset = offset_in_page(offset); 1069 unsigned page_length = PAGE_SIZE - page_offset; 1070 page_length = remain < page_length ? remain : page_length; 1071 if (node.allocated) { 1072 wmb(); 1073 ggtt->base.insert_page(&ggtt->base, 1074 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1075 node.start, I915_CACHE_NONE, 0); 1076 wmb(); 1077 } else { 1078 page_base += offset & PAGE_MASK; 1079 } 1080 1081 if (gtt_user_read(&ggtt->mappable, page_base, page_offset, 1082 user_data, page_length)) { 1083 ret = -EFAULT; 1084 break; 1085 } 1086 1087 remain -= page_length; 1088 user_data += page_length; 1089 offset += page_length; 1090 } 1091 1092 mutex_lock(&i915->drm.struct_mutex); 1093 out_unpin: 1094 if (node.allocated) { 1095 wmb(); 1096 ggtt->base.clear_range(&ggtt->base, 1097 node.start, node.size); 1098 remove_mappable_node(&node); 1099 } else { 1100 i915_vma_unpin(vma); 1101 } 1102 out_unlock: 1103 intel_runtime_pm_put(i915); 1104 mutex_unlock(&i915->drm.struct_mutex); 1105 1106 return ret; 1107 } 1108 1109 /** 1110 * Reads data from the object referenced by handle. 1111 * @dev: drm device pointer 1112 * @data: ioctl data blob 1113 * @file: drm file pointer 1114 * 1115 * On error, the contents of *data are undefined. 1116 */ 1117 int 1118 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1119 struct drm_file *file) 1120 { 1121 struct drm_i915_gem_pread *args = data; 1122 struct drm_i915_gem_object *obj; 1123 int ret; 1124 1125 if (args->size == 0) 1126 return 0; 1127 1128 if (!access_ok(VERIFY_WRITE, 1129 u64_to_user_ptr(args->data_ptr), 1130 args->size)) 1131 return -EFAULT; 1132 1133 obj = i915_gem_object_lookup(file, args->handle); 1134 if (!obj) 1135 return -ENOENT; 1136 1137 /* Bounds check source. */ 1138 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1139 ret = -EINVAL; 1140 goto out; 1141 } 1142 1143 trace_i915_gem_object_pread(obj, args->offset, args->size); 1144 1145 ret = i915_gem_object_wait(obj, 1146 I915_WAIT_INTERRUPTIBLE, 1147 MAX_SCHEDULE_TIMEOUT, 1148 to_rps_client(file)); 1149 if (ret) 1150 goto out; 1151 1152 ret = i915_gem_object_pin_pages(obj); 1153 if (ret) 1154 goto out; 1155 1156 ret = i915_gem_shmem_pread(obj, args); 1157 if (ret == -EFAULT || ret == -ENODEV) 1158 ret = i915_gem_gtt_pread(obj, args); 1159 1160 i915_gem_object_unpin_pages(obj); 1161 out: 1162 i915_gem_object_put(obj); 1163 return ret; 1164 } 1165 1166 /* This is the fast write path which cannot handle 1167 * page faults in the source data 1168 */ 1169 1170 static inline bool 1171 ggtt_write(struct io_mapping *mapping, 1172 loff_t base, int offset, 1173 char __user *user_data, int length) 1174 { 1175 void *vaddr; 1176 unsigned long unwritten; 1177 1178 /* We can use the cpu mem copy function because this is X86. */ 1179 vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base); 1180 unwritten = __copy_from_user_inatomic_nocache(vaddr + offset, 1181 user_data, length); 1182 io_mapping_unmap_atomic(vaddr); 1183 if (unwritten) { 1184 vaddr = (void __force *) 1185 io_mapping_map_wc(mapping, base, PAGE_SIZE); 1186 unwritten = copy_from_user(vaddr + offset, user_data, length); 1187 io_mapping_unmap(vaddr); 1188 } 1189 1190 return unwritten; 1191 } 1192 1193 /** 1194 * This is the fast pwrite path, where we copy the data directly from the 1195 * user into the GTT, uncached. 1196 * @obj: i915 GEM object 1197 * @args: pwrite arguments structure 1198 */ 1199 static int 1200 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1201 const struct drm_i915_gem_pwrite *args) 1202 { 1203 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1204 struct i915_ggtt *ggtt = &i915->ggtt; 1205 struct drm_mm_node node; 1206 struct i915_vma *vma; 1207 u64 remain, offset; 1208 void __user *user_data; 1209 int ret; 1210 1211 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1212 if (ret) 1213 return ret; 1214 1215 intel_runtime_pm_get(i915); 1216 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1217 PIN_MAPPABLE | PIN_NONBLOCK); 1218 if (!IS_ERR(vma)) { 1219 node.start = i915_ggtt_offset(vma); 1220 node.allocated = false; 1221 ret = i915_vma_put_fence(vma); 1222 if (ret) { 1223 i915_vma_unpin(vma); 1224 vma = ERR_PTR(ret); 1225 } 1226 } 1227 if (IS_ERR(vma)) { 1228 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1229 if (ret) 1230 goto out_unlock; 1231 GEM_BUG_ON(!node.allocated); 1232 } 1233 1234 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1235 if (ret) 1236 goto out_unpin; 1237 1238 mutex_unlock(&i915->drm.struct_mutex); 1239 1240 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1241 1242 user_data = u64_to_user_ptr(args->data_ptr); 1243 offset = args->offset; 1244 remain = args->size; 1245 while (remain) { 1246 /* Operation in this page 1247 * 1248 * page_base = page offset within aperture 1249 * page_offset = offset within page 1250 * page_length = bytes to copy for this page 1251 */ 1252 u32 page_base = node.start; 1253 unsigned int page_offset = offset_in_page(offset); 1254 unsigned int page_length = PAGE_SIZE - page_offset; 1255 page_length = remain < page_length ? remain : page_length; 1256 if (node.allocated) { 1257 wmb(); /* flush the write before we modify the GGTT */ 1258 ggtt->base.insert_page(&ggtt->base, 1259 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1260 node.start, I915_CACHE_NONE, 0); 1261 wmb(); /* flush modifications to the GGTT (insert_page) */ 1262 } else { 1263 page_base += offset & PAGE_MASK; 1264 } 1265 /* If we get a fault while copying data, then (presumably) our 1266 * source page isn't available. Return the error and we'll 1267 * retry in the slow path. 1268 * If the object is non-shmem backed, we retry again with the 1269 * path that handles page fault. 1270 */ 1271 if (ggtt_write(&ggtt->mappable, page_base, page_offset, 1272 user_data, page_length)) { 1273 ret = -EFAULT; 1274 break; 1275 } 1276 1277 remain -= page_length; 1278 user_data += page_length; 1279 offset += page_length; 1280 } 1281 intel_fb_obj_flush(obj, ORIGIN_CPU); 1282 1283 mutex_lock(&i915->drm.struct_mutex); 1284 out_unpin: 1285 if (node.allocated) { 1286 wmb(); 1287 ggtt->base.clear_range(&ggtt->base, 1288 node.start, node.size); 1289 remove_mappable_node(&node); 1290 } else { 1291 i915_vma_unpin(vma); 1292 } 1293 out_unlock: 1294 intel_runtime_pm_put(i915); 1295 mutex_unlock(&i915->drm.struct_mutex); 1296 return ret; 1297 } 1298 1299 static int 1300 shmem_pwrite_slow(struct page *page, int offset, int length, 1301 char __user *user_data, 1302 bool page_do_bit17_swizzling, 1303 bool needs_clflush_before, 1304 bool needs_clflush_after) 1305 { 1306 char *vaddr; 1307 int ret; 1308 1309 vaddr = kmap(page); 1310 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1311 shmem_clflush_swizzled_range(vaddr + offset, length, 1312 page_do_bit17_swizzling); 1313 if (page_do_bit17_swizzling) 1314 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1315 length); 1316 else 1317 ret = __copy_from_user(vaddr + offset, user_data, length); 1318 if (needs_clflush_after) 1319 shmem_clflush_swizzled_range(vaddr + offset, length, 1320 page_do_bit17_swizzling); 1321 kunmap(page); 1322 1323 return ret ? -EFAULT : 0; 1324 } 1325 1326 /* Per-page copy function for the shmem pwrite fastpath. 1327 * Flushes invalid cachelines before writing to the target if 1328 * needs_clflush_before is set and flushes out any written cachelines after 1329 * writing if needs_clflush is set. 1330 */ 1331 static int 1332 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 1333 bool page_do_bit17_swizzling, 1334 bool needs_clflush_before, 1335 bool needs_clflush_after) 1336 { 1337 int ret; 1338 1339 ret = -ENODEV; 1340 if (!page_do_bit17_swizzling) { 1341 char *vaddr = kmap_atomic(page); 1342 1343 if (needs_clflush_before) 1344 drm_clflush_virt_range(vaddr + offset, len); 1345 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1346 if (needs_clflush_after) 1347 drm_clflush_virt_range(vaddr + offset, len); 1348 1349 kunmap_atomic(vaddr); 1350 } 1351 if (ret == 0) 1352 return ret; 1353 1354 return shmem_pwrite_slow(page, offset, len, user_data, 1355 page_do_bit17_swizzling, 1356 needs_clflush_before, 1357 needs_clflush_after); 1358 } 1359 1360 static int 1361 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1362 const struct drm_i915_gem_pwrite *args) 1363 { 1364 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1365 void __user *user_data; 1366 u64 remain; 1367 unsigned int obj_do_bit17_swizzling; 1368 unsigned int partial_cacheline_write; 1369 unsigned int needs_clflush; 1370 unsigned int offset, idx; 1371 int ret; 1372 1373 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1374 if (ret) 1375 return ret; 1376 1377 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1378 mutex_unlock(&i915->drm.struct_mutex); 1379 if (ret) 1380 return ret; 1381 1382 obj_do_bit17_swizzling = 0; 1383 if (i915_gem_object_needs_bit17_swizzle(obj)) 1384 obj_do_bit17_swizzling = BIT(17); 1385 1386 /* If we don't overwrite a cacheline completely we need to be 1387 * careful to have up-to-date data by first clflushing. Don't 1388 * overcomplicate things and flush the entire patch. 1389 */ 1390 partial_cacheline_write = 0; 1391 if (needs_clflush & CLFLUSH_BEFORE) 1392 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 1393 1394 user_data = u64_to_user_ptr(args->data_ptr); 1395 remain = args->size; 1396 offset = offset_in_page(args->offset); 1397 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1398 struct page *page = i915_gem_object_get_page(obj, idx); 1399 int length; 1400 1401 length = remain; 1402 if (offset + length > PAGE_SIZE) 1403 length = PAGE_SIZE - offset; 1404 1405 ret = shmem_pwrite(page, offset, length, user_data, 1406 page_to_phys(page) & obj_do_bit17_swizzling, 1407 (offset | length) & partial_cacheline_write, 1408 needs_clflush & CLFLUSH_AFTER); 1409 if (ret) 1410 break; 1411 1412 remain -= length; 1413 user_data += length; 1414 offset = 0; 1415 } 1416 1417 intel_fb_obj_flush(obj, ORIGIN_CPU); 1418 i915_gem_obj_finish_shmem_access(obj); 1419 return ret; 1420 } 1421 1422 /** 1423 * Writes data to the object referenced by handle. 1424 * @dev: drm device 1425 * @data: ioctl data blob 1426 * @file: drm file 1427 * 1428 * On error, the contents of the buffer that were to be modified are undefined. 1429 */ 1430 int 1431 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1432 struct drm_file *file) 1433 { 1434 struct drm_i915_gem_pwrite *args = data; 1435 struct drm_i915_gem_object *obj; 1436 int ret; 1437 1438 if (args->size == 0) 1439 return 0; 1440 1441 if (!access_ok(VERIFY_READ, 1442 u64_to_user_ptr(args->data_ptr), 1443 args->size)) 1444 return -EFAULT; 1445 1446 obj = i915_gem_object_lookup(file, args->handle); 1447 if (!obj) 1448 return -ENOENT; 1449 1450 /* Bounds check destination. */ 1451 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1452 ret = -EINVAL; 1453 goto err; 1454 } 1455 1456 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1457 1458 ret = -ENODEV; 1459 if (obj->ops->pwrite) 1460 ret = obj->ops->pwrite(obj, args); 1461 if (ret != -ENODEV) 1462 goto err; 1463 1464 ret = i915_gem_object_wait(obj, 1465 I915_WAIT_INTERRUPTIBLE | 1466 I915_WAIT_ALL, 1467 MAX_SCHEDULE_TIMEOUT, 1468 to_rps_client(file)); 1469 if (ret) 1470 goto err; 1471 1472 ret = i915_gem_object_pin_pages(obj); 1473 if (ret) 1474 goto err; 1475 1476 ret = -EFAULT; 1477 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1478 * it would end up going through the fenced access, and we'll get 1479 * different detiling behavior between reading and writing. 1480 * pread/pwrite currently are reading and writing from the CPU 1481 * perspective, requiring manual detiling by the client. 1482 */ 1483 if (!i915_gem_object_has_struct_page(obj) || 1484 cpu_write_needs_clflush(obj)) 1485 /* Note that the gtt paths might fail with non-page-backed user 1486 * pointers (e.g. gtt mappings when moving data between 1487 * textures). Fallback to the shmem path in that case. 1488 */ 1489 ret = i915_gem_gtt_pwrite_fast(obj, args); 1490 1491 if (ret == -EFAULT || ret == -ENOSPC) { 1492 if (obj->phys_handle) 1493 ret = i915_gem_phys_pwrite(obj, args, file); 1494 else 1495 ret = i915_gem_shmem_pwrite(obj, args); 1496 } 1497 1498 i915_gem_object_unpin_pages(obj); 1499 err: 1500 i915_gem_object_put(obj); 1501 return ret; 1502 } 1503 1504 static inline enum fb_op_origin 1505 write_origin(struct drm_i915_gem_object *obj, unsigned domain) 1506 { 1507 return (domain == I915_GEM_DOMAIN_GTT ? 1508 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 1509 } 1510 1511 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1512 { 1513 struct drm_i915_private *i915; 1514 struct list_head *list; 1515 struct i915_vma *vma; 1516 1517 list_for_each_entry(vma, &obj->vma_list, obj_link) { 1518 if (!i915_vma_is_ggtt(vma)) 1519 break; 1520 1521 if (i915_vma_is_active(vma)) 1522 continue; 1523 1524 if (!drm_mm_node_allocated(&vma->node)) 1525 continue; 1526 1527 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1528 } 1529 1530 i915 = to_i915(obj->base.dev); 1531 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1532 list_move_tail(&obj->global_link, list); 1533 } 1534 1535 /** 1536 * Called when user space prepares to use an object with the CPU, either 1537 * through the mmap ioctl's mapping or a GTT mapping. 1538 * @dev: drm device 1539 * @data: ioctl data blob 1540 * @file: drm file 1541 */ 1542 int 1543 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1544 struct drm_file *file) 1545 { 1546 struct drm_i915_gem_set_domain *args = data; 1547 struct drm_i915_gem_object *obj; 1548 uint32_t read_domains = args->read_domains; 1549 uint32_t write_domain = args->write_domain; 1550 int err; 1551 1552 /* Only handle setting domains to types used by the CPU. */ 1553 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1554 return -EINVAL; 1555 1556 /* Having something in the write domain implies it's in the read 1557 * domain, and only that read domain. Enforce that in the request. 1558 */ 1559 if (write_domain != 0 && read_domains != write_domain) 1560 return -EINVAL; 1561 1562 obj = i915_gem_object_lookup(file, args->handle); 1563 if (!obj) 1564 return -ENOENT; 1565 1566 /* Try to flush the object off the GPU without holding the lock. 1567 * We will repeat the flush holding the lock in the normal manner 1568 * to catch cases where we are gazumped. 1569 */ 1570 err = i915_gem_object_wait(obj, 1571 I915_WAIT_INTERRUPTIBLE | 1572 (write_domain ? I915_WAIT_ALL : 0), 1573 MAX_SCHEDULE_TIMEOUT, 1574 to_rps_client(file)); 1575 if (err) 1576 goto out; 1577 1578 /* Flush and acquire obj->pages so that we are coherent through 1579 * direct access in memory with previous cached writes through 1580 * shmemfs and that our cache domain tracking remains valid. 1581 * For example, if the obj->filp was moved to swap without us 1582 * being notified and releasing the pages, we would mistakenly 1583 * continue to assume that the obj remained out of the CPU cached 1584 * domain. 1585 */ 1586 err = i915_gem_object_pin_pages(obj); 1587 if (err) 1588 goto out; 1589 1590 err = i915_mutex_lock_interruptible(dev); 1591 if (err) 1592 goto out_unpin; 1593 1594 if (read_domains & I915_GEM_DOMAIN_GTT) 1595 err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1596 else 1597 err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1598 1599 /* And bump the LRU for this access */ 1600 i915_gem_object_bump_inactive_ggtt(obj); 1601 1602 mutex_unlock(&dev->struct_mutex); 1603 1604 if (write_domain != 0) 1605 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); 1606 1607 out_unpin: 1608 i915_gem_object_unpin_pages(obj); 1609 out: 1610 i915_gem_object_put(obj); 1611 return err; 1612 } 1613 1614 /** 1615 * Called when user space has done writes to this buffer 1616 * @dev: drm device 1617 * @data: ioctl data blob 1618 * @file: drm file 1619 */ 1620 int 1621 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1622 struct drm_file *file) 1623 { 1624 struct drm_i915_gem_sw_finish *args = data; 1625 struct drm_i915_gem_object *obj; 1626 1627 obj = i915_gem_object_lookup(file, args->handle); 1628 if (!obj) 1629 return -ENOENT; 1630 1631 /* Pinned buffers may be scanout, so flush the cache */ 1632 i915_gem_object_flush_if_display(obj); 1633 i915_gem_object_put(obj); 1634 1635 return 0; 1636 } 1637 1638 /** 1639 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1640 * it is mapped to. 1641 * @dev: drm device 1642 * @data: ioctl data blob 1643 * @file: drm file 1644 * 1645 * While the mapping holds a reference on the contents of the object, it doesn't 1646 * imply a ref on the object itself. 1647 * 1648 * IMPORTANT: 1649 * 1650 * DRM driver writers who look a this function as an example for how to do GEM 1651 * mmap support, please don't implement mmap support like here. The modern way 1652 * to implement DRM mmap support is with an mmap offset ioctl (like 1653 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1654 * That way debug tooling like valgrind will understand what's going on, hiding 1655 * the mmap call in a driver private ioctl will break that. The i915 driver only 1656 * does cpu mmaps this way because we didn't know better. 1657 */ 1658 int 1659 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1660 struct drm_file *file) 1661 { 1662 struct drm_i915_gem_mmap *args = data; 1663 struct drm_i915_gem_object *obj; 1664 unsigned long addr; 1665 1666 if (args->flags & ~(I915_MMAP_WC)) 1667 return -EINVAL; 1668 1669 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1670 return -ENODEV; 1671 1672 obj = i915_gem_object_lookup(file, args->handle); 1673 if (!obj) 1674 return -ENOENT; 1675 1676 /* prime objects have no backing filp to GEM mmap 1677 * pages from. 1678 */ 1679 if (!obj->base.filp) { 1680 i915_gem_object_put(obj); 1681 return -EINVAL; 1682 } 1683 1684 addr = vm_mmap(obj->base.filp, 0, args->size, 1685 PROT_READ | PROT_WRITE, MAP_SHARED, 1686 args->offset); 1687 if (args->flags & I915_MMAP_WC) { 1688 struct mm_struct *mm = current->mm; 1689 struct vm_area_struct *vma; 1690 1691 if (down_write_killable(&mm->mmap_sem)) { 1692 i915_gem_object_put(obj); 1693 return -EINTR; 1694 } 1695 vma = find_vma(mm, addr); 1696 if (vma) 1697 vma->vm_page_prot = 1698 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1699 else 1700 addr = -ENOMEM; 1701 up_write(&mm->mmap_sem); 1702 1703 /* This may race, but that's ok, it only gets set */ 1704 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 1705 } 1706 i915_gem_object_put(obj); 1707 if (IS_ERR((void *)addr)) 1708 return addr; 1709 1710 args->addr_ptr = (uint64_t) addr; 1711 1712 return 0; 1713 } 1714 1715 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) 1716 { 1717 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 1718 } 1719 1720 /** 1721 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 1722 * 1723 * A history of the GTT mmap interface: 1724 * 1725 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 1726 * aligned and suitable for fencing, and still fit into the available 1727 * mappable space left by the pinned display objects. A classic problem 1728 * we called the page-fault-of-doom where we would ping-pong between 1729 * two objects that could not fit inside the GTT and so the memcpy 1730 * would page one object in at the expense of the other between every 1731 * single byte. 1732 * 1733 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 1734 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 1735 * object is too large for the available space (or simply too large 1736 * for the mappable aperture!), a view is created instead and faulted 1737 * into userspace. (This view is aligned and sized appropriately for 1738 * fenced access.) 1739 * 1740 * Restrictions: 1741 * 1742 * * snoopable objects cannot be accessed via the GTT. It can cause machine 1743 * hangs on some architectures, corruption on others. An attempt to service 1744 * a GTT page fault from a snoopable object will generate a SIGBUS. 1745 * 1746 * * the object must be able to fit into RAM (physical memory, though no 1747 * limited to the mappable aperture). 1748 * 1749 * 1750 * Caveats: 1751 * 1752 * * a new GTT page fault will synchronize rendering from the GPU and flush 1753 * all data to system memory. Subsequent access will not be synchronized. 1754 * 1755 * * all mappings are revoked on runtime device suspend. 1756 * 1757 * * there are only 8, 16 or 32 fence registers to share between all users 1758 * (older machines require fence register for display and blitter access 1759 * as well). Contention of the fence registers will cause the previous users 1760 * to be unmapped and any new access will generate new page faults. 1761 * 1762 * * running out of memory while servicing a fault may generate a SIGBUS, 1763 * rather than the expected SIGSEGV. 1764 */ 1765 int i915_gem_mmap_gtt_version(void) 1766 { 1767 return 1; 1768 } 1769 1770 static inline struct i915_ggtt_view 1771 compute_partial_view(struct drm_i915_gem_object *obj, 1772 pgoff_t page_offset, 1773 unsigned int chunk) 1774 { 1775 struct i915_ggtt_view view; 1776 1777 if (i915_gem_object_is_tiled(obj)) 1778 chunk = roundup(chunk, tile_row_pages(obj)); 1779 1780 view.type = I915_GGTT_VIEW_PARTIAL; 1781 view.partial.offset = rounddown(page_offset, chunk); 1782 view.partial.size = 1783 min_t(unsigned int, chunk, 1784 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 1785 1786 /* If the partial covers the entire object, just create a normal VMA. */ 1787 if (chunk >= obj->base.size >> PAGE_SHIFT) 1788 view.type = I915_GGTT_VIEW_NORMAL; 1789 1790 return view; 1791 } 1792 1793 /** 1794 * i915_gem_fault - fault a page into the GTT 1795 * @vmf: fault info 1796 * 1797 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1798 * from userspace. The fault handler takes care of binding the object to 1799 * the GTT (if needed), allocating and programming a fence register (again, 1800 * only if needed based on whether the old reg is still valid or the object 1801 * is tiled) and inserting a new PTE into the faulting process. 1802 * 1803 * Note that the faulting process may involve evicting existing objects 1804 * from the GTT and/or fence registers to make room. So performance may 1805 * suffer if the GTT working set is large or there are few fence registers 1806 * left. 1807 * 1808 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 1809 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 1810 */ 1811 int i915_gem_fault(struct vm_fault *vmf) 1812 { 1813 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ 1814 struct vm_area_struct *area = vmf->vma; 1815 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 1816 struct drm_device *dev = obj->base.dev; 1817 struct drm_i915_private *dev_priv = to_i915(dev); 1818 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1819 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1820 struct i915_vma *vma; 1821 pgoff_t page_offset; 1822 unsigned int flags; 1823 int ret; 1824 1825 /* We don't use vmf->pgoff since that has the fake offset */ 1826 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 1827 1828 trace_i915_gem_object_fault(obj, page_offset, true, write); 1829 1830 /* Try to flush the object off the GPU first without holding the lock. 1831 * Upon acquiring the lock, we will perform our sanity checks and then 1832 * repeat the flush holding the lock in the normal manner to catch cases 1833 * where we are gazumped. 1834 */ 1835 ret = i915_gem_object_wait(obj, 1836 I915_WAIT_INTERRUPTIBLE, 1837 MAX_SCHEDULE_TIMEOUT, 1838 NULL); 1839 if (ret) 1840 goto err; 1841 1842 ret = i915_gem_object_pin_pages(obj); 1843 if (ret) 1844 goto err; 1845 1846 intel_runtime_pm_get(dev_priv); 1847 1848 ret = i915_mutex_lock_interruptible(dev); 1849 if (ret) 1850 goto err_rpm; 1851 1852 /* Access to snoopable pages through the GTT is incoherent. */ 1853 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 1854 ret = -EFAULT; 1855 goto err_unlock; 1856 } 1857 1858 /* If the object is smaller than a couple of partial vma, it is 1859 * not worth only creating a single partial vma - we may as well 1860 * clear enough space for the full object. 1861 */ 1862 flags = PIN_MAPPABLE; 1863 if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT) 1864 flags |= PIN_NONBLOCK | PIN_NONFAULT; 1865 1866 /* Now pin it into the GTT as needed */ 1867 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); 1868 if (IS_ERR(vma)) { 1869 /* Use a partial view if it is bigger than available space */ 1870 struct i915_ggtt_view view = 1871 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 1872 1873 /* Userspace is now writing through an untracked VMA, abandon 1874 * all hope that the hardware is able to track future writes. 1875 */ 1876 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 1877 1878 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 1879 } 1880 if (IS_ERR(vma)) { 1881 ret = PTR_ERR(vma); 1882 goto err_unlock; 1883 } 1884 1885 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1886 if (ret) 1887 goto err_unpin; 1888 1889 ret = i915_vma_get_fence(vma); 1890 if (ret) 1891 goto err_unpin; 1892 1893 /* Mark as being mmapped into userspace for later revocation */ 1894 assert_rpm_wakelock_held(dev_priv); 1895 if (list_empty(&obj->userfault_link)) 1896 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 1897 1898 /* Finally, remap it using the new GTT offset */ 1899 ret = remap_io_mapping(area, 1900 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 1901 (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, 1902 min_t(u64, vma->size, area->vm_end - area->vm_start), 1903 &ggtt->mappable); 1904 1905 err_unpin: 1906 __i915_vma_unpin(vma); 1907 err_unlock: 1908 mutex_unlock(&dev->struct_mutex); 1909 err_rpm: 1910 intel_runtime_pm_put(dev_priv); 1911 i915_gem_object_unpin_pages(obj); 1912 err: 1913 switch (ret) { 1914 case -EIO: 1915 /* 1916 * We eat errors when the gpu is terminally wedged to avoid 1917 * userspace unduly crashing (gl has no provisions for mmaps to 1918 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1919 * and so needs to be reported. 1920 */ 1921 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1922 ret = VM_FAULT_SIGBUS; 1923 break; 1924 } 1925 case -EAGAIN: 1926 /* 1927 * EAGAIN means the gpu is hung and we'll wait for the error 1928 * handler to reset everything when re-faulting in 1929 * i915_mutex_lock_interruptible. 1930 */ 1931 case 0: 1932 case -ERESTARTSYS: 1933 case -EINTR: 1934 case -EBUSY: 1935 /* 1936 * EBUSY is ok: this just means that another thread 1937 * already did the job. 1938 */ 1939 ret = VM_FAULT_NOPAGE; 1940 break; 1941 case -ENOMEM: 1942 ret = VM_FAULT_OOM; 1943 break; 1944 case -ENOSPC: 1945 case -EFAULT: 1946 ret = VM_FAULT_SIGBUS; 1947 break; 1948 default: 1949 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1950 ret = VM_FAULT_SIGBUS; 1951 break; 1952 } 1953 return ret; 1954 } 1955 1956 /** 1957 * i915_gem_release_mmap - remove physical page mappings 1958 * @obj: obj in question 1959 * 1960 * Preserve the reservation of the mmapping with the DRM core code, but 1961 * relinquish ownership of the pages back to the system. 1962 * 1963 * It is vital that we remove the page mapping if we have mapped a tiled 1964 * object through the GTT and then lose the fence register due to 1965 * resource pressure. Similarly if the object has been moved out of the 1966 * aperture, than pages mapped into userspace must be revoked. Removing the 1967 * mapping will then trigger a page fault on the next user access, allowing 1968 * fixup by i915_gem_fault(). 1969 */ 1970 void 1971 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1972 { 1973 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1974 1975 /* Serialisation between user GTT access and our code depends upon 1976 * revoking the CPU's PTE whilst the mutex is held. The next user 1977 * pagefault then has to wait until we release the mutex. 1978 * 1979 * Note that RPM complicates somewhat by adding an additional 1980 * requirement that operations to the GGTT be made holding the RPM 1981 * wakeref. 1982 */ 1983 lockdep_assert_held(&i915->drm.struct_mutex); 1984 intel_runtime_pm_get(i915); 1985 1986 if (list_empty(&obj->userfault_link)) 1987 goto out; 1988 1989 list_del_init(&obj->userfault_link); 1990 drm_vma_node_unmap(&obj->base.vma_node, 1991 obj->base.dev->anon_inode->i_mapping); 1992 1993 /* Ensure that the CPU's PTE are revoked and there are not outstanding 1994 * memory transactions from userspace before we return. The TLB 1995 * flushing implied above by changing the PTE above *should* be 1996 * sufficient, an extra barrier here just provides us with a bit 1997 * of paranoid documentation about our requirement to serialise 1998 * memory writes before touching registers / GSM. 1999 */ 2000 wmb(); 2001 2002 out: 2003 intel_runtime_pm_put(i915); 2004 } 2005 2006 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2007 { 2008 struct drm_i915_gem_object *obj, *on; 2009 int i; 2010 2011 /* 2012 * Only called during RPM suspend. All users of the userfault_list 2013 * must be holding an RPM wakeref to ensure that this can not 2014 * run concurrently with themselves (and use the struct_mutex for 2015 * protection between themselves). 2016 */ 2017 2018 list_for_each_entry_safe(obj, on, 2019 &dev_priv->mm.userfault_list, userfault_link) { 2020 list_del_init(&obj->userfault_link); 2021 drm_vma_node_unmap(&obj->base.vma_node, 2022 obj->base.dev->anon_inode->i_mapping); 2023 } 2024 2025 /* The fence will be lost when the device powers down. If any were 2026 * in use by hardware (i.e. they are pinned), we should not be powering 2027 * down! All other fences will be reacquired by the user upon waking. 2028 */ 2029 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2030 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2031 2032 /* Ideally we want to assert that the fence register is not 2033 * live at this point (i.e. that no piece of code will be 2034 * trying to write through fence + GTT, as that both violates 2035 * our tracking of activity and associated locking/barriers, 2036 * but also is illegal given that the hw is powered down). 2037 * 2038 * Previously we used reg->pin_count as a "liveness" indicator. 2039 * That is not sufficient, and we need a more fine-grained 2040 * tool if we want to have a sanity check here. 2041 */ 2042 2043 if (!reg->vma) 2044 continue; 2045 2046 GEM_BUG_ON(!list_empty(®->vma->obj->userfault_link)); 2047 reg->dirty = true; 2048 } 2049 } 2050 2051 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2052 { 2053 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2054 int err; 2055 2056 err = drm_gem_create_mmap_offset(&obj->base); 2057 if (likely(!err)) 2058 return 0; 2059 2060 /* Attempt to reap some mmap space from dead objects */ 2061 do { 2062 err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE); 2063 if (err) 2064 break; 2065 2066 i915_gem_drain_freed_objects(dev_priv); 2067 err = drm_gem_create_mmap_offset(&obj->base); 2068 if (!err) 2069 break; 2070 2071 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2072 2073 return err; 2074 } 2075 2076 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2077 { 2078 drm_gem_free_mmap_offset(&obj->base); 2079 } 2080 2081 int 2082 i915_gem_mmap_gtt(struct drm_file *file, 2083 struct drm_device *dev, 2084 uint32_t handle, 2085 uint64_t *offset) 2086 { 2087 struct drm_i915_gem_object *obj; 2088 int ret; 2089 2090 obj = i915_gem_object_lookup(file, handle); 2091 if (!obj) 2092 return -ENOENT; 2093 2094 ret = i915_gem_object_create_mmap_offset(obj); 2095 if (ret == 0) 2096 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2097 2098 i915_gem_object_put(obj); 2099 return ret; 2100 } 2101 2102 /** 2103 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2104 * @dev: DRM device 2105 * @data: GTT mapping ioctl data 2106 * @file: GEM object info 2107 * 2108 * Simply returns the fake offset to userspace so it can mmap it. 2109 * The mmap call will end up in drm_gem_mmap(), which will set things 2110 * up so we can get faults in the handler above. 2111 * 2112 * The fault handler will take care of binding the object into the GTT 2113 * (since it may have been evicted to make room for something), allocating 2114 * a fence register, and mapping the appropriate aperture address into 2115 * userspace. 2116 */ 2117 int 2118 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2119 struct drm_file *file) 2120 { 2121 struct drm_i915_gem_mmap_gtt *args = data; 2122 2123 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2124 } 2125 2126 /* Immediately discard the backing storage */ 2127 static void 2128 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2129 { 2130 i915_gem_object_free_mmap_offset(obj); 2131 2132 if (obj->base.filp == NULL) 2133 return; 2134 2135 /* Our goal here is to return as much of the memory as 2136 * is possible back to the system as we are called from OOM. 2137 * To do this we must instruct the shmfs to drop all of its 2138 * backing pages, *now*. 2139 */ 2140 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2141 obj->mm.madv = __I915_MADV_PURGED; 2142 obj->mm.pages = ERR_PTR(-EFAULT); 2143 } 2144 2145 /* Try to discard unwanted pages */ 2146 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2147 { 2148 struct address_space *mapping; 2149 2150 lockdep_assert_held(&obj->mm.lock); 2151 GEM_BUG_ON(obj->mm.pages); 2152 2153 switch (obj->mm.madv) { 2154 case I915_MADV_DONTNEED: 2155 i915_gem_object_truncate(obj); 2156 case __I915_MADV_PURGED: 2157 return; 2158 } 2159 2160 if (obj->base.filp == NULL) 2161 return; 2162 2163 mapping = obj->base.filp->f_mapping, 2164 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2165 } 2166 2167 static void 2168 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2169 struct sg_table *pages) 2170 { 2171 struct sgt_iter sgt_iter; 2172 struct page *page; 2173 2174 __i915_gem_object_release_shmem(obj, pages, true); 2175 2176 i915_gem_gtt_finish_pages(obj, pages); 2177 2178 if (i915_gem_object_needs_bit17_swizzle(obj)) 2179 i915_gem_object_save_bit_17_swizzle(obj, pages); 2180 2181 for_each_sgt_page(page, sgt_iter, pages) { 2182 if (obj->mm.dirty) 2183 set_page_dirty(page); 2184 2185 if (obj->mm.madv == I915_MADV_WILLNEED) 2186 mark_page_accessed(page); 2187 2188 put_page(page); 2189 } 2190 obj->mm.dirty = false; 2191 2192 sg_free_table(pages); 2193 kfree(pages); 2194 } 2195 2196 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2197 { 2198 struct radix_tree_iter iter; 2199 void **slot; 2200 2201 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2202 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2203 } 2204 2205 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2206 enum i915_mm_subclass subclass) 2207 { 2208 struct sg_table *pages; 2209 2210 if (i915_gem_object_has_pinned_pages(obj)) 2211 return; 2212 2213 GEM_BUG_ON(obj->bind_count); 2214 if (!READ_ONCE(obj->mm.pages)) 2215 return; 2216 2217 /* May be called by shrinker from within get_pages() (on another bo) */ 2218 mutex_lock_nested(&obj->mm.lock, subclass); 2219 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2220 goto unlock; 2221 2222 /* ->put_pages might need to allocate memory for the bit17 swizzle 2223 * array, hence protect them from being reaped by removing them from gtt 2224 * lists early. */ 2225 pages = fetch_and_zero(&obj->mm.pages); 2226 GEM_BUG_ON(!pages); 2227 2228 if (obj->mm.mapping) { 2229 void *ptr; 2230 2231 ptr = ptr_mask_bits(obj->mm.mapping); 2232 if (is_vmalloc_addr(ptr)) 2233 vunmap(ptr); 2234 else 2235 kunmap(kmap_to_page(ptr)); 2236 2237 obj->mm.mapping = NULL; 2238 } 2239 2240 __i915_gem_object_reset_page_iter(obj); 2241 2242 if (!IS_ERR(pages)) 2243 obj->ops->put_pages(obj, pages); 2244 2245 unlock: 2246 mutex_unlock(&obj->mm.lock); 2247 } 2248 2249 static bool i915_sg_trim(struct sg_table *orig_st) 2250 { 2251 struct sg_table new_st; 2252 struct scatterlist *sg, *new_sg; 2253 unsigned int i; 2254 2255 if (orig_st->nents == orig_st->orig_nents) 2256 return false; 2257 2258 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2259 return false; 2260 2261 new_sg = new_st.sgl; 2262 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2263 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2264 /* called before being DMA mapped, no need to copy sg->dma_* */ 2265 new_sg = sg_next(new_sg); 2266 } 2267 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2268 2269 sg_free_table(orig_st); 2270 2271 *orig_st = new_st; 2272 return true; 2273 } 2274 2275 static struct sg_table * 2276 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2277 { 2278 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2279 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2280 unsigned long i; 2281 struct address_space *mapping; 2282 struct sg_table *st; 2283 struct scatterlist *sg; 2284 struct sgt_iter sgt_iter; 2285 struct page *page; 2286 unsigned long last_pfn = 0; /* suppress gcc warning */ 2287 unsigned int max_segment; 2288 gfp_t noreclaim; 2289 int ret; 2290 2291 /* Assert that the object is not currently in any GPU domain. As it 2292 * wasn't in the GTT, there shouldn't be any way it could have been in 2293 * a GPU cache 2294 */ 2295 GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2296 GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2297 2298 max_segment = swiotlb_max_segment(); 2299 if (!max_segment) 2300 max_segment = rounddown(UINT_MAX, PAGE_SIZE); 2301 2302 st = kmalloc(sizeof(*st), GFP_KERNEL); 2303 if (st == NULL) 2304 return ERR_PTR(-ENOMEM); 2305 2306 rebuild_st: 2307 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2308 kfree(st); 2309 return ERR_PTR(-ENOMEM); 2310 } 2311 2312 /* Get the list of pages out of our struct file. They'll be pinned 2313 * at this point until we release them. 2314 * 2315 * Fail silently without starting the shrinker 2316 */ 2317 mapping = obj->base.filp->f_mapping; 2318 noreclaim = mapping_gfp_constraint(mapping, 2319 ~(__GFP_IO | __GFP_RECLAIM)); 2320 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2321 2322 sg = st->sgl; 2323 st->nents = 0; 2324 for (i = 0; i < page_count; i++) { 2325 const unsigned int shrink[] = { 2326 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2327 0, 2328 }, *s = shrink; 2329 gfp_t gfp = noreclaim; 2330 2331 do { 2332 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2333 if (likely(!IS_ERR(page))) 2334 break; 2335 2336 if (!*s) { 2337 ret = PTR_ERR(page); 2338 goto err_sg; 2339 } 2340 2341 i915_gem_shrink(dev_priv, 2 * page_count, *s++); 2342 cond_resched(); 2343 2344 /* We've tried hard to allocate the memory by reaping 2345 * our own buffer, now let the real VM do its job and 2346 * go down in flames if truly OOM. 2347 * 2348 * However, since graphics tend to be disposable, 2349 * defer the oom here by reporting the ENOMEM back 2350 * to userspace. 2351 */ 2352 if (!*s) { 2353 /* reclaim and warn, but no oom */ 2354 gfp = mapping_gfp_mask(mapping); 2355 2356 /* Our bo are always dirty and so we require 2357 * kswapd to reclaim our pages (direct reclaim 2358 * does not effectively begin pageout of our 2359 * buffers on its own). However, direct reclaim 2360 * only waits for kswapd when under allocation 2361 * congestion. So as a result __GFP_RECLAIM is 2362 * unreliable and fails to actually reclaim our 2363 * dirty pages -- unless you try over and over 2364 * again with !__GFP_NORETRY. However, we still 2365 * want to fail this allocation rather than 2366 * trigger the out-of-memory killer and for 2367 * this we want the future __GFP_MAYFAIL. 2368 */ 2369 } 2370 } while (1); 2371 2372 if (!i || 2373 sg->length >= max_segment || 2374 page_to_pfn(page) != last_pfn + 1) { 2375 if (i) 2376 sg = sg_next(sg); 2377 st->nents++; 2378 sg_set_page(sg, page, PAGE_SIZE, 0); 2379 } else { 2380 sg->length += PAGE_SIZE; 2381 } 2382 last_pfn = page_to_pfn(page); 2383 2384 /* Check that the i965g/gm workaround works. */ 2385 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2386 } 2387 if (sg) /* loop terminated early; short sg table */ 2388 sg_mark_end(sg); 2389 2390 /* Trim unused sg entries to avoid wasting memory. */ 2391 i915_sg_trim(st); 2392 2393 ret = i915_gem_gtt_prepare_pages(obj, st); 2394 if (ret) { 2395 /* DMA remapping failed? One possible cause is that 2396 * it could not reserve enough large entries, asking 2397 * for PAGE_SIZE chunks instead may be helpful. 2398 */ 2399 if (max_segment > PAGE_SIZE) { 2400 for_each_sgt_page(page, sgt_iter, st) 2401 put_page(page); 2402 sg_free_table(st); 2403 2404 max_segment = PAGE_SIZE; 2405 goto rebuild_st; 2406 } else { 2407 dev_warn(&dev_priv->drm.pdev->dev, 2408 "Failed to DMA remap %lu pages\n", 2409 page_count); 2410 goto err_pages; 2411 } 2412 } 2413 2414 if (i915_gem_object_needs_bit17_swizzle(obj)) 2415 i915_gem_object_do_bit_17_swizzle(obj, st); 2416 2417 return st; 2418 2419 err_sg: 2420 sg_mark_end(sg); 2421 err_pages: 2422 for_each_sgt_page(page, sgt_iter, st) 2423 put_page(page); 2424 sg_free_table(st); 2425 kfree(st); 2426 2427 /* shmemfs first checks if there is enough memory to allocate the page 2428 * and reports ENOSPC should there be insufficient, along with the usual 2429 * ENOMEM for a genuine allocation failure. 2430 * 2431 * We use ENOSPC in our driver to mean that we have run out of aperture 2432 * space and so want to translate the error from shmemfs back to our 2433 * usual understanding of ENOMEM. 2434 */ 2435 if (ret == -ENOSPC) 2436 ret = -ENOMEM; 2437 2438 return ERR_PTR(ret); 2439 } 2440 2441 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 2442 struct sg_table *pages) 2443 { 2444 lockdep_assert_held(&obj->mm.lock); 2445 2446 obj->mm.get_page.sg_pos = pages->sgl; 2447 obj->mm.get_page.sg_idx = 0; 2448 2449 obj->mm.pages = pages; 2450 2451 if (i915_gem_object_is_tiled(obj) && 2452 to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 2453 GEM_BUG_ON(obj->mm.quirked); 2454 __i915_gem_object_pin_pages(obj); 2455 obj->mm.quirked = true; 2456 } 2457 } 2458 2459 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2460 { 2461 struct sg_table *pages; 2462 2463 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 2464 2465 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 2466 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2467 return -EFAULT; 2468 } 2469 2470 pages = obj->ops->get_pages(obj); 2471 if (unlikely(IS_ERR(pages))) 2472 return PTR_ERR(pages); 2473 2474 __i915_gem_object_set_pages(obj, pages); 2475 return 0; 2476 } 2477 2478 /* Ensure that the associated pages are gathered from the backing storage 2479 * and pinned into our object. i915_gem_object_pin_pages() may be called 2480 * multiple times before they are released by a single call to 2481 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 2482 * either as a result of memory pressure (reaping pages under the shrinker) 2483 * or as the object is itself released. 2484 */ 2485 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2486 { 2487 int err; 2488 2489 err = mutex_lock_interruptible(&obj->mm.lock); 2490 if (err) 2491 return err; 2492 2493 if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { 2494 err = ____i915_gem_object_get_pages(obj); 2495 if (err) 2496 goto unlock; 2497 2498 smp_mb__before_atomic(); 2499 } 2500 atomic_inc(&obj->mm.pages_pin_count); 2501 2502 unlock: 2503 mutex_unlock(&obj->mm.lock); 2504 return err; 2505 } 2506 2507 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2508 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 2509 enum i915_map_type type) 2510 { 2511 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2512 struct sg_table *sgt = obj->mm.pages; 2513 struct sgt_iter sgt_iter; 2514 struct page *page; 2515 struct page *stack_pages[32]; 2516 struct page **pages = stack_pages; 2517 unsigned long i = 0; 2518 pgprot_t pgprot; 2519 void *addr; 2520 2521 /* A single page can always be kmapped */ 2522 if (n_pages == 1 && type == I915_MAP_WB) 2523 return kmap(sg_page(sgt->sgl)); 2524 2525 if (n_pages > ARRAY_SIZE(stack_pages)) { 2526 /* Too big for stack -- allocate temporary array instead */ 2527 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); 2528 if (!pages) 2529 return NULL; 2530 } 2531 2532 for_each_sgt_page(page, sgt_iter, sgt) 2533 pages[i++] = page; 2534 2535 /* Check that we have the expected number of pages */ 2536 GEM_BUG_ON(i != n_pages); 2537 2538 switch (type) { 2539 case I915_MAP_WB: 2540 pgprot = PAGE_KERNEL; 2541 break; 2542 case I915_MAP_WC: 2543 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 2544 break; 2545 } 2546 addr = vmap(pages, n_pages, 0, pgprot); 2547 2548 if (pages != stack_pages) 2549 drm_free_large(pages); 2550 2551 return addr; 2552 } 2553 2554 /* get, pin, and map the pages of the object into kernel space */ 2555 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 2556 enum i915_map_type type) 2557 { 2558 enum i915_map_type has_type; 2559 bool pinned; 2560 void *ptr; 2561 int ret; 2562 2563 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 2564 2565 ret = mutex_lock_interruptible(&obj->mm.lock); 2566 if (ret) 2567 return ERR_PTR(ret); 2568 2569 pinned = true; 2570 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 2571 if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { 2572 ret = ____i915_gem_object_get_pages(obj); 2573 if (ret) 2574 goto err_unlock; 2575 2576 smp_mb__before_atomic(); 2577 } 2578 atomic_inc(&obj->mm.pages_pin_count); 2579 pinned = false; 2580 } 2581 GEM_BUG_ON(!obj->mm.pages); 2582 2583 ptr = ptr_unpack_bits(obj->mm.mapping, has_type); 2584 if (ptr && has_type != type) { 2585 if (pinned) { 2586 ret = -EBUSY; 2587 goto err_unpin; 2588 } 2589 2590 if (is_vmalloc_addr(ptr)) 2591 vunmap(ptr); 2592 else 2593 kunmap(kmap_to_page(ptr)); 2594 2595 ptr = obj->mm.mapping = NULL; 2596 } 2597 2598 if (!ptr) { 2599 ptr = i915_gem_object_map(obj, type); 2600 if (!ptr) { 2601 ret = -ENOMEM; 2602 goto err_unpin; 2603 } 2604 2605 obj->mm.mapping = ptr_pack_bits(ptr, type); 2606 } 2607 2608 out_unlock: 2609 mutex_unlock(&obj->mm.lock); 2610 return ptr; 2611 2612 err_unpin: 2613 atomic_dec(&obj->mm.pages_pin_count); 2614 err_unlock: 2615 ptr = ERR_PTR(ret); 2616 goto out_unlock; 2617 } 2618 2619 static int 2620 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 2621 const struct drm_i915_gem_pwrite *arg) 2622 { 2623 struct address_space *mapping = obj->base.filp->f_mapping; 2624 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 2625 u64 remain, offset; 2626 unsigned int pg; 2627 2628 /* Before we instantiate/pin the backing store for our use, we 2629 * can prepopulate the shmemfs filp efficiently using a write into 2630 * the pagecache. We avoid the penalty of instantiating all the 2631 * pages, important if the user is just writing to a few and never 2632 * uses the object on the GPU, and using a direct write into shmemfs 2633 * allows it to avoid the cost of retrieving a page (either swapin 2634 * or clearing-before-use) before it is overwritten. 2635 */ 2636 if (READ_ONCE(obj->mm.pages)) 2637 return -ENODEV; 2638 2639 /* Before the pages are instantiated the object is treated as being 2640 * in the CPU domain. The pages will be clflushed as required before 2641 * use, and we can freely write into the pages directly. If userspace 2642 * races pwrite with any other operation; corruption will ensue - 2643 * that is userspace's prerogative! 2644 */ 2645 2646 remain = arg->size; 2647 offset = arg->offset; 2648 pg = offset_in_page(offset); 2649 2650 do { 2651 unsigned int len, unwritten; 2652 struct page *page; 2653 void *data, *vaddr; 2654 int err; 2655 2656 len = PAGE_SIZE - pg; 2657 if (len > remain) 2658 len = remain; 2659 2660 err = pagecache_write_begin(obj->base.filp, mapping, 2661 offset, len, 0, 2662 &page, &data); 2663 if (err < 0) 2664 return err; 2665 2666 vaddr = kmap(page); 2667 unwritten = copy_from_user(vaddr + pg, user_data, len); 2668 kunmap(page); 2669 2670 err = pagecache_write_end(obj->base.filp, mapping, 2671 offset, len, len - unwritten, 2672 page, data); 2673 if (err < 0) 2674 return err; 2675 2676 if (unwritten) 2677 return -EFAULT; 2678 2679 remain -= len; 2680 user_data += len; 2681 offset += len; 2682 pg = 0; 2683 } while (remain); 2684 2685 return 0; 2686 } 2687 2688 static bool ban_context(const struct i915_gem_context *ctx) 2689 { 2690 return (i915_gem_context_is_bannable(ctx) && 2691 ctx->ban_score >= CONTEXT_SCORE_BAN_THRESHOLD); 2692 } 2693 2694 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 2695 { 2696 ctx->guilty_count++; 2697 ctx->ban_score += CONTEXT_SCORE_GUILTY; 2698 if (ban_context(ctx)) 2699 i915_gem_context_set_banned(ctx); 2700 2701 DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", 2702 ctx->name, ctx->ban_score, 2703 yesno(i915_gem_context_is_banned(ctx))); 2704 2705 if (!i915_gem_context_is_banned(ctx) || IS_ERR_OR_NULL(ctx->file_priv)) 2706 return; 2707 2708 ctx->file_priv->context_bans++; 2709 DRM_DEBUG_DRIVER("client %s has had %d context banned\n", 2710 ctx->name, ctx->file_priv->context_bans); 2711 } 2712 2713 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 2714 { 2715 ctx->active_count++; 2716 } 2717 2718 struct drm_i915_gem_request * 2719 i915_gem_find_active_request(struct intel_engine_cs *engine) 2720 { 2721 struct drm_i915_gem_request *request, *active = NULL; 2722 unsigned long flags; 2723 2724 /* We are called by the error capture and reset at a random 2725 * point in time. In particular, note that neither is crucially 2726 * ordered with an interrupt. After a hang, the GPU is dead and we 2727 * assume that no more writes can happen (we waited long enough for 2728 * all writes that were in transaction to be flushed) - adding an 2729 * extra delay for a recent interrupt is pointless. Hence, we do 2730 * not need an engine->irq_seqno_barrier() before the seqno reads. 2731 */ 2732 spin_lock_irqsave(&engine->timeline->lock, flags); 2733 list_for_each_entry(request, &engine->timeline->requests, link) { 2734 if (__i915_gem_request_completed(request, 2735 request->global_seqno)) 2736 continue; 2737 2738 GEM_BUG_ON(request->engine != engine); 2739 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 2740 &request->fence.flags)); 2741 2742 active = request; 2743 break; 2744 } 2745 spin_unlock_irqrestore(&engine->timeline->lock, flags); 2746 2747 return active; 2748 } 2749 2750 static bool engine_stalled(struct intel_engine_cs *engine) 2751 { 2752 if (!engine->hangcheck.stalled) 2753 return false; 2754 2755 /* Check for possible seqno movement after hang declaration */ 2756 if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) { 2757 DRM_DEBUG_DRIVER("%s pardoned\n", engine->name); 2758 return false; 2759 } 2760 2761 return true; 2762 } 2763 2764 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 2765 { 2766 struct intel_engine_cs *engine; 2767 enum intel_engine_id id; 2768 int err = 0; 2769 2770 /* Ensure irq handler finishes, and not run again. */ 2771 for_each_engine(engine, dev_priv, id) { 2772 struct drm_i915_gem_request *request; 2773 2774 /* Prevent the signaler thread from updating the request 2775 * state (by calling dma_fence_signal) as we are processing 2776 * the reset. The write from the GPU of the seqno is 2777 * asynchronous and the signaler thread may see a different 2778 * value to us and declare the request complete, even though 2779 * the reset routine have picked that request as the active 2780 * (incomplete) request. This conflict is not handled 2781 * gracefully! 2782 */ 2783 kthread_park(engine->breadcrumbs.signaler); 2784 2785 /* Prevent request submission to the hardware until we have 2786 * completed the reset in i915_gem_reset_finish(). If a request 2787 * is completed by one engine, it may then queue a request 2788 * to a second via its engine->irq_tasklet *just* as we are 2789 * calling engine->init_hw() and also writing the ELSP. 2790 * Turning off the engine->irq_tasklet until the reset is over 2791 * prevents the race. 2792 */ 2793 tasklet_kill(&engine->irq_tasklet); 2794 tasklet_disable(&engine->irq_tasklet); 2795 2796 if (engine->irq_seqno_barrier) 2797 engine->irq_seqno_barrier(engine); 2798 2799 if (engine_stalled(engine)) { 2800 request = i915_gem_find_active_request(engine); 2801 if (request && request->fence.error == -EIO) 2802 err = -EIO; /* Previous reset failed! */ 2803 } 2804 } 2805 2806 i915_gem_revoke_fences(dev_priv); 2807 2808 return err; 2809 } 2810 2811 static void skip_request(struct drm_i915_gem_request *request) 2812 { 2813 void *vaddr = request->ring->vaddr; 2814 u32 head; 2815 2816 /* As this request likely depends on state from the lost 2817 * context, clear out all the user operations leaving the 2818 * breadcrumb at the end (so we get the fence notifications). 2819 */ 2820 head = request->head; 2821 if (request->postfix < head) { 2822 memset(vaddr + head, 0, request->ring->size - head); 2823 head = 0; 2824 } 2825 memset(vaddr + head, 0, request->postfix - head); 2826 2827 dma_fence_set_error(&request->fence, -EIO); 2828 } 2829 2830 static void engine_skip_context(struct drm_i915_gem_request *request) 2831 { 2832 struct intel_engine_cs *engine = request->engine; 2833 struct i915_gem_context *hung_ctx = request->ctx; 2834 struct intel_timeline *timeline; 2835 unsigned long flags; 2836 2837 timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); 2838 2839 spin_lock_irqsave(&engine->timeline->lock, flags); 2840 spin_lock(&timeline->lock); 2841 2842 list_for_each_entry_continue(request, &engine->timeline->requests, link) 2843 if (request->ctx == hung_ctx) 2844 skip_request(request); 2845 2846 list_for_each_entry(request, &timeline->requests, link) 2847 skip_request(request); 2848 2849 spin_unlock(&timeline->lock); 2850 spin_unlock_irqrestore(&engine->timeline->lock, flags); 2851 } 2852 2853 /* Returns true if the request was guilty of hang */ 2854 static bool i915_gem_reset_request(struct drm_i915_gem_request *request) 2855 { 2856 /* Read once and return the resolution */ 2857 const bool guilty = engine_stalled(request->engine); 2858 2859 /* The guilty request will get skipped on a hung engine. 2860 * 2861 * Users of client default contexts do not rely on logical 2862 * state preserved between batches so it is safe to execute 2863 * queued requests following the hang. Non default contexts 2864 * rely on preserved state, so skipping a batch loses the 2865 * evolution of the state and it needs to be considered corrupted. 2866 * Executing more queued batches on top of corrupted state is 2867 * risky. But we take the risk by trying to advance through 2868 * the queued requests in order to make the client behaviour 2869 * more predictable around resets, by not throwing away random 2870 * amount of batches it has prepared for execution. Sophisticated 2871 * clients can use gem_reset_stats_ioctl and dma fence status 2872 * (exported via sync_file info ioctl on explicit fences) to observe 2873 * when it loses the context state and should rebuild accordingly. 2874 * 2875 * The context ban, and ultimately the client ban, mechanism are safety 2876 * valves if client submission ends up resulting in nothing more than 2877 * subsequent hangs. 2878 */ 2879 2880 if (guilty) { 2881 i915_gem_context_mark_guilty(request->ctx); 2882 skip_request(request); 2883 } else { 2884 i915_gem_context_mark_innocent(request->ctx); 2885 dma_fence_set_error(&request->fence, -EAGAIN); 2886 } 2887 2888 return guilty; 2889 } 2890 2891 static void i915_gem_reset_engine(struct intel_engine_cs *engine) 2892 { 2893 struct drm_i915_gem_request *request; 2894 2895 request = i915_gem_find_active_request(engine); 2896 if (request && i915_gem_reset_request(request)) { 2897 DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", 2898 engine->name, request->global_seqno); 2899 2900 /* If this context is now banned, skip all pending requests. */ 2901 if (i915_gem_context_is_banned(request->ctx)) 2902 engine_skip_context(request); 2903 } 2904 2905 /* Setup the CS to resume from the breadcrumb of the hung request */ 2906 engine->reset_hw(engine, request); 2907 } 2908 2909 void i915_gem_reset(struct drm_i915_private *dev_priv) 2910 { 2911 struct intel_engine_cs *engine; 2912 enum intel_engine_id id; 2913 2914 lockdep_assert_held(&dev_priv->drm.struct_mutex); 2915 2916 i915_gem_retire_requests(dev_priv); 2917 2918 for_each_engine(engine, dev_priv, id) { 2919 struct i915_gem_context *ctx; 2920 2921 i915_gem_reset_engine(engine); 2922 ctx = fetch_and_zero(&engine->last_retired_context); 2923 if (ctx) 2924 engine->context_unpin(engine, ctx); 2925 } 2926 2927 i915_gem_restore_fences(dev_priv); 2928 2929 if (dev_priv->gt.awake) { 2930 intel_sanitize_gt_powersave(dev_priv); 2931 intel_enable_gt_powersave(dev_priv); 2932 if (INTEL_GEN(dev_priv) >= 6) 2933 gen6_rps_busy(dev_priv); 2934 } 2935 } 2936 2937 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 2938 { 2939 struct intel_engine_cs *engine; 2940 enum intel_engine_id id; 2941 2942 lockdep_assert_held(&dev_priv->drm.struct_mutex); 2943 2944 for_each_engine(engine, dev_priv, id) { 2945 tasklet_enable(&engine->irq_tasklet); 2946 kthread_unpark(engine->breadcrumbs.signaler); 2947 } 2948 } 2949 2950 static void nop_submit_request(struct drm_i915_gem_request *request) 2951 { 2952 dma_fence_set_error(&request->fence, -EIO); 2953 i915_gem_request_submit(request); 2954 intel_engine_init_global_seqno(request->engine, request->global_seqno); 2955 } 2956 2957 static void engine_set_wedged(struct intel_engine_cs *engine) 2958 { 2959 struct drm_i915_gem_request *request; 2960 unsigned long flags; 2961 2962 /* We need to be sure that no thread is running the old callback as 2963 * we install the nop handler (otherwise we would submit a request 2964 * to hardware that will never complete). In order to prevent this 2965 * race, we wait until the machine is idle before making the swap 2966 * (using stop_machine()). 2967 */ 2968 engine->submit_request = nop_submit_request; 2969 2970 /* Mark all executing requests as skipped */ 2971 spin_lock_irqsave(&engine->timeline->lock, flags); 2972 list_for_each_entry(request, &engine->timeline->requests, link) 2973 dma_fence_set_error(&request->fence, -EIO); 2974 spin_unlock_irqrestore(&engine->timeline->lock, flags); 2975 2976 /* Mark all pending requests as complete so that any concurrent 2977 * (lockless) lookup doesn't try and wait upon the request as we 2978 * reset it. 2979 */ 2980 intel_engine_init_global_seqno(engine, 2981 intel_engine_last_submit(engine)); 2982 2983 /* 2984 * Clear the execlists queue up before freeing the requests, as those 2985 * are the ones that keep the context and ringbuffer backing objects 2986 * pinned in place. 2987 */ 2988 2989 if (i915.enable_execlists) { 2990 unsigned long flags; 2991 2992 spin_lock_irqsave(&engine->timeline->lock, flags); 2993 2994 i915_gem_request_put(engine->execlist_port[0].request); 2995 i915_gem_request_put(engine->execlist_port[1].request); 2996 memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); 2997 engine->execlist_queue = RB_ROOT; 2998 engine->execlist_first = NULL; 2999 3000 spin_unlock_irqrestore(&engine->timeline->lock, flags); 3001 } 3002 } 3003 3004 static int __i915_gem_set_wedged_BKL(void *data) 3005 { 3006 struct drm_i915_private *i915 = data; 3007 struct intel_engine_cs *engine; 3008 enum intel_engine_id id; 3009 3010 for_each_engine(engine, i915, id) 3011 engine_set_wedged(engine); 3012 3013 return 0; 3014 } 3015 3016 void i915_gem_set_wedged(struct drm_i915_private *dev_priv) 3017 { 3018 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3019 set_bit(I915_WEDGED, &dev_priv->gpu_error.flags); 3020 3021 /* Retire completed requests first so the list of inflight/incomplete 3022 * requests is accurate and we don't try and mark successful requests 3023 * as in error during __i915_gem_set_wedged_BKL(). 3024 */ 3025 i915_gem_retire_requests(dev_priv); 3026 3027 stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); 3028 3029 i915_gem_context_lost(dev_priv); 3030 3031 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); 3032 } 3033 3034 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3035 { 3036 struct i915_gem_timeline *tl; 3037 int i; 3038 3039 lockdep_assert_held(&i915->drm.struct_mutex); 3040 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3041 return true; 3042 3043 /* Before unwedging, make sure that all pending operations 3044 * are flushed and errored out - we may have requests waiting upon 3045 * third party fences. We marked all inflight requests as EIO, and 3046 * every execbuf since returned EIO, for consistency we want all 3047 * the currently pending requests to also be marked as EIO, which 3048 * is done inside our nop_submit_request - and so we must wait. 3049 * 3050 * No more can be submitted until we reset the wedged bit. 3051 */ 3052 list_for_each_entry(tl, &i915->gt.timelines, link) { 3053 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3054 struct drm_i915_gem_request *rq; 3055 3056 rq = i915_gem_active_peek(&tl->engine[i].last_request, 3057 &i915->drm.struct_mutex); 3058 if (!rq) 3059 continue; 3060 3061 /* We can't use our normal waiter as we want to 3062 * avoid recursively trying to handle the current 3063 * reset. The basic dma_fence_default_wait() installs 3064 * a callback for dma_fence_signal(), which is 3065 * triggered by our nop handler (indirectly, the 3066 * callback enables the signaler thread which is 3067 * woken by the nop_submit_request() advancing the seqno 3068 * and when the seqno passes the fence, the signaler 3069 * then signals the fence waking us up). 3070 */ 3071 if (dma_fence_default_wait(&rq->fence, true, 3072 MAX_SCHEDULE_TIMEOUT) < 0) 3073 return false; 3074 } 3075 } 3076 3077 /* Undo nop_submit_request. We prevent all new i915 requests from 3078 * being queued (by disallowing execbuf whilst wedged) so having 3079 * waited for all active requests above, we know the system is idle 3080 * and do not have to worry about a thread being inside 3081 * engine->submit_request() as we swap over. So unlike installing 3082 * the nop_submit_request on reset, we can do this from normal 3083 * context and do not require stop_machine(). 3084 */ 3085 intel_engines_reset_default_submission(i915); 3086 3087 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3088 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3089 3090 return true; 3091 } 3092 3093 static void 3094 i915_gem_retire_work_handler(struct work_struct *work) 3095 { 3096 struct drm_i915_private *dev_priv = 3097 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3098 struct drm_device *dev = &dev_priv->drm; 3099 3100 /* Come back later if the device is busy... */ 3101 if (mutex_trylock(&dev->struct_mutex)) { 3102 i915_gem_retire_requests(dev_priv); 3103 mutex_unlock(&dev->struct_mutex); 3104 } 3105 3106 /* Keep the retire handler running until we are finally idle. 3107 * We do not need to do this test under locking as in the worst-case 3108 * we queue the retire worker once too often. 3109 */ 3110 if (READ_ONCE(dev_priv->gt.awake)) { 3111 i915_queue_hangcheck(dev_priv); 3112 queue_delayed_work(dev_priv->wq, 3113 &dev_priv->gt.retire_work, 3114 round_jiffies_up_relative(HZ)); 3115 } 3116 } 3117 3118 static void 3119 i915_gem_idle_work_handler(struct work_struct *work) 3120 { 3121 struct drm_i915_private *dev_priv = 3122 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3123 struct drm_device *dev = &dev_priv->drm; 3124 struct intel_engine_cs *engine; 3125 enum intel_engine_id id; 3126 bool rearm_hangcheck; 3127 3128 if (!READ_ONCE(dev_priv->gt.awake)) 3129 return; 3130 3131 /* 3132 * Wait for last execlists context complete, but bail out in case a 3133 * new request is submitted. 3134 */ 3135 wait_for(intel_engines_are_idle(dev_priv), 10); 3136 if (READ_ONCE(dev_priv->gt.active_requests)) 3137 return; 3138 3139 rearm_hangcheck = 3140 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3141 3142 if (!mutex_trylock(&dev->struct_mutex)) { 3143 /* Currently busy, come back later */ 3144 mod_delayed_work(dev_priv->wq, 3145 &dev_priv->gt.idle_work, 3146 msecs_to_jiffies(50)); 3147 goto out_rearm; 3148 } 3149 3150 /* 3151 * New request retired after this work handler started, extend active 3152 * period until next instance of the work. 3153 */ 3154 if (work_pending(work)) 3155 goto out_unlock; 3156 3157 if (dev_priv->gt.active_requests) 3158 goto out_unlock; 3159 3160 if (wait_for(intel_engines_are_idle(dev_priv), 10)) 3161 DRM_ERROR("Timeout waiting for engines to idle\n"); 3162 3163 for_each_engine(engine, dev_priv, id) { 3164 intel_engine_disarm_breadcrumbs(engine); 3165 i915_gem_batch_pool_fini(&engine->batch_pool); 3166 } 3167 3168 GEM_BUG_ON(!dev_priv->gt.awake); 3169 dev_priv->gt.awake = false; 3170 rearm_hangcheck = false; 3171 3172 if (INTEL_GEN(dev_priv) >= 6) 3173 gen6_rps_idle(dev_priv); 3174 intel_runtime_pm_put(dev_priv); 3175 out_unlock: 3176 mutex_unlock(&dev->struct_mutex); 3177 3178 out_rearm: 3179 if (rearm_hangcheck) { 3180 GEM_BUG_ON(!dev_priv->gt.awake); 3181 i915_queue_hangcheck(dev_priv); 3182 } 3183 } 3184 3185 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 3186 { 3187 struct drm_i915_gem_object *obj = to_intel_bo(gem); 3188 struct drm_i915_file_private *fpriv = file->driver_priv; 3189 struct i915_vma *vma, *vn; 3190 3191 mutex_lock(&obj->base.dev->struct_mutex); 3192 list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) 3193 if (vma->vm->file == fpriv) 3194 i915_vma_close(vma); 3195 3196 if (i915_gem_object_is_active(obj) && 3197 !i915_gem_object_has_active_reference(obj)) { 3198 i915_gem_object_set_active_reference(obj); 3199 i915_gem_object_get(obj); 3200 } 3201 mutex_unlock(&obj->base.dev->struct_mutex); 3202 } 3203 3204 static unsigned long to_wait_timeout(s64 timeout_ns) 3205 { 3206 if (timeout_ns < 0) 3207 return MAX_SCHEDULE_TIMEOUT; 3208 3209 if (timeout_ns == 0) 3210 return 0; 3211 3212 return nsecs_to_jiffies_timeout(timeout_ns); 3213 } 3214 3215 /** 3216 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3217 * @dev: drm device pointer 3218 * @data: ioctl data blob 3219 * @file: drm file pointer 3220 * 3221 * Returns 0 if successful, else an error is returned with the remaining time in 3222 * the timeout parameter. 3223 * -ETIME: object is still busy after timeout 3224 * -ERESTARTSYS: signal interrupted the wait 3225 * -ENONENT: object doesn't exist 3226 * Also possible, but rare: 3227 * -EAGAIN: GPU wedged 3228 * -ENOMEM: damn 3229 * -ENODEV: Internal IRQ fail 3230 * -E?: The add request failed 3231 * 3232 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3233 * non-zero timeout parameter the wait ioctl will wait for the given number of 3234 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3235 * without holding struct_mutex the object may become re-busied before this 3236 * function completes. A similar but shorter * race condition exists in the busy 3237 * ioctl 3238 */ 3239 int 3240 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3241 { 3242 struct drm_i915_gem_wait *args = data; 3243 struct drm_i915_gem_object *obj; 3244 ktime_t start; 3245 long ret; 3246 3247 if (args->flags != 0) 3248 return -EINVAL; 3249 3250 obj = i915_gem_object_lookup(file, args->bo_handle); 3251 if (!obj) 3252 return -ENOENT; 3253 3254 start = ktime_get(); 3255 3256 ret = i915_gem_object_wait(obj, 3257 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, 3258 to_wait_timeout(args->timeout_ns), 3259 to_rps_client(file)); 3260 3261 if (args->timeout_ns > 0) { 3262 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 3263 if (args->timeout_ns < 0) 3264 args->timeout_ns = 0; 3265 3266 /* 3267 * Apparently ktime isn't accurate enough and occasionally has a 3268 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 3269 * things up to make the test happy. We allow up to 1 jiffy. 3270 * 3271 * This is a regression from the timespec->ktime conversion. 3272 */ 3273 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 3274 args->timeout_ns = 0; 3275 } 3276 3277 i915_gem_object_put(obj); 3278 return ret; 3279 } 3280 3281 static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) 3282 { 3283 int ret, i; 3284 3285 for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { 3286 ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); 3287 if (ret) 3288 return ret; 3289 } 3290 3291 return 0; 3292 } 3293 3294 static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms) 3295 { 3296 return wait_for(intel_engine_is_idle(engine), timeout_ms); 3297 } 3298 3299 static int wait_for_engines(struct drm_i915_private *i915) 3300 { 3301 struct intel_engine_cs *engine; 3302 enum intel_engine_id id; 3303 3304 for_each_engine(engine, i915, id) { 3305 if (GEM_WARN_ON(wait_for_engine(engine, 50))) { 3306 i915_gem_set_wedged(i915); 3307 return -EIO; 3308 } 3309 3310 GEM_BUG_ON(intel_engine_get_seqno(engine) != 3311 intel_engine_last_submit(engine)); 3312 } 3313 3314 return 0; 3315 } 3316 3317 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) 3318 { 3319 int ret; 3320 3321 /* If the device is asleep, we have no requests outstanding */ 3322 if (!READ_ONCE(i915->gt.awake)) 3323 return 0; 3324 3325 if (flags & I915_WAIT_LOCKED) { 3326 struct i915_gem_timeline *tl; 3327 3328 lockdep_assert_held(&i915->drm.struct_mutex); 3329 3330 list_for_each_entry(tl, &i915->gt.timelines, link) { 3331 ret = wait_for_timeline(tl, flags); 3332 if (ret) 3333 return ret; 3334 } 3335 3336 i915_gem_retire_requests(i915); 3337 GEM_BUG_ON(i915->gt.active_requests); 3338 3339 ret = wait_for_engines(i915); 3340 } else { 3341 ret = wait_for_timeline(&i915->gt.global_timeline, flags); 3342 } 3343 3344 return ret; 3345 } 3346 3347 /** Flushes the GTT write domain for the object if it's dirty. */ 3348 static void 3349 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3350 { 3351 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3352 3353 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3354 return; 3355 3356 /* No actual flushing is required for the GTT write domain. Writes 3357 * to it "immediately" go to main memory as far as we know, so there's 3358 * no chipset flush. It also doesn't land in render cache. 3359 * 3360 * However, we do have to enforce the order so that all writes through 3361 * the GTT land before any writes to the device, such as updates to 3362 * the GATT itself. 3363 * 3364 * We also have to wait a bit for the writes to land from the GTT. 3365 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 3366 * timing. This issue has only been observed when switching quickly 3367 * between GTT writes and CPU reads from inside the kernel on recent hw, 3368 * and it appears to only affect discrete GTT blocks (i.e. on LLC 3369 * system agents we cannot reproduce this behaviour). 3370 */ 3371 wmb(); 3372 if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { 3373 if (intel_runtime_pm_get_if_in_use(dev_priv)) { 3374 spin_lock_irq(&dev_priv->uncore.lock); 3375 POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); 3376 spin_unlock_irq(&dev_priv->uncore.lock); 3377 intel_runtime_pm_put(dev_priv); 3378 } 3379 } 3380 3381 intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); 3382 3383 obj->base.write_domain = 0; 3384 } 3385 3386 /** Flushes the CPU write domain for the object if it's dirty. */ 3387 static void 3388 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3389 { 3390 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3391 return; 3392 3393 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 3394 obj->base.write_domain = 0; 3395 } 3396 3397 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 3398 { 3399 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) 3400 return; 3401 3402 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 3403 obj->base.write_domain = 0; 3404 } 3405 3406 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 3407 { 3408 if (!READ_ONCE(obj->pin_display)) 3409 return; 3410 3411 mutex_lock(&obj->base.dev->struct_mutex); 3412 __i915_gem_object_flush_for_display(obj); 3413 mutex_unlock(&obj->base.dev->struct_mutex); 3414 } 3415 3416 /** 3417 * Moves a single object to the GTT read, and possibly write domain. 3418 * @obj: object to act on 3419 * @write: ask for write access or read only 3420 * 3421 * This function returns when the move is complete, including waiting on 3422 * flushes to occur. 3423 */ 3424 int 3425 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3426 { 3427 int ret; 3428 3429 lockdep_assert_held(&obj->base.dev->struct_mutex); 3430 3431 ret = i915_gem_object_wait(obj, 3432 I915_WAIT_INTERRUPTIBLE | 3433 I915_WAIT_LOCKED | 3434 (write ? I915_WAIT_ALL : 0), 3435 MAX_SCHEDULE_TIMEOUT, 3436 NULL); 3437 if (ret) 3438 return ret; 3439 3440 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3441 return 0; 3442 3443 /* Flush and acquire obj->pages so that we are coherent through 3444 * direct access in memory with previous cached writes through 3445 * shmemfs and that our cache domain tracking remains valid. 3446 * For example, if the obj->filp was moved to swap without us 3447 * being notified and releasing the pages, we would mistakenly 3448 * continue to assume that the obj remained out of the CPU cached 3449 * domain. 3450 */ 3451 ret = i915_gem_object_pin_pages(obj); 3452 if (ret) 3453 return ret; 3454 3455 i915_gem_object_flush_cpu_write_domain(obj); 3456 3457 /* Serialise direct access to this object with the barriers for 3458 * coherent writes from the GPU, by effectively invalidating the 3459 * GTT domain upon first access. 3460 */ 3461 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3462 mb(); 3463 3464 /* It should now be out of any other write domains, and we can update 3465 * the domain values for our changes. 3466 */ 3467 GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3468 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3469 if (write) { 3470 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3471 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3472 obj->mm.dirty = true; 3473 } 3474 3475 i915_gem_object_unpin_pages(obj); 3476 return 0; 3477 } 3478 3479 /** 3480 * Changes the cache-level of an object across all VMA. 3481 * @obj: object to act on 3482 * @cache_level: new cache level to set for the object 3483 * 3484 * After this function returns, the object will be in the new cache-level 3485 * across all GTT and the contents of the backing storage will be coherent, 3486 * with respect to the new cache-level. In order to keep the backing storage 3487 * coherent for all users, we only allow a single cache level to be set 3488 * globally on the object and prevent it from being changed whilst the 3489 * hardware is reading from the object. That is if the object is currently 3490 * on the scanout it will be set to uncached (or equivalent display 3491 * cache coherency) and all non-MOCS GPU access will also be uncached so 3492 * that all direct access to the scanout remains coherent. 3493 */ 3494 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3495 enum i915_cache_level cache_level) 3496 { 3497 struct i915_vma *vma; 3498 int ret; 3499 3500 lockdep_assert_held(&obj->base.dev->struct_mutex); 3501 3502 if (obj->cache_level == cache_level) 3503 return 0; 3504 3505 /* Inspect the list of currently bound VMA and unbind any that would 3506 * be invalid given the new cache-level. This is principally to 3507 * catch the issue of the CS prefetch crossing page boundaries and 3508 * reading an invalid PTE on older architectures. 3509 */ 3510 restart: 3511 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3512 if (!drm_mm_node_allocated(&vma->node)) 3513 continue; 3514 3515 if (i915_vma_is_pinned(vma)) { 3516 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3517 return -EBUSY; 3518 } 3519 3520 if (i915_gem_valid_gtt_space(vma, cache_level)) 3521 continue; 3522 3523 ret = i915_vma_unbind(vma); 3524 if (ret) 3525 return ret; 3526 3527 /* As unbinding may affect other elements in the 3528 * obj->vma_list (due to side-effects from retiring 3529 * an active vma), play safe and restart the iterator. 3530 */ 3531 goto restart; 3532 } 3533 3534 /* We can reuse the existing drm_mm nodes but need to change the 3535 * cache-level on the PTE. We could simply unbind them all and 3536 * rebind with the correct cache-level on next use. However since 3537 * we already have a valid slot, dma mapping, pages etc, we may as 3538 * rewrite the PTE in the belief that doing so tramples upon less 3539 * state and so involves less work. 3540 */ 3541 if (obj->bind_count) { 3542 /* Before we change the PTE, the GPU must not be accessing it. 3543 * If we wait upon the object, we know that all the bound 3544 * VMA are no longer active. 3545 */ 3546 ret = i915_gem_object_wait(obj, 3547 I915_WAIT_INTERRUPTIBLE | 3548 I915_WAIT_LOCKED | 3549 I915_WAIT_ALL, 3550 MAX_SCHEDULE_TIMEOUT, 3551 NULL); 3552 if (ret) 3553 return ret; 3554 3555 if (!HAS_LLC(to_i915(obj->base.dev)) && 3556 cache_level != I915_CACHE_NONE) { 3557 /* Access to snoopable pages through the GTT is 3558 * incoherent and on some machines causes a hard 3559 * lockup. Relinquish the CPU mmaping to force 3560 * userspace to refault in the pages and we can 3561 * then double check if the GTT mapping is still 3562 * valid for that pointer access. 3563 */ 3564 i915_gem_release_mmap(obj); 3565 3566 /* As we no longer need a fence for GTT access, 3567 * we can relinquish it now (and so prevent having 3568 * to steal a fence from someone else on the next 3569 * fence request). Note GPU activity would have 3570 * dropped the fence as all snoopable access is 3571 * supposed to be linear. 3572 */ 3573 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3574 ret = i915_vma_put_fence(vma); 3575 if (ret) 3576 return ret; 3577 } 3578 } else { 3579 /* We either have incoherent backing store and 3580 * so no GTT access or the architecture is fully 3581 * coherent. In such cases, existing GTT mmaps 3582 * ignore the cache bit in the PTE and we can 3583 * rewrite it without confusing the GPU or having 3584 * to force userspace to fault back in its mmaps. 3585 */ 3586 } 3587 3588 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3589 if (!drm_mm_node_allocated(&vma->node)) 3590 continue; 3591 3592 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3593 if (ret) 3594 return ret; 3595 } 3596 } 3597 3598 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU && 3599 i915_gem_object_is_coherent(obj)) 3600 obj->cache_dirty = true; 3601 3602 list_for_each_entry(vma, &obj->vma_list, obj_link) 3603 vma->node.color = cache_level; 3604 obj->cache_level = cache_level; 3605 3606 return 0; 3607 } 3608 3609 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3610 struct drm_file *file) 3611 { 3612 struct drm_i915_gem_caching *args = data; 3613 struct drm_i915_gem_object *obj; 3614 int err = 0; 3615 3616 rcu_read_lock(); 3617 obj = i915_gem_object_lookup_rcu(file, args->handle); 3618 if (!obj) { 3619 err = -ENOENT; 3620 goto out; 3621 } 3622 3623 switch (obj->cache_level) { 3624 case I915_CACHE_LLC: 3625 case I915_CACHE_L3_LLC: 3626 args->caching = I915_CACHING_CACHED; 3627 break; 3628 3629 case I915_CACHE_WT: 3630 args->caching = I915_CACHING_DISPLAY; 3631 break; 3632 3633 default: 3634 args->caching = I915_CACHING_NONE; 3635 break; 3636 } 3637 out: 3638 rcu_read_unlock(); 3639 return err; 3640 } 3641 3642 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3643 struct drm_file *file) 3644 { 3645 struct drm_i915_private *i915 = to_i915(dev); 3646 struct drm_i915_gem_caching *args = data; 3647 struct drm_i915_gem_object *obj; 3648 enum i915_cache_level level; 3649 int ret = 0; 3650 3651 switch (args->caching) { 3652 case I915_CACHING_NONE: 3653 level = I915_CACHE_NONE; 3654 break; 3655 case I915_CACHING_CACHED: 3656 /* 3657 * Due to a HW issue on BXT A stepping, GPU stores via a 3658 * snooped mapping may leave stale data in a corresponding CPU 3659 * cacheline, whereas normally such cachelines would get 3660 * invalidated. 3661 */ 3662 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 3663 return -ENODEV; 3664 3665 level = I915_CACHE_LLC; 3666 break; 3667 case I915_CACHING_DISPLAY: 3668 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 3669 break; 3670 default: 3671 return -EINVAL; 3672 } 3673 3674 obj = i915_gem_object_lookup(file, args->handle); 3675 if (!obj) 3676 return -ENOENT; 3677 3678 if (obj->cache_level == level) 3679 goto out; 3680 3681 ret = i915_gem_object_wait(obj, 3682 I915_WAIT_INTERRUPTIBLE, 3683 MAX_SCHEDULE_TIMEOUT, 3684 to_rps_client(file)); 3685 if (ret) 3686 goto out; 3687 3688 ret = i915_mutex_lock_interruptible(dev); 3689 if (ret) 3690 goto out; 3691 3692 ret = i915_gem_object_set_cache_level(obj, level); 3693 mutex_unlock(&dev->struct_mutex); 3694 3695 out: 3696 i915_gem_object_put(obj); 3697 return ret; 3698 } 3699 3700 /* 3701 * Prepare buffer for display plane (scanout, cursors, etc). 3702 * Can be called from an uninterruptible phase (modesetting) and allows 3703 * any flushes to be pipelined (for pageflips). 3704 */ 3705 struct i915_vma * 3706 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3707 u32 alignment, 3708 const struct i915_ggtt_view *view) 3709 { 3710 struct i915_vma *vma; 3711 int ret; 3712 3713 lockdep_assert_held(&obj->base.dev->struct_mutex); 3714 3715 /* Mark the pin_display early so that we account for the 3716 * display coherency whilst setting up the cache domains. 3717 */ 3718 obj->pin_display++; 3719 3720 /* The display engine is not coherent with the LLC cache on gen6. As 3721 * a result, we make sure that the pinning that is about to occur is 3722 * done with uncached PTEs. This is lowest common denominator for all 3723 * chipsets. 3724 * 3725 * However for gen6+, we could do better by using the GFDT bit instead 3726 * of uncaching, which would allow us to flush all the LLC-cached data 3727 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3728 */ 3729 ret = i915_gem_object_set_cache_level(obj, 3730 HAS_WT(to_i915(obj->base.dev)) ? 3731 I915_CACHE_WT : I915_CACHE_NONE); 3732 if (ret) { 3733 vma = ERR_PTR(ret); 3734 goto err_unpin_display; 3735 } 3736 3737 /* As the user may map the buffer once pinned in the display plane 3738 * (e.g. libkms for the bootup splash), we have to ensure that we 3739 * always use map_and_fenceable for all scanout buffers. However, 3740 * it may simply be too big to fit into mappable, in which case 3741 * put it anyway and hope that userspace can cope (but always first 3742 * try to preserve the existing ABI). 3743 */ 3744 vma = ERR_PTR(-ENOSPC); 3745 if (!view || view->type == I915_GGTT_VIEW_NORMAL) 3746 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 3747 PIN_MAPPABLE | PIN_NONBLOCK); 3748 if (IS_ERR(vma)) { 3749 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3750 unsigned int flags; 3751 3752 /* Valleyview is definitely limited to scanning out the first 3753 * 512MiB. Lets presume this behaviour was inherited from the 3754 * g4x display engine and that all earlier gen are similarly 3755 * limited. Testing suggests that it is a little more 3756 * complicated than this. For example, Cherryview appears quite 3757 * happy to scanout from anywhere within its global aperture. 3758 */ 3759 flags = 0; 3760 if (HAS_GMCH_DISPLAY(i915)) 3761 flags = PIN_MAPPABLE; 3762 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 3763 } 3764 if (IS_ERR(vma)) 3765 goto err_unpin_display; 3766 3767 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 3768 3769 /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ 3770 __i915_gem_object_flush_for_display(obj); 3771 intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); 3772 3773 /* It should now be out of any other write domains, and we can update 3774 * the domain values for our changes. 3775 */ 3776 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3777 3778 return vma; 3779 3780 err_unpin_display: 3781 obj->pin_display--; 3782 return vma; 3783 } 3784 3785 void 3786 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 3787 { 3788 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 3789 3790 if (WARN_ON(vma->obj->pin_display == 0)) 3791 return; 3792 3793 if (--vma->obj->pin_display == 0) 3794 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 3795 3796 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 3797 i915_gem_object_bump_inactive_ggtt(vma->obj); 3798 3799 i915_vma_unpin(vma); 3800 } 3801 3802 /** 3803 * Moves a single object to the CPU read, and possibly write domain. 3804 * @obj: object to act on 3805 * @write: requesting write or read-only access 3806 * 3807 * This function returns when the move is complete, including waiting on 3808 * flushes to occur. 3809 */ 3810 int 3811 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3812 { 3813 int ret; 3814 3815 lockdep_assert_held(&obj->base.dev->struct_mutex); 3816 3817 ret = i915_gem_object_wait(obj, 3818 I915_WAIT_INTERRUPTIBLE | 3819 I915_WAIT_LOCKED | 3820 (write ? I915_WAIT_ALL : 0), 3821 MAX_SCHEDULE_TIMEOUT, 3822 NULL); 3823 if (ret) 3824 return ret; 3825 3826 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3827 return 0; 3828 3829 i915_gem_object_flush_gtt_write_domain(obj); 3830 3831 /* Flush the CPU cache if it's still invalid. */ 3832 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3833 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 3834 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3835 } 3836 3837 /* It should now be out of any other write domains, and we can update 3838 * the domain values for our changes. 3839 */ 3840 GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3841 3842 /* If we're writing through the CPU, then the GPU read domains will 3843 * need to be invalidated at next use. 3844 */ 3845 if (write) { 3846 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3847 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3848 } 3849 3850 return 0; 3851 } 3852 3853 /* Throttle our rendering by waiting until the ring has completed our requests 3854 * emitted over 20 msec ago. 3855 * 3856 * Note that if we were to use the current jiffies each time around the loop, 3857 * we wouldn't escape the function with any frames outstanding if the time to 3858 * render a frame was over 20ms. 3859 * 3860 * This should get us reasonable parallelism between CPU and GPU but also 3861 * relatively low latency when blocking on a particular request to finish. 3862 */ 3863 static int 3864 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3865 { 3866 struct drm_i915_private *dev_priv = to_i915(dev); 3867 struct drm_i915_file_private *file_priv = file->driver_priv; 3868 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 3869 struct drm_i915_gem_request *request, *target = NULL; 3870 long ret; 3871 3872 /* ABI: return -EIO if already wedged */ 3873 if (i915_terminally_wedged(&dev_priv->gpu_error)) 3874 return -EIO; 3875 3876 spin_lock(&file_priv->mm.lock); 3877 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 3878 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3879 break; 3880 3881 if (target) { 3882 list_del(&target->client_link); 3883 target->file_priv = NULL; 3884 } 3885 3886 target = request; 3887 } 3888 if (target) 3889 i915_gem_request_get(target); 3890 spin_unlock(&file_priv->mm.lock); 3891 3892 if (target == NULL) 3893 return 0; 3894 3895 ret = i915_wait_request(target, 3896 I915_WAIT_INTERRUPTIBLE, 3897 MAX_SCHEDULE_TIMEOUT); 3898 i915_gem_request_put(target); 3899 3900 return ret < 0 ? ret : 0; 3901 } 3902 3903 struct i915_vma * 3904 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 3905 const struct i915_ggtt_view *view, 3906 u64 size, 3907 u64 alignment, 3908 u64 flags) 3909 { 3910 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3911 struct i915_address_space *vm = &dev_priv->ggtt.base; 3912 struct i915_vma *vma; 3913 int ret; 3914 3915 lockdep_assert_held(&obj->base.dev->struct_mutex); 3916 3917 vma = i915_vma_instance(obj, vm, view); 3918 if (unlikely(IS_ERR(vma))) 3919 return vma; 3920 3921 if (i915_vma_misplaced(vma, size, alignment, flags)) { 3922 if (flags & PIN_NONBLOCK && 3923 (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) 3924 return ERR_PTR(-ENOSPC); 3925 3926 if (flags & PIN_MAPPABLE) { 3927 /* If the required space is larger than the available 3928 * aperture, we will not able to find a slot for the 3929 * object and unbinding the object now will be in 3930 * vain. Worse, doing so may cause us to ping-pong 3931 * the object in and out of the Global GTT and 3932 * waste a lot of cycles under the mutex. 3933 */ 3934 if (vma->fence_size > dev_priv->ggtt.mappable_end) 3935 return ERR_PTR(-E2BIG); 3936 3937 /* If NONBLOCK is set the caller is optimistically 3938 * trying to cache the full object within the mappable 3939 * aperture, and *must* have a fallback in place for 3940 * situations where we cannot bind the object. We 3941 * can be a little more lax here and use the fallback 3942 * more often to avoid costly migrations of ourselves 3943 * and other objects within the aperture. 3944 * 3945 * Half-the-aperture is used as a simple heuristic. 3946 * More interesting would to do search for a free 3947 * block prior to making the commitment to unbind. 3948 * That caters for the self-harm case, and with a 3949 * little more heuristics (e.g. NOFAULT, NOEVICT) 3950 * we could try to minimise harm to others. 3951 */ 3952 if (flags & PIN_NONBLOCK && 3953 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 3954 return ERR_PTR(-ENOSPC); 3955 } 3956 3957 WARN(i915_vma_is_pinned(vma), 3958 "bo is already pinned in ggtt with incorrect alignment:" 3959 " offset=%08x, req.alignment=%llx," 3960 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 3961 i915_ggtt_offset(vma), alignment, 3962 !!(flags & PIN_MAPPABLE), 3963 i915_vma_is_map_and_fenceable(vma)); 3964 ret = i915_vma_unbind(vma); 3965 if (ret) 3966 return ERR_PTR(ret); 3967 } 3968 3969 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 3970 if (ret) 3971 return ERR_PTR(ret); 3972 3973 return vma; 3974 } 3975 3976 static __always_inline unsigned int __busy_read_flag(unsigned int id) 3977 { 3978 /* Note that we could alias engines in the execbuf API, but 3979 * that would be very unwise as it prevents userspace from 3980 * fine control over engine selection. Ahem. 3981 * 3982 * This should be something like EXEC_MAX_ENGINE instead of 3983 * I915_NUM_ENGINES. 3984 */ 3985 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 3986 return 0x10000 << id; 3987 } 3988 3989 static __always_inline unsigned int __busy_write_id(unsigned int id) 3990 { 3991 /* The uABI guarantees an active writer is also amongst the read 3992 * engines. This would be true if we accessed the activity tracking 3993 * under the lock, but as we perform the lookup of the object and 3994 * its activity locklessly we can not guarantee that the last_write 3995 * being active implies that we have set the same engine flag from 3996 * last_read - hence we always set both read and write busy for 3997 * last_write. 3998 */ 3999 return id | __busy_read_flag(id); 4000 } 4001 4002 static __always_inline unsigned int 4003 __busy_set_if_active(const struct dma_fence *fence, 4004 unsigned int (*flag)(unsigned int id)) 4005 { 4006 struct drm_i915_gem_request *rq; 4007 4008 /* We have to check the current hw status of the fence as the uABI 4009 * guarantees forward progress. We could rely on the idle worker 4010 * to eventually flush us, but to minimise latency just ask the 4011 * hardware. 4012 * 4013 * Note we only report on the status of native fences. 4014 */ 4015 if (!dma_fence_is_i915(fence)) 4016 return 0; 4017 4018 /* opencode to_request() in order to avoid const warnings */ 4019 rq = container_of(fence, struct drm_i915_gem_request, fence); 4020 if (i915_gem_request_completed(rq)) 4021 return 0; 4022 4023 return flag(rq->engine->exec_id); 4024 } 4025 4026 static __always_inline unsigned int 4027 busy_check_reader(const struct dma_fence *fence) 4028 { 4029 return __busy_set_if_active(fence, __busy_read_flag); 4030 } 4031 4032 static __always_inline unsigned int 4033 busy_check_writer(const struct dma_fence *fence) 4034 { 4035 if (!fence) 4036 return 0; 4037 4038 return __busy_set_if_active(fence, __busy_write_id); 4039 } 4040 4041 int 4042 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4043 struct drm_file *file) 4044 { 4045 struct drm_i915_gem_busy *args = data; 4046 struct drm_i915_gem_object *obj; 4047 struct reservation_object_list *list; 4048 unsigned int seq; 4049 int err; 4050 4051 err = -ENOENT; 4052 rcu_read_lock(); 4053 obj = i915_gem_object_lookup_rcu(file, args->handle); 4054 if (!obj) 4055 goto out; 4056 4057 /* A discrepancy here is that we do not report the status of 4058 * non-i915 fences, i.e. even though we may report the object as idle, 4059 * a call to set-domain may still stall waiting for foreign rendering. 4060 * This also means that wait-ioctl may report an object as busy, 4061 * where busy-ioctl considers it idle. 4062 * 4063 * We trade the ability to warn of foreign fences to report on which 4064 * i915 engines are active for the object. 4065 * 4066 * Alternatively, we can trade that extra information on read/write 4067 * activity with 4068 * args->busy = 4069 * !reservation_object_test_signaled_rcu(obj->resv, true); 4070 * to report the overall busyness. This is what the wait-ioctl does. 4071 * 4072 */ 4073 retry: 4074 seq = raw_read_seqcount(&obj->resv->seq); 4075 4076 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4077 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4078 4079 /* Translate shared fences to READ set of engines */ 4080 list = rcu_dereference(obj->resv->fence); 4081 if (list) { 4082 unsigned int shared_count = list->shared_count, i; 4083 4084 for (i = 0; i < shared_count; ++i) { 4085 struct dma_fence *fence = 4086 rcu_dereference(list->shared[i]); 4087 4088 args->busy |= busy_check_reader(fence); 4089 } 4090 } 4091 4092 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 4093 goto retry; 4094 4095 err = 0; 4096 out: 4097 rcu_read_unlock(); 4098 return err; 4099 } 4100 4101 int 4102 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4103 struct drm_file *file_priv) 4104 { 4105 return i915_gem_ring_throttle(dev, file_priv); 4106 } 4107 4108 int 4109 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4110 struct drm_file *file_priv) 4111 { 4112 struct drm_i915_private *dev_priv = to_i915(dev); 4113 struct drm_i915_gem_madvise *args = data; 4114 struct drm_i915_gem_object *obj; 4115 int err; 4116 4117 switch (args->madv) { 4118 case I915_MADV_DONTNEED: 4119 case I915_MADV_WILLNEED: 4120 break; 4121 default: 4122 return -EINVAL; 4123 } 4124 4125 obj = i915_gem_object_lookup(file_priv, args->handle); 4126 if (!obj) 4127 return -ENOENT; 4128 4129 err = mutex_lock_interruptible(&obj->mm.lock); 4130 if (err) 4131 goto out; 4132 4133 if (obj->mm.pages && 4134 i915_gem_object_is_tiled(obj) && 4135 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4136 if (obj->mm.madv == I915_MADV_WILLNEED) { 4137 GEM_BUG_ON(!obj->mm.quirked); 4138 __i915_gem_object_unpin_pages(obj); 4139 obj->mm.quirked = false; 4140 } 4141 if (args->madv == I915_MADV_WILLNEED) { 4142 GEM_BUG_ON(obj->mm.quirked); 4143 __i915_gem_object_pin_pages(obj); 4144 obj->mm.quirked = true; 4145 } 4146 } 4147 4148 if (obj->mm.madv != __I915_MADV_PURGED) 4149 obj->mm.madv = args->madv; 4150 4151 /* if the object is no longer attached, discard its backing storage */ 4152 if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages) 4153 i915_gem_object_truncate(obj); 4154 4155 args->retained = obj->mm.madv != __I915_MADV_PURGED; 4156 mutex_unlock(&obj->mm.lock); 4157 4158 out: 4159 i915_gem_object_put(obj); 4160 return err; 4161 } 4162 4163 static void 4164 frontbuffer_retire(struct i915_gem_active *active, 4165 struct drm_i915_gem_request *request) 4166 { 4167 struct drm_i915_gem_object *obj = 4168 container_of(active, typeof(*obj), frontbuffer_write); 4169 4170 intel_fb_obj_flush(obj, ORIGIN_CS); 4171 } 4172 4173 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4174 const struct drm_i915_gem_object_ops *ops) 4175 { 4176 mutex_init(&obj->mm.lock); 4177 4178 INIT_LIST_HEAD(&obj->global_link); 4179 INIT_LIST_HEAD(&obj->userfault_link); 4180 INIT_LIST_HEAD(&obj->obj_exec_link); 4181 INIT_LIST_HEAD(&obj->vma_list); 4182 INIT_LIST_HEAD(&obj->batch_pool_link); 4183 4184 obj->ops = ops; 4185 4186 reservation_object_init(&obj->__builtin_resv); 4187 obj->resv = &obj->__builtin_resv; 4188 4189 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 4190 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 4191 4192 obj->mm.madv = I915_MADV_WILLNEED; 4193 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 4194 mutex_init(&obj->mm.get_page.lock); 4195 4196 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4197 } 4198 4199 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4200 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 4201 I915_GEM_OBJECT_IS_SHRINKABLE, 4202 4203 .get_pages = i915_gem_object_get_pages_gtt, 4204 .put_pages = i915_gem_object_put_pages_gtt, 4205 4206 .pwrite = i915_gem_object_pwrite_gtt, 4207 }; 4208 4209 struct drm_i915_gem_object * 4210 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 4211 { 4212 struct drm_i915_gem_object *obj; 4213 struct address_space *mapping; 4214 gfp_t mask; 4215 int ret; 4216 4217 /* There is a prevalence of the assumption that we fit the object's 4218 * page count inside a 32bit _signed_ variable. Let's document this and 4219 * catch if we ever need to fix it. In the meantime, if you do spot 4220 * such a local variable, please consider fixing! 4221 */ 4222 if (WARN_ON(size >> PAGE_SHIFT > INT_MAX)) 4223 return ERR_PTR(-E2BIG); 4224 4225 if (overflows_type(size, obj->base.size)) 4226 return ERR_PTR(-E2BIG); 4227 4228 obj = i915_gem_object_alloc(dev_priv); 4229 if (obj == NULL) 4230 return ERR_PTR(-ENOMEM); 4231 4232 ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size); 4233 if (ret) 4234 goto fail; 4235 4236 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4237 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 4238 /* 965gm cannot relocate objects above 4GiB. */ 4239 mask &= ~__GFP_HIGHMEM; 4240 mask |= __GFP_DMA32; 4241 } 4242 4243 mapping = obj->base.filp->f_mapping; 4244 mapping_set_gfp_mask(mapping, mask); 4245 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 4246 4247 i915_gem_object_init(obj, &i915_gem_object_ops); 4248 4249 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4250 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4251 4252 if (HAS_LLC(dev_priv)) { 4253 /* On some devices, we can have the GPU use the LLC (the CPU 4254 * cache) for about a 10% performance improvement 4255 * compared to uncached. Graphics requests other than 4256 * display scanout are coherent with the CPU in 4257 * accessing this cache. This means in this mode we 4258 * don't need to clflush on the CPU side, and on the 4259 * GPU side we only need to flush internal caches to 4260 * get data visible to the CPU. 4261 * 4262 * However, we maintain the display planes as UC, and so 4263 * need to rebind when first used as such. 4264 */ 4265 obj->cache_level = I915_CACHE_LLC; 4266 } else 4267 obj->cache_level = I915_CACHE_NONE; 4268 4269 trace_i915_gem_object_create(obj); 4270 4271 return obj; 4272 4273 fail: 4274 i915_gem_object_free(obj); 4275 return ERR_PTR(ret); 4276 } 4277 4278 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4279 { 4280 /* If we are the last user of the backing storage (be it shmemfs 4281 * pages or stolen etc), we know that the pages are going to be 4282 * immediately released. In this case, we can then skip copying 4283 * back the contents from the GPU. 4284 */ 4285 4286 if (obj->mm.madv != I915_MADV_WILLNEED) 4287 return false; 4288 4289 if (obj->base.filp == NULL) 4290 return true; 4291 4292 /* At first glance, this looks racy, but then again so would be 4293 * userspace racing mmap against close. However, the first external 4294 * reference to the filp can only be obtained through the 4295 * i915_gem_mmap_ioctl() which safeguards us against the user 4296 * acquiring such a reference whilst we are in the middle of 4297 * freeing the object. 4298 */ 4299 return atomic_long_read(&obj->base.filp->f_count) == 1; 4300 } 4301 4302 static void __i915_gem_free_objects(struct drm_i915_private *i915, 4303 struct llist_node *freed) 4304 { 4305 struct drm_i915_gem_object *obj, *on; 4306 4307 mutex_lock(&i915->drm.struct_mutex); 4308 intel_runtime_pm_get(i915); 4309 llist_for_each_entry(obj, freed, freed) { 4310 struct i915_vma *vma, *vn; 4311 4312 trace_i915_gem_object_destroy(obj); 4313 4314 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4315 list_for_each_entry_safe(vma, vn, 4316 &obj->vma_list, obj_link) { 4317 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 4318 GEM_BUG_ON(i915_vma_is_active(vma)); 4319 vma->flags &= ~I915_VMA_PIN_MASK; 4320 i915_vma_close(vma); 4321 } 4322 GEM_BUG_ON(!list_empty(&obj->vma_list)); 4323 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 4324 4325 list_del(&obj->global_link); 4326 } 4327 intel_runtime_pm_put(i915); 4328 mutex_unlock(&i915->drm.struct_mutex); 4329 4330 llist_for_each_entry_safe(obj, on, freed, freed) { 4331 GEM_BUG_ON(obj->bind_count); 4332 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 4333 4334 if (obj->ops->release) 4335 obj->ops->release(obj); 4336 4337 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 4338 atomic_set(&obj->mm.pages_pin_count, 0); 4339 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 4340 GEM_BUG_ON(obj->mm.pages); 4341 4342 if (obj->base.import_attach) 4343 drm_prime_gem_destroy(&obj->base, NULL); 4344 4345 reservation_object_fini(&obj->__builtin_resv); 4346 drm_gem_object_release(&obj->base); 4347 i915_gem_info_remove_obj(i915, obj->base.size); 4348 4349 kfree(obj->bit_17); 4350 i915_gem_object_free(obj); 4351 } 4352 } 4353 4354 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 4355 { 4356 struct llist_node *freed; 4357 4358 freed = llist_del_all(&i915->mm.free_list); 4359 if (unlikely(freed)) 4360 __i915_gem_free_objects(i915, freed); 4361 } 4362 4363 static void __i915_gem_free_work(struct work_struct *work) 4364 { 4365 struct drm_i915_private *i915 = 4366 container_of(work, struct drm_i915_private, mm.free_work); 4367 struct llist_node *freed; 4368 4369 /* All file-owned VMA should have been released by this point through 4370 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4371 * However, the object may also be bound into the global GTT (e.g. 4372 * older GPUs without per-process support, or for direct access through 4373 * the GTT either for the user or for scanout). Those VMA still need to 4374 * unbound now. 4375 */ 4376 4377 while ((freed = llist_del_all(&i915->mm.free_list))) 4378 __i915_gem_free_objects(i915, freed); 4379 } 4380 4381 static void __i915_gem_free_object_rcu(struct rcu_head *head) 4382 { 4383 struct drm_i915_gem_object *obj = 4384 container_of(head, typeof(*obj), rcu); 4385 struct drm_i915_private *i915 = to_i915(obj->base.dev); 4386 4387 /* We can't simply use call_rcu() from i915_gem_free_object() 4388 * as we need to block whilst unbinding, and the call_rcu 4389 * task may be called from softirq context. So we take a 4390 * detour through a worker. 4391 */ 4392 if (llist_add(&obj->freed, &i915->mm.free_list)) 4393 schedule_work(&i915->mm.free_work); 4394 } 4395 4396 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4397 { 4398 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4399 4400 if (obj->mm.quirked) 4401 __i915_gem_object_unpin_pages(obj); 4402 4403 if (discard_backing_storage(obj)) 4404 obj->mm.madv = I915_MADV_DONTNEED; 4405 4406 /* Before we free the object, make sure any pure RCU-only 4407 * read-side critical sections are complete, e.g. 4408 * i915_gem_busy_ioctl(). For the corresponding synchronized 4409 * lookup see i915_gem_object_lookup_rcu(). 4410 */ 4411 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 4412 } 4413 4414 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 4415 { 4416 lockdep_assert_held(&obj->base.dev->struct_mutex); 4417 4418 GEM_BUG_ON(i915_gem_object_has_active_reference(obj)); 4419 if (i915_gem_object_is_active(obj)) 4420 i915_gem_object_set_active_reference(obj); 4421 else 4422 i915_gem_object_put(obj); 4423 } 4424 4425 static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) 4426 { 4427 struct intel_engine_cs *engine; 4428 enum intel_engine_id id; 4429 4430 for_each_engine(engine, dev_priv, id) 4431 GEM_BUG_ON(engine->last_retired_context && 4432 !i915_gem_context_is_kernel(engine->last_retired_context)); 4433 } 4434 4435 void i915_gem_sanitize(struct drm_i915_private *i915) 4436 { 4437 /* 4438 * If we inherit context state from the BIOS or earlier occupants 4439 * of the GPU, the GPU may be in an inconsistent state when we 4440 * try to take over. The only way to remove the earlier state 4441 * is by resetting. However, resetting on earlier gen is tricky as 4442 * it may impact the display and we are uncertain about the stability 4443 * of the reset, so we only reset recent machines with logical 4444 * context support (that must be reset to remove any stray contexts). 4445 */ 4446 if (HAS_HW_CONTEXTS(i915)) { 4447 int reset = intel_gpu_reset(i915, ALL_ENGINES); 4448 WARN_ON(reset && reset != -ENODEV); 4449 } 4450 } 4451 4452 int i915_gem_suspend(struct drm_i915_private *dev_priv) 4453 { 4454 struct drm_device *dev = &dev_priv->drm; 4455 int ret; 4456 4457 intel_runtime_pm_get(dev_priv); 4458 intel_suspend_gt_powersave(dev_priv); 4459 4460 mutex_lock(&dev->struct_mutex); 4461 4462 /* We have to flush all the executing contexts to main memory so 4463 * that they can saved in the hibernation image. To ensure the last 4464 * context image is coherent, we have to switch away from it. That 4465 * leaves the dev_priv->kernel_context still active when 4466 * we actually suspend, and its image in memory may not match the GPU 4467 * state. Fortunately, the kernel_context is disposable and we do 4468 * not rely on its state. 4469 */ 4470 ret = i915_gem_switch_to_kernel_context(dev_priv); 4471 if (ret) 4472 goto err_unlock; 4473 4474 ret = i915_gem_wait_for_idle(dev_priv, 4475 I915_WAIT_INTERRUPTIBLE | 4476 I915_WAIT_LOCKED); 4477 if (ret) 4478 goto err_unlock; 4479 4480 assert_kernel_context_is_current(dev_priv); 4481 i915_gem_context_lost(dev_priv); 4482 mutex_unlock(&dev->struct_mutex); 4483 4484 intel_guc_suspend(dev_priv); 4485 4486 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4487 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 4488 4489 /* As the idle_work is rearming if it detects a race, play safe and 4490 * repeat the flush until it is definitely idle. 4491 */ 4492 while (flush_delayed_work(&dev_priv->gt.idle_work)) 4493 ; 4494 4495 i915_gem_drain_freed_objects(dev_priv); 4496 4497 /* Assert that we sucessfully flushed all the work and 4498 * reset the GPU back to its idle, low power state. 4499 */ 4500 WARN_ON(dev_priv->gt.awake); 4501 WARN_ON(!intel_engines_are_idle(dev_priv)); 4502 4503 /* 4504 * Neither the BIOS, ourselves or any other kernel 4505 * expects the system to be in execlists mode on startup, 4506 * so we need to reset the GPU back to legacy mode. And the only 4507 * known way to disable logical contexts is through a GPU reset. 4508 * 4509 * So in order to leave the system in a known default configuration, 4510 * always reset the GPU upon unload and suspend. Afterwards we then 4511 * clean up the GEM state tracking, flushing off the requests and 4512 * leaving the system in a known idle state. 4513 * 4514 * Note that is of the upmost importance that the GPU is idle and 4515 * all stray writes are flushed *before* we dismantle the backing 4516 * storage for the pinned objects. 4517 * 4518 * However, since we are uncertain that resetting the GPU on older 4519 * machines is a good idea, we don't - just in case it leaves the 4520 * machine in an unusable condition. 4521 */ 4522 i915_gem_sanitize(dev_priv); 4523 goto out_rpm_put; 4524 4525 err_unlock: 4526 mutex_unlock(&dev->struct_mutex); 4527 out_rpm_put: 4528 intel_runtime_pm_put(dev_priv); 4529 return ret; 4530 } 4531 4532 void i915_gem_resume(struct drm_i915_private *dev_priv) 4533 { 4534 struct drm_device *dev = &dev_priv->drm; 4535 4536 WARN_ON(dev_priv->gt.awake); 4537 4538 mutex_lock(&dev->struct_mutex); 4539 i915_gem_restore_gtt_mappings(dev_priv); 4540 4541 /* As we didn't flush the kernel context before suspend, we cannot 4542 * guarantee that the context image is complete. So let's just reset 4543 * it and start again. 4544 */ 4545 dev_priv->gt.resume(dev_priv); 4546 4547 mutex_unlock(&dev->struct_mutex); 4548 } 4549 4550 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 4551 { 4552 if (INTEL_GEN(dev_priv) < 5 || 4553 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4554 return; 4555 4556 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4557 DISP_TILE_SURFACE_SWIZZLING); 4558 4559 if (IS_GEN5(dev_priv)) 4560 return; 4561 4562 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4563 if (IS_GEN6(dev_priv)) 4564 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4565 else if (IS_GEN7(dev_priv)) 4566 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4567 else if (IS_GEN8(dev_priv)) 4568 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4569 else 4570 BUG(); 4571 } 4572 4573 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 4574 { 4575 I915_WRITE(RING_CTL(base), 0); 4576 I915_WRITE(RING_HEAD(base), 0); 4577 I915_WRITE(RING_TAIL(base), 0); 4578 I915_WRITE(RING_START(base), 0); 4579 } 4580 4581 static void init_unused_rings(struct drm_i915_private *dev_priv) 4582 { 4583 if (IS_I830(dev_priv)) { 4584 init_unused_ring(dev_priv, PRB1_BASE); 4585 init_unused_ring(dev_priv, SRB0_BASE); 4586 init_unused_ring(dev_priv, SRB1_BASE); 4587 init_unused_ring(dev_priv, SRB2_BASE); 4588 init_unused_ring(dev_priv, SRB3_BASE); 4589 } else if (IS_GEN2(dev_priv)) { 4590 init_unused_ring(dev_priv, SRB0_BASE); 4591 init_unused_ring(dev_priv, SRB1_BASE); 4592 } else if (IS_GEN3(dev_priv)) { 4593 init_unused_ring(dev_priv, PRB1_BASE); 4594 init_unused_ring(dev_priv, PRB2_BASE); 4595 } 4596 } 4597 4598 static int __i915_gem_restart_engines(void *data) 4599 { 4600 struct drm_i915_private *i915 = data; 4601 struct intel_engine_cs *engine; 4602 enum intel_engine_id id; 4603 int err; 4604 4605 for_each_engine(engine, i915, id) { 4606 err = engine->init_hw(engine); 4607 if (err) 4608 return err; 4609 } 4610 4611 return 0; 4612 } 4613 4614 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 4615 { 4616 int ret; 4617 4618 dev_priv->gt.last_init_time = ktime_get(); 4619 4620 /* Double layer security blanket, see i915_gem_init() */ 4621 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4622 4623 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 4624 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4625 4626 if (IS_HASWELL(dev_priv)) 4627 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 4628 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4629 4630 if (HAS_PCH_NOP(dev_priv)) { 4631 if (IS_IVYBRIDGE(dev_priv)) { 4632 u32 temp = I915_READ(GEN7_MSG_CTL); 4633 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4634 I915_WRITE(GEN7_MSG_CTL, temp); 4635 } else if (INTEL_GEN(dev_priv) >= 7) { 4636 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4637 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4638 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4639 } 4640 } 4641 4642 i915_gem_init_swizzling(dev_priv); 4643 4644 /* 4645 * At least 830 can leave some of the unused rings 4646 * "active" (ie. head != tail) after resume which 4647 * will prevent c3 entry. Makes sure all unused rings 4648 * are totally idle. 4649 */ 4650 init_unused_rings(dev_priv); 4651 4652 BUG_ON(!dev_priv->kernel_context); 4653 4654 ret = i915_ppgtt_init_hw(dev_priv); 4655 if (ret) { 4656 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 4657 goto out; 4658 } 4659 4660 /* Need to do basic initialisation of all rings first: */ 4661 ret = __i915_gem_restart_engines(dev_priv); 4662 if (ret) 4663 goto out; 4664 4665 intel_mocs_init_l3cc_table(dev_priv); 4666 4667 /* We can't enable contexts until all firmware is loaded */ 4668 ret = intel_uc_init_hw(dev_priv); 4669 if (ret) 4670 goto out; 4671 4672 out: 4673 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4674 return ret; 4675 } 4676 4677 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) 4678 { 4679 if (INTEL_INFO(dev_priv)->gen < 6) 4680 return false; 4681 4682 /* TODO: make semaphores and Execlists play nicely together */ 4683 if (i915.enable_execlists) 4684 return false; 4685 4686 if (value >= 0) 4687 return value; 4688 4689 #ifdef CONFIG_INTEL_IOMMU 4690 /* Enable semaphores on SNB when IO remapping is off */ 4691 if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) 4692 return false; 4693 #endif 4694 4695 return true; 4696 } 4697 4698 int i915_gem_init(struct drm_i915_private *dev_priv) 4699 { 4700 int ret; 4701 4702 mutex_lock(&dev_priv->drm.struct_mutex); 4703 4704 i915_gem_clflush_init(dev_priv); 4705 4706 if (!i915.enable_execlists) { 4707 dev_priv->gt.resume = intel_legacy_submission_resume; 4708 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 4709 } else { 4710 dev_priv->gt.resume = intel_lr_context_resume; 4711 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 4712 } 4713 4714 /* This is just a security blanket to placate dragons. 4715 * On some systems, we very sporadically observe that the first TLBs 4716 * used by the CS may be stale, despite us poking the TLB reset. If 4717 * we hold the forcewake during initialisation these problems 4718 * just magically go away. 4719 */ 4720 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4721 4722 i915_gem_init_userptr(dev_priv); 4723 4724 ret = i915_gem_init_ggtt(dev_priv); 4725 if (ret) 4726 goto out_unlock; 4727 4728 ret = i915_gem_context_init(dev_priv); 4729 if (ret) 4730 goto out_unlock; 4731 4732 ret = intel_engines_init(dev_priv); 4733 if (ret) 4734 goto out_unlock; 4735 4736 ret = i915_gem_init_hw(dev_priv); 4737 if (ret == -EIO) { 4738 /* Allow engine initialisation to fail by marking the GPU as 4739 * wedged. But we only want to do this where the GPU is angry, 4740 * for all other failure, such as an allocation failure, bail. 4741 */ 4742 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 4743 i915_gem_set_wedged(dev_priv); 4744 ret = 0; 4745 } 4746 4747 out_unlock: 4748 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4749 mutex_unlock(&dev_priv->drm.struct_mutex); 4750 4751 return ret; 4752 } 4753 4754 void i915_gem_init_mmio(struct drm_i915_private *i915) 4755 { 4756 i915_gem_sanitize(i915); 4757 } 4758 4759 void 4760 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 4761 { 4762 struct intel_engine_cs *engine; 4763 enum intel_engine_id id; 4764 4765 for_each_engine(engine, dev_priv, id) 4766 dev_priv->gt.cleanup_engine(engine); 4767 } 4768 4769 void 4770 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 4771 { 4772 int i; 4773 4774 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 4775 !IS_CHERRYVIEW(dev_priv)) 4776 dev_priv->num_fence_regs = 32; 4777 else if (INTEL_INFO(dev_priv)->gen >= 4 || 4778 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 4779 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 4780 dev_priv->num_fence_regs = 16; 4781 else 4782 dev_priv->num_fence_regs = 8; 4783 4784 if (intel_vgpu_active(dev_priv)) 4785 dev_priv->num_fence_regs = 4786 I915_READ(vgtif_reg(avail_rs.fence_num)); 4787 4788 /* Initialize fence registers to zero */ 4789 for (i = 0; i < dev_priv->num_fence_regs; i++) { 4790 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 4791 4792 fence->i915 = dev_priv; 4793 fence->id = i; 4794 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 4795 } 4796 i915_gem_restore_fences(dev_priv); 4797 4798 i915_gem_detect_bit_6_swizzle(dev_priv); 4799 } 4800 4801 int 4802 i915_gem_load_init(struct drm_i915_private *dev_priv) 4803 { 4804 int err = -ENOMEM; 4805 4806 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 4807 if (!dev_priv->objects) 4808 goto err_out; 4809 4810 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 4811 if (!dev_priv->vmas) 4812 goto err_objects; 4813 4814 dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, 4815 SLAB_HWCACHE_ALIGN | 4816 SLAB_RECLAIM_ACCOUNT | 4817 SLAB_TYPESAFE_BY_RCU); 4818 if (!dev_priv->requests) 4819 goto err_vmas; 4820 4821 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 4822 SLAB_HWCACHE_ALIGN | 4823 SLAB_RECLAIM_ACCOUNT); 4824 if (!dev_priv->dependencies) 4825 goto err_requests; 4826 4827 mutex_lock(&dev_priv->drm.struct_mutex); 4828 INIT_LIST_HEAD(&dev_priv->gt.timelines); 4829 err = i915_gem_timeline_init__global(dev_priv); 4830 mutex_unlock(&dev_priv->drm.struct_mutex); 4831 if (err) 4832 goto err_dependencies; 4833 4834 INIT_LIST_HEAD(&dev_priv->context_list); 4835 INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); 4836 init_llist_head(&dev_priv->mm.free_list); 4837 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4838 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4839 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4840 INIT_LIST_HEAD(&dev_priv->mm.userfault_list); 4841 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 4842 i915_gem_retire_work_handler); 4843 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 4844 i915_gem_idle_work_handler); 4845 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 4846 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 4847 4848 init_waitqueue_head(&dev_priv->pending_flip_queue); 4849 4850 dev_priv->mm.interruptible = true; 4851 4852 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 4853 4854 spin_lock_init(&dev_priv->fb_tracking.lock); 4855 4856 return 0; 4857 4858 err_dependencies: 4859 kmem_cache_destroy(dev_priv->dependencies); 4860 err_requests: 4861 kmem_cache_destroy(dev_priv->requests); 4862 err_vmas: 4863 kmem_cache_destroy(dev_priv->vmas); 4864 err_objects: 4865 kmem_cache_destroy(dev_priv->objects); 4866 err_out: 4867 return err; 4868 } 4869 4870 void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) 4871 { 4872 i915_gem_drain_freed_objects(dev_priv); 4873 WARN_ON(!llist_empty(&dev_priv->mm.free_list)); 4874 WARN_ON(dev_priv->mm.object_count); 4875 4876 mutex_lock(&dev_priv->drm.struct_mutex); 4877 i915_gem_timeline_fini(&dev_priv->gt.global_timeline); 4878 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 4879 mutex_unlock(&dev_priv->drm.struct_mutex); 4880 4881 kmem_cache_destroy(dev_priv->dependencies); 4882 kmem_cache_destroy(dev_priv->requests); 4883 kmem_cache_destroy(dev_priv->vmas); 4884 kmem_cache_destroy(dev_priv->objects); 4885 4886 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 4887 rcu_barrier(); 4888 } 4889 4890 int i915_gem_freeze(struct drm_i915_private *dev_priv) 4891 { 4892 mutex_lock(&dev_priv->drm.struct_mutex); 4893 i915_gem_shrink_all(dev_priv); 4894 mutex_unlock(&dev_priv->drm.struct_mutex); 4895 4896 return 0; 4897 } 4898 4899 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 4900 { 4901 struct drm_i915_gem_object *obj; 4902 struct list_head *phases[] = { 4903 &dev_priv->mm.unbound_list, 4904 &dev_priv->mm.bound_list, 4905 NULL 4906 }, **p; 4907 4908 /* Called just before we write the hibernation image. 4909 * 4910 * We need to update the domain tracking to reflect that the CPU 4911 * will be accessing all the pages to create and restore from the 4912 * hibernation, and so upon restoration those pages will be in the 4913 * CPU domain. 4914 * 4915 * To make sure the hibernation image contains the latest state, 4916 * we update that state just before writing out the image. 4917 * 4918 * To try and reduce the hibernation image, we manually shrink 4919 * the objects as well. 4920 */ 4921 4922 mutex_lock(&dev_priv->drm.struct_mutex); 4923 i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); 4924 4925 for (p = phases; *p; p++) { 4926 list_for_each_entry(obj, *p, global_link) { 4927 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4928 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4929 } 4930 } 4931 mutex_unlock(&dev_priv->drm.struct_mutex); 4932 4933 return 0; 4934 } 4935 4936 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4937 { 4938 struct drm_i915_file_private *file_priv = file->driver_priv; 4939 struct drm_i915_gem_request *request; 4940 4941 /* Clean up our request list when the client is going away, so that 4942 * later retire_requests won't dereference our soon-to-be-gone 4943 * file_priv. 4944 */ 4945 spin_lock(&file_priv->mm.lock); 4946 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 4947 request->file_priv = NULL; 4948 spin_unlock(&file_priv->mm.lock); 4949 4950 if (!list_empty(&file_priv->rps.link)) { 4951 spin_lock(&to_i915(dev)->rps.client_lock); 4952 list_del(&file_priv->rps.link); 4953 spin_unlock(&to_i915(dev)->rps.client_lock); 4954 } 4955 } 4956 4957 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 4958 { 4959 struct drm_i915_file_private *file_priv; 4960 int ret; 4961 4962 DRM_DEBUG("\n"); 4963 4964 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 4965 if (!file_priv) 4966 return -ENOMEM; 4967 4968 file->driver_priv = file_priv; 4969 file_priv->dev_priv = to_i915(dev); 4970 file_priv->file = file; 4971 INIT_LIST_HEAD(&file_priv->rps.link); 4972 4973 spin_lock_init(&file_priv->mm.lock); 4974 INIT_LIST_HEAD(&file_priv->mm.request_list); 4975 4976 file_priv->bsd_engine = -1; 4977 4978 ret = i915_gem_context_open(dev, file); 4979 if (ret) 4980 kfree(file_priv); 4981 4982 return ret; 4983 } 4984 4985 /** 4986 * i915_gem_track_fb - update frontbuffer tracking 4987 * @old: current GEM buffer for the frontbuffer slots 4988 * @new: new GEM buffer for the frontbuffer slots 4989 * @frontbuffer_bits: bitmask of frontbuffer slots 4990 * 4991 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 4992 * from @old and setting them in @new. Both @old and @new can be NULL. 4993 */ 4994 void i915_gem_track_fb(struct drm_i915_gem_object *old, 4995 struct drm_i915_gem_object *new, 4996 unsigned frontbuffer_bits) 4997 { 4998 /* Control of individual bits within the mask are guarded by 4999 * the owning plane->mutex, i.e. we can never see concurrent 5000 * manipulation of individual bits. But since the bitfield as a whole 5001 * is updated using RMW, we need to use atomics in order to update 5002 * the bits. 5003 */ 5004 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 5005 sizeof(atomic_t) * BITS_PER_BYTE); 5006 5007 if (old) { 5008 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 5009 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 5010 } 5011 5012 if (new) { 5013 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 5014 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 5015 } 5016 } 5017 5018 /* Allocate a new GEM object and fill it with the supplied data */ 5019 struct drm_i915_gem_object * 5020 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 5021 const void *data, size_t size) 5022 { 5023 struct drm_i915_gem_object *obj; 5024 struct file *file; 5025 size_t offset; 5026 int err; 5027 5028 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 5029 if (IS_ERR(obj)) 5030 return obj; 5031 5032 GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); 5033 5034 file = obj->base.filp; 5035 offset = 0; 5036 do { 5037 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 5038 struct page *page; 5039 void *pgdata, *vaddr; 5040 5041 err = pagecache_write_begin(file, file->f_mapping, 5042 offset, len, 0, 5043 &page, &pgdata); 5044 if (err < 0) 5045 goto fail; 5046 5047 vaddr = kmap(page); 5048 memcpy(vaddr, data, len); 5049 kunmap(page); 5050 5051 err = pagecache_write_end(file, file->f_mapping, 5052 offset, len, len, 5053 page, pgdata); 5054 if (err < 0) 5055 goto fail; 5056 5057 size -= len; 5058 data += len; 5059 offset += len; 5060 } while (size); 5061 5062 return obj; 5063 5064 fail: 5065 i915_gem_object_put(obj); 5066 return ERR_PTR(err); 5067 } 5068 5069 struct scatterlist * 5070 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 5071 unsigned int n, 5072 unsigned int *offset) 5073 { 5074 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 5075 struct scatterlist *sg; 5076 unsigned int idx, count; 5077 5078 might_sleep(); 5079 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 5080 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 5081 5082 /* As we iterate forward through the sg, we record each entry in a 5083 * radixtree for quick repeated (backwards) lookups. If we have seen 5084 * this index previously, we will have an entry for it. 5085 * 5086 * Initial lookup is O(N), but this is amortized to O(1) for 5087 * sequential page access (where each new request is consecutive 5088 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 5089 * i.e. O(1) with a large constant! 5090 */ 5091 if (n < READ_ONCE(iter->sg_idx)) 5092 goto lookup; 5093 5094 mutex_lock(&iter->lock); 5095 5096 /* We prefer to reuse the last sg so that repeated lookup of this 5097 * (or the subsequent) sg are fast - comparing against the last 5098 * sg is faster than going through the radixtree. 5099 */ 5100 5101 sg = iter->sg_pos; 5102 idx = iter->sg_idx; 5103 count = __sg_page_count(sg); 5104 5105 while (idx + count <= n) { 5106 unsigned long exception, i; 5107 int ret; 5108 5109 /* If we cannot allocate and insert this entry, or the 5110 * individual pages from this range, cancel updating the 5111 * sg_idx so that on this lookup we are forced to linearly 5112 * scan onwards, but on future lookups we will try the 5113 * insertion again (in which case we need to be careful of 5114 * the error return reporting that we have already inserted 5115 * this index). 5116 */ 5117 ret = radix_tree_insert(&iter->radix, idx, sg); 5118 if (ret && ret != -EEXIST) 5119 goto scan; 5120 5121 exception = 5122 RADIX_TREE_EXCEPTIONAL_ENTRY | 5123 idx << RADIX_TREE_EXCEPTIONAL_SHIFT; 5124 for (i = 1; i < count; i++) { 5125 ret = radix_tree_insert(&iter->radix, idx + i, 5126 (void *)exception); 5127 if (ret && ret != -EEXIST) 5128 goto scan; 5129 } 5130 5131 idx += count; 5132 sg = ____sg_next(sg); 5133 count = __sg_page_count(sg); 5134 } 5135 5136 scan: 5137 iter->sg_pos = sg; 5138 iter->sg_idx = idx; 5139 5140 mutex_unlock(&iter->lock); 5141 5142 if (unlikely(n < idx)) /* insertion completed by another thread */ 5143 goto lookup; 5144 5145 /* In case we failed to insert the entry into the radixtree, we need 5146 * to look beyond the current sg. 5147 */ 5148 while (idx + count <= n) { 5149 idx += count; 5150 sg = ____sg_next(sg); 5151 count = __sg_page_count(sg); 5152 } 5153 5154 *offset = n - idx; 5155 return sg; 5156 5157 lookup: 5158 rcu_read_lock(); 5159 5160 sg = radix_tree_lookup(&iter->radix, n); 5161 GEM_BUG_ON(!sg); 5162 5163 /* If this index is in the middle of multi-page sg entry, 5164 * the radixtree will contain an exceptional entry that points 5165 * to the start of that range. We will return the pointer to 5166 * the base page and the offset of this page within the 5167 * sg entry's range. 5168 */ 5169 *offset = 0; 5170 if (unlikely(radix_tree_exception(sg))) { 5171 unsigned long base = 5172 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 5173 5174 sg = radix_tree_lookup(&iter->radix, base); 5175 GEM_BUG_ON(!sg); 5176 5177 *offset = n - base; 5178 } 5179 5180 rcu_read_unlock(); 5181 5182 return sg; 5183 } 5184 5185 struct page * 5186 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 5187 { 5188 struct scatterlist *sg; 5189 unsigned int offset; 5190 5191 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 5192 5193 sg = i915_gem_object_get_sg(obj, n, &offset); 5194 return nth_page(sg_page(sg), offset); 5195 } 5196 5197 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5198 struct page * 5199 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 5200 unsigned int n) 5201 { 5202 struct page *page; 5203 5204 page = i915_gem_object_get_page(obj, n); 5205 if (!obj->mm.dirty) 5206 set_page_dirty(page); 5207 5208 return page; 5209 } 5210 5211 dma_addr_t 5212 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 5213 unsigned long n) 5214 { 5215 struct scatterlist *sg; 5216 unsigned int offset; 5217 5218 sg = i915_gem_object_get_sg(obj, n, &offset); 5219 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 5220 } 5221 5222 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5223 #include "selftests/scatterlist.c" 5224 #include "selftests/mock_gem_device.c" 5225 #include "selftests/huge_gem_object.c" 5226 #include "selftests/i915_gem_object.c" 5227 #include "selftests/i915_gem_coherency.c" 5228 #endif 5229