1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include "intel_mocs.h" 36 #include <linux/shmem_fs.h> 37 #include <linux/slab.h> 38 #include <linux/swap.h> 39 #include <linux/pci.h> 40 #include <linux/dma-buf.h> 41 42 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 43 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 44 static void 45 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 46 static void 47 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 48 49 static bool cpu_cache_is_coherent(struct drm_device *dev, 50 enum i915_cache_level level) 51 { 52 return HAS_LLC(dev) || level != I915_CACHE_NONE; 53 } 54 55 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 56 { 57 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 58 return false; 59 60 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 61 return true; 62 63 return obj->pin_display; 64 } 65 66 static int 67 insert_mappable_node(struct drm_i915_private *i915, 68 struct drm_mm_node *node, u32 size) 69 { 70 memset(node, 0, sizeof(*node)); 71 return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, 72 size, 0, 0, 0, 73 i915->ggtt.mappable_end, 74 DRM_MM_SEARCH_DEFAULT, 75 DRM_MM_CREATE_DEFAULT); 76 } 77 78 static void 79 remove_mappable_node(struct drm_mm_node *node) 80 { 81 drm_mm_remove_node(node); 82 } 83 84 /* some bookkeeping */ 85 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 86 size_t size) 87 { 88 spin_lock(&dev_priv->mm.object_stat_lock); 89 dev_priv->mm.object_count++; 90 dev_priv->mm.object_memory += size; 91 spin_unlock(&dev_priv->mm.object_stat_lock); 92 } 93 94 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 95 size_t size) 96 { 97 spin_lock(&dev_priv->mm.object_stat_lock); 98 dev_priv->mm.object_count--; 99 dev_priv->mm.object_memory -= size; 100 spin_unlock(&dev_priv->mm.object_stat_lock); 101 } 102 103 static int 104 i915_gem_wait_for_error(struct i915_gpu_error *error) 105 { 106 int ret; 107 108 if (!i915_reset_in_progress(error)) 109 return 0; 110 111 /* 112 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 113 * userspace. If it takes that long something really bad is going on and 114 * we should simply try to bail out and fail as gracefully as possible. 115 */ 116 ret = wait_event_interruptible_timeout(error->reset_queue, 117 !i915_reset_in_progress(error), 118 10*HZ); 119 if (ret == 0) { 120 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 121 return -EIO; 122 } else if (ret < 0) { 123 return ret; 124 } else { 125 return 0; 126 } 127 } 128 129 int i915_mutex_lock_interruptible(struct drm_device *dev) 130 { 131 struct drm_i915_private *dev_priv = to_i915(dev); 132 int ret; 133 134 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 135 if (ret) 136 return ret; 137 138 ret = mutex_lock_interruptible(&dev->struct_mutex); 139 if (ret) 140 return ret; 141 142 WARN_ON(i915_verify_lists(dev)); 143 return 0; 144 } 145 146 int 147 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 148 struct drm_file *file) 149 { 150 struct drm_i915_private *dev_priv = to_i915(dev); 151 struct i915_ggtt *ggtt = &dev_priv->ggtt; 152 struct drm_i915_gem_get_aperture *args = data; 153 struct i915_vma *vma; 154 size_t pinned; 155 156 pinned = 0; 157 mutex_lock(&dev->struct_mutex); 158 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 159 if (vma->pin_count) 160 pinned += vma->node.size; 161 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 162 if (vma->pin_count) 163 pinned += vma->node.size; 164 mutex_unlock(&dev->struct_mutex); 165 166 args->aper_size = ggtt->base.total; 167 args->aper_available_size = args->aper_size - pinned; 168 169 return 0; 170 } 171 172 static int 173 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 174 { 175 struct address_space *mapping = obj->base.filp->f_mapping; 176 char *vaddr = obj->phys_handle->vaddr; 177 struct sg_table *st; 178 struct scatterlist *sg; 179 int i; 180 181 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 182 return -EINVAL; 183 184 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 185 struct page *page; 186 char *src; 187 188 page = shmem_read_mapping_page(mapping, i); 189 if (IS_ERR(page)) 190 return PTR_ERR(page); 191 192 src = kmap_atomic(page); 193 memcpy(vaddr, src, PAGE_SIZE); 194 drm_clflush_virt_range(vaddr, PAGE_SIZE); 195 kunmap_atomic(src); 196 197 put_page(page); 198 vaddr += PAGE_SIZE; 199 } 200 201 i915_gem_chipset_flush(to_i915(obj->base.dev)); 202 203 st = kmalloc(sizeof(*st), GFP_KERNEL); 204 if (st == NULL) 205 return -ENOMEM; 206 207 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 208 kfree(st); 209 return -ENOMEM; 210 } 211 212 sg = st->sgl; 213 sg->offset = 0; 214 sg->length = obj->base.size; 215 216 sg_dma_address(sg) = obj->phys_handle->busaddr; 217 sg_dma_len(sg) = obj->base.size; 218 219 obj->pages = st; 220 return 0; 221 } 222 223 static void 224 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 225 { 226 int ret; 227 228 BUG_ON(obj->madv == __I915_MADV_PURGED); 229 230 ret = i915_gem_object_set_to_cpu_domain(obj, true); 231 if (WARN_ON(ret)) { 232 /* In the event of a disaster, abandon all caches and 233 * hope for the best. 234 */ 235 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 236 } 237 238 if (obj->madv == I915_MADV_DONTNEED) 239 obj->dirty = 0; 240 241 if (obj->dirty) { 242 struct address_space *mapping = obj->base.filp->f_mapping; 243 char *vaddr = obj->phys_handle->vaddr; 244 int i; 245 246 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 247 struct page *page; 248 char *dst; 249 250 page = shmem_read_mapping_page(mapping, i); 251 if (IS_ERR(page)) 252 continue; 253 254 dst = kmap_atomic(page); 255 drm_clflush_virt_range(vaddr, PAGE_SIZE); 256 memcpy(dst, vaddr, PAGE_SIZE); 257 kunmap_atomic(dst); 258 259 set_page_dirty(page); 260 if (obj->madv == I915_MADV_WILLNEED) 261 mark_page_accessed(page); 262 put_page(page); 263 vaddr += PAGE_SIZE; 264 } 265 obj->dirty = 0; 266 } 267 268 sg_free_table(obj->pages); 269 kfree(obj->pages); 270 } 271 272 static void 273 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 274 { 275 drm_pci_free(obj->base.dev, obj->phys_handle); 276 } 277 278 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 279 .get_pages = i915_gem_object_get_pages_phys, 280 .put_pages = i915_gem_object_put_pages_phys, 281 .release = i915_gem_object_release_phys, 282 }; 283 284 static int 285 drop_pages(struct drm_i915_gem_object *obj) 286 { 287 struct i915_vma *vma, *next; 288 int ret; 289 290 drm_gem_object_reference(&obj->base); 291 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 292 if (i915_vma_unbind(vma)) 293 break; 294 295 ret = i915_gem_object_put_pages(obj); 296 drm_gem_object_unreference(&obj->base); 297 298 return ret; 299 } 300 301 int 302 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 303 int align) 304 { 305 drm_dma_handle_t *phys; 306 int ret; 307 308 if (obj->phys_handle) { 309 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 310 return -EBUSY; 311 312 return 0; 313 } 314 315 if (obj->madv != I915_MADV_WILLNEED) 316 return -EFAULT; 317 318 if (obj->base.filp == NULL) 319 return -EINVAL; 320 321 ret = drop_pages(obj); 322 if (ret) 323 return ret; 324 325 /* create a new object */ 326 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 327 if (!phys) 328 return -ENOMEM; 329 330 obj->phys_handle = phys; 331 obj->ops = &i915_gem_phys_ops; 332 333 return i915_gem_object_get_pages(obj); 334 } 335 336 static int 337 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 338 struct drm_i915_gem_pwrite *args, 339 struct drm_file *file_priv) 340 { 341 struct drm_device *dev = obj->base.dev; 342 void *vaddr = obj->phys_handle->vaddr + args->offset; 343 char __user *user_data = u64_to_user_ptr(args->data_ptr); 344 int ret = 0; 345 346 /* We manually control the domain here and pretend that it 347 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 348 */ 349 ret = i915_gem_object_wait_rendering(obj, false); 350 if (ret) 351 return ret; 352 353 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 354 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 355 unsigned long unwritten; 356 357 /* The physical object once assigned is fixed for the lifetime 358 * of the obj, so we can safely drop the lock and continue 359 * to access vaddr. 360 */ 361 mutex_unlock(&dev->struct_mutex); 362 unwritten = copy_from_user(vaddr, user_data, args->size); 363 mutex_lock(&dev->struct_mutex); 364 if (unwritten) { 365 ret = -EFAULT; 366 goto out; 367 } 368 } 369 370 drm_clflush_virt_range(vaddr, args->size); 371 i915_gem_chipset_flush(to_i915(dev)); 372 373 out: 374 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 375 return ret; 376 } 377 378 void *i915_gem_object_alloc(struct drm_device *dev) 379 { 380 struct drm_i915_private *dev_priv = to_i915(dev); 381 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 382 } 383 384 void i915_gem_object_free(struct drm_i915_gem_object *obj) 385 { 386 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 387 kmem_cache_free(dev_priv->objects, obj); 388 } 389 390 static int 391 i915_gem_create(struct drm_file *file, 392 struct drm_device *dev, 393 uint64_t size, 394 uint32_t *handle_p) 395 { 396 struct drm_i915_gem_object *obj; 397 int ret; 398 u32 handle; 399 400 size = roundup(size, PAGE_SIZE); 401 if (size == 0) 402 return -EINVAL; 403 404 /* Allocate the new object */ 405 obj = i915_gem_object_create(dev, size); 406 if (IS_ERR(obj)) 407 return PTR_ERR(obj); 408 409 ret = drm_gem_handle_create(file, &obj->base, &handle); 410 /* drop reference from allocate - handle holds it now */ 411 drm_gem_object_unreference_unlocked(&obj->base); 412 if (ret) 413 return ret; 414 415 *handle_p = handle; 416 return 0; 417 } 418 419 int 420 i915_gem_dumb_create(struct drm_file *file, 421 struct drm_device *dev, 422 struct drm_mode_create_dumb *args) 423 { 424 /* have to work out size/pitch and return them */ 425 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 426 args->size = args->pitch * args->height; 427 return i915_gem_create(file, dev, 428 args->size, &args->handle); 429 } 430 431 /** 432 * Creates a new mm object and returns a handle to it. 433 * @dev: drm device pointer 434 * @data: ioctl data blob 435 * @file: drm file pointer 436 */ 437 int 438 i915_gem_create_ioctl(struct drm_device *dev, void *data, 439 struct drm_file *file) 440 { 441 struct drm_i915_gem_create *args = data; 442 443 return i915_gem_create(file, dev, 444 args->size, &args->handle); 445 } 446 447 static inline int 448 __copy_to_user_swizzled(char __user *cpu_vaddr, 449 const char *gpu_vaddr, int gpu_offset, 450 int length) 451 { 452 int ret, cpu_offset = 0; 453 454 while (length > 0) { 455 int cacheline_end = ALIGN(gpu_offset + 1, 64); 456 int this_length = min(cacheline_end - gpu_offset, length); 457 int swizzled_gpu_offset = gpu_offset ^ 64; 458 459 ret = __copy_to_user(cpu_vaddr + cpu_offset, 460 gpu_vaddr + swizzled_gpu_offset, 461 this_length); 462 if (ret) 463 return ret + length; 464 465 cpu_offset += this_length; 466 gpu_offset += this_length; 467 length -= this_length; 468 } 469 470 return 0; 471 } 472 473 static inline int 474 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 475 const char __user *cpu_vaddr, 476 int length) 477 { 478 int ret, cpu_offset = 0; 479 480 while (length > 0) { 481 int cacheline_end = ALIGN(gpu_offset + 1, 64); 482 int this_length = min(cacheline_end - gpu_offset, length); 483 int swizzled_gpu_offset = gpu_offset ^ 64; 484 485 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 486 cpu_vaddr + cpu_offset, 487 this_length); 488 if (ret) 489 return ret + length; 490 491 cpu_offset += this_length; 492 gpu_offset += this_length; 493 length -= this_length; 494 } 495 496 return 0; 497 } 498 499 /* 500 * Pins the specified object's pages and synchronizes the object with 501 * GPU accesses. Sets needs_clflush to non-zero if the caller should 502 * flush the object from the CPU cache. 503 */ 504 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 505 int *needs_clflush) 506 { 507 int ret; 508 509 *needs_clflush = 0; 510 511 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 512 return -EINVAL; 513 514 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 515 /* If we're not in the cpu read domain, set ourself into the gtt 516 * read domain and manually flush cachelines (if required). This 517 * optimizes for the case when the gpu will dirty the data 518 * anyway again before the next pread happens. */ 519 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 520 obj->cache_level); 521 ret = i915_gem_object_wait_rendering(obj, true); 522 if (ret) 523 return ret; 524 } 525 526 ret = i915_gem_object_get_pages(obj); 527 if (ret) 528 return ret; 529 530 i915_gem_object_pin_pages(obj); 531 532 return ret; 533 } 534 535 /* Per-page copy function for the shmem pread fastpath. 536 * Flushes invalid cachelines before reading the target if 537 * needs_clflush is set. */ 538 static int 539 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 540 char __user *user_data, 541 bool page_do_bit17_swizzling, bool needs_clflush) 542 { 543 char *vaddr; 544 int ret; 545 546 if (unlikely(page_do_bit17_swizzling)) 547 return -EINVAL; 548 549 vaddr = kmap_atomic(page); 550 if (needs_clflush) 551 drm_clflush_virt_range(vaddr + shmem_page_offset, 552 page_length); 553 ret = __copy_to_user_inatomic(user_data, 554 vaddr + shmem_page_offset, 555 page_length); 556 kunmap_atomic(vaddr); 557 558 return ret ? -EFAULT : 0; 559 } 560 561 static void 562 shmem_clflush_swizzled_range(char *addr, unsigned long length, 563 bool swizzled) 564 { 565 if (unlikely(swizzled)) { 566 unsigned long start = (unsigned long) addr; 567 unsigned long end = (unsigned long) addr + length; 568 569 /* For swizzling simply ensure that we always flush both 570 * channels. Lame, but simple and it works. Swizzled 571 * pwrite/pread is far from a hotpath - current userspace 572 * doesn't use it at all. */ 573 start = round_down(start, 128); 574 end = round_up(end, 128); 575 576 drm_clflush_virt_range((void *)start, end - start); 577 } else { 578 drm_clflush_virt_range(addr, length); 579 } 580 581 } 582 583 /* Only difference to the fast-path function is that this can handle bit17 584 * and uses non-atomic copy and kmap functions. */ 585 static int 586 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 587 char __user *user_data, 588 bool page_do_bit17_swizzling, bool needs_clflush) 589 { 590 char *vaddr; 591 int ret; 592 593 vaddr = kmap(page); 594 if (needs_clflush) 595 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 596 page_length, 597 page_do_bit17_swizzling); 598 599 if (page_do_bit17_swizzling) 600 ret = __copy_to_user_swizzled(user_data, 601 vaddr, shmem_page_offset, 602 page_length); 603 else 604 ret = __copy_to_user(user_data, 605 vaddr + shmem_page_offset, 606 page_length); 607 kunmap(page); 608 609 return ret ? - EFAULT : 0; 610 } 611 612 static inline unsigned long 613 slow_user_access(struct io_mapping *mapping, 614 uint64_t page_base, int page_offset, 615 char __user *user_data, 616 unsigned long length, bool pwrite) 617 { 618 void __iomem *ioaddr; 619 void *vaddr; 620 uint64_t unwritten; 621 622 ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); 623 /* We can use the cpu mem copy function because this is X86. */ 624 vaddr = (void __force *)ioaddr + page_offset; 625 if (pwrite) 626 unwritten = __copy_from_user(vaddr, user_data, length); 627 else 628 unwritten = __copy_to_user(user_data, vaddr, length); 629 630 io_mapping_unmap(ioaddr); 631 return unwritten; 632 } 633 634 static int 635 i915_gem_gtt_pread(struct drm_device *dev, 636 struct drm_i915_gem_object *obj, uint64_t size, 637 uint64_t data_offset, uint64_t data_ptr) 638 { 639 struct drm_i915_private *dev_priv = to_i915(dev); 640 struct i915_ggtt *ggtt = &dev_priv->ggtt; 641 struct drm_mm_node node; 642 char __user *user_data; 643 uint64_t remain; 644 uint64_t offset; 645 int ret; 646 647 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 648 if (ret) { 649 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); 650 if (ret) 651 goto out; 652 653 ret = i915_gem_object_get_pages(obj); 654 if (ret) { 655 remove_mappable_node(&node); 656 goto out; 657 } 658 659 i915_gem_object_pin_pages(obj); 660 } else { 661 node.start = i915_gem_obj_ggtt_offset(obj); 662 node.allocated = false; 663 ret = i915_gem_object_put_fence(obj); 664 if (ret) 665 goto out_unpin; 666 } 667 668 ret = i915_gem_object_set_to_gtt_domain(obj, false); 669 if (ret) 670 goto out_unpin; 671 672 user_data = u64_to_user_ptr(data_ptr); 673 remain = size; 674 offset = data_offset; 675 676 mutex_unlock(&dev->struct_mutex); 677 if (likely(!i915.prefault_disable)) { 678 ret = fault_in_multipages_writeable(user_data, remain); 679 if (ret) { 680 mutex_lock(&dev->struct_mutex); 681 goto out_unpin; 682 } 683 } 684 685 while (remain > 0) { 686 /* Operation in this page 687 * 688 * page_base = page offset within aperture 689 * page_offset = offset within page 690 * page_length = bytes to copy for this page 691 */ 692 u32 page_base = node.start; 693 unsigned page_offset = offset_in_page(offset); 694 unsigned page_length = PAGE_SIZE - page_offset; 695 page_length = remain < page_length ? remain : page_length; 696 if (node.allocated) { 697 wmb(); 698 ggtt->base.insert_page(&ggtt->base, 699 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 700 node.start, 701 I915_CACHE_NONE, 0); 702 wmb(); 703 } else { 704 page_base += offset & PAGE_MASK; 705 } 706 /* This is a slow read/write as it tries to read from 707 * and write to user memory which may result into page 708 * faults, and so we cannot perform this under struct_mutex. 709 */ 710 if (slow_user_access(ggtt->mappable, page_base, 711 page_offset, user_data, 712 page_length, false)) { 713 ret = -EFAULT; 714 break; 715 } 716 717 remain -= page_length; 718 user_data += page_length; 719 offset += page_length; 720 } 721 722 mutex_lock(&dev->struct_mutex); 723 if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { 724 /* The user has modified the object whilst we tried 725 * reading from it, and we now have no idea what domain 726 * the pages should be in. As we have just been touching 727 * them directly, flush everything back to the GTT 728 * domain. 729 */ 730 ret = i915_gem_object_set_to_gtt_domain(obj, false); 731 } 732 733 out_unpin: 734 if (node.allocated) { 735 wmb(); 736 ggtt->base.clear_range(&ggtt->base, 737 node.start, node.size, 738 true); 739 i915_gem_object_unpin_pages(obj); 740 remove_mappable_node(&node); 741 } else { 742 i915_gem_object_ggtt_unpin(obj); 743 } 744 out: 745 return ret; 746 } 747 748 static int 749 i915_gem_shmem_pread(struct drm_device *dev, 750 struct drm_i915_gem_object *obj, 751 struct drm_i915_gem_pread *args, 752 struct drm_file *file) 753 { 754 char __user *user_data; 755 ssize_t remain; 756 loff_t offset; 757 int shmem_page_offset, page_length, ret = 0; 758 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 759 int prefaulted = 0; 760 int needs_clflush = 0; 761 struct sg_page_iter sg_iter; 762 763 if (!i915_gem_object_has_struct_page(obj)) 764 return -ENODEV; 765 766 user_data = u64_to_user_ptr(args->data_ptr); 767 remain = args->size; 768 769 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 770 771 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 772 if (ret) 773 return ret; 774 775 offset = args->offset; 776 777 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 778 offset >> PAGE_SHIFT) { 779 struct page *page = sg_page_iter_page(&sg_iter); 780 781 if (remain <= 0) 782 break; 783 784 /* Operation in this page 785 * 786 * shmem_page_offset = offset within page in shmem file 787 * page_length = bytes to copy for this page 788 */ 789 shmem_page_offset = offset_in_page(offset); 790 page_length = remain; 791 if ((shmem_page_offset + page_length) > PAGE_SIZE) 792 page_length = PAGE_SIZE - shmem_page_offset; 793 794 page_do_bit17_swizzling = obj_do_bit17_swizzling && 795 (page_to_phys(page) & (1 << 17)) != 0; 796 797 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 798 user_data, page_do_bit17_swizzling, 799 needs_clflush); 800 if (ret == 0) 801 goto next_page; 802 803 mutex_unlock(&dev->struct_mutex); 804 805 if (likely(!i915.prefault_disable) && !prefaulted) { 806 ret = fault_in_multipages_writeable(user_data, remain); 807 /* Userspace is tricking us, but we've already clobbered 808 * its pages with the prefault and promised to write the 809 * data up to the first fault. Hence ignore any errors 810 * and just continue. */ 811 (void)ret; 812 prefaulted = 1; 813 } 814 815 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 816 user_data, page_do_bit17_swizzling, 817 needs_clflush); 818 819 mutex_lock(&dev->struct_mutex); 820 821 if (ret) 822 goto out; 823 824 next_page: 825 remain -= page_length; 826 user_data += page_length; 827 offset += page_length; 828 } 829 830 out: 831 i915_gem_object_unpin_pages(obj); 832 833 return ret; 834 } 835 836 /** 837 * Reads data from the object referenced by handle. 838 * @dev: drm device pointer 839 * @data: ioctl data blob 840 * @file: drm file pointer 841 * 842 * On error, the contents of *data are undefined. 843 */ 844 int 845 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 846 struct drm_file *file) 847 { 848 struct drm_i915_gem_pread *args = data; 849 struct drm_i915_gem_object *obj; 850 int ret = 0; 851 852 if (args->size == 0) 853 return 0; 854 855 if (!access_ok(VERIFY_WRITE, 856 u64_to_user_ptr(args->data_ptr), 857 args->size)) 858 return -EFAULT; 859 860 ret = i915_mutex_lock_interruptible(dev); 861 if (ret) 862 return ret; 863 864 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 865 if (&obj->base == NULL) { 866 ret = -ENOENT; 867 goto unlock; 868 } 869 870 /* Bounds check source. */ 871 if (args->offset > obj->base.size || 872 args->size > obj->base.size - args->offset) { 873 ret = -EINVAL; 874 goto out; 875 } 876 877 trace_i915_gem_object_pread(obj, args->offset, args->size); 878 879 ret = i915_gem_shmem_pread(dev, obj, args, file); 880 881 /* pread for non shmem backed objects */ 882 if (ret == -EFAULT || ret == -ENODEV) { 883 intel_runtime_pm_get(to_i915(dev)); 884 ret = i915_gem_gtt_pread(dev, obj, args->size, 885 args->offset, args->data_ptr); 886 intel_runtime_pm_put(to_i915(dev)); 887 } 888 889 out: 890 drm_gem_object_unreference(&obj->base); 891 unlock: 892 mutex_unlock(&dev->struct_mutex); 893 return ret; 894 } 895 896 /* This is the fast write path which cannot handle 897 * page faults in the source data 898 */ 899 900 static inline int 901 fast_user_write(struct io_mapping *mapping, 902 loff_t page_base, int page_offset, 903 char __user *user_data, 904 int length) 905 { 906 void __iomem *vaddr_atomic; 907 void *vaddr; 908 unsigned long unwritten; 909 910 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 911 /* We can use the cpu mem copy function because this is X86. */ 912 vaddr = (void __force*)vaddr_atomic + page_offset; 913 unwritten = __copy_from_user_inatomic_nocache(vaddr, 914 user_data, length); 915 io_mapping_unmap_atomic(vaddr_atomic); 916 return unwritten; 917 } 918 919 /** 920 * This is the fast pwrite path, where we copy the data directly from the 921 * user into the GTT, uncached. 922 * @dev: drm device pointer 923 * @obj: i915 gem object 924 * @args: pwrite arguments structure 925 * @file: drm file pointer 926 */ 927 static int 928 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, 929 struct drm_i915_gem_object *obj, 930 struct drm_i915_gem_pwrite *args, 931 struct drm_file *file) 932 { 933 struct i915_ggtt *ggtt = &i915->ggtt; 934 struct drm_device *dev = obj->base.dev; 935 struct drm_mm_node node; 936 uint64_t remain, offset; 937 char __user *user_data; 938 int ret; 939 bool hit_slow_path = false; 940 941 if (obj->tiling_mode != I915_TILING_NONE) 942 return -EFAULT; 943 944 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 945 if (ret) { 946 ret = insert_mappable_node(i915, &node, PAGE_SIZE); 947 if (ret) 948 goto out; 949 950 ret = i915_gem_object_get_pages(obj); 951 if (ret) { 952 remove_mappable_node(&node); 953 goto out; 954 } 955 956 i915_gem_object_pin_pages(obj); 957 } else { 958 node.start = i915_gem_obj_ggtt_offset(obj); 959 node.allocated = false; 960 ret = i915_gem_object_put_fence(obj); 961 if (ret) 962 goto out_unpin; 963 } 964 965 ret = i915_gem_object_set_to_gtt_domain(obj, true); 966 if (ret) 967 goto out_unpin; 968 969 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 970 obj->dirty = true; 971 972 user_data = u64_to_user_ptr(args->data_ptr); 973 offset = args->offset; 974 remain = args->size; 975 while (remain) { 976 /* Operation in this page 977 * 978 * page_base = page offset within aperture 979 * page_offset = offset within page 980 * page_length = bytes to copy for this page 981 */ 982 u32 page_base = node.start; 983 unsigned page_offset = offset_in_page(offset); 984 unsigned page_length = PAGE_SIZE - page_offset; 985 page_length = remain < page_length ? remain : page_length; 986 if (node.allocated) { 987 wmb(); /* flush the write before we modify the GGTT */ 988 ggtt->base.insert_page(&ggtt->base, 989 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 990 node.start, I915_CACHE_NONE, 0); 991 wmb(); /* flush modifications to the GGTT (insert_page) */ 992 } else { 993 page_base += offset & PAGE_MASK; 994 } 995 /* If we get a fault while copying data, then (presumably) our 996 * source page isn't available. Return the error and we'll 997 * retry in the slow path. 998 * If the object is non-shmem backed, we retry again with the 999 * path that handles page fault. 1000 */ 1001 if (fast_user_write(ggtt->mappable, page_base, 1002 page_offset, user_data, page_length)) { 1003 hit_slow_path = true; 1004 mutex_unlock(&dev->struct_mutex); 1005 if (slow_user_access(ggtt->mappable, 1006 page_base, 1007 page_offset, user_data, 1008 page_length, true)) { 1009 ret = -EFAULT; 1010 mutex_lock(&dev->struct_mutex); 1011 goto out_flush; 1012 } 1013 1014 mutex_lock(&dev->struct_mutex); 1015 } 1016 1017 remain -= page_length; 1018 user_data += page_length; 1019 offset += page_length; 1020 } 1021 1022 out_flush: 1023 if (hit_slow_path) { 1024 if (ret == 0 && 1025 (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { 1026 /* The user has modified the object whilst we tried 1027 * reading from it, and we now have no idea what domain 1028 * the pages should be in. As we have just been touching 1029 * them directly, flush everything back to the GTT 1030 * domain. 1031 */ 1032 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1033 } 1034 } 1035 1036 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 1037 out_unpin: 1038 if (node.allocated) { 1039 wmb(); 1040 ggtt->base.clear_range(&ggtt->base, 1041 node.start, node.size, 1042 true); 1043 i915_gem_object_unpin_pages(obj); 1044 remove_mappable_node(&node); 1045 } else { 1046 i915_gem_object_ggtt_unpin(obj); 1047 } 1048 out: 1049 return ret; 1050 } 1051 1052 /* Per-page copy function for the shmem pwrite fastpath. 1053 * Flushes invalid cachelines before writing to the target if 1054 * needs_clflush_before is set and flushes out any written cachelines after 1055 * writing if needs_clflush is set. */ 1056 static int 1057 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 1058 char __user *user_data, 1059 bool page_do_bit17_swizzling, 1060 bool needs_clflush_before, 1061 bool needs_clflush_after) 1062 { 1063 char *vaddr; 1064 int ret; 1065 1066 if (unlikely(page_do_bit17_swizzling)) 1067 return -EINVAL; 1068 1069 vaddr = kmap_atomic(page); 1070 if (needs_clflush_before) 1071 drm_clflush_virt_range(vaddr + shmem_page_offset, 1072 page_length); 1073 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 1074 user_data, page_length); 1075 if (needs_clflush_after) 1076 drm_clflush_virt_range(vaddr + shmem_page_offset, 1077 page_length); 1078 kunmap_atomic(vaddr); 1079 1080 return ret ? -EFAULT : 0; 1081 } 1082 1083 /* Only difference to the fast-path function is that this can handle bit17 1084 * and uses non-atomic copy and kmap functions. */ 1085 static int 1086 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 1087 char __user *user_data, 1088 bool page_do_bit17_swizzling, 1089 bool needs_clflush_before, 1090 bool needs_clflush_after) 1091 { 1092 char *vaddr; 1093 int ret; 1094 1095 vaddr = kmap(page); 1096 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1097 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 1098 page_length, 1099 page_do_bit17_swizzling); 1100 if (page_do_bit17_swizzling) 1101 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 1102 user_data, 1103 page_length); 1104 else 1105 ret = __copy_from_user(vaddr + shmem_page_offset, 1106 user_data, 1107 page_length); 1108 if (needs_clflush_after) 1109 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 1110 page_length, 1111 page_do_bit17_swizzling); 1112 kunmap(page); 1113 1114 return ret ? -EFAULT : 0; 1115 } 1116 1117 static int 1118 i915_gem_shmem_pwrite(struct drm_device *dev, 1119 struct drm_i915_gem_object *obj, 1120 struct drm_i915_gem_pwrite *args, 1121 struct drm_file *file) 1122 { 1123 ssize_t remain; 1124 loff_t offset; 1125 char __user *user_data; 1126 int shmem_page_offset, page_length, ret = 0; 1127 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 1128 int hit_slowpath = 0; 1129 int needs_clflush_after = 0; 1130 int needs_clflush_before = 0; 1131 struct sg_page_iter sg_iter; 1132 1133 user_data = u64_to_user_ptr(args->data_ptr); 1134 remain = args->size; 1135 1136 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 1137 1138 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1139 /* If we're not in the cpu write domain, set ourself into the gtt 1140 * write domain and manually flush cachelines (if required). This 1141 * optimizes for the case when the gpu will use the data 1142 * right away and we therefore have to clflush anyway. */ 1143 needs_clflush_after = cpu_write_needs_clflush(obj); 1144 ret = i915_gem_object_wait_rendering(obj, false); 1145 if (ret) 1146 return ret; 1147 } 1148 /* Same trick applies to invalidate partially written cachelines read 1149 * before writing. */ 1150 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 1151 needs_clflush_before = 1152 !cpu_cache_is_coherent(dev, obj->cache_level); 1153 1154 ret = i915_gem_object_get_pages(obj); 1155 if (ret) 1156 return ret; 1157 1158 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1159 1160 i915_gem_object_pin_pages(obj); 1161 1162 offset = args->offset; 1163 obj->dirty = 1; 1164 1165 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 1166 offset >> PAGE_SHIFT) { 1167 struct page *page = sg_page_iter_page(&sg_iter); 1168 int partial_cacheline_write; 1169 1170 if (remain <= 0) 1171 break; 1172 1173 /* Operation in this page 1174 * 1175 * shmem_page_offset = offset within page in shmem file 1176 * page_length = bytes to copy for this page 1177 */ 1178 shmem_page_offset = offset_in_page(offset); 1179 1180 page_length = remain; 1181 if ((shmem_page_offset + page_length) > PAGE_SIZE) 1182 page_length = PAGE_SIZE - shmem_page_offset; 1183 1184 /* If we don't overwrite a cacheline completely we need to be 1185 * careful to have up-to-date data by first clflushing. Don't 1186 * overcomplicate things and flush the entire patch. */ 1187 partial_cacheline_write = needs_clflush_before && 1188 ((shmem_page_offset | page_length) 1189 & (boot_cpu_data.x86_clflush_size - 1)); 1190 1191 page_do_bit17_swizzling = obj_do_bit17_swizzling && 1192 (page_to_phys(page) & (1 << 17)) != 0; 1193 1194 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 1195 user_data, page_do_bit17_swizzling, 1196 partial_cacheline_write, 1197 needs_clflush_after); 1198 if (ret == 0) 1199 goto next_page; 1200 1201 hit_slowpath = 1; 1202 mutex_unlock(&dev->struct_mutex); 1203 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 1204 user_data, page_do_bit17_swizzling, 1205 partial_cacheline_write, 1206 needs_clflush_after); 1207 1208 mutex_lock(&dev->struct_mutex); 1209 1210 if (ret) 1211 goto out; 1212 1213 next_page: 1214 remain -= page_length; 1215 user_data += page_length; 1216 offset += page_length; 1217 } 1218 1219 out: 1220 i915_gem_object_unpin_pages(obj); 1221 1222 if (hit_slowpath) { 1223 /* 1224 * Fixup: Flush cpu caches in case we didn't flush the dirty 1225 * cachelines in-line while writing and the object moved 1226 * out of the cpu write domain while we've dropped the lock. 1227 */ 1228 if (!needs_clflush_after && 1229 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1230 if (i915_gem_clflush_object(obj, obj->pin_display)) 1231 needs_clflush_after = true; 1232 } 1233 } 1234 1235 if (needs_clflush_after) 1236 i915_gem_chipset_flush(to_i915(dev)); 1237 else 1238 obj->cache_dirty = true; 1239 1240 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1241 return ret; 1242 } 1243 1244 /** 1245 * Writes data to the object referenced by handle. 1246 * @dev: drm device 1247 * @data: ioctl data blob 1248 * @file: drm file 1249 * 1250 * On error, the contents of the buffer that were to be modified are undefined. 1251 */ 1252 int 1253 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1254 struct drm_file *file) 1255 { 1256 struct drm_i915_private *dev_priv = to_i915(dev); 1257 struct drm_i915_gem_pwrite *args = data; 1258 struct drm_i915_gem_object *obj; 1259 int ret; 1260 1261 if (args->size == 0) 1262 return 0; 1263 1264 if (!access_ok(VERIFY_READ, 1265 u64_to_user_ptr(args->data_ptr), 1266 args->size)) 1267 return -EFAULT; 1268 1269 if (likely(!i915.prefault_disable)) { 1270 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), 1271 args->size); 1272 if (ret) 1273 return -EFAULT; 1274 } 1275 1276 intel_runtime_pm_get(dev_priv); 1277 1278 ret = i915_mutex_lock_interruptible(dev); 1279 if (ret) 1280 goto put_rpm; 1281 1282 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1283 if (&obj->base == NULL) { 1284 ret = -ENOENT; 1285 goto unlock; 1286 } 1287 1288 /* Bounds check destination. */ 1289 if (args->offset > obj->base.size || 1290 args->size > obj->base.size - args->offset) { 1291 ret = -EINVAL; 1292 goto out; 1293 } 1294 1295 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1296 1297 ret = -EFAULT; 1298 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1299 * it would end up going through the fenced access, and we'll get 1300 * different detiling behavior between reading and writing. 1301 * pread/pwrite currently are reading and writing from the CPU 1302 * perspective, requiring manual detiling by the client. 1303 */ 1304 if (!i915_gem_object_has_struct_page(obj) || 1305 cpu_write_needs_clflush(obj)) { 1306 ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); 1307 /* Note that the gtt paths might fail with non-page-backed user 1308 * pointers (e.g. gtt mappings when moving data between 1309 * textures). Fallback to the shmem path in that case. */ 1310 } 1311 1312 if (ret == -EFAULT || ret == -ENOSPC) { 1313 if (obj->phys_handle) 1314 ret = i915_gem_phys_pwrite(obj, args, file); 1315 else if (i915_gem_object_has_struct_page(obj)) 1316 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1317 else 1318 ret = -ENODEV; 1319 } 1320 1321 out: 1322 drm_gem_object_unreference(&obj->base); 1323 unlock: 1324 mutex_unlock(&dev->struct_mutex); 1325 put_rpm: 1326 intel_runtime_pm_put(dev_priv); 1327 1328 return ret; 1329 } 1330 1331 static int 1332 i915_gem_check_wedge(unsigned reset_counter, bool interruptible) 1333 { 1334 if (__i915_terminally_wedged(reset_counter)) 1335 return -EIO; 1336 1337 if (__i915_reset_in_progress(reset_counter)) { 1338 /* Non-interruptible callers can't handle -EAGAIN, hence return 1339 * -EIO unconditionally for these. */ 1340 if (!interruptible) 1341 return -EIO; 1342 1343 return -EAGAIN; 1344 } 1345 1346 return 0; 1347 } 1348 1349 static unsigned long local_clock_us(unsigned *cpu) 1350 { 1351 unsigned long t; 1352 1353 /* Cheaply and approximately convert from nanoseconds to microseconds. 1354 * The result and subsequent calculations are also defined in the same 1355 * approximate microseconds units. The principal source of timing 1356 * error here is from the simple truncation. 1357 * 1358 * Note that local_clock() is only defined wrt to the current CPU; 1359 * the comparisons are no longer valid if we switch CPUs. Instead of 1360 * blocking preemption for the entire busywait, we can detect the CPU 1361 * switch and use that as indicator of system load and a reason to 1362 * stop busywaiting, see busywait_stop(). 1363 */ 1364 *cpu = get_cpu(); 1365 t = local_clock() >> 10; 1366 put_cpu(); 1367 1368 return t; 1369 } 1370 1371 static bool busywait_stop(unsigned long timeout, unsigned cpu) 1372 { 1373 unsigned this_cpu; 1374 1375 if (time_after(local_clock_us(&this_cpu), timeout)) 1376 return true; 1377 1378 return this_cpu != cpu; 1379 } 1380 1381 bool __i915_spin_request(const struct drm_i915_gem_request *req, 1382 int state, unsigned long timeout_us) 1383 { 1384 unsigned cpu; 1385 1386 /* When waiting for high frequency requests, e.g. during synchronous 1387 * rendering split between the CPU and GPU, the finite amount of time 1388 * required to set up the irq and wait upon it limits the response 1389 * rate. By busywaiting on the request completion for a short while we 1390 * can service the high frequency waits as quick as possible. However, 1391 * if it is a slow request, we want to sleep as quickly as possible. 1392 * The tradeoff between waiting and sleeping is roughly the time it 1393 * takes to sleep on a request, on the order of a microsecond. 1394 */ 1395 1396 timeout_us += local_clock_us(&cpu); 1397 do { 1398 if (i915_gem_request_completed(req)) 1399 return true; 1400 1401 if (signal_pending_state(state, current)) 1402 break; 1403 1404 if (busywait_stop(timeout_us, cpu)) 1405 break; 1406 1407 cpu_relax_lowlatency(); 1408 } while (!need_resched()); 1409 1410 return false; 1411 } 1412 1413 /** 1414 * __i915_wait_request - wait until execution of request has finished 1415 * @req: duh! 1416 * @interruptible: do an interruptible wait (normally yes) 1417 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1418 * @rps: RPS client 1419 * 1420 * Note: It is of utmost importance that the passed in seqno and reset_counter 1421 * values have been read by the caller in an smp safe manner. Where read-side 1422 * locks are involved, it is sufficient to read the reset_counter before 1423 * unlocking the lock that protects the seqno. For lockless tricks, the 1424 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1425 * inserted. 1426 * 1427 * Returns 0 if the request was found within the alloted time. Else returns the 1428 * errno with remaining time filled in timeout argument. 1429 */ 1430 int __i915_wait_request(struct drm_i915_gem_request *req, 1431 bool interruptible, 1432 s64 *timeout, 1433 struct intel_rps_client *rps) 1434 { 1435 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 1436 DEFINE_WAIT(reset); 1437 struct intel_wait wait; 1438 unsigned long timeout_remain; 1439 s64 before = 0; /* Only to silence a compiler warning. */ 1440 int ret = 0; 1441 1442 might_sleep(); 1443 1444 if (list_empty(&req->list)) 1445 return 0; 1446 1447 if (i915_gem_request_completed(req)) 1448 return 0; 1449 1450 timeout_remain = MAX_SCHEDULE_TIMEOUT; 1451 if (timeout) { 1452 if (WARN_ON(*timeout < 0)) 1453 return -EINVAL; 1454 1455 if (*timeout == 0) 1456 return -ETIME; 1457 1458 timeout_remain = nsecs_to_jiffies_timeout(*timeout); 1459 1460 /* 1461 * Record current time in case interrupted by signal, or wedged. 1462 */ 1463 before = ktime_get_raw_ns(); 1464 } 1465 1466 trace_i915_gem_request_wait_begin(req); 1467 1468 /* This client is about to stall waiting for the GPU. In many cases 1469 * this is undesirable and limits the throughput of the system, as 1470 * many clients cannot continue processing user input/output whilst 1471 * blocked. RPS autotuning may take tens of milliseconds to respond 1472 * to the GPU load and thus incurs additional latency for the client. 1473 * We can circumvent that by promoting the GPU frequency to maximum 1474 * before we wait. This makes the GPU throttle up much more quickly 1475 * (good for benchmarks and user experience, e.g. window animations), 1476 * but at a cost of spending more power processing the workload 1477 * (bad for battery). Not all clients even want their results 1478 * immediately and for them we should just let the GPU select its own 1479 * frequency to maximise efficiency. To prevent a single client from 1480 * forcing the clocks too high for the whole system, we only allow 1481 * each client to waitboost once in a busy period. 1482 */ 1483 if (INTEL_INFO(req->i915)->gen >= 6) 1484 gen6_rps_boost(req->i915, rps, req->emitted_jiffies); 1485 1486 /* Optimistic spin for the next ~jiffie before touching IRQs */ 1487 if (i915_spin_request(req, state, 5)) 1488 goto complete; 1489 1490 set_current_state(state); 1491 add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); 1492 1493 intel_wait_init(&wait, req->seqno); 1494 if (intel_engine_add_wait(req->engine, &wait)) 1495 /* In order to check that we haven't missed the interrupt 1496 * as we enabled it, we need to kick ourselves to do a 1497 * coherent check on the seqno before we sleep. 1498 */ 1499 goto wakeup; 1500 1501 for (;;) { 1502 if (signal_pending_state(state, current)) { 1503 ret = -ERESTARTSYS; 1504 break; 1505 } 1506 1507 timeout_remain = io_schedule_timeout(timeout_remain); 1508 if (timeout_remain == 0) { 1509 ret = -ETIME; 1510 break; 1511 } 1512 1513 if (intel_wait_complete(&wait)) 1514 break; 1515 1516 set_current_state(state); 1517 1518 wakeup: 1519 /* Carefully check if the request is complete, giving time 1520 * for the seqno to be visible following the interrupt. 1521 * We also have to check in case we are kicked by the GPU 1522 * reset in order to drop the struct_mutex. 1523 */ 1524 if (__i915_request_irq_complete(req)) 1525 break; 1526 1527 /* Only spin if we know the GPU is processing this request */ 1528 if (i915_spin_request(req, state, 2)) 1529 break; 1530 } 1531 remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); 1532 1533 intel_engine_remove_wait(req->engine, &wait); 1534 __set_current_state(TASK_RUNNING); 1535 complete: 1536 trace_i915_gem_request_wait_end(req); 1537 1538 if (timeout) { 1539 s64 tres = *timeout - (ktime_get_raw_ns() - before); 1540 1541 *timeout = tres < 0 ? 0 : tres; 1542 1543 /* 1544 * Apparently ktime isn't accurate enough and occasionally has a 1545 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1546 * things up to make the test happy. We allow up to 1 jiffy. 1547 * 1548 * This is a regrssion from the timespec->ktime conversion. 1549 */ 1550 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1551 *timeout = 0; 1552 } 1553 1554 if (rps && req->seqno == req->engine->last_submitted_seqno) { 1555 /* The GPU is now idle and this client has stalled. 1556 * Since no other client has submitted a request in the 1557 * meantime, assume that this client is the only one 1558 * supplying work to the GPU but is unable to keep that 1559 * work supplied because it is waiting. Since the GPU is 1560 * then never kept fully busy, RPS autoclocking will 1561 * keep the clocks relatively low, causing further delays. 1562 * Compensate by giving the synchronous client credit for 1563 * a waitboost next time. 1564 */ 1565 spin_lock(&req->i915->rps.client_lock); 1566 list_del_init(&rps->link); 1567 spin_unlock(&req->i915->rps.client_lock); 1568 } 1569 1570 return ret; 1571 } 1572 1573 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1574 struct drm_file *file) 1575 { 1576 struct drm_i915_file_private *file_priv; 1577 1578 WARN_ON(!req || !file || req->file_priv); 1579 1580 if (!req || !file) 1581 return -EINVAL; 1582 1583 if (req->file_priv) 1584 return -EINVAL; 1585 1586 file_priv = file->driver_priv; 1587 1588 spin_lock(&file_priv->mm.lock); 1589 req->file_priv = file_priv; 1590 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1591 spin_unlock(&file_priv->mm.lock); 1592 1593 req->pid = get_pid(task_pid(current)); 1594 1595 return 0; 1596 } 1597 1598 static inline void 1599 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1600 { 1601 struct drm_i915_file_private *file_priv = request->file_priv; 1602 1603 if (!file_priv) 1604 return; 1605 1606 spin_lock(&file_priv->mm.lock); 1607 list_del(&request->client_list); 1608 request->file_priv = NULL; 1609 spin_unlock(&file_priv->mm.lock); 1610 1611 put_pid(request->pid); 1612 request->pid = NULL; 1613 } 1614 1615 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1616 { 1617 trace_i915_gem_request_retire(request); 1618 1619 /* We know the GPU must have read the request to have 1620 * sent us the seqno + interrupt, so use the position 1621 * of tail of the request to update the last known position 1622 * of the GPU head. 1623 * 1624 * Note this requires that we are always called in request 1625 * completion order. 1626 */ 1627 request->ringbuf->last_retired_head = request->postfix; 1628 1629 list_del_init(&request->list); 1630 i915_gem_request_remove_from_client(request); 1631 1632 if (request->previous_context) { 1633 if (i915.enable_execlists) 1634 intel_lr_context_unpin(request->previous_context, 1635 request->engine); 1636 } 1637 1638 i915_gem_context_unreference(request->ctx); 1639 i915_gem_request_unreference(request); 1640 } 1641 1642 static void 1643 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1644 { 1645 struct intel_engine_cs *engine = req->engine; 1646 struct drm_i915_gem_request *tmp; 1647 1648 lockdep_assert_held(&engine->i915->drm.struct_mutex); 1649 1650 if (list_empty(&req->list)) 1651 return; 1652 1653 do { 1654 tmp = list_first_entry(&engine->request_list, 1655 typeof(*tmp), list); 1656 1657 i915_gem_request_retire(tmp); 1658 } while (tmp != req); 1659 1660 WARN_ON(i915_verify_lists(engine->dev)); 1661 } 1662 1663 /** 1664 * Waits for a request to be signaled, and cleans up the 1665 * request and object lists appropriately for that event. 1666 * @req: request to wait on 1667 */ 1668 int 1669 i915_wait_request(struct drm_i915_gem_request *req) 1670 { 1671 struct drm_i915_private *dev_priv = req->i915; 1672 bool interruptible; 1673 int ret; 1674 1675 interruptible = dev_priv->mm.interruptible; 1676 1677 BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex)); 1678 1679 ret = __i915_wait_request(req, interruptible, NULL, NULL); 1680 if (ret) 1681 return ret; 1682 1683 /* If the GPU hung, we want to keep the requests to find the guilty. */ 1684 if (!i915_reset_in_progress(&dev_priv->gpu_error)) 1685 __i915_gem_request_retire__upto(req); 1686 1687 return 0; 1688 } 1689 1690 /** 1691 * Ensures that all rendering to the object has completed and the object is 1692 * safe to unbind from the GTT or access from the CPU. 1693 * @obj: i915 gem object 1694 * @readonly: waiting for read access or write 1695 */ 1696 int 1697 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1698 bool readonly) 1699 { 1700 int ret, i; 1701 1702 if (!obj->active) 1703 return 0; 1704 1705 if (readonly) { 1706 if (obj->last_write_req != NULL) { 1707 ret = i915_wait_request(obj->last_write_req); 1708 if (ret) 1709 return ret; 1710 1711 i = obj->last_write_req->engine->id; 1712 if (obj->last_read_req[i] == obj->last_write_req) 1713 i915_gem_object_retire__read(obj, i); 1714 else 1715 i915_gem_object_retire__write(obj); 1716 } 1717 } else { 1718 for (i = 0; i < I915_NUM_ENGINES; i++) { 1719 if (obj->last_read_req[i] == NULL) 1720 continue; 1721 1722 ret = i915_wait_request(obj->last_read_req[i]); 1723 if (ret) 1724 return ret; 1725 1726 i915_gem_object_retire__read(obj, i); 1727 } 1728 GEM_BUG_ON(obj->active); 1729 } 1730 1731 return 0; 1732 } 1733 1734 static void 1735 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1736 struct drm_i915_gem_request *req) 1737 { 1738 int ring = req->engine->id; 1739 1740 if (obj->last_read_req[ring] == req) 1741 i915_gem_object_retire__read(obj, ring); 1742 else if (obj->last_write_req == req) 1743 i915_gem_object_retire__write(obj); 1744 1745 if (!i915_reset_in_progress(&req->i915->gpu_error)) 1746 __i915_gem_request_retire__upto(req); 1747 } 1748 1749 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1750 * as the object state may change during this call. 1751 */ 1752 static __must_check int 1753 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1754 struct intel_rps_client *rps, 1755 bool readonly) 1756 { 1757 struct drm_device *dev = obj->base.dev; 1758 struct drm_i915_private *dev_priv = to_i915(dev); 1759 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 1760 int ret, i, n = 0; 1761 1762 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1763 BUG_ON(!dev_priv->mm.interruptible); 1764 1765 if (!obj->active) 1766 return 0; 1767 1768 if (readonly) { 1769 struct drm_i915_gem_request *req; 1770 1771 req = obj->last_write_req; 1772 if (req == NULL) 1773 return 0; 1774 1775 requests[n++] = i915_gem_request_reference(req); 1776 } else { 1777 for (i = 0; i < I915_NUM_ENGINES; i++) { 1778 struct drm_i915_gem_request *req; 1779 1780 req = obj->last_read_req[i]; 1781 if (req == NULL) 1782 continue; 1783 1784 requests[n++] = i915_gem_request_reference(req); 1785 } 1786 } 1787 1788 mutex_unlock(&dev->struct_mutex); 1789 ret = 0; 1790 for (i = 0; ret == 0 && i < n; i++) 1791 ret = __i915_wait_request(requests[i], true, NULL, rps); 1792 mutex_lock(&dev->struct_mutex); 1793 1794 for (i = 0; i < n; i++) { 1795 if (ret == 0) 1796 i915_gem_object_retire_request(obj, requests[i]); 1797 i915_gem_request_unreference(requests[i]); 1798 } 1799 1800 return ret; 1801 } 1802 1803 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1804 { 1805 struct drm_i915_file_private *fpriv = file->driver_priv; 1806 return &fpriv->rps; 1807 } 1808 1809 static enum fb_op_origin 1810 write_origin(struct drm_i915_gem_object *obj, unsigned domain) 1811 { 1812 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ? 1813 ORIGIN_GTT : ORIGIN_CPU; 1814 } 1815 1816 /** 1817 * Called when user space prepares to use an object with the CPU, either 1818 * through the mmap ioctl's mapping or a GTT mapping. 1819 * @dev: drm device 1820 * @data: ioctl data blob 1821 * @file: drm file 1822 */ 1823 int 1824 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1825 struct drm_file *file) 1826 { 1827 struct drm_i915_gem_set_domain *args = data; 1828 struct drm_i915_gem_object *obj; 1829 uint32_t read_domains = args->read_domains; 1830 uint32_t write_domain = args->write_domain; 1831 int ret; 1832 1833 /* Only handle setting domains to types used by the CPU. */ 1834 if (write_domain & I915_GEM_GPU_DOMAINS) 1835 return -EINVAL; 1836 1837 if (read_domains & I915_GEM_GPU_DOMAINS) 1838 return -EINVAL; 1839 1840 /* Having something in the write domain implies it's in the read 1841 * domain, and only that read domain. Enforce that in the request. 1842 */ 1843 if (write_domain != 0 && read_domains != write_domain) 1844 return -EINVAL; 1845 1846 ret = i915_mutex_lock_interruptible(dev); 1847 if (ret) 1848 return ret; 1849 1850 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1851 if (&obj->base == NULL) { 1852 ret = -ENOENT; 1853 goto unlock; 1854 } 1855 1856 /* Try to flush the object off the GPU without holding the lock. 1857 * We will repeat the flush holding the lock in the normal manner 1858 * to catch cases where we are gazumped. 1859 */ 1860 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1861 to_rps_client(file), 1862 !write_domain); 1863 if (ret) 1864 goto unref; 1865 1866 if (read_domains & I915_GEM_DOMAIN_GTT) 1867 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1868 else 1869 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1870 1871 if (write_domain != 0) 1872 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); 1873 1874 unref: 1875 drm_gem_object_unreference(&obj->base); 1876 unlock: 1877 mutex_unlock(&dev->struct_mutex); 1878 return ret; 1879 } 1880 1881 /** 1882 * Called when user space has done writes to this buffer 1883 * @dev: drm device 1884 * @data: ioctl data blob 1885 * @file: drm file 1886 */ 1887 int 1888 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1889 struct drm_file *file) 1890 { 1891 struct drm_i915_gem_sw_finish *args = data; 1892 struct drm_i915_gem_object *obj; 1893 int ret = 0; 1894 1895 ret = i915_mutex_lock_interruptible(dev); 1896 if (ret) 1897 return ret; 1898 1899 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1900 if (&obj->base == NULL) { 1901 ret = -ENOENT; 1902 goto unlock; 1903 } 1904 1905 /* Pinned buffers may be scanout, so flush the cache */ 1906 if (obj->pin_display) 1907 i915_gem_object_flush_cpu_write_domain(obj); 1908 1909 drm_gem_object_unreference(&obj->base); 1910 unlock: 1911 mutex_unlock(&dev->struct_mutex); 1912 return ret; 1913 } 1914 1915 /** 1916 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1917 * it is mapped to. 1918 * @dev: drm device 1919 * @data: ioctl data blob 1920 * @file: drm file 1921 * 1922 * While the mapping holds a reference on the contents of the object, it doesn't 1923 * imply a ref on the object itself. 1924 * 1925 * IMPORTANT: 1926 * 1927 * DRM driver writers who look a this function as an example for how to do GEM 1928 * mmap support, please don't implement mmap support like here. The modern way 1929 * to implement DRM mmap support is with an mmap offset ioctl (like 1930 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1931 * That way debug tooling like valgrind will understand what's going on, hiding 1932 * the mmap call in a driver private ioctl will break that. The i915 driver only 1933 * does cpu mmaps this way because we didn't know better. 1934 */ 1935 int 1936 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1937 struct drm_file *file) 1938 { 1939 struct drm_i915_gem_mmap *args = data; 1940 struct drm_gem_object *obj; 1941 unsigned long addr; 1942 1943 if (args->flags & ~(I915_MMAP_WC)) 1944 return -EINVAL; 1945 1946 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1947 return -ENODEV; 1948 1949 obj = drm_gem_object_lookup(file, args->handle); 1950 if (obj == NULL) 1951 return -ENOENT; 1952 1953 /* prime objects have no backing filp to GEM mmap 1954 * pages from. 1955 */ 1956 if (!obj->filp) { 1957 drm_gem_object_unreference_unlocked(obj); 1958 return -EINVAL; 1959 } 1960 1961 addr = vm_mmap(obj->filp, 0, args->size, 1962 PROT_READ | PROT_WRITE, MAP_SHARED, 1963 args->offset); 1964 if (args->flags & I915_MMAP_WC) { 1965 struct mm_struct *mm = current->mm; 1966 struct vm_area_struct *vma; 1967 1968 if (down_write_killable(&mm->mmap_sem)) { 1969 drm_gem_object_unreference_unlocked(obj); 1970 return -EINTR; 1971 } 1972 vma = find_vma(mm, addr); 1973 if (vma) 1974 vma->vm_page_prot = 1975 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1976 else 1977 addr = -ENOMEM; 1978 up_write(&mm->mmap_sem); 1979 1980 /* This may race, but that's ok, it only gets set */ 1981 WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true); 1982 } 1983 drm_gem_object_unreference_unlocked(obj); 1984 if (IS_ERR((void *)addr)) 1985 return addr; 1986 1987 args->addr_ptr = (uint64_t) addr; 1988 1989 return 0; 1990 } 1991 1992 /** 1993 * i915_gem_fault - fault a page into the GTT 1994 * @vma: VMA in question 1995 * @vmf: fault info 1996 * 1997 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1998 * from userspace. The fault handler takes care of binding the object to 1999 * the GTT (if needed), allocating and programming a fence register (again, 2000 * only if needed based on whether the old reg is still valid or the object 2001 * is tiled) and inserting a new PTE into the faulting process. 2002 * 2003 * Note that the faulting process may involve evicting existing objects 2004 * from the GTT and/or fence registers to make room. So performance may 2005 * suffer if the GTT working set is large or there are few fence registers 2006 * left. 2007 */ 2008 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2009 { 2010 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 2011 struct drm_device *dev = obj->base.dev; 2012 struct drm_i915_private *dev_priv = to_i915(dev); 2013 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2014 struct i915_ggtt_view view = i915_ggtt_view_normal; 2015 pgoff_t page_offset; 2016 unsigned long pfn; 2017 int ret = 0; 2018 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 2019 2020 intel_runtime_pm_get(dev_priv); 2021 2022 /* We don't use vmf->pgoff since that has the fake offset */ 2023 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 2024 PAGE_SHIFT; 2025 2026 ret = i915_mutex_lock_interruptible(dev); 2027 if (ret) 2028 goto out; 2029 2030 trace_i915_gem_object_fault(obj, page_offset, true, write); 2031 2032 /* Try to flush the object off the GPU first without holding the lock. 2033 * Upon reacquiring the lock, we will perform our sanity checks and then 2034 * repeat the flush holding the lock in the normal manner to catch cases 2035 * where we are gazumped. 2036 */ 2037 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 2038 if (ret) 2039 goto unlock; 2040 2041 /* Access to snoopable pages through the GTT is incoherent. */ 2042 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 2043 ret = -EFAULT; 2044 goto unlock; 2045 } 2046 2047 /* Use a partial view if the object is bigger than the aperture. */ 2048 if (obj->base.size >= ggtt->mappable_end && 2049 obj->tiling_mode == I915_TILING_NONE) { 2050 static const unsigned int chunk_size = 256; // 1 MiB 2051 2052 memset(&view, 0, sizeof(view)); 2053 view.type = I915_GGTT_VIEW_PARTIAL; 2054 view.params.partial.offset = rounddown(page_offset, chunk_size); 2055 view.params.partial.size = 2056 min_t(unsigned int, 2057 chunk_size, 2058 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 2059 view.params.partial.offset); 2060 } 2061 2062 /* Now pin it into the GTT if needed */ 2063 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 2064 if (ret) 2065 goto unlock; 2066 2067 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2068 if (ret) 2069 goto unpin; 2070 2071 ret = i915_gem_object_get_fence(obj); 2072 if (ret) 2073 goto unpin; 2074 2075 /* Finally, remap it using the new GTT offset */ 2076 pfn = ggtt->mappable_base + 2077 i915_gem_obj_ggtt_offset_view(obj, &view); 2078 pfn >>= PAGE_SHIFT; 2079 2080 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 2081 /* Overriding existing pages in partial view does not cause 2082 * us any trouble as TLBs are still valid because the fault 2083 * is due to userspace losing part of the mapping or never 2084 * having accessed it before (at this partials' range). 2085 */ 2086 unsigned long base = vma->vm_start + 2087 (view.params.partial.offset << PAGE_SHIFT); 2088 unsigned int i; 2089 2090 for (i = 0; i < view.params.partial.size; i++) { 2091 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 2092 if (ret) 2093 break; 2094 } 2095 2096 obj->fault_mappable = true; 2097 } else { 2098 if (!obj->fault_mappable) { 2099 unsigned long size = min_t(unsigned long, 2100 vma->vm_end - vma->vm_start, 2101 obj->base.size); 2102 int i; 2103 2104 for (i = 0; i < size >> PAGE_SHIFT; i++) { 2105 ret = vm_insert_pfn(vma, 2106 (unsigned long)vma->vm_start + i * PAGE_SIZE, 2107 pfn + i); 2108 if (ret) 2109 break; 2110 } 2111 2112 obj->fault_mappable = true; 2113 } else 2114 ret = vm_insert_pfn(vma, 2115 (unsigned long)vmf->virtual_address, 2116 pfn + page_offset); 2117 } 2118 unpin: 2119 i915_gem_object_ggtt_unpin_view(obj, &view); 2120 unlock: 2121 mutex_unlock(&dev->struct_mutex); 2122 out: 2123 switch (ret) { 2124 case -EIO: 2125 /* 2126 * We eat errors when the gpu is terminally wedged to avoid 2127 * userspace unduly crashing (gl has no provisions for mmaps to 2128 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2129 * and so needs to be reported. 2130 */ 2131 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2132 ret = VM_FAULT_SIGBUS; 2133 break; 2134 } 2135 case -EAGAIN: 2136 /* 2137 * EAGAIN means the gpu is hung and we'll wait for the error 2138 * handler to reset everything when re-faulting in 2139 * i915_mutex_lock_interruptible. 2140 */ 2141 case 0: 2142 case -ERESTARTSYS: 2143 case -EINTR: 2144 case -EBUSY: 2145 /* 2146 * EBUSY is ok: this just means that another thread 2147 * already did the job. 2148 */ 2149 ret = VM_FAULT_NOPAGE; 2150 break; 2151 case -ENOMEM: 2152 ret = VM_FAULT_OOM; 2153 break; 2154 case -ENOSPC: 2155 case -EFAULT: 2156 ret = VM_FAULT_SIGBUS; 2157 break; 2158 default: 2159 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2160 ret = VM_FAULT_SIGBUS; 2161 break; 2162 } 2163 2164 intel_runtime_pm_put(dev_priv); 2165 return ret; 2166 } 2167 2168 /** 2169 * i915_gem_release_mmap - remove physical page mappings 2170 * @obj: obj in question 2171 * 2172 * Preserve the reservation of the mmapping with the DRM core code, but 2173 * relinquish ownership of the pages back to the system. 2174 * 2175 * It is vital that we remove the page mapping if we have mapped a tiled 2176 * object through the GTT and then lose the fence register due to 2177 * resource pressure. Similarly if the object has been moved out of the 2178 * aperture, than pages mapped into userspace must be revoked. Removing the 2179 * mapping will then trigger a page fault on the next user access, allowing 2180 * fixup by i915_gem_fault(). 2181 */ 2182 void 2183 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2184 { 2185 /* Serialisation between user GTT access and our code depends upon 2186 * revoking the CPU's PTE whilst the mutex is held. The next user 2187 * pagefault then has to wait until we release the mutex. 2188 */ 2189 lockdep_assert_held(&obj->base.dev->struct_mutex); 2190 2191 if (!obj->fault_mappable) 2192 return; 2193 2194 drm_vma_node_unmap(&obj->base.vma_node, 2195 obj->base.dev->anon_inode->i_mapping); 2196 2197 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2198 * memory transactions from userspace before we return. The TLB 2199 * flushing implied above by changing the PTE above *should* be 2200 * sufficient, an extra barrier here just provides us with a bit 2201 * of paranoid documentation about our requirement to serialise 2202 * memory writes before touching registers / GSM. 2203 */ 2204 wmb(); 2205 2206 obj->fault_mappable = false; 2207 } 2208 2209 void 2210 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2211 { 2212 struct drm_i915_gem_object *obj; 2213 2214 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2215 i915_gem_release_mmap(obj); 2216 } 2217 2218 uint32_t 2219 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2220 { 2221 uint32_t gtt_size; 2222 2223 if (INTEL_INFO(dev)->gen >= 4 || 2224 tiling_mode == I915_TILING_NONE) 2225 return size; 2226 2227 /* Previous chips need a power-of-two fence region when tiling */ 2228 if (IS_GEN3(dev)) 2229 gtt_size = 1024*1024; 2230 else 2231 gtt_size = 512*1024; 2232 2233 while (gtt_size < size) 2234 gtt_size <<= 1; 2235 2236 return gtt_size; 2237 } 2238 2239 /** 2240 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2241 * @dev: drm device 2242 * @size: object size 2243 * @tiling_mode: tiling mode 2244 * @fenced: is fenced alignemned required or not 2245 * 2246 * Return the required GTT alignment for an object, taking into account 2247 * potential fence register mapping. 2248 */ 2249 uint32_t 2250 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2251 int tiling_mode, bool fenced) 2252 { 2253 /* 2254 * Minimum alignment is 4k (GTT page size), but might be greater 2255 * if a fence register is needed for the object. 2256 */ 2257 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2258 tiling_mode == I915_TILING_NONE) 2259 return 4096; 2260 2261 /* 2262 * Previous chips need to be aligned to the size of the smallest 2263 * fence register that can contain the object. 2264 */ 2265 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2266 } 2267 2268 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2269 { 2270 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2271 int ret; 2272 2273 dev_priv->mm.shrinker_no_lock_stealing = true; 2274 2275 ret = drm_gem_create_mmap_offset(&obj->base); 2276 if (ret != -ENOSPC) 2277 goto out; 2278 2279 /* Badly fragmented mmap space? The only way we can recover 2280 * space is by destroying unwanted objects. We can't randomly release 2281 * mmap_offsets as userspace expects them to be persistent for the 2282 * lifetime of the objects. The closest we can is to release the 2283 * offsets on purgeable objects by truncating it and marking it purged, 2284 * which prevents userspace from ever using that object again. 2285 */ 2286 i915_gem_shrink(dev_priv, 2287 obj->base.size >> PAGE_SHIFT, 2288 I915_SHRINK_BOUND | 2289 I915_SHRINK_UNBOUND | 2290 I915_SHRINK_PURGEABLE); 2291 ret = drm_gem_create_mmap_offset(&obj->base); 2292 if (ret != -ENOSPC) 2293 goto out; 2294 2295 i915_gem_shrink_all(dev_priv); 2296 ret = drm_gem_create_mmap_offset(&obj->base); 2297 out: 2298 dev_priv->mm.shrinker_no_lock_stealing = false; 2299 2300 return ret; 2301 } 2302 2303 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2304 { 2305 drm_gem_free_mmap_offset(&obj->base); 2306 } 2307 2308 int 2309 i915_gem_mmap_gtt(struct drm_file *file, 2310 struct drm_device *dev, 2311 uint32_t handle, 2312 uint64_t *offset) 2313 { 2314 struct drm_i915_gem_object *obj; 2315 int ret; 2316 2317 ret = i915_mutex_lock_interruptible(dev); 2318 if (ret) 2319 return ret; 2320 2321 obj = to_intel_bo(drm_gem_object_lookup(file, handle)); 2322 if (&obj->base == NULL) { 2323 ret = -ENOENT; 2324 goto unlock; 2325 } 2326 2327 if (obj->madv != I915_MADV_WILLNEED) { 2328 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2329 ret = -EFAULT; 2330 goto out; 2331 } 2332 2333 ret = i915_gem_object_create_mmap_offset(obj); 2334 if (ret) 2335 goto out; 2336 2337 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2338 2339 out: 2340 drm_gem_object_unreference(&obj->base); 2341 unlock: 2342 mutex_unlock(&dev->struct_mutex); 2343 return ret; 2344 } 2345 2346 /** 2347 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2348 * @dev: DRM device 2349 * @data: GTT mapping ioctl data 2350 * @file: GEM object info 2351 * 2352 * Simply returns the fake offset to userspace so it can mmap it. 2353 * The mmap call will end up in drm_gem_mmap(), which will set things 2354 * up so we can get faults in the handler above. 2355 * 2356 * The fault handler will take care of binding the object into the GTT 2357 * (since it may have been evicted to make room for something), allocating 2358 * a fence register, and mapping the appropriate aperture address into 2359 * userspace. 2360 */ 2361 int 2362 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2363 struct drm_file *file) 2364 { 2365 struct drm_i915_gem_mmap_gtt *args = data; 2366 2367 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2368 } 2369 2370 /* Immediately discard the backing storage */ 2371 static void 2372 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2373 { 2374 i915_gem_object_free_mmap_offset(obj); 2375 2376 if (obj->base.filp == NULL) 2377 return; 2378 2379 /* Our goal here is to return as much of the memory as 2380 * is possible back to the system as we are called from OOM. 2381 * To do this we must instruct the shmfs to drop all of its 2382 * backing pages, *now*. 2383 */ 2384 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2385 obj->madv = __I915_MADV_PURGED; 2386 } 2387 2388 /* Try to discard unwanted pages */ 2389 static void 2390 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2391 { 2392 struct address_space *mapping; 2393 2394 switch (obj->madv) { 2395 case I915_MADV_DONTNEED: 2396 i915_gem_object_truncate(obj); 2397 case __I915_MADV_PURGED: 2398 return; 2399 } 2400 2401 if (obj->base.filp == NULL) 2402 return; 2403 2404 mapping = obj->base.filp->f_mapping, 2405 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2406 } 2407 2408 static void 2409 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2410 { 2411 struct sgt_iter sgt_iter; 2412 struct page *page; 2413 int ret; 2414 2415 BUG_ON(obj->madv == __I915_MADV_PURGED); 2416 2417 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2418 if (WARN_ON(ret)) { 2419 /* In the event of a disaster, abandon all caches and 2420 * hope for the best. 2421 */ 2422 i915_gem_clflush_object(obj, true); 2423 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2424 } 2425 2426 i915_gem_gtt_finish_object(obj); 2427 2428 if (i915_gem_object_needs_bit17_swizzle(obj)) 2429 i915_gem_object_save_bit_17_swizzle(obj); 2430 2431 if (obj->madv == I915_MADV_DONTNEED) 2432 obj->dirty = 0; 2433 2434 for_each_sgt_page(page, sgt_iter, obj->pages) { 2435 if (obj->dirty) 2436 set_page_dirty(page); 2437 2438 if (obj->madv == I915_MADV_WILLNEED) 2439 mark_page_accessed(page); 2440 2441 put_page(page); 2442 } 2443 obj->dirty = 0; 2444 2445 sg_free_table(obj->pages); 2446 kfree(obj->pages); 2447 } 2448 2449 int 2450 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2451 { 2452 const struct drm_i915_gem_object_ops *ops = obj->ops; 2453 2454 if (obj->pages == NULL) 2455 return 0; 2456 2457 if (obj->pages_pin_count) 2458 return -EBUSY; 2459 2460 BUG_ON(i915_gem_obj_bound_any(obj)); 2461 2462 /* ->put_pages might need to allocate memory for the bit17 swizzle 2463 * array, hence protect them from being reaped by removing them from gtt 2464 * lists early. */ 2465 list_del(&obj->global_list); 2466 2467 if (obj->mapping) { 2468 if (is_vmalloc_addr(obj->mapping)) 2469 vunmap(obj->mapping); 2470 else 2471 kunmap(kmap_to_page(obj->mapping)); 2472 obj->mapping = NULL; 2473 } 2474 2475 ops->put_pages(obj); 2476 obj->pages = NULL; 2477 2478 i915_gem_object_invalidate(obj); 2479 2480 return 0; 2481 } 2482 2483 static int 2484 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2485 { 2486 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2487 int page_count, i; 2488 struct address_space *mapping; 2489 struct sg_table *st; 2490 struct scatterlist *sg; 2491 struct sgt_iter sgt_iter; 2492 struct page *page; 2493 unsigned long last_pfn = 0; /* suppress gcc warning */ 2494 int ret; 2495 gfp_t gfp; 2496 2497 /* Assert that the object is not currently in any GPU domain. As it 2498 * wasn't in the GTT, there shouldn't be any way it could have been in 2499 * a GPU cache 2500 */ 2501 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2502 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2503 2504 st = kmalloc(sizeof(*st), GFP_KERNEL); 2505 if (st == NULL) 2506 return -ENOMEM; 2507 2508 page_count = obj->base.size / PAGE_SIZE; 2509 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2510 kfree(st); 2511 return -ENOMEM; 2512 } 2513 2514 /* Get the list of pages out of our struct file. They'll be pinned 2515 * at this point until we release them. 2516 * 2517 * Fail silently without starting the shrinker 2518 */ 2519 mapping = obj->base.filp->f_mapping; 2520 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); 2521 gfp |= __GFP_NORETRY | __GFP_NOWARN; 2522 sg = st->sgl; 2523 st->nents = 0; 2524 for (i = 0; i < page_count; i++) { 2525 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2526 if (IS_ERR(page)) { 2527 i915_gem_shrink(dev_priv, 2528 page_count, 2529 I915_SHRINK_BOUND | 2530 I915_SHRINK_UNBOUND | 2531 I915_SHRINK_PURGEABLE); 2532 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2533 } 2534 if (IS_ERR(page)) { 2535 /* We've tried hard to allocate the memory by reaping 2536 * our own buffer, now let the real VM do its job and 2537 * go down in flames if truly OOM. 2538 */ 2539 i915_gem_shrink_all(dev_priv); 2540 page = shmem_read_mapping_page(mapping, i); 2541 if (IS_ERR(page)) { 2542 ret = PTR_ERR(page); 2543 goto err_pages; 2544 } 2545 } 2546 #ifdef CONFIG_SWIOTLB 2547 if (swiotlb_nr_tbl()) { 2548 st->nents++; 2549 sg_set_page(sg, page, PAGE_SIZE, 0); 2550 sg = sg_next(sg); 2551 continue; 2552 } 2553 #endif 2554 if (!i || page_to_pfn(page) != last_pfn + 1) { 2555 if (i) 2556 sg = sg_next(sg); 2557 st->nents++; 2558 sg_set_page(sg, page, PAGE_SIZE, 0); 2559 } else { 2560 sg->length += PAGE_SIZE; 2561 } 2562 last_pfn = page_to_pfn(page); 2563 2564 /* Check that the i965g/gm workaround works. */ 2565 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2566 } 2567 #ifdef CONFIG_SWIOTLB 2568 if (!swiotlb_nr_tbl()) 2569 #endif 2570 sg_mark_end(sg); 2571 obj->pages = st; 2572 2573 ret = i915_gem_gtt_prepare_object(obj); 2574 if (ret) 2575 goto err_pages; 2576 2577 if (i915_gem_object_needs_bit17_swizzle(obj)) 2578 i915_gem_object_do_bit_17_swizzle(obj); 2579 2580 if (obj->tiling_mode != I915_TILING_NONE && 2581 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2582 i915_gem_object_pin_pages(obj); 2583 2584 return 0; 2585 2586 err_pages: 2587 sg_mark_end(sg); 2588 for_each_sgt_page(page, sgt_iter, st) 2589 put_page(page); 2590 sg_free_table(st); 2591 kfree(st); 2592 2593 /* shmemfs first checks if there is enough memory to allocate the page 2594 * and reports ENOSPC should there be insufficient, along with the usual 2595 * ENOMEM for a genuine allocation failure. 2596 * 2597 * We use ENOSPC in our driver to mean that we have run out of aperture 2598 * space and so want to translate the error from shmemfs back to our 2599 * usual understanding of ENOMEM. 2600 */ 2601 if (ret == -ENOSPC) 2602 ret = -ENOMEM; 2603 2604 return ret; 2605 } 2606 2607 /* Ensure that the associated pages are gathered from the backing storage 2608 * and pinned into our object. i915_gem_object_get_pages() may be called 2609 * multiple times before they are released by a single call to 2610 * i915_gem_object_put_pages() - once the pages are no longer referenced 2611 * either as a result of memory pressure (reaping pages under the shrinker) 2612 * or as the object is itself released. 2613 */ 2614 int 2615 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2616 { 2617 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2618 const struct drm_i915_gem_object_ops *ops = obj->ops; 2619 int ret; 2620 2621 if (obj->pages) 2622 return 0; 2623 2624 if (obj->madv != I915_MADV_WILLNEED) { 2625 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2626 return -EFAULT; 2627 } 2628 2629 BUG_ON(obj->pages_pin_count); 2630 2631 ret = ops->get_pages(obj); 2632 if (ret) 2633 return ret; 2634 2635 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2636 2637 obj->get_page.sg = obj->pages->sgl; 2638 obj->get_page.last = 0; 2639 2640 return 0; 2641 } 2642 2643 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2644 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) 2645 { 2646 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2647 struct sg_table *sgt = obj->pages; 2648 struct sgt_iter sgt_iter; 2649 struct page *page; 2650 struct page *stack_pages[32]; 2651 struct page **pages = stack_pages; 2652 unsigned long i = 0; 2653 void *addr; 2654 2655 /* A single page can always be kmapped */ 2656 if (n_pages == 1) 2657 return kmap(sg_page(sgt->sgl)); 2658 2659 if (n_pages > ARRAY_SIZE(stack_pages)) { 2660 /* Too big for stack -- allocate temporary array instead */ 2661 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); 2662 if (!pages) 2663 return NULL; 2664 } 2665 2666 for_each_sgt_page(page, sgt_iter, sgt) 2667 pages[i++] = page; 2668 2669 /* Check that we have the expected number of pages */ 2670 GEM_BUG_ON(i != n_pages); 2671 2672 addr = vmap(pages, n_pages, 0, PAGE_KERNEL); 2673 2674 if (pages != stack_pages) 2675 drm_free_large(pages); 2676 2677 return addr; 2678 } 2679 2680 /* get, pin, and map the pages of the object into kernel space */ 2681 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2682 { 2683 int ret; 2684 2685 lockdep_assert_held(&obj->base.dev->struct_mutex); 2686 2687 ret = i915_gem_object_get_pages(obj); 2688 if (ret) 2689 return ERR_PTR(ret); 2690 2691 i915_gem_object_pin_pages(obj); 2692 2693 if (!obj->mapping) { 2694 obj->mapping = i915_gem_object_map(obj); 2695 if (!obj->mapping) { 2696 i915_gem_object_unpin_pages(obj); 2697 return ERR_PTR(-ENOMEM); 2698 } 2699 } 2700 2701 return obj->mapping; 2702 } 2703 2704 void i915_vma_move_to_active(struct i915_vma *vma, 2705 struct drm_i915_gem_request *req) 2706 { 2707 struct drm_i915_gem_object *obj = vma->obj; 2708 struct intel_engine_cs *engine; 2709 2710 engine = i915_gem_request_get_engine(req); 2711 2712 /* Add a reference if we're newly entering the active list. */ 2713 if (obj->active == 0) 2714 drm_gem_object_reference(&obj->base); 2715 obj->active |= intel_engine_flag(engine); 2716 2717 list_move_tail(&obj->engine_list[engine->id], &engine->active_list); 2718 i915_gem_request_assign(&obj->last_read_req[engine->id], req); 2719 2720 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2721 } 2722 2723 static void 2724 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2725 { 2726 GEM_BUG_ON(obj->last_write_req == NULL); 2727 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2728 2729 i915_gem_request_assign(&obj->last_write_req, NULL); 2730 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2731 } 2732 2733 static void 2734 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2735 { 2736 struct i915_vma *vma; 2737 2738 GEM_BUG_ON(obj->last_read_req[ring] == NULL); 2739 GEM_BUG_ON(!(obj->active & (1 << ring))); 2740 2741 list_del_init(&obj->engine_list[ring]); 2742 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2743 2744 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2745 i915_gem_object_retire__write(obj); 2746 2747 obj->active &= ~(1 << ring); 2748 if (obj->active) 2749 return; 2750 2751 /* Bump our place on the bound list to keep it roughly in LRU order 2752 * so that we don't steal from recently used but inactive objects 2753 * (unless we are forced to ofc!) 2754 */ 2755 list_move_tail(&obj->global_list, 2756 &to_i915(obj->base.dev)->mm.bound_list); 2757 2758 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2759 if (!list_empty(&vma->vm_link)) 2760 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2761 } 2762 2763 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2764 drm_gem_object_unreference(&obj->base); 2765 } 2766 2767 static int 2768 i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) 2769 { 2770 struct intel_engine_cs *engine; 2771 int ret; 2772 2773 /* Carefully retire all requests without writing to the rings */ 2774 for_each_engine(engine, dev_priv) { 2775 ret = intel_engine_idle(engine); 2776 if (ret) 2777 return ret; 2778 } 2779 i915_gem_retire_requests(dev_priv); 2780 2781 /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ 2782 if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { 2783 while (intel_kick_waiters(dev_priv) || 2784 intel_kick_signalers(dev_priv)) 2785 yield(); 2786 } 2787 2788 /* Finally reset hw state */ 2789 for_each_engine(engine, dev_priv) 2790 intel_ring_init_seqno(engine, seqno); 2791 2792 return 0; 2793 } 2794 2795 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2796 { 2797 struct drm_i915_private *dev_priv = to_i915(dev); 2798 int ret; 2799 2800 if (seqno == 0) 2801 return -EINVAL; 2802 2803 /* HWS page needs to be set less than what we 2804 * will inject to ring 2805 */ 2806 ret = i915_gem_init_seqno(dev_priv, seqno - 1); 2807 if (ret) 2808 return ret; 2809 2810 /* Carefully set the last_seqno value so that wrap 2811 * detection still works 2812 */ 2813 dev_priv->next_seqno = seqno; 2814 dev_priv->last_seqno = seqno - 1; 2815 if (dev_priv->last_seqno == 0) 2816 dev_priv->last_seqno--; 2817 2818 return 0; 2819 } 2820 2821 int 2822 i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) 2823 { 2824 /* reserve 0 for non-seqno */ 2825 if (dev_priv->next_seqno == 0) { 2826 int ret = i915_gem_init_seqno(dev_priv, 0); 2827 if (ret) 2828 return ret; 2829 2830 dev_priv->next_seqno = 1; 2831 } 2832 2833 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2834 return 0; 2835 } 2836 2837 static void i915_gem_mark_busy(const struct intel_engine_cs *engine) 2838 { 2839 struct drm_i915_private *dev_priv = engine->i915; 2840 2841 dev_priv->gt.active_engines |= intel_engine_flag(engine); 2842 if (dev_priv->gt.awake) 2843 return; 2844 2845 intel_runtime_pm_get_noresume(dev_priv); 2846 dev_priv->gt.awake = true; 2847 2848 i915_update_gfx_val(dev_priv); 2849 if (INTEL_GEN(dev_priv) >= 6) 2850 gen6_rps_busy(dev_priv); 2851 2852 queue_delayed_work(dev_priv->wq, 2853 &dev_priv->gt.retire_work, 2854 round_jiffies_up_relative(HZ)); 2855 } 2856 2857 /* 2858 * NB: This function is not allowed to fail. Doing so would mean the the 2859 * request is not being tracked for completion but the work itself is 2860 * going to happen on the hardware. This would be a Bad Thing(tm). 2861 */ 2862 void __i915_add_request(struct drm_i915_gem_request *request, 2863 struct drm_i915_gem_object *obj, 2864 bool flush_caches) 2865 { 2866 struct intel_engine_cs *engine; 2867 struct intel_ringbuffer *ringbuf; 2868 u32 request_start; 2869 u32 reserved_tail; 2870 int ret; 2871 2872 if (WARN_ON(request == NULL)) 2873 return; 2874 2875 engine = request->engine; 2876 ringbuf = request->ringbuf; 2877 2878 /* 2879 * To ensure that this call will not fail, space for its emissions 2880 * should already have been reserved in the ring buffer. Let the ring 2881 * know that it is time to use that space up. 2882 */ 2883 request_start = intel_ring_get_tail(ringbuf); 2884 reserved_tail = request->reserved_space; 2885 request->reserved_space = 0; 2886 2887 /* 2888 * Emit any outstanding flushes - execbuf can fail to emit the flush 2889 * after having emitted the batchbuffer command. Hence we need to fix 2890 * things up similar to emitting the lazy request. The difference here 2891 * is that the flush _must_ happen before the next request, no matter 2892 * what. 2893 */ 2894 if (flush_caches) { 2895 if (i915.enable_execlists) 2896 ret = logical_ring_flush_all_caches(request); 2897 else 2898 ret = intel_ring_flush_all_caches(request); 2899 /* Not allowed to fail! */ 2900 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2901 } 2902 2903 trace_i915_gem_request_add(request); 2904 2905 request->head = request_start; 2906 2907 /* Whilst this request exists, batch_obj will be on the 2908 * active_list, and so will hold the active reference. Only when this 2909 * request is retired will the the batch_obj be moved onto the 2910 * inactive_list and lose its active reference. Hence we do not need 2911 * to explicitly hold another reference here. 2912 */ 2913 request->batch_obj = obj; 2914 2915 /* Seal the request and mark it as pending execution. Note that 2916 * we may inspect this state, without holding any locks, during 2917 * hangcheck. Hence we apply the barrier to ensure that we do not 2918 * see a more recent value in the hws than we are tracking. 2919 */ 2920 request->emitted_jiffies = jiffies; 2921 request->previous_seqno = engine->last_submitted_seqno; 2922 smp_store_mb(engine->last_submitted_seqno, request->seqno); 2923 list_add_tail(&request->list, &engine->request_list); 2924 2925 /* Record the position of the start of the request so that 2926 * should we detect the updated seqno part-way through the 2927 * GPU processing the request, we never over-estimate the 2928 * position of the head. 2929 */ 2930 request->postfix = intel_ring_get_tail(ringbuf); 2931 2932 if (i915.enable_execlists) 2933 ret = engine->emit_request(request); 2934 else { 2935 ret = engine->add_request(request); 2936 2937 request->tail = intel_ring_get_tail(ringbuf); 2938 } 2939 /* Not allowed to fail! */ 2940 WARN(ret, "emit|add_request failed: %d!\n", ret); 2941 /* Sanity check that the reserved size was large enough. */ 2942 ret = intel_ring_get_tail(ringbuf) - request_start; 2943 if (ret < 0) 2944 ret += ringbuf->size; 2945 WARN_ONCE(ret > reserved_tail, 2946 "Not enough space reserved (%d bytes) " 2947 "for adding the request (%d bytes)\n", 2948 reserved_tail, ret); 2949 2950 i915_gem_mark_busy(engine); 2951 } 2952 2953 static bool i915_context_is_banned(const struct i915_gem_context *ctx) 2954 { 2955 unsigned long elapsed; 2956 2957 if (ctx->hang_stats.banned) 2958 return true; 2959 2960 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2961 if (ctx->hang_stats.ban_period_seconds && 2962 elapsed <= ctx->hang_stats.ban_period_seconds) { 2963 DRM_DEBUG("context hanging too fast, banning!\n"); 2964 return true; 2965 } 2966 2967 return false; 2968 } 2969 2970 static void i915_set_reset_status(struct i915_gem_context *ctx, 2971 const bool guilty) 2972 { 2973 struct i915_ctx_hang_stats *hs = &ctx->hang_stats; 2974 2975 if (guilty) { 2976 hs->banned = i915_context_is_banned(ctx); 2977 hs->batch_active++; 2978 hs->guilty_ts = get_seconds(); 2979 } else { 2980 hs->batch_pending++; 2981 } 2982 } 2983 2984 void i915_gem_request_free(struct kref *req_ref) 2985 { 2986 struct drm_i915_gem_request *req = container_of(req_ref, 2987 typeof(*req), ref); 2988 kmem_cache_free(req->i915->requests, req); 2989 } 2990 2991 static inline int 2992 __i915_gem_request_alloc(struct intel_engine_cs *engine, 2993 struct i915_gem_context *ctx, 2994 struct drm_i915_gem_request **req_out) 2995 { 2996 struct drm_i915_private *dev_priv = engine->i915; 2997 unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); 2998 struct drm_i915_gem_request *req; 2999 int ret; 3000 3001 if (!req_out) 3002 return -EINVAL; 3003 3004 *req_out = NULL; 3005 3006 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report 3007 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex 3008 * and restart. 3009 */ 3010 ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); 3011 if (ret) 3012 return ret; 3013 3014 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 3015 if (req == NULL) 3016 return -ENOMEM; 3017 3018 ret = i915_gem_get_seqno(engine->i915, &req->seqno); 3019 if (ret) 3020 goto err; 3021 3022 kref_init(&req->ref); 3023 req->i915 = dev_priv; 3024 req->engine = engine; 3025 req->ctx = ctx; 3026 i915_gem_context_reference(req->ctx); 3027 3028 /* 3029 * Reserve space in the ring buffer for all the commands required to 3030 * eventually emit this request. This is to guarantee that the 3031 * i915_add_request() call can't fail. Note that the reserve may need 3032 * to be redone if the request is not actually submitted straight 3033 * away, e.g. because a GPU scheduler has deferred it. 3034 */ 3035 req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; 3036 3037 if (i915.enable_execlists) 3038 ret = intel_logical_ring_alloc_request_extras(req); 3039 else 3040 ret = intel_ring_alloc_request_extras(req); 3041 if (ret) 3042 goto err_ctx; 3043 3044 *req_out = req; 3045 return 0; 3046 3047 err_ctx: 3048 i915_gem_context_unreference(ctx); 3049 err: 3050 kmem_cache_free(dev_priv->requests, req); 3051 return ret; 3052 } 3053 3054 /** 3055 * i915_gem_request_alloc - allocate a request structure 3056 * 3057 * @engine: engine that we wish to issue the request on. 3058 * @ctx: context that the request will be associated with. 3059 * This can be NULL if the request is not directly related to 3060 * any specific user context, in which case this function will 3061 * choose an appropriate context to use. 3062 * 3063 * Returns a pointer to the allocated request if successful, 3064 * or an error code if not. 3065 */ 3066 struct drm_i915_gem_request * 3067 i915_gem_request_alloc(struct intel_engine_cs *engine, 3068 struct i915_gem_context *ctx) 3069 { 3070 struct drm_i915_gem_request *req; 3071 int err; 3072 3073 if (ctx == NULL) 3074 ctx = engine->i915->kernel_context; 3075 err = __i915_gem_request_alloc(engine, ctx, &req); 3076 return err ? ERR_PTR(err) : req; 3077 } 3078 3079 struct drm_i915_gem_request * 3080 i915_gem_find_active_request(struct intel_engine_cs *engine) 3081 { 3082 struct drm_i915_gem_request *request; 3083 3084 /* We are called by the error capture and reset at a random 3085 * point in time. In particular, note that neither is crucially 3086 * ordered with an interrupt. After a hang, the GPU is dead and we 3087 * assume that no more writes can happen (we waited long enough for 3088 * all writes that were in transaction to be flushed) - adding an 3089 * extra delay for a recent interrupt is pointless. Hence, we do 3090 * not need an engine->irq_seqno_barrier() before the seqno reads. 3091 */ 3092 list_for_each_entry(request, &engine->request_list, list) { 3093 if (i915_gem_request_completed(request)) 3094 continue; 3095 3096 return request; 3097 } 3098 3099 return NULL; 3100 } 3101 3102 static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) 3103 { 3104 struct drm_i915_gem_request *request; 3105 bool ring_hung; 3106 3107 request = i915_gem_find_active_request(engine); 3108 if (request == NULL) 3109 return; 3110 3111 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 3112 3113 i915_set_reset_status(request->ctx, ring_hung); 3114 list_for_each_entry_continue(request, &engine->request_list, list) 3115 i915_set_reset_status(request->ctx, false); 3116 } 3117 3118 static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) 3119 { 3120 struct intel_ringbuffer *buffer; 3121 3122 while (!list_empty(&engine->active_list)) { 3123 struct drm_i915_gem_object *obj; 3124 3125 obj = list_first_entry(&engine->active_list, 3126 struct drm_i915_gem_object, 3127 engine_list[engine->id]); 3128 3129 i915_gem_object_retire__read(obj, engine->id); 3130 } 3131 3132 /* 3133 * Clear the execlists queue up before freeing the requests, as those 3134 * are the ones that keep the context and ringbuffer backing objects 3135 * pinned in place. 3136 */ 3137 3138 if (i915.enable_execlists) { 3139 /* Ensure irq handler finishes or is cancelled. */ 3140 tasklet_kill(&engine->irq_tasklet); 3141 3142 intel_execlists_cancel_requests(engine); 3143 } 3144 3145 /* 3146 * We must free the requests after all the corresponding objects have 3147 * been moved off active lists. Which is the same order as the normal 3148 * retire_requests function does. This is important if object hold 3149 * implicit references on things like e.g. ppgtt address spaces through 3150 * the request. 3151 */ 3152 while (!list_empty(&engine->request_list)) { 3153 struct drm_i915_gem_request *request; 3154 3155 request = list_first_entry(&engine->request_list, 3156 struct drm_i915_gem_request, 3157 list); 3158 3159 i915_gem_request_retire(request); 3160 } 3161 3162 /* Having flushed all requests from all queues, we know that all 3163 * ringbuffers must now be empty. However, since we do not reclaim 3164 * all space when retiring the request (to prevent HEADs colliding 3165 * with rapid ringbuffer wraparound) the amount of available space 3166 * upon reset is less than when we start. Do one more pass over 3167 * all the ringbuffers to reset last_retired_head. 3168 */ 3169 list_for_each_entry(buffer, &engine->buffers, link) { 3170 buffer->last_retired_head = buffer->tail; 3171 intel_ring_update_space(buffer); 3172 } 3173 3174 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 3175 3176 engine->i915->gt.active_engines &= ~intel_engine_flag(engine); 3177 } 3178 3179 void i915_gem_reset(struct drm_device *dev) 3180 { 3181 struct drm_i915_private *dev_priv = to_i915(dev); 3182 struct intel_engine_cs *engine; 3183 3184 /* 3185 * Before we free the objects from the requests, we need to inspect 3186 * them for finding the guilty party. As the requests only borrow 3187 * their reference to the objects, the inspection must be done first. 3188 */ 3189 for_each_engine(engine, dev_priv) 3190 i915_gem_reset_engine_status(engine); 3191 3192 for_each_engine(engine, dev_priv) 3193 i915_gem_reset_engine_cleanup(engine); 3194 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); 3195 3196 i915_gem_context_reset(dev); 3197 3198 i915_gem_restore_fences(dev); 3199 3200 WARN_ON(i915_verify_lists(dev)); 3201 } 3202 3203 /** 3204 * This function clears the request list as sequence numbers are passed. 3205 * @engine: engine to retire requests on 3206 */ 3207 void 3208 i915_gem_retire_requests_ring(struct intel_engine_cs *engine) 3209 { 3210 WARN_ON(i915_verify_lists(engine->dev)); 3211 3212 /* Retire requests first as we use it above for the early return. 3213 * If we retire requests last, we may use a later seqno and so clear 3214 * the requests lists without clearing the active list, leading to 3215 * confusion. 3216 */ 3217 while (!list_empty(&engine->request_list)) { 3218 struct drm_i915_gem_request *request; 3219 3220 request = list_first_entry(&engine->request_list, 3221 struct drm_i915_gem_request, 3222 list); 3223 3224 if (!i915_gem_request_completed(request)) 3225 break; 3226 3227 i915_gem_request_retire(request); 3228 } 3229 3230 /* Move any buffers on the active list that are no longer referenced 3231 * by the ringbuffer to the flushing/inactive lists as appropriate, 3232 * before we free the context associated with the requests. 3233 */ 3234 while (!list_empty(&engine->active_list)) { 3235 struct drm_i915_gem_object *obj; 3236 3237 obj = list_first_entry(&engine->active_list, 3238 struct drm_i915_gem_object, 3239 engine_list[engine->id]); 3240 3241 if (!list_empty(&obj->last_read_req[engine->id]->list)) 3242 break; 3243 3244 i915_gem_object_retire__read(obj, engine->id); 3245 } 3246 3247 WARN_ON(i915_verify_lists(engine->dev)); 3248 } 3249 3250 void i915_gem_retire_requests(struct drm_i915_private *dev_priv) 3251 { 3252 struct intel_engine_cs *engine; 3253 3254 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3255 3256 if (dev_priv->gt.active_engines == 0) 3257 return; 3258 3259 GEM_BUG_ON(!dev_priv->gt.awake); 3260 3261 for_each_engine(engine, dev_priv) { 3262 i915_gem_retire_requests_ring(engine); 3263 if (list_empty(&engine->request_list)) 3264 dev_priv->gt.active_engines &= ~intel_engine_flag(engine); 3265 } 3266 3267 if (dev_priv->gt.active_engines == 0) 3268 queue_delayed_work(dev_priv->wq, 3269 &dev_priv->gt.idle_work, 3270 msecs_to_jiffies(100)); 3271 } 3272 3273 static void 3274 i915_gem_retire_work_handler(struct work_struct *work) 3275 { 3276 struct drm_i915_private *dev_priv = 3277 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3278 struct drm_device *dev = &dev_priv->drm; 3279 3280 /* Come back later if the device is busy... */ 3281 if (mutex_trylock(&dev->struct_mutex)) { 3282 i915_gem_retire_requests(dev_priv); 3283 mutex_unlock(&dev->struct_mutex); 3284 } 3285 3286 /* Keep the retire handler running until we are finally idle. 3287 * We do not need to do this test under locking as in the worst-case 3288 * we queue the retire worker once too often. 3289 */ 3290 if (READ_ONCE(dev_priv->gt.awake)) 3291 queue_delayed_work(dev_priv->wq, 3292 &dev_priv->gt.retire_work, 3293 round_jiffies_up_relative(HZ)); 3294 } 3295 3296 static void 3297 i915_gem_idle_work_handler(struct work_struct *work) 3298 { 3299 struct drm_i915_private *dev_priv = 3300 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3301 struct drm_device *dev = &dev_priv->drm; 3302 struct intel_engine_cs *engine; 3303 unsigned int stuck_engines; 3304 bool rearm_hangcheck; 3305 3306 if (!READ_ONCE(dev_priv->gt.awake)) 3307 return; 3308 3309 if (READ_ONCE(dev_priv->gt.active_engines)) 3310 return; 3311 3312 rearm_hangcheck = 3313 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3314 3315 if (!mutex_trylock(&dev->struct_mutex)) { 3316 /* Currently busy, come back later */ 3317 mod_delayed_work(dev_priv->wq, 3318 &dev_priv->gt.idle_work, 3319 msecs_to_jiffies(50)); 3320 goto out_rearm; 3321 } 3322 3323 if (dev_priv->gt.active_engines) 3324 goto out_unlock; 3325 3326 for_each_engine(engine, dev_priv) 3327 i915_gem_batch_pool_fini(&engine->batch_pool); 3328 3329 GEM_BUG_ON(!dev_priv->gt.awake); 3330 dev_priv->gt.awake = false; 3331 rearm_hangcheck = false; 3332 3333 stuck_engines = intel_kick_waiters(dev_priv); 3334 if (unlikely(stuck_engines)) { 3335 DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n"); 3336 dev_priv->gpu_error.missed_irq_rings |= stuck_engines; 3337 } 3338 3339 if (INTEL_GEN(dev_priv) >= 6) 3340 gen6_rps_idle(dev_priv); 3341 intel_runtime_pm_put(dev_priv); 3342 out_unlock: 3343 mutex_unlock(&dev->struct_mutex); 3344 3345 out_rearm: 3346 if (rearm_hangcheck) { 3347 GEM_BUG_ON(!dev_priv->gt.awake); 3348 i915_queue_hangcheck(dev_priv); 3349 } 3350 } 3351 3352 /** 3353 * Ensures that an object will eventually get non-busy by flushing any required 3354 * write domains, emitting any outstanding lazy request and retiring and 3355 * completed requests. 3356 * @obj: object to flush 3357 */ 3358 static int 3359 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3360 { 3361 int i; 3362 3363 if (!obj->active) 3364 return 0; 3365 3366 for (i = 0; i < I915_NUM_ENGINES; i++) { 3367 struct drm_i915_gem_request *req; 3368 3369 req = obj->last_read_req[i]; 3370 if (req == NULL) 3371 continue; 3372 3373 if (i915_gem_request_completed(req)) 3374 i915_gem_object_retire__read(obj, i); 3375 } 3376 3377 return 0; 3378 } 3379 3380 /** 3381 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3382 * @dev: drm device pointer 3383 * @data: ioctl data blob 3384 * @file: drm file pointer 3385 * 3386 * Returns 0 if successful, else an error is returned with the remaining time in 3387 * the timeout parameter. 3388 * -ETIME: object is still busy after timeout 3389 * -ERESTARTSYS: signal interrupted the wait 3390 * -ENONENT: object doesn't exist 3391 * Also possible, but rare: 3392 * -EAGAIN: GPU wedged 3393 * -ENOMEM: damn 3394 * -ENODEV: Internal IRQ fail 3395 * -E?: The add request failed 3396 * 3397 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3398 * non-zero timeout parameter the wait ioctl will wait for the given number of 3399 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3400 * without holding struct_mutex the object may become re-busied before this 3401 * function completes. A similar but shorter * race condition exists in the busy 3402 * ioctl 3403 */ 3404 int 3405 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3406 { 3407 struct drm_i915_gem_wait *args = data; 3408 struct drm_i915_gem_object *obj; 3409 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3410 int i, n = 0; 3411 int ret; 3412 3413 if (args->flags != 0) 3414 return -EINVAL; 3415 3416 ret = i915_mutex_lock_interruptible(dev); 3417 if (ret) 3418 return ret; 3419 3420 obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); 3421 if (&obj->base == NULL) { 3422 mutex_unlock(&dev->struct_mutex); 3423 return -ENOENT; 3424 } 3425 3426 /* Need to make sure the object gets inactive eventually. */ 3427 ret = i915_gem_object_flush_active(obj); 3428 if (ret) 3429 goto out; 3430 3431 if (!obj->active) 3432 goto out; 3433 3434 /* Do this after OLR check to make sure we make forward progress polling 3435 * on this IOCTL with a timeout == 0 (like busy ioctl) 3436 */ 3437 if (args->timeout_ns == 0) { 3438 ret = -ETIME; 3439 goto out; 3440 } 3441 3442 drm_gem_object_unreference(&obj->base); 3443 3444 for (i = 0; i < I915_NUM_ENGINES; i++) { 3445 if (obj->last_read_req[i] == NULL) 3446 continue; 3447 3448 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3449 } 3450 3451 mutex_unlock(&dev->struct_mutex); 3452 3453 for (i = 0; i < n; i++) { 3454 if (ret == 0) 3455 ret = __i915_wait_request(req[i], true, 3456 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3457 to_rps_client(file)); 3458 i915_gem_request_unreference(req[i]); 3459 } 3460 return ret; 3461 3462 out: 3463 drm_gem_object_unreference(&obj->base); 3464 mutex_unlock(&dev->struct_mutex); 3465 return ret; 3466 } 3467 3468 static int 3469 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3470 struct intel_engine_cs *to, 3471 struct drm_i915_gem_request *from_req, 3472 struct drm_i915_gem_request **to_req) 3473 { 3474 struct intel_engine_cs *from; 3475 int ret; 3476 3477 from = i915_gem_request_get_engine(from_req); 3478 if (to == from) 3479 return 0; 3480 3481 if (i915_gem_request_completed(from_req)) 3482 return 0; 3483 3484 if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) { 3485 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3486 ret = __i915_wait_request(from_req, 3487 i915->mm.interruptible, 3488 NULL, 3489 &i915->rps.semaphores); 3490 if (ret) 3491 return ret; 3492 3493 i915_gem_object_retire_request(obj, from_req); 3494 } else { 3495 int idx = intel_ring_sync_index(from, to); 3496 u32 seqno = i915_gem_request_get_seqno(from_req); 3497 3498 WARN_ON(!to_req); 3499 3500 if (seqno <= from->semaphore.sync_seqno[idx]) 3501 return 0; 3502 3503 if (*to_req == NULL) { 3504 struct drm_i915_gem_request *req; 3505 3506 req = i915_gem_request_alloc(to, NULL); 3507 if (IS_ERR(req)) 3508 return PTR_ERR(req); 3509 3510 *to_req = req; 3511 } 3512 3513 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3514 ret = to->semaphore.sync_to(*to_req, from, seqno); 3515 if (ret) 3516 return ret; 3517 3518 /* We use last_read_req because sync_to() 3519 * might have just caused seqno wrap under 3520 * the radar. 3521 */ 3522 from->semaphore.sync_seqno[idx] = 3523 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3524 } 3525 3526 return 0; 3527 } 3528 3529 /** 3530 * i915_gem_object_sync - sync an object to a ring. 3531 * 3532 * @obj: object which may be in use on another ring. 3533 * @to: ring we wish to use the object on. May be NULL. 3534 * @to_req: request we wish to use the object for. See below. 3535 * This will be allocated and returned if a request is 3536 * required but not passed in. 3537 * 3538 * This code is meant to abstract object synchronization with the GPU. 3539 * Calling with NULL implies synchronizing the object with the CPU 3540 * rather than a particular GPU ring. Conceptually we serialise writes 3541 * between engines inside the GPU. We only allow one engine to write 3542 * into a buffer at any time, but multiple readers. To ensure each has 3543 * a coherent view of memory, we must: 3544 * 3545 * - If there is an outstanding write request to the object, the new 3546 * request must wait for it to complete (either CPU or in hw, requests 3547 * on the same ring will be naturally ordered). 3548 * 3549 * - If we are a write request (pending_write_domain is set), the new 3550 * request must wait for outstanding read requests to complete. 3551 * 3552 * For CPU synchronisation (NULL to) no request is required. For syncing with 3553 * rings to_req must be non-NULL. However, a request does not have to be 3554 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3555 * request will be allocated automatically and returned through *to_req. Note 3556 * that it is not guaranteed that commands will be emitted (because the system 3557 * might already be idle). Hence there is no need to create a request that 3558 * might never have any work submitted. Note further that if a request is 3559 * returned in *to_req, it is the responsibility of the caller to submit 3560 * that request (after potentially adding more work to it). 3561 * 3562 * Returns 0 if successful, else propagates up the lower layer error. 3563 */ 3564 int 3565 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3566 struct intel_engine_cs *to, 3567 struct drm_i915_gem_request **to_req) 3568 { 3569 const bool readonly = obj->base.pending_write_domain == 0; 3570 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3571 int ret, i, n; 3572 3573 if (!obj->active) 3574 return 0; 3575 3576 if (to == NULL) 3577 return i915_gem_object_wait_rendering(obj, readonly); 3578 3579 n = 0; 3580 if (readonly) { 3581 if (obj->last_write_req) 3582 req[n++] = obj->last_write_req; 3583 } else { 3584 for (i = 0; i < I915_NUM_ENGINES; i++) 3585 if (obj->last_read_req[i]) 3586 req[n++] = obj->last_read_req[i]; 3587 } 3588 for (i = 0; i < n; i++) { 3589 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3590 if (ret) 3591 return ret; 3592 } 3593 3594 return 0; 3595 } 3596 3597 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3598 { 3599 u32 old_write_domain, old_read_domains; 3600 3601 /* Force a pagefault for domain tracking on next user access */ 3602 i915_gem_release_mmap(obj); 3603 3604 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3605 return; 3606 3607 old_read_domains = obj->base.read_domains; 3608 old_write_domain = obj->base.write_domain; 3609 3610 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3611 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3612 3613 trace_i915_gem_object_change_domain(obj, 3614 old_read_domains, 3615 old_write_domain); 3616 } 3617 3618 static void __i915_vma_iounmap(struct i915_vma *vma) 3619 { 3620 GEM_BUG_ON(vma->pin_count); 3621 3622 if (vma->iomap == NULL) 3623 return; 3624 3625 io_mapping_unmap(vma->iomap); 3626 vma->iomap = NULL; 3627 } 3628 3629 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3630 { 3631 struct drm_i915_gem_object *obj = vma->obj; 3632 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3633 int ret; 3634 3635 if (list_empty(&vma->obj_link)) 3636 return 0; 3637 3638 if (!drm_mm_node_allocated(&vma->node)) { 3639 i915_gem_vma_destroy(vma); 3640 return 0; 3641 } 3642 3643 if (vma->pin_count) 3644 return -EBUSY; 3645 3646 BUG_ON(obj->pages == NULL); 3647 3648 if (wait) { 3649 ret = i915_gem_object_wait_rendering(obj, false); 3650 if (ret) 3651 return ret; 3652 } 3653 3654 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3655 i915_gem_object_finish_gtt(obj); 3656 3657 /* release the fence reg _after_ flushing */ 3658 ret = i915_gem_object_put_fence(obj); 3659 if (ret) 3660 return ret; 3661 3662 __i915_vma_iounmap(vma); 3663 } 3664 3665 trace_i915_vma_unbind(vma); 3666 3667 vma->vm->unbind_vma(vma); 3668 vma->bound = 0; 3669 3670 list_del_init(&vma->vm_link); 3671 if (vma->is_ggtt) { 3672 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3673 obj->map_and_fenceable = false; 3674 } else if (vma->ggtt_view.pages) { 3675 sg_free_table(vma->ggtt_view.pages); 3676 kfree(vma->ggtt_view.pages); 3677 } 3678 vma->ggtt_view.pages = NULL; 3679 } 3680 3681 drm_mm_remove_node(&vma->node); 3682 i915_gem_vma_destroy(vma); 3683 3684 /* Since the unbound list is global, only move to that list if 3685 * no more VMAs exist. */ 3686 if (list_empty(&obj->vma_list)) 3687 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3688 3689 /* And finally now the object is completely decoupled from this vma, 3690 * we can drop its hold on the backing storage and allow it to be 3691 * reaped by the shrinker. 3692 */ 3693 i915_gem_object_unpin_pages(obj); 3694 3695 return 0; 3696 } 3697 3698 int i915_vma_unbind(struct i915_vma *vma) 3699 { 3700 return __i915_vma_unbind(vma, true); 3701 } 3702 3703 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3704 { 3705 return __i915_vma_unbind(vma, false); 3706 } 3707 3708 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) 3709 { 3710 struct intel_engine_cs *engine; 3711 int ret; 3712 3713 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3714 3715 for_each_engine(engine, dev_priv) { 3716 if (engine->last_context == NULL) 3717 continue; 3718 3719 ret = intel_engine_idle(engine); 3720 if (ret) 3721 return ret; 3722 } 3723 3724 WARN_ON(i915_verify_lists(dev)); 3725 return 0; 3726 } 3727 3728 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3729 unsigned long cache_level) 3730 { 3731 struct drm_mm_node *gtt_space = &vma->node; 3732 struct drm_mm_node *other; 3733 3734 /* 3735 * On some machines we have to be careful when putting differing types 3736 * of snoopable memory together to avoid the prefetcher crossing memory 3737 * domains and dying. During vm initialisation, we decide whether or not 3738 * these constraints apply and set the drm_mm.color_adjust 3739 * appropriately. 3740 */ 3741 if (vma->vm->mm.color_adjust == NULL) 3742 return true; 3743 3744 if (!drm_mm_node_allocated(gtt_space)) 3745 return true; 3746 3747 if (list_empty(>t_space->node_list)) 3748 return true; 3749 3750 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3751 if (other->allocated && !other->hole_follows && other->color != cache_level) 3752 return false; 3753 3754 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3755 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3756 return false; 3757 3758 return true; 3759 } 3760 3761 /** 3762 * Finds free space in the GTT aperture and binds the object or a view of it 3763 * there. 3764 * @obj: object to bind 3765 * @vm: address space to bind into 3766 * @ggtt_view: global gtt view if applicable 3767 * @alignment: requested alignment 3768 * @flags: mask of PIN_* flags to use 3769 */ 3770 static struct i915_vma * 3771 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3772 struct i915_address_space *vm, 3773 const struct i915_ggtt_view *ggtt_view, 3774 unsigned alignment, 3775 uint64_t flags) 3776 { 3777 struct drm_device *dev = obj->base.dev; 3778 struct drm_i915_private *dev_priv = to_i915(dev); 3779 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3780 u32 fence_alignment, unfenced_alignment; 3781 u32 search_flag, alloc_flag; 3782 u64 start, end; 3783 u64 size, fence_size; 3784 struct i915_vma *vma; 3785 int ret; 3786 3787 if (i915_is_ggtt(vm)) { 3788 u32 view_size; 3789 3790 if (WARN_ON(!ggtt_view)) 3791 return ERR_PTR(-EINVAL); 3792 3793 view_size = i915_ggtt_view_size(obj, ggtt_view); 3794 3795 fence_size = i915_gem_get_gtt_size(dev, 3796 view_size, 3797 obj->tiling_mode); 3798 fence_alignment = i915_gem_get_gtt_alignment(dev, 3799 view_size, 3800 obj->tiling_mode, 3801 true); 3802 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3803 view_size, 3804 obj->tiling_mode, 3805 false); 3806 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3807 } else { 3808 fence_size = i915_gem_get_gtt_size(dev, 3809 obj->base.size, 3810 obj->tiling_mode); 3811 fence_alignment = i915_gem_get_gtt_alignment(dev, 3812 obj->base.size, 3813 obj->tiling_mode, 3814 true); 3815 unfenced_alignment = 3816 i915_gem_get_gtt_alignment(dev, 3817 obj->base.size, 3818 obj->tiling_mode, 3819 false); 3820 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3821 } 3822 3823 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3824 end = vm->total; 3825 if (flags & PIN_MAPPABLE) 3826 end = min_t(u64, end, ggtt->mappable_end); 3827 if (flags & PIN_ZONE_4G) 3828 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3829 3830 if (alignment == 0) 3831 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3832 unfenced_alignment; 3833 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3834 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3835 ggtt_view ? ggtt_view->type : 0, 3836 alignment); 3837 return ERR_PTR(-EINVAL); 3838 } 3839 3840 /* If binding the object/GGTT view requires more space than the entire 3841 * aperture has, reject it early before evicting everything in a vain 3842 * attempt to find space. 3843 */ 3844 if (size > end) { 3845 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3846 ggtt_view ? ggtt_view->type : 0, 3847 size, 3848 flags & PIN_MAPPABLE ? "mappable" : "total", 3849 end); 3850 return ERR_PTR(-E2BIG); 3851 } 3852 3853 ret = i915_gem_object_get_pages(obj); 3854 if (ret) 3855 return ERR_PTR(ret); 3856 3857 i915_gem_object_pin_pages(obj); 3858 3859 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3860 i915_gem_obj_lookup_or_create_vma(obj, vm); 3861 3862 if (IS_ERR(vma)) 3863 goto err_unpin; 3864 3865 if (flags & PIN_OFFSET_FIXED) { 3866 uint64_t offset = flags & PIN_OFFSET_MASK; 3867 3868 if (offset & (alignment - 1) || offset + size > end) { 3869 ret = -EINVAL; 3870 goto err_free_vma; 3871 } 3872 vma->node.start = offset; 3873 vma->node.size = size; 3874 vma->node.color = obj->cache_level; 3875 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3876 if (ret) { 3877 ret = i915_gem_evict_for_vma(vma); 3878 if (ret == 0) 3879 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3880 } 3881 if (ret) 3882 goto err_free_vma; 3883 } else { 3884 if (flags & PIN_HIGH) { 3885 search_flag = DRM_MM_SEARCH_BELOW; 3886 alloc_flag = DRM_MM_CREATE_TOP; 3887 } else { 3888 search_flag = DRM_MM_SEARCH_DEFAULT; 3889 alloc_flag = DRM_MM_CREATE_DEFAULT; 3890 } 3891 3892 search_free: 3893 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3894 size, alignment, 3895 obj->cache_level, 3896 start, end, 3897 search_flag, 3898 alloc_flag); 3899 if (ret) { 3900 ret = i915_gem_evict_something(dev, vm, size, alignment, 3901 obj->cache_level, 3902 start, end, 3903 flags); 3904 if (ret == 0) 3905 goto search_free; 3906 3907 goto err_free_vma; 3908 } 3909 } 3910 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3911 ret = -EINVAL; 3912 goto err_remove_node; 3913 } 3914 3915 trace_i915_vma_bind(vma, flags); 3916 ret = i915_vma_bind(vma, obj->cache_level, flags); 3917 if (ret) 3918 goto err_remove_node; 3919 3920 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3921 list_add_tail(&vma->vm_link, &vm->inactive_list); 3922 3923 return vma; 3924 3925 err_remove_node: 3926 drm_mm_remove_node(&vma->node); 3927 err_free_vma: 3928 i915_gem_vma_destroy(vma); 3929 vma = ERR_PTR(ret); 3930 err_unpin: 3931 i915_gem_object_unpin_pages(obj); 3932 return vma; 3933 } 3934 3935 bool 3936 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3937 bool force) 3938 { 3939 /* If we don't have a page list set up, then we're not pinned 3940 * to GPU, and we can ignore the cache flush because it'll happen 3941 * again at bind time. 3942 */ 3943 if (obj->pages == NULL) 3944 return false; 3945 3946 /* 3947 * Stolen memory is always coherent with the GPU as it is explicitly 3948 * marked as wc by the system, or the system is cache-coherent. 3949 */ 3950 if (obj->stolen || obj->phys_handle) 3951 return false; 3952 3953 /* If the GPU is snooping the contents of the CPU cache, 3954 * we do not need to manually clear the CPU cache lines. However, 3955 * the caches are only snooped when the render cache is 3956 * flushed/invalidated. As we always have to emit invalidations 3957 * and flushes when moving into and out of the RENDER domain, correct 3958 * snooping behaviour occurs naturally as the result of our domain 3959 * tracking. 3960 */ 3961 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3962 obj->cache_dirty = true; 3963 return false; 3964 } 3965 3966 trace_i915_gem_object_clflush(obj); 3967 drm_clflush_sg(obj->pages); 3968 obj->cache_dirty = false; 3969 3970 return true; 3971 } 3972 3973 /** Flushes the GTT write domain for the object if it's dirty. */ 3974 static void 3975 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3976 { 3977 uint32_t old_write_domain; 3978 3979 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3980 return; 3981 3982 /* No actual flushing is required for the GTT write domain. Writes 3983 * to it immediately go to main memory as far as we know, so there's 3984 * no chipset flush. It also doesn't land in render cache. 3985 * 3986 * However, we do have to enforce the order so that all writes through 3987 * the GTT land before any writes to the device, such as updates to 3988 * the GATT itself. 3989 */ 3990 wmb(); 3991 3992 old_write_domain = obj->base.write_domain; 3993 obj->base.write_domain = 0; 3994 3995 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3996 3997 trace_i915_gem_object_change_domain(obj, 3998 obj->base.read_domains, 3999 old_write_domain); 4000 } 4001 4002 /** Flushes the CPU write domain for the object if it's dirty. */ 4003 static void 4004 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 4005 { 4006 uint32_t old_write_domain; 4007 4008 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 4009 return; 4010 4011 if (i915_gem_clflush_object(obj, obj->pin_display)) 4012 i915_gem_chipset_flush(to_i915(obj->base.dev)); 4013 4014 old_write_domain = obj->base.write_domain; 4015 obj->base.write_domain = 0; 4016 4017 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 4018 4019 trace_i915_gem_object_change_domain(obj, 4020 obj->base.read_domains, 4021 old_write_domain); 4022 } 4023 4024 /** 4025 * Moves a single object to the GTT read, and possibly write domain. 4026 * @obj: object to act on 4027 * @write: ask for write access or read only 4028 * 4029 * This function returns when the move is complete, including waiting on 4030 * flushes to occur. 4031 */ 4032 int 4033 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 4034 { 4035 struct drm_device *dev = obj->base.dev; 4036 struct drm_i915_private *dev_priv = to_i915(dev); 4037 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4038 uint32_t old_write_domain, old_read_domains; 4039 struct i915_vma *vma; 4040 int ret; 4041 4042 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 4043 return 0; 4044 4045 ret = i915_gem_object_wait_rendering(obj, !write); 4046 if (ret) 4047 return ret; 4048 4049 /* Flush and acquire obj->pages so that we are coherent through 4050 * direct access in memory with previous cached writes through 4051 * shmemfs and that our cache domain tracking remains valid. 4052 * For example, if the obj->filp was moved to swap without us 4053 * being notified and releasing the pages, we would mistakenly 4054 * continue to assume that the obj remained out of the CPU cached 4055 * domain. 4056 */ 4057 ret = i915_gem_object_get_pages(obj); 4058 if (ret) 4059 return ret; 4060 4061 i915_gem_object_flush_cpu_write_domain(obj); 4062 4063 /* Serialise direct access to this object with the barriers for 4064 * coherent writes from the GPU, by effectively invalidating the 4065 * GTT domain upon first access. 4066 */ 4067 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 4068 mb(); 4069 4070 old_write_domain = obj->base.write_domain; 4071 old_read_domains = obj->base.read_domains; 4072 4073 /* It should now be out of any other write domains, and we can update 4074 * the domain values for our changes. 4075 */ 4076 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4077 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4078 if (write) { 4079 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 4080 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 4081 obj->dirty = 1; 4082 } 4083 4084 trace_i915_gem_object_change_domain(obj, 4085 old_read_domains, 4086 old_write_domain); 4087 4088 /* And bump the LRU for this access */ 4089 vma = i915_gem_obj_to_ggtt(obj); 4090 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 4091 list_move_tail(&vma->vm_link, 4092 &ggtt->base.inactive_list); 4093 4094 return 0; 4095 } 4096 4097 /** 4098 * Changes the cache-level of an object across all VMA. 4099 * @obj: object to act on 4100 * @cache_level: new cache level to set for the object 4101 * 4102 * After this function returns, the object will be in the new cache-level 4103 * across all GTT and the contents of the backing storage will be coherent, 4104 * with respect to the new cache-level. In order to keep the backing storage 4105 * coherent for all users, we only allow a single cache level to be set 4106 * globally on the object and prevent it from being changed whilst the 4107 * hardware is reading from the object. That is if the object is currently 4108 * on the scanout it will be set to uncached (or equivalent display 4109 * cache coherency) and all non-MOCS GPU access will also be uncached so 4110 * that all direct access to the scanout remains coherent. 4111 */ 4112 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4113 enum i915_cache_level cache_level) 4114 { 4115 struct drm_device *dev = obj->base.dev; 4116 struct i915_vma *vma, *next; 4117 bool bound = false; 4118 int ret = 0; 4119 4120 if (obj->cache_level == cache_level) 4121 goto out; 4122 4123 /* Inspect the list of currently bound VMA and unbind any that would 4124 * be invalid given the new cache-level. This is principally to 4125 * catch the issue of the CS prefetch crossing page boundaries and 4126 * reading an invalid PTE on older architectures. 4127 */ 4128 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4129 if (!drm_mm_node_allocated(&vma->node)) 4130 continue; 4131 4132 if (vma->pin_count) { 4133 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4134 return -EBUSY; 4135 } 4136 4137 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4138 ret = i915_vma_unbind(vma); 4139 if (ret) 4140 return ret; 4141 } else 4142 bound = true; 4143 } 4144 4145 /* We can reuse the existing drm_mm nodes but need to change the 4146 * cache-level on the PTE. We could simply unbind them all and 4147 * rebind with the correct cache-level on next use. However since 4148 * we already have a valid slot, dma mapping, pages etc, we may as 4149 * rewrite the PTE in the belief that doing so tramples upon less 4150 * state and so involves less work. 4151 */ 4152 if (bound) { 4153 /* Before we change the PTE, the GPU must not be accessing it. 4154 * If we wait upon the object, we know that all the bound 4155 * VMA are no longer active. 4156 */ 4157 ret = i915_gem_object_wait_rendering(obj, false); 4158 if (ret) 4159 return ret; 4160 4161 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 4162 /* Access to snoopable pages through the GTT is 4163 * incoherent and on some machines causes a hard 4164 * lockup. Relinquish the CPU mmaping to force 4165 * userspace to refault in the pages and we can 4166 * then double check if the GTT mapping is still 4167 * valid for that pointer access. 4168 */ 4169 i915_gem_release_mmap(obj); 4170 4171 /* As we no longer need a fence for GTT access, 4172 * we can relinquish it now (and so prevent having 4173 * to steal a fence from someone else on the next 4174 * fence request). Note GPU activity would have 4175 * dropped the fence as all snoopable access is 4176 * supposed to be linear. 4177 */ 4178 ret = i915_gem_object_put_fence(obj); 4179 if (ret) 4180 return ret; 4181 } else { 4182 /* We either have incoherent backing store and 4183 * so no GTT access or the architecture is fully 4184 * coherent. In such cases, existing GTT mmaps 4185 * ignore the cache bit in the PTE and we can 4186 * rewrite it without confusing the GPU or having 4187 * to force userspace to fault back in its mmaps. 4188 */ 4189 } 4190 4191 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4192 if (!drm_mm_node_allocated(&vma->node)) 4193 continue; 4194 4195 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4196 if (ret) 4197 return ret; 4198 } 4199 } 4200 4201 list_for_each_entry(vma, &obj->vma_list, obj_link) 4202 vma->node.color = cache_level; 4203 obj->cache_level = cache_level; 4204 4205 out: 4206 /* Flush the dirty CPU caches to the backing storage so that the 4207 * object is now coherent at its new cache level (with respect 4208 * to the access domain). 4209 */ 4210 if (obj->cache_dirty && cpu_write_needs_clflush(obj)) { 4211 if (i915_gem_clflush_object(obj, true)) 4212 i915_gem_chipset_flush(to_i915(obj->base.dev)); 4213 } 4214 4215 return 0; 4216 } 4217 4218 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4219 struct drm_file *file) 4220 { 4221 struct drm_i915_gem_caching *args = data; 4222 struct drm_i915_gem_object *obj; 4223 4224 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4225 if (&obj->base == NULL) 4226 return -ENOENT; 4227 4228 switch (obj->cache_level) { 4229 case I915_CACHE_LLC: 4230 case I915_CACHE_L3_LLC: 4231 args->caching = I915_CACHING_CACHED; 4232 break; 4233 4234 case I915_CACHE_WT: 4235 args->caching = I915_CACHING_DISPLAY; 4236 break; 4237 4238 default: 4239 args->caching = I915_CACHING_NONE; 4240 break; 4241 } 4242 4243 drm_gem_object_unreference_unlocked(&obj->base); 4244 return 0; 4245 } 4246 4247 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4248 struct drm_file *file) 4249 { 4250 struct drm_i915_private *dev_priv = to_i915(dev); 4251 struct drm_i915_gem_caching *args = data; 4252 struct drm_i915_gem_object *obj; 4253 enum i915_cache_level level; 4254 int ret; 4255 4256 switch (args->caching) { 4257 case I915_CACHING_NONE: 4258 level = I915_CACHE_NONE; 4259 break; 4260 case I915_CACHING_CACHED: 4261 /* 4262 * Due to a HW issue on BXT A stepping, GPU stores via a 4263 * snooped mapping may leave stale data in a corresponding CPU 4264 * cacheline, whereas normally such cachelines would get 4265 * invalidated. 4266 */ 4267 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 4268 return -ENODEV; 4269 4270 level = I915_CACHE_LLC; 4271 break; 4272 case I915_CACHING_DISPLAY: 4273 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4274 break; 4275 default: 4276 return -EINVAL; 4277 } 4278 4279 intel_runtime_pm_get(dev_priv); 4280 4281 ret = i915_mutex_lock_interruptible(dev); 4282 if (ret) 4283 goto rpm_put; 4284 4285 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4286 if (&obj->base == NULL) { 4287 ret = -ENOENT; 4288 goto unlock; 4289 } 4290 4291 ret = i915_gem_object_set_cache_level(obj, level); 4292 4293 drm_gem_object_unreference(&obj->base); 4294 unlock: 4295 mutex_unlock(&dev->struct_mutex); 4296 rpm_put: 4297 intel_runtime_pm_put(dev_priv); 4298 4299 return ret; 4300 } 4301 4302 /* 4303 * Prepare buffer for display plane (scanout, cursors, etc). 4304 * Can be called from an uninterruptible phase (modesetting) and allows 4305 * any flushes to be pipelined (for pageflips). 4306 */ 4307 int 4308 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4309 u32 alignment, 4310 const struct i915_ggtt_view *view) 4311 { 4312 u32 old_read_domains, old_write_domain; 4313 int ret; 4314 4315 /* Mark the pin_display early so that we account for the 4316 * display coherency whilst setting up the cache domains. 4317 */ 4318 obj->pin_display++; 4319 4320 /* The display engine is not coherent with the LLC cache on gen6. As 4321 * a result, we make sure that the pinning that is about to occur is 4322 * done with uncached PTEs. This is lowest common denominator for all 4323 * chipsets. 4324 * 4325 * However for gen6+, we could do better by using the GFDT bit instead 4326 * of uncaching, which would allow us to flush all the LLC-cached data 4327 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4328 */ 4329 ret = i915_gem_object_set_cache_level(obj, 4330 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4331 if (ret) 4332 goto err_unpin_display; 4333 4334 /* As the user may map the buffer once pinned in the display plane 4335 * (e.g. libkms for the bootup splash), we have to ensure that we 4336 * always use map_and_fenceable for all scanout buffers. 4337 */ 4338 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4339 view->type == I915_GGTT_VIEW_NORMAL ? 4340 PIN_MAPPABLE : 0); 4341 if (ret) 4342 goto err_unpin_display; 4343 4344 i915_gem_object_flush_cpu_write_domain(obj); 4345 4346 old_write_domain = obj->base.write_domain; 4347 old_read_domains = obj->base.read_domains; 4348 4349 /* It should now be out of any other write domains, and we can update 4350 * the domain values for our changes. 4351 */ 4352 obj->base.write_domain = 0; 4353 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4354 4355 trace_i915_gem_object_change_domain(obj, 4356 old_read_domains, 4357 old_write_domain); 4358 4359 return 0; 4360 4361 err_unpin_display: 4362 obj->pin_display--; 4363 return ret; 4364 } 4365 4366 void 4367 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4368 const struct i915_ggtt_view *view) 4369 { 4370 if (WARN_ON(obj->pin_display == 0)) 4371 return; 4372 4373 i915_gem_object_ggtt_unpin_view(obj, view); 4374 4375 obj->pin_display--; 4376 } 4377 4378 /** 4379 * Moves a single object to the CPU read, and possibly write domain. 4380 * @obj: object to act on 4381 * @write: requesting write or read-only access 4382 * 4383 * This function returns when the move is complete, including waiting on 4384 * flushes to occur. 4385 */ 4386 int 4387 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4388 { 4389 uint32_t old_write_domain, old_read_domains; 4390 int ret; 4391 4392 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4393 return 0; 4394 4395 ret = i915_gem_object_wait_rendering(obj, !write); 4396 if (ret) 4397 return ret; 4398 4399 i915_gem_object_flush_gtt_write_domain(obj); 4400 4401 old_write_domain = obj->base.write_domain; 4402 old_read_domains = obj->base.read_domains; 4403 4404 /* Flush the CPU cache if it's still invalid. */ 4405 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4406 i915_gem_clflush_object(obj, false); 4407 4408 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4409 } 4410 4411 /* It should now be out of any other write domains, and we can update 4412 * the domain values for our changes. 4413 */ 4414 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4415 4416 /* If we're writing through the CPU, then the GPU read domains will 4417 * need to be invalidated at next use. 4418 */ 4419 if (write) { 4420 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4421 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4422 } 4423 4424 trace_i915_gem_object_change_domain(obj, 4425 old_read_domains, 4426 old_write_domain); 4427 4428 return 0; 4429 } 4430 4431 /* Throttle our rendering by waiting until the ring has completed our requests 4432 * emitted over 20 msec ago. 4433 * 4434 * Note that if we were to use the current jiffies each time around the loop, 4435 * we wouldn't escape the function with any frames outstanding if the time to 4436 * render a frame was over 20ms. 4437 * 4438 * This should get us reasonable parallelism between CPU and GPU but also 4439 * relatively low latency when blocking on a particular request to finish. 4440 */ 4441 static int 4442 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4443 { 4444 struct drm_i915_private *dev_priv = to_i915(dev); 4445 struct drm_i915_file_private *file_priv = file->driver_priv; 4446 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4447 struct drm_i915_gem_request *request, *target = NULL; 4448 int ret; 4449 4450 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4451 if (ret) 4452 return ret; 4453 4454 /* ABI: return -EIO if already wedged */ 4455 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4456 return -EIO; 4457 4458 spin_lock(&file_priv->mm.lock); 4459 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4460 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4461 break; 4462 4463 /* 4464 * Note that the request might not have been submitted yet. 4465 * In which case emitted_jiffies will be zero. 4466 */ 4467 if (!request->emitted_jiffies) 4468 continue; 4469 4470 target = request; 4471 } 4472 if (target) 4473 i915_gem_request_reference(target); 4474 spin_unlock(&file_priv->mm.lock); 4475 4476 if (target == NULL) 4477 return 0; 4478 4479 ret = __i915_wait_request(target, true, NULL, NULL); 4480 i915_gem_request_unreference(target); 4481 4482 return ret; 4483 } 4484 4485 static bool 4486 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4487 { 4488 struct drm_i915_gem_object *obj = vma->obj; 4489 4490 if (alignment && 4491 vma->node.start & (alignment - 1)) 4492 return true; 4493 4494 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4495 return true; 4496 4497 if (flags & PIN_OFFSET_BIAS && 4498 vma->node.start < (flags & PIN_OFFSET_MASK)) 4499 return true; 4500 4501 if (flags & PIN_OFFSET_FIXED && 4502 vma->node.start != (flags & PIN_OFFSET_MASK)) 4503 return true; 4504 4505 return false; 4506 } 4507 4508 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4509 { 4510 struct drm_i915_gem_object *obj = vma->obj; 4511 bool mappable, fenceable; 4512 u32 fence_size, fence_alignment; 4513 4514 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4515 obj->base.size, 4516 obj->tiling_mode); 4517 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4518 obj->base.size, 4519 obj->tiling_mode, 4520 true); 4521 4522 fenceable = (vma->node.size == fence_size && 4523 (vma->node.start & (fence_alignment - 1)) == 0); 4524 4525 mappable = (vma->node.start + fence_size <= 4526 to_i915(obj->base.dev)->ggtt.mappable_end); 4527 4528 obj->map_and_fenceable = mappable && fenceable; 4529 } 4530 4531 static int 4532 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4533 struct i915_address_space *vm, 4534 const struct i915_ggtt_view *ggtt_view, 4535 uint32_t alignment, 4536 uint64_t flags) 4537 { 4538 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4539 struct i915_vma *vma; 4540 unsigned bound; 4541 int ret; 4542 4543 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4544 return -ENODEV; 4545 4546 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4547 return -EINVAL; 4548 4549 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4550 return -EINVAL; 4551 4552 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4553 return -EINVAL; 4554 4555 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4556 i915_gem_obj_to_vma(obj, vm); 4557 4558 if (vma) { 4559 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4560 return -EBUSY; 4561 4562 if (i915_vma_misplaced(vma, alignment, flags)) { 4563 WARN(vma->pin_count, 4564 "bo is already pinned in %s with incorrect alignment:" 4565 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4566 " obj->map_and_fenceable=%d\n", 4567 ggtt_view ? "ggtt" : "ppgtt", 4568 upper_32_bits(vma->node.start), 4569 lower_32_bits(vma->node.start), 4570 alignment, 4571 !!(flags & PIN_MAPPABLE), 4572 obj->map_and_fenceable); 4573 ret = i915_vma_unbind(vma); 4574 if (ret) 4575 return ret; 4576 4577 vma = NULL; 4578 } 4579 } 4580 4581 bound = vma ? vma->bound : 0; 4582 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4583 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4584 flags); 4585 if (IS_ERR(vma)) 4586 return PTR_ERR(vma); 4587 } else { 4588 ret = i915_vma_bind(vma, obj->cache_level, flags); 4589 if (ret) 4590 return ret; 4591 } 4592 4593 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4594 (bound ^ vma->bound) & GLOBAL_BIND) { 4595 __i915_vma_set_map_and_fenceable(vma); 4596 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4597 } 4598 4599 vma->pin_count++; 4600 return 0; 4601 } 4602 4603 int 4604 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4605 struct i915_address_space *vm, 4606 uint32_t alignment, 4607 uint64_t flags) 4608 { 4609 return i915_gem_object_do_pin(obj, vm, 4610 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4611 alignment, flags); 4612 } 4613 4614 int 4615 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4616 const struct i915_ggtt_view *view, 4617 uint32_t alignment, 4618 uint64_t flags) 4619 { 4620 struct drm_device *dev = obj->base.dev; 4621 struct drm_i915_private *dev_priv = to_i915(dev); 4622 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4623 4624 BUG_ON(!view); 4625 4626 return i915_gem_object_do_pin(obj, &ggtt->base, view, 4627 alignment, flags | PIN_GLOBAL); 4628 } 4629 4630 void 4631 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4632 const struct i915_ggtt_view *view) 4633 { 4634 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4635 4636 WARN_ON(vma->pin_count == 0); 4637 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4638 4639 --vma->pin_count; 4640 } 4641 4642 int 4643 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4644 struct drm_file *file) 4645 { 4646 struct drm_i915_gem_busy *args = data; 4647 struct drm_i915_gem_object *obj; 4648 int ret; 4649 4650 ret = i915_mutex_lock_interruptible(dev); 4651 if (ret) 4652 return ret; 4653 4654 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4655 if (&obj->base == NULL) { 4656 ret = -ENOENT; 4657 goto unlock; 4658 } 4659 4660 /* Count all active objects as busy, even if they are currently not used 4661 * by the gpu. Users of this interface expect objects to eventually 4662 * become non-busy without any further actions, therefore emit any 4663 * necessary flushes here. 4664 */ 4665 ret = i915_gem_object_flush_active(obj); 4666 if (ret) 4667 goto unref; 4668 4669 args->busy = 0; 4670 if (obj->active) { 4671 int i; 4672 4673 for (i = 0; i < I915_NUM_ENGINES; i++) { 4674 struct drm_i915_gem_request *req; 4675 4676 req = obj->last_read_req[i]; 4677 if (req) 4678 args->busy |= 1 << (16 + req->engine->exec_id); 4679 } 4680 if (obj->last_write_req) 4681 args->busy |= obj->last_write_req->engine->exec_id; 4682 } 4683 4684 unref: 4685 drm_gem_object_unreference(&obj->base); 4686 unlock: 4687 mutex_unlock(&dev->struct_mutex); 4688 return ret; 4689 } 4690 4691 int 4692 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4693 struct drm_file *file_priv) 4694 { 4695 return i915_gem_ring_throttle(dev, file_priv); 4696 } 4697 4698 int 4699 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4700 struct drm_file *file_priv) 4701 { 4702 struct drm_i915_private *dev_priv = to_i915(dev); 4703 struct drm_i915_gem_madvise *args = data; 4704 struct drm_i915_gem_object *obj; 4705 int ret; 4706 4707 switch (args->madv) { 4708 case I915_MADV_DONTNEED: 4709 case I915_MADV_WILLNEED: 4710 break; 4711 default: 4712 return -EINVAL; 4713 } 4714 4715 ret = i915_mutex_lock_interruptible(dev); 4716 if (ret) 4717 return ret; 4718 4719 obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); 4720 if (&obj->base == NULL) { 4721 ret = -ENOENT; 4722 goto unlock; 4723 } 4724 4725 if (i915_gem_obj_is_pinned(obj)) { 4726 ret = -EINVAL; 4727 goto out; 4728 } 4729 4730 if (obj->pages && 4731 obj->tiling_mode != I915_TILING_NONE && 4732 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4733 if (obj->madv == I915_MADV_WILLNEED) 4734 i915_gem_object_unpin_pages(obj); 4735 if (args->madv == I915_MADV_WILLNEED) 4736 i915_gem_object_pin_pages(obj); 4737 } 4738 4739 if (obj->madv != __I915_MADV_PURGED) 4740 obj->madv = args->madv; 4741 4742 /* if the object is no longer attached, discard its backing storage */ 4743 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4744 i915_gem_object_truncate(obj); 4745 4746 args->retained = obj->madv != __I915_MADV_PURGED; 4747 4748 out: 4749 drm_gem_object_unreference(&obj->base); 4750 unlock: 4751 mutex_unlock(&dev->struct_mutex); 4752 return ret; 4753 } 4754 4755 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4756 const struct drm_i915_gem_object_ops *ops) 4757 { 4758 int i; 4759 4760 INIT_LIST_HEAD(&obj->global_list); 4761 for (i = 0; i < I915_NUM_ENGINES; i++) 4762 INIT_LIST_HEAD(&obj->engine_list[i]); 4763 INIT_LIST_HEAD(&obj->obj_exec_link); 4764 INIT_LIST_HEAD(&obj->vma_list); 4765 INIT_LIST_HEAD(&obj->batch_pool_link); 4766 4767 obj->ops = ops; 4768 4769 obj->fence_reg = I915_FENCE_REG_NONE; 4770 obj->madv = I915_MADV_WILLNEED; 4771 4772 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4773 } 4774 4775 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4776 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4777 .get_pages = i915_gem_object_get_pages_gtt, 4778 .put_pages = i915_gem_object_put_pages_gtt, 4779 }; 4780 4781 struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev, 4782 size_t size) 4783 { 4784 struct drm_i915_gem_object *obj; 4785 struct address_space *mapping; 4786 gfp_t mask; 4787 int ret; 4788 4789 obj = i915_gem_object_alloc(dev); 4790 if (obj == NULL) 4791 return ERR_PTR(-ENOMEM); 4792 4793 ret = drm_gem_object_init(dev, &obj->base, size); 4794 if (ret) 4795 goto fail; 4796 4797 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4798 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4799 /* 965gm cannot relocate objects above 4GiB. */ 4800 mask &= ~__GFP_HIGHMEM; 4801 mask |= __GFP_DMA32; 4802 } 4803 4804 mapping = obj->base.filp->f_mapping; 4805 mapping_set_gfp_mask(mapping, mask); 4806 4807 i915_gem_object_init(obj, &i915_gem_object_ops); 4808 4809 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4810 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4811 4812 if (HAS_LLC(dev)) { 4813 /* On some devices, we can have the GPU use the LLC (the CPU 4814 * cache) for about a 10% performance improvement 4815 * compared to uncached. Graphics requests other than 4816 * display scanout are coherent with the CPU in 4817 * accessing this cache. This means in this mode we 4818 * don't need to clflush on the CPU side, and on the 4819 * GPU side we only need to flush internal caches to 4820 * get data visible to the CPU. 4821 * 4822 * However, we maintain the display planes as UC, and so 4823 * need to rebind when first used as such. 4824 */ 4825 obj->cache_level = I915_CACHE_LLC; 4826 } else 4827 obj->cache_level = I915_CACHE_NONE; 4828 4829 trace_i915_gem_object_create(obj); 4830 4831 return obj; 4832 4833 fail: 4834 i915_gem_object_free(obj); 4835 4836 return ERR_PTR(ret); 4837 } 4838 4839 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4840 { 4841 /* If we are the last user of the backing storage (be it shmemfs 4842 * pages or stolen etc), we know that the pages are going to be 4843 * immediately released. In this case, we can then skip copying 4844 * back the contents from the GPU. 4845 */ 4846 4847 if (obj->madv != I915_MADV_WILLNEED) 4848 return false; 4849 4850 if (obj->base.filp == NULL) 4851 return true; 4852 4853 /* At first glance, this looks racy, but then again so would be 4854 * userspace racing mmap against close. However, the first external 4855 * reference to the filp can only be obtained through the 4856 * i915_gem_mmap_ioctl() which safeguards us against the user 4857 * acquiring such a reference whilst we are in the middle of 4858 * freeing the object. 4859 */ 4860 return atomic_long_read(&obj->base.filp->f_count) == 1; 4861 } 4862 4863 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4864 { 4865 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4866 struct drm_device *dev = obj->base.dev; 4867 struct drm_i915_private *dev_priv = to_i915(dev); 4868 struct i915_vma *vma, *next; 4869 4870 intel_runtime_pm_get(dev_priv); 4871 4872 trace_i915_gem_object_destroy(obj); 4873 4874 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4875 int ret; 4876 4877 vma->pin_count = 0; 4878 ret = i915_vma_unbind(vma); 4879 if (WARN_ON(ret == -ERESTARTSYS)) { 4880 bool was_interruptible; 4881 4882 was_interruptible = dev_priv->mm.interruptible; 4883 dev_priv->mm.interruptible = false; 4884 4885 WARN_ON(i915_vma_unbind(vma)); 4886 4887 dev_priv->mm.interruptible = was_interruptible; 4888 } 4889 } 4890 4891 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4892 * before progressing. */ 4893 if (obj->stolen) 4894 i915_gem_object_unpin_pages(obj); 4895 4896 WARN_ON(obj->frontbuffer_bits); 4897 4898 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4899 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4900 obj->tiling_mode != I915_TILING_NONE) 4901 i915_gem_object_unpin_pages(obj); 4902 4903 if (WARN_ON(obj->pages_pin_count)) 4904 obj->pages_pin_count = 0; 4905 if (discard_backing_storage(obj)) 4906 obj->madv = I915_MADV_DONTNEED; 4907 i915_gem_object_put_pages(obj); 4908 i915_gem_object_free_mmap_offset(obj); 4909 4910 BUG_ON(obj->pages); 4911 4912 if (obj->base.import_attach) 4913 drm_prime_gem_destroy(&obj->base, NULL); 4914 4915 if (obj->ops->release) 4916 obj->ops->release(obj); 4917 4918 drm_gem_object_release(&obj->base); 4919 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4920 4921 kfree(obj->bit_17); 4922 i915_gem_object_free(obj); 4923 4924 intel_runtime_pm_put(dev_priv); 4925 } 4926 4927 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4928 struct i915_address_space *vm) 4929 { 4930 struct i915_vma *vma; 4931 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4932 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4933 vma->vm == vm) 4934 return vma; 4935 } 4936 return NULL; 4937 } 4938 4939 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4940 const struct i915_ggtt_view *view) 4941 { 4942 struct i915_vma *vma; 4943 4944 GEM_BUG_ON(!view); 4945 4946 list_for_each_entry(vma, &obj->vma_list, obj_link) 4947 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) 4948 return vma; 4949 return NULL; 4950 } 4951 4952 void i915_gem_vma_destroy(struct i915_vma *vma) 4953 { 4954 WARN_ON(vma->node.allocated); 4955 4956 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4957 if (!list_empty(&vma->exec_list)) 4958 return; 4959 4960 if (!vma->is_ggtt) 4961 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4962 4963 list_del(&vma->obj_link); 4964 4965 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 4966 } 4967 4968 static void 4969 i915_gem_stop_engines(struct drm_device *dev) 4970 { 4971 struct drm_i915_private *dev_priv = to_i915(dev); 4972 struct intel_engine_cs *engine; 4973 4974 for_each_engine(engine, dev_priv) 4975 dev_priv->gt.stop_engine(engine); 4976 } 4977 4978 int 4979 i915_gem_suspend(struct drm_device *dev) 4980 { 4981 struct drm_i915_private *dev_priv = to_i915(dev); 4982 int ret = 0; 4983 4984 mutex_lock(&dev->struct_mutex); 4985 ret = i915_gem_wait_for_idle(dev_priv); 4986 if (ret) 4987 goto err; 4988 4989 i915_gem_retire_requests(dev_priv); 4990 4991 i915_gem_stop_engines(dev); 4992 i915_gem_context_lost(dev_priv); 4993 mutex_unlock(&dev->struct_mutex); 4994 4995 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4996 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 4997 flush_delayed_work(&dev_priv->gt.idle_work); 4998 4999 /* Assert that we sucessfully flushed all the work and 5000 * reset the GPU back to its idle, low power state. 5001 */ 5002 WARN_ON(dev_priv->gt.awake); 5003 5004 return 0; 5005 5006 err: 5007 mutex_unlock(&dev->struct_mutex); 5008 return ret; 5009 } 5010 5011 void i915_gem_init_swizzling(struct drm_device *dev) 5012 { 5013 struct drm_i915_private *dev_priv = to_i915(dev); 5014 5015 if (INTEL_INFO(dev)->gen < 5 || 5016 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5017 return; 5018 5019 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5020 DISP_TILE_SURFACE_SWIZZLING); 5021 5022 if (IS_GEN5(dev)) 5023 return; 5024 5025 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5026 if (IS_GEN6(dev)) 5027 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5028 else if (IS_GEN7(dev)) 5029 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5030 else if (IS_GEN8(dev)) 5031 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5032 else 5033 BUG(); 5034 } 5035 5036 static void init_unused_ring(struct drm_device *dev, u32 base) 5037 { 5038 struct drm_i915_private *dev_priv = to_i915(dev); 5039 5040 I915_WRITE(RING_CTL(base), 0); 5041 I915_WRITE(RING_HEAD(base), 0); 5042 I915_WRITE(RING_TAIL(base), 0); 5043 I915_WRITE(RING_START(base), 0); 5044 } 5045 5046 static void init_unused_rings(struct drm_device *dev) 5047 { 5048 if (IS_I830(dev)) { 5049 init_unused_ring(dev, PRB1_BASE); 5050 init_unused_ring(dev, SRB0_BASE); 5051 init_unused_ring(dev, SRB1_BASE); 5052 init_unused_ring(dev, SRB2_BASE); 5053 init_unused_ring(dev, SRB3_BASE); 5054 } else if (IS_GEN2(dev)) { 5055 init_unused_ring(dev, SRB0_BASE); 5056 init_unused_ring(dev, SRB1_BASE); 5057 } else if (IS_GEN3(dev)) { 5058 init_unused_ring(dev, PRB1_BASE); 5059 init_unused_ring(dev, PRB2_BASE); 5060 } 5061 } 5062 5063 int i915_gem_init_engines(struct drm_device *dev) 5064 { 5065 struct drm_i915_private *dev_priv = to_i915(dev); 5066 int ret; 5067 5068 ret = intel_init_render_ring_buffer(dev); 5069 if (ret) 5070 return ret; 5071 5072 if (HAS_BSD(dev)) { 5073 ret = intel_init_bsd_ring_buffer(dev); 5074 if (ret) 5075 goto cleanup_render_ring; 5076 } 5077 5078 if (HAS_BLT(dev)) { 5079 ret = intel_init_blt_ring_buffer(dev); 5080 if (ret) 5081 goto cleanup_bsd_ring; 5082 } 5083 5084 if (HAS_VEBOX(dev)) { 5085 ret = intel_init_vebox_ring_buffer(dev); 5086 if (ret) 5087 goto cleanup_blt_ring; 5088 } 5089 5090 if (HAS_BSD2(dev)) { 5091 ret = intel_init_bsd2_ring_buffer(dev); 5092 if (ret) 5093 goto cleanup_vebox_ring; 5094 } 5095 5096 return 0; 5097 5098 cleanup_vebox_ring: 5099 intel_cleanup_engine(&dev_priv->engine[VECS]); 5100 cleanup_blt_ring: 5101 intel_cleanup_engine(&dev_priv->engine[BCS]); 5102 cleanup_bsd_ring: 5103 intel_cleanup_engine(&dev_priv->engine[VCS]); 5104 cleanup_render_ring: 5105 intel_cleanup_engine(&dev_priv->engine[RCS]); 5106 5107 return ret; 5108 } 5109 5110 int 5111 i915_gem_init_hw(struct drm_device *dev) 5112 { 5113 struct drm_i915_private *dev_priv = to_i915(dev); 5114 struct intel_engine_cs *engine; 5115 int ret; 5116 5117 /* Double layer security blanket, see i915_gem_init() */ 5118 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5119 5120 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) 5121 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5122 5123 if (IS_HASWELL(dev)) 5124 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5125 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5126 5127 if (HAS_PCH_NOP(dev)) { 5128 if (IS_IVYBRIDGE(dev)) { 5129 u32 temp = I915_READ(GEN7_MSG_CTL); 5130 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5131 I915_WRITE(GEN7_MSG_CTL, temp); 5132 } else if (INTEL_INFO(dev)->gen >= 7) { 5133 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5134 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5135 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5136 } 5137 } 5138 5139 i915_gem_init_swizzling(dev); 5140 5141 /* 5142 * At least 830 can leave some of the unused rings 5143 * "active" (ie. head != tail) after resume which 5144 * will prevent c3 entry. Makes sure all unused rings 5145 * are totally idle. 5146 */ 5147 init_unused_rings(dev); 5148 5149 BUG_ON(!dev_priv->kernel_context); 5150 5151 ret = i915_ppgtt_init_hw(dev); 5152 if (ret) { 5153 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5154 goto out; 5155 } 5156 5157 /* Need to do basic initialisation of all rings first: */ 5158 for_each_engine(engine, dev_priv) { 5159 ret = engine->init_hw(engine); 5160 if (ret) 5161 goto out; 5162 } 5163 5164 intel_mocs_init_l3cc_table(dev); 5165 5166 /* We can't enable contexts until all firmware is loaded */ 5167 ret = intel_guc_setup(dev); 5168 if (ret) 5169 goto out; 5170 5171 out: 5172 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5173 return ret; 5174 } 5175 5176 int i915_gem_init(struct drm_device *dev) 5177 { 5178 struct drm_i915_private *dev_priv = to_i915(dev); 5179 int ret; 5180 5181 mutex_lock(&dev->struct_mutex); 5182 5183 if (!i915.enable_execlists) { 5184 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5185 dev_priv->gt.init_engines = i915_gem_init_engines; 5186 dev_priv->gt.cleanup_engine = intel_cleanup_engine; 5187 dev_priv->gt.stop_engine = intel_stop_engine; 5188 } else { 5189 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5190 dev_priv->gt.init_engines = intel_logical_rings_init; 5191 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5192 dev_priv->gt.stop_engine = intel_logical_ring_stop; 5193 } 5194 5195 /* This is just a security blanket to placate dragons. 5196 * On some systems, we very sporadically observe that the first TLBs 5197 * used by the CS may be stale, despite us poking the TLB reset. If 5198 * we hold the forcewake during initialisation these problems 5199 * just magically go away. 5200 */ 5201 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5202 5203 i915_gem_init_userptr(dev_priv); 5204 i915_gem_init_ggtt(dev); 5205 5206 ret = i915_gem_context_init(dev); 5207 if (ret) 5208 goto out_unlock; 5209 5210 ret = dev_priv->gt.init_engines(dev); 5211 if (ret) 5212 goto out_unlock; 5213 5214 ret = i915_gem_init_hw(dev); 5215 if (ret == -EIO) { 5216 /* Allow ring initialisation to fail by marking the GPU as 5217 * wedged. But we only want to do this where the GPU is angry, 5218 * for all other failure, such as an allocation failure, bail. 5219 */ 5220 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5221 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5222 ret = 0; 5223 } 5224 5225 out_unlock: 5226 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5227 mutex_unlock(&dev->struct_mutex); 5228 5229 return ret; 5230 } 5231 5232 void 5233 i915_gem_cleanup_engines(struct drm_device *dev) 5234 { 5235 struct drm_i915_private *dev_priv = to_i915(dev); 5236 struct intel_engine_cs *engine; 5237 5238 for_each_engine(engine, dev_priv) 5239 dev_priv->gt.cleanup_engine(engine); 5240 } 5241 5242 static void 5243 init_engine_lists(struct intel_engine_cs *engine) 5244 { 5245 INIT_LIST_HEAD(&engine->active_list); 5246 INIT_LIST_HEAD(&engine->request_list); 5247 } 5248 5249 void 5250 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5251 { 5252 struct drm_device *dev = &dev_priv->drm; 5253 5254 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5255 !IS_CHERRYVIEW(dev_priv)) 5256 dev_priv->num_fence_regs = 32; 5257 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 5258 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 5259 dev_priv->num_fence_regs = 16; 5260 else 5261 dev_priv->num_fence_regs = 8; 5262 5263 if (intel_vgpu_active(dev_priv)) 5264 dev_priv->num_fence_regs = 5265 I915_READ(vgtif_reg(avail_rs.fence_num)); 5266 5267 /* Initialize fence registers to zero */ 5268 i915_gem_restore_fences(dev); 5269 5270 i915_gem_detect_bit_6_swizzle(dev); 5271 } 5272 5273 void 5274 i915_gem_load_init(struct drm_device *dev) 5275 { 5276 struct drm_i915_private *dev_priv = to_i915(dev); 5277 int i; 5278 5279 dev_priv->objects = 5280 kmem_cache_create("i915_gem_object", 5281 sizeof(struct drm_i915_gem_object), 0, 5282 SLAB_HWCACHE_ALIGN, 5283 NULL); 5284 dev_priv->vmas = 5285 kmem_cache_create("i915_gem_vma", 5286 sizeof(struct i915_vma), 0, 5287 SLAB_HWCACHE_ALIGN, 5288 NULL); 5289 dev_priv->requests = 5290 kmem_cache_create("i915_gem_request", 5291 sizeof(struct drm_i915_gem_request), 0, 5292 SLAB_HWCACHE_ALIGN, 5293 NULL); 5294 5295 INIT_LIST_HEAD(&dev_priv->vm_list); 5296 INIT_LIST_HEAD(&dev_priv->context_list); 5297 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5298 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5299 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5300 for (i = 0; i < I915_NUM_ENGINES; i++) 5301 init_engine_lists(&dev_priv->engine[i]); 5302 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5303 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5304 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 5305 i915_gem_retire_work_handler); 5306 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 5307 i915_gem_idle_work_handler); 5308 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 5309 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5310 5311 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5312 5313 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5314 5315 init_waitqueue_head(&dev_priv->pending_flip_queue); 5316 5317 dev_priv->mm.interruptible = true; 5318 5319 mutex_init(&dev_priv->fb_tracking.lock); 5320 } 5321 5322 void i915_gem_load_cleanup(struct drm_device *dev) 5323 { 5324 struct drm_i915_private *dev_priv = to_i915(dev); 5325 5326 kmem_cache_destroy(dev_priv->requests); 5327 kmem_cache_destroy(dev_priv->vmas); 5328 kmem_cache_destroy(dev_priv->objects); 5329 } 5330 5331 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 5332 { 5333 struct drm_i915_gem_object *obj; 5334 5335 /* Called just before we write the hibernation image. 5336 * 5337 * We need to update the domain tracking to reflect that the CPU 5338 * will be accessing all the pages to create and restore from the 5339 * hibernation, and so upon restoration those pages will be in the 5340 * CPU domain. 5341 * 5342 * To make sure the hibernation image contains the latest state, 5343 * we update that state just before writing out the image. 5344 */ 5345 5346 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { 5347 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 5348 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 5349 } 5350 5351 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5352 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 5353 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 5354 } 5355 5356 return 0; 5357 } 5358 5359 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5360 { 5361 struct drm_i915_file_private *file_priv = file->driver_priv; 5362 5363 /* Clean up our request list when the client is going away, so that 5364 * later retire_requests won't dereference our soon-to-be-gone 5365 * file_priv. 5366 */ 5367 spin_lock(&file_priv->mm.lock); 5368 while (!list_empty(&file_priv->mm.request_list)) { 5369 struct drm_i915_gem_request *request; 5370 5371 request = list_first_entry(&file_priv->mm.request_list, 5372 struct drm_i915_gem_request, 5373 client_list); 5374 list_del(&request->client_list); 5375 request->file_priv = NULL; 5376 } 5377 spin_unlock(&file_priv->mm.lock); 5378 5379 if (!list_empty(&file_priv->rps.link)) { 5380 spin_lock(&to_i915(dev)->rps.client_lock); 5381 list_del(&file_priv->rps.link); 5382 spin_unlock(&to_i915(dev)->rps.client_lock); 5383 } 5384 } 5385 5386 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5387 { 5388 struct drm_i915_file_private *file_priv; 5389 int ret; 5390 5391 DRM_DEBUG_DRIVER("\n"); 5392 5393 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5394 if (!file_priv) 5395 return -ENOMEM; 5396 5397 file->driver_priv = file_priv; 5398 file_priv->dev_priv = to_i915(dev); 5399 file_priv->file = file; 5400 INIT_LIST_HEAD(&file_priv->rps.link); 5401 5402 spin_lock_init(&file_priv->mm.lock); 5403 INIT_LIST_HEAD(&file_priv->mm.request_list); 5404 5405 file_priv->bsd_ring = -1; 5406 5407 ret = i915_gem_context_open(dev, file); 5408 if (ret) 5409 kfree(file_priv); 5410 5411 return ret; 5412 } 5413 5414 /** 5415 * i915_gem_track_fb - update frontbuffer tracking 5416 * @old: current GEM buffer for the frontbuffer slots 5417 * @new: new GEM buffer for the frontbuffer slots 5418 * @frontbuffer_bits: bitmask of frontbuffer slots 5419 * 5420 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5421 * from @old and setting them in @new. Both @old and @new can be NULL. 5422 */ 5423 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5424 struct drm_i915_gem_object *new, 5425 unsigned frontbuffer_bits) 5426 { 5427 if (old) { 5428 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5429 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5430 old->frontbuffer_bits &= ~frontbuffer_bits; 5431 } 5432 5433 if (new) { 5434 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5435 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5436 new->frontbuffer_bits |= frontbuffer_bits; 5437 } 5438 } 5439 5440 /* All the new VM stuff */ 5441 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5442 struct i915_address_space *vm) 5443 { 5444 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5445 struct i915_vma *vma; 5446 5447 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5448 5449 list_for_each_entry(vma, &o->vma_list, obj_link) { 5450 if (vma->is_ggtt && 5451 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5452 continue; 5453 if (vma->vm == vm) 5454 return vma->node.start; 5455 } 5456 5457 WARN(1, "%s vma for this object not found.\n", 5458 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5459 return -1; 5460 } 5461 5462 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5463 const struct i915_ggtt_view *view) 5464 { 5465 struct i915_vma *vma; 5466 5467 list_for_each_entry(vma, &o->vma_list, obj_link) 5468 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) 5469 return vma->node.start; 5470 5471 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5472 return -1; 5473 } 5474 5475 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5476 struct i915_address_space *vm) 5477 { 5478 struct i915_vma *vma; 5479 5480 list_for_each_entry(vma, &o->vma_list, obj_link) { 5481 if (vma->is_ggtt && 5482 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5483 continue; 5484 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5485 return true; 5486 } 5487 5488 return false; 5489 } 5490 5491 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5492 const struct i915_ggtt_view *view) 5493 { 5494 struct i915_vma *vma; 5495 5496 list_for_each_entry(vma, &o->vma_list, obj_link) 5497 if (vma->is_ggtt && 5498 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5499 drm_mm_node_allocated(&vma->node)) 5500 return true; 5501 5502 return false; 5503 } 5504 5505 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5506 { 5507 struct i915_vma *vma; 5508 5509 list_for_each_entry(vma, &o->vma_list, obj_link) 5510 if (drm_mm_node_allocated(&vma->node)) 5511 return true; 5512 5513 return false; 5514 } 5515 5516 unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) 5517 { 5518 struct i915_vma *vma; 5519 5520 GEM_BUG_ON(list_empty(&o->vma_list)); 5521 5522 list_for_each_entry(vma, &o->vma_list, obj_link) { 5523 if (vma->is_ggtt && 5524 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 5525 return vma->node.size; 5526 } 5527 5528 return 0; 5529 } 5530 5531 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5532 { 5533 struct i915_vma *vma; 5534 list_for_each_entry(vma, &obj->vma_list, obj_link) 5535 if (vma->pin_count > 0) 5536 return true; 5537 5538 return false; 5539 } 5540 5541 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5542 struct page * 5543 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5544 { 5545 struct page *page; 5546 5547 /* Only default objects have per-page dirty tracking */ 5548 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 5549 return NULL; 5550 5551 page = i915_gem_object_get_page(obj, n); 5552 set_page_dirty(page); 5553 return page; 5554 } 5555 5556 /* Allocate a new GEM object and fill it with the supplied data */ 5557 struct drm_i915_gem_object * 5558 i915_gem_object_create_from_data(struct drm_device *dev, 5559 const void *data, size_t size) 5560 { 5561 struct drm_i915_gem_object *obj; 5562 struct sg_table *sg; 5563 size_t bytes; 5564 int ret; 5565 5566 obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE)); 5567 if (IS_ERR(obj)) 5568 return obj; 5569 5570 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5571 if (ret) 5572 goto fail; 5573 5574 ret = i915_gem_object_get_pages(obj); 5575 if (ret) 5576 goto fail; 5577 5578 i915_gem_object_pin_pages(obj); 5579 sg = obj->pages; 5580 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 5581 obj->dirty = 1; /* Backing store is now out of date */ 5582 i915_gem_object_unpin_pages(obj); 5583 5584 if (WARN_ON(bytes != size)) { 5585 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5586 ret = -EFAULT; 5587 goto fail; 5588 } 5589 5590 return obj; 5591 5592 fail: 5593 drm_gem_object_unreference(&obj->base); 5594 return ERR_PTR(ret); 5595 } 5596