1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 #include <linux/dma-buf.h> 40 41 #define RQ_BUG_ON(expr) 42 43 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 44 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 45 static void 46 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 47 static void 48 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 49 50 static bool cpu_cache_is_coherent(struct drm_device *dev, 51 enum i915_cache_level level) 52 { 53 return HAS_LLC(dev) || level != I915_CACHE_NONE; 54 } 55 56 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 57 { 58 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 59 return true; 60 61 return obj->pin_display; 62 } 63 64 /* some bookkeeping */ 65 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 66 size_t size) 67 { 68 spin_lock(&dev_priv->mm.object_stat_lock); 69 dev_priv->mm.object_count++; 70 dev_priv->mm.object_memory += size; 71 spin_unlock(&dev_priv->mm.object_stat_lock); 72 } 73 74 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 75 size_t size) 76 { 77 spin_lock(&dev_priv->mm.object_stat_lock); 78 dev_priv->mm.object_count--; 79 dev_priv->mm.object_memory -= size; 80 spin_unlock(&dev_priv->mm.object_stat_lock); 81 } 82 83 static int 84 i915_gem_wait_for_error(struct i915_gpu_error *error) 85 { 86 int ret; 87 88 #define EXIT_COND (!i915_reset_in_progress(error) || \ 89 i915_terminally_wedged(error)) 90 if (EXIT_COND) 91 return 0; 92 93 /* 94 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 95 * userspace. If it takes that long something really bad is going on and 96 * we should simply try to bail out and fail as gracefully as possible. 97 */ 98 ret = wait_event_interruptible_timeout(error->reset_queue, 99 EXIT_COND, 100 10*HZ); 101 if (ret == 0) { 102 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 103 return -EIO; 104 } else if (ret < 0) { 105 return ret; 106 } 107 #undef EXIT_COND 108 109 return 0; 110 } 111 112 int i915_mutex_lock_interruptible(struct drm_device *dev) 113 { 114 struct drm_i915_private *dev_priv = dev->dev_private; 115 int ret; 116 117 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 118 if (ret) 119 return ret; 120 121 ret = mutex_lock_interruptible(&dev->struct_mutex); 122 if (ret) 123 return ret; 124 125 WARN_ON(i915_verify_lists(dev)); 126 return 0; 127 } 128 129 int 130 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 131 struct drm_file *file) 132 { 133 struct drm_i915_private *dev_priv = to_i915(dev); 134 struct i915_ggtt *ggtt = &dev_priv->ggtt; 135 struct drm_i915_gem_get_aperture *args = data; 136 struct i915_vma *vma; 137 size_t pinned; 138 139 pinned = 0; 140 mutex_lock(&dev->struct_mutex); 141 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 142 if (vma->pin_count) 143 pinned += vma->node.size; 144 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 145 if (vma->pin_count) 146 pinned += vma->node.size; 147 mutex_unlock(&dev->struct_mutex); 148 149 args->aper_size = ggtt->base.total; 150 args->aper_available_size = args->aper_size - pinned; 151 152 return 0; 153 } 154 155 static int 156 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 157 { 158 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 159 char *vaddr = obj->phys_handle->vaddr; 160 struct sg_table *st; 161 struct scatterlist *sg; 162 int i; 163 164 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 165 return -EINVAL; 166 167 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 168 struct page *page; 169 char *src; 170 171 page = shmem_read_mapping_page(mapping, i); 172 if (IS_ERR(page)) 173 return PTR_ERR(page); 174 175 src = kmap_atomic(page); 176 memcpy(vaddr, src, PAGE_SIZE); 177 drm_clflush_virt_range(vaddr, PAGE_SIZE); 178 kunmap_atomic(src); 179 180 put_page(page); 181 vaddr += PAGE_SIZE; 182 } 183 184 i915_gem_chipset_flush(obj->base.dev); 185 186 st = kmalloc(sizeof(*st), GFP_KERNEL); 187 if (st == NULL) 188 return -ENOMEM; 189 190 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 191 kfree(st); 192 return -ENOMEM; 193 } 194 195 sg = st->sgl; 196 sg->offset = 0; 197 sg->length = obj->base.size; 198 199 sg_dma_address(sg) = obj->phys_handle->busaddr; 200 sg_dma_len(sg) = obj->base.size; 201 202 obj->pages = st; 203 return 0; 204 } 205 206 static void 207 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 208 { 209 int ret; 210 211 BUG_ON(obj->madv == __I915_MADV_PURGED); 212 213 ret = i915_gem_object_set_to_cpu_domain(obj, true); 214 if (ret) { 215 /* In the event of a disaster, abandon all caches and 216 * hope for the best. 217 */ 218 WARN_ON(ret != -EIO); 219 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 220 } 221 222 if (obj->madv == I915_MADV_DONTNEED) 223 obj->dirty = 0; 224 225 if (obj->dirty) { 226 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 227 char *vaddr = obj->phys_handle->vaddr; 228 int i; 229 230 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 231 struct page *page; 232 char *dst; 233 234 page = shmem_read_mapping_page(mapping, i); 235 if (IS_ERR(page)) 236 continue; 237 238 dst = kmap_atomic(page); 239 drm_clflush_virt_range(vaddr, PAGE_SIZE); 240 memcpy(dst, vaddr, PAGE_SIZE); 241 kunmap_atomic(dst); 242 243 set_page_dirty(page); 244 if (obj->madv == I915_MADV_WILLNEED) 245 mark_page_accessed(page); 246 put_page(page); 247 vaddr += PAGE_SIZE; 248 } 249 obj->dirty = 0; 250 } 251 252 sg_free_table(obj->pages); 253 kfree(obj->pages); 254 } 255 256 static void 257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 258 { 259 drm_pci_free(obj->base.dev, obj->phys_handle); 260 } 261 262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 263 .get_pages = i915_gem_object_get_pages_phys, 264 .put_pages = i915_gem_object_put_pages_phys, 265 .release = i915_gem_object_release_phys, 266 }; 267 268 static int 269 drop_pages(struct drm_i915_gem_object *obj) 270 { 271 struct i915_vma *vma, *next; 272 int ret; 273 274 drm_gem_object_reference(&obj->base); 275 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 276 if (i915_vma_unbind(vma)) 277 break; 278 279 ret = i915_gem_object_put_pages(obj); 280 drm_gem_object_unreference(&obj->base); 281 282 return ret; 283 } 284 285 int 286 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 287 int align) 288 { 289 drm_dma_handle_t *phys; 290 int ret; 291 292 if (obj->phys_handle) { 293 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 294 return -EBUSY; 295 296 return 0; 297 } 298 299 if (obj->madv != I915_MADV_WILLNEED) 300 return -EFAULT; 301 302 if (obj->base.filp == NULL) 303 return -EINVAL; 304 305 ret = drop_pages(obj); 306 if (ret) 307 return ret; 308 309 /* create a new object */ 310 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 311 if (!phys) 312 return -ENOMEM; 313 314 obj->phys_handle = phys; 315 obj->ops = &i915_gem_phys_ops; 316 317 return i915_gem_object_get_pages(obj); 318 } 319 320 static int 321 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 322 struct drm_i915_gem_pwrite *args, 323 struct drm_file *file_priv) 324 { 325 struct drm_device *dev = obj->base.dev; 326 void *vaddr = obj->phys_handle->vaddr + args->offset; 327 char __user *user_data = to_user_ptr(args->data_ptr); 328 int ret = 0; 329 330 /* We manually control the domain here and pretend that it 331 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 332 */ 333 ret = i915_gem_object_wait_rendering(obj, false); 334 if (ret) 335 return ret; 336 337 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 338 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 339 unsigned long unwritten; 340 341 /* The physical object once assigned is fixed for the lifetime 342 * of the obj, so we can safely drop the lock and continue 343 * to access vaddr. 344 */ 345 mutex_unlock(&dev->struct_mutex); 346 unwritten = copy_from_user(vaddr, user_data, args->size); 347 mutex_lock(&dev->struct_mutex); 348 if (unwritten) { 349 ret = -EFAULT; 350 goto out; 351 } 352 } 353 354 drm_clflush_virt_range(vaddr, args->size); 355 i915_gem_chipset_flush(dev); 356 357 out: 358 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 359 return ret; 360 } 361 362 void *i915_gem_object_alloc(struct drm_device *dev) 363 { 364 struct drm_i915_private *dev_priv = dev->dev_private; 365 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 366 } 367 368 void i915_gem_object_free(struct drm_i915_gem_object *obj) 369 { 370 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 371 kmem_cache_free(dev_priv->objects, obj); 372 } 373 374 static int 375 i915_gem_create(struct drm_file *file, 376 struct drm_device *dev, 377 uint64_t size, 378 uint32_t *handle_p) 379 { 380 struct drm_i915_gem_object *obj; 381 int ret; 382 u32 handle; 383 384 size = roundup(size, PAGE_SIZE); 385 if (size == 0) 386 return -EINVAL; 387 388 /* Allocate the new object */ 389 obj = i915_gem_alloc_object(dev, size); 390 if (obj == NULL) 391 return -ENOMEM; 392 393 ret = drm_gem_handle_create(file, &obj->base, &handle); 394 /* drop reference from allocate - handle holds it now */ 395 drm_gem_object_unreference_unlocked(&obj->base); 396 if (ret) 397 return ret; 398 399 *handle_p = handle; 400 return 0; 401 } 402 403 int 404 i915_gem_dumb_create(struct drm_file *file, 405 struct drm_device *dev, 406 struct drm_mode_create_dumb *args) 407 { 408 /* have to work out size/pitch and return them */ 409 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 410 args->size = args->pitch * args->height; 411 return i915_gem_create(file, dev, 412 args->size, &args->handle); 413 } 414 415 /** 416 * Creates a new mm object and returns a handle to it. 417 */ 418 int 419 i915_gem_create_ioctl(struct drm_device *dev, void *data, 420 struct drm_file *file) 421 { 422 struct drm_i915_gem_create *args = data; 423 424 return i915_gem_create(file, dev, 425 args->size, &args->handle); 426 } 427 428 static inline int 429 __copy_to_user_swizzled(char __user *cpu_vaddr, 430 const char *gpu_vaddr, int gpu_offset, 431 int length) 432 { 433 int ret, cpu_offset = 0; 434 435 while (length > 0) { 436 int cacheline_end = ALIGN(gpu_offset + 1, 64); 437 int this_length = min(cacheline_end - gpu_offset, length); 438 int swizzled_gpu_offset = gpu_offset ^ 64; 439 440 ret = __copy_to_user(cpu_vaddr + cpu_offset, 441 gpu_vaddr + swizzled_gpu_offset, 442 this_length); 443 if (ret) 444 return ret + length; 445 446 cpu_offset += this_length; 447 gpu_offset += this_length; 448 length -= this_length; 449 } 450 451 return 0; 452 } 453 454 static inline int 455 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 456 const char __user *cpu_vaddr, 457 int length) 458 { 459 int ret, cpu_offset = 0; 460 461 while (length > 0) { 462 int cacheline_end = ALIGN(gpu_offset + 1, 64); 463 int this_length = min(cacheline_end - gpu_offset, length); 464 int swizzled_gpu_offset = gpu_offset ^ 64; 465 466 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 467 cpu_vaddr + cpu_offset, 468 this_length); 469 if (ret) 470 return ret + length; 471 472 cpu_offset += this_length; 473 gpu_offset += this_length; 474 length -= this_length; 475 } 476 477 return 0; 478 } 479 480 /* 481 * Pins the specified object's pages and synchronizes the object with 482 * GPU accesses. Sets needs_clflush to non-zero if the caller should 483 * flush the object from the CPU cache. 484 */ 485 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 486 int *needs_clflush) 487 { 488 int ret; 489 490 *needs_clflush = 0; 491 492 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 493 return -EINVAL; 494 495 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 496 /* If we're not in the cpu read domain, set ourself into the gtt 497 * read domain and manually flush cachelines (if required). This 498 * optimizes for the case when the gpu will dirty the data 499 * anyway again before the next pread happens. */ 500 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 501 obj->cache_level); 502 ret = i915_gem_object_wait_rendering(obj, true); 503 if (ret) 504 return ret; 505 } 506 507 ret = i915_gem_object_get_pages(obj); 508 if (ret) 509 return ret; 510 511 i915_gem_object_pin_pages(obj); 512 513 return ret; 514 } 515 516 /* Per-page copy function for the shmem pread fastpath. 517 * Flushes invalid cachelines before reading the target if 518 * needs_clflush is set. */ 519 static int 520 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 521 char __user *user_data, 522 bool page_do_bit17_swizzling, bool needs_clflush) 523 { 524 char *vaddr; 525 int ret; 526 527 if (unlikely(page_do_bit17_swizzling)) 528 return -EINVAL; 529 530 vaddr = kmap_atomic(page); 531 if (needs_clflush) 532 drm_clflush_virt_range(vaddr + shmem_page_offset, 533 page_length); 534 ret = __copy_to_user_inatomic(user_data, 535 vaddr + shmem_page_offset, 536 page_length); 537 kunmap_atomic(vaddr); 538 539 return ret ? -EFAULT : 0; 540 } 541 542 static void 543 shmem_clflush_swizzled_range(char *addr, unsigned long length, 544 bool swizzled) 545 { 546 if (unlikely(swizzled)) { 547 unsigned long start = (unsigned long) addr; 548 unsigned long end = (unsigned long) addr + length; 549 550 /* For swizzling simply ensure that we always flush both 551 * channels. Lame, but simple and it works. Swizzled 552 * pwrite/pread is far from a hotpath - current userspace 553 * doesn't use it at all. */ 554 start = round_down(start, 128); 555 end = round_up(end, 128); 556 557 drm_clflush_virt_range((void *)start, end - start); 558 } else { 559 drm_clflush_virt_range(addr, length); 560 } 561 562 } 563 564 /* Only difference to the fast-path function is that this can handle bit17 565 * and uses non-atomic copy and kmap functions. */ 566 static int 567 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 568 char __user *user_data, 569 bool page_do_bit17_swizzling, bool needs_clflush) 570 { 571 char *vaddr; 572 int ret; 573 574 vaddr = kmap(page); 575 if (needs_clflush) 576 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 577 page_length, 578 page_do_bit17_swizzling); 579 580 if (page_do_bit17_swizzling) 581 ret = __copy_to_user_swizzled(user_data, 582 vaddr, shmem_page_offset, 583 page_length); 584 else 585 ret = __copy_to_user(user_data, 586 vaddr + shmem_page_offset, 587 page_length); 588 kunmap(page); 589 590 return ret ? - EFAULT : 0; 591 } 592 593 static int 594 i915_gem_shmem_pread(struct drm_device *dev, 595 struct drm_i915_gem_object *obj, 596 struct drm_i915_gem_pread *args, 597 struct drm_file *file) 598 { 599 char __user *user_data; 600 ssize_t remain; 601 loff_t offset; 602 int shmem_page_offset, page_length, ret = 0; 603 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 604 int prefaulted = 0; 605 int needs_clflush = 0; 606 struct sg_page_iter sg_iter; 607 608 user_data = to_user_ptr(args->data_ptr); 609 remain = args->size; 610 611 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 612 613 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 614 if (ret) 615 return ret; 616 617 offset = args->offset; 618 619 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 620 offset >> PAGE_SHIFT) { 621 struct page *page = sg_page_iter_page(&sg_iter); 622 623 if (remain <= 0) 624 break; 625 626 /* Operation in this page 627 * 628 * shmem_page_offset = offset within page in shmem file 629 * page_length = bytes to copy for this page 630 */ 631 shmem_page_offset = offset_in_page(offset); 632 page_length = remain; 633 if ((shmem_page_offset + page_length) > PAGE_SIZE) 634 page_length = PAGE_SIZE - shmem_page_offset; 635 636 page_do_bit17_swizzling = obj_do_bit17_swizzling && 637 (page_to_phys(page) & (1 << 17)) != 0; 638 639 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 640 user_data, page_do_bit17_swizzling, 641 needs_clflush); 642 if (ret == 0) 643 goto next_page; 644 645 mutex_unlock(&dev->struct_mutex); 646 647 if (likely(!i915.prefault_disable) && !prefaulted) { 648 ret = fault_in_multipages_writeable(user_data, remain); 649 /* Userspace is tricking us, but we've already clobbered 650 * its pages with the prefault and promised to write the 651 * data up to the first fault. Hence ignore any errors 652 * and just continue. */ 653 (void)ret; 654 prefaulted = 1; 655 } 656 657 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 658 user_data, page_do_bit17_swizzling, 659 needs_clflush); 660 661 mutex_lock(&dev->struct_mutex); 662 663 if (ret) 664 goto out; 665 666 next_page: 667 remain -= page_length; 668 user_data += page_length; 669 offset += page_length; 670 } 671 672 out: 673 i915_gem_object_unpin_pages(obj); 674 675 return ret; 676 } 677 678 /** 679 * Reads data from the object referenced by handle. 680 * 681 * On error, the contents of *data are undefined. 682 */ 683 int 684 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 685 struct drm_file *file) 686 { 687 struct drm_i915_gem_pread *args = data; 688 struct drm_i915_gem_object *obj; 689 int ret = 0; 690 691 if (args->size == 0) 692 return 0; 693 694 if (!access_ok(VERIFY_WRITE, 695 to_user_ptr(args->data_ptr), 696 args->size)) 697 return -EFAULT; 698 699 ret = i915_mutex_lock_interruptible(dev); 700 if (ret) 701 return ret; 702 703 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 704 if (&obj->base == NULL) { 705 ret = -ENOENT; 706 goto unlock; 707 } 708 709 /* Bounds check source. */ 710 if (args->offset > obj->base.size || 711 args->size > obj->base.size - args->offset) { 712 ret = -EINVAL; 713 goto out; 714 } 715 716 /* prime objects have no backing filp to GEM pread/pwrite 717 * pages from. 718 */ 719 if (!obj->base.filp) { 720 ret = -EINVAL; 721 goto out; 722 } 723 724 trace_i915_gem_object_pread(obj, args->offset, args->size); 725 726 ret = i915_gem_shmem_pread(dev, obj, args, file); 727 728 out: 729 drm_gem_object_unreference(&obj->base); 730 unlock: 731 mutex_unlock(&dev->struct_mutex); 732 return ret; 733 } 734 735 /* This is the fast write path which cannot handle 736 * page faults in the source data 737 */ 738 739 static inline int 740 fast_user_write(struct io_mapping *mapping, 741 loff_t page_base, int page_offset, 742 char __user *user_data, 743 int length) 744 { 745 void __iomem *vaddr_atomic; 746 void *vaddr; 747 unsigned long unwritten; 748 749 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 750 /* We can use the cpu mem copy function because this is X86. */ 751 vaddr = (void __force*)vaddr_atomic + page_offset; 752 unwritten = __copy_from_user_inatomic_nocache(vaddr, 753 user_data, length); 754 io_mapping_unmap_atomic(vaddr_atomic); 755 return unwritten; 756 } 757 758 /** 759 * This is the fast pwrite path, where we copy the data directly from the 760 * user into the GTT, uncached. 761 */ 762 static int 763 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 764 struct drm_i915_gem_object *obj, 765 struct drm_i915_gem_pwrite *args, 766 struct drm_file *file) 767 { 768 struct drm_i915_private *dev_priv = to_i915(dev); 769 struct i915_ggtt *ggtt = &dev_priv->ggtt; 770 ssize_t remain; 771 loff_t offset, page_base; 772 char __user *user_data; 773 int page_offset, page_length, ret; 774 775 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 776 if (ret) 777 goto out; 778 779 ret = i915_gem_object_set_to_gtt_domain(obj, true); 780 if (ret) 781 goto out_unpin; 782 783 ret = i915_gem_object_put_fence(obj); 784 if (ret) 785 goto out_unpin; 786 787 user_data = to_user_ptr(args->data_ptr); 788 remain = args->size; 789 790 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 791 792 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 793 794 while (remain > 0) { 795 /* Operation in this page 796 * 797 * page_base = page offset within aperture 798 * page_offset = offset within page 799 * page_length = bytes to copy for this page 800 */ 801 page_base = offset & PAGE_MASK; 802 page_offset = offset_in_page(offset); 803 page_length = remain; 804 if ((page_offset + remain) > PAGE_SIZE) 805 page_length = PAGE_SIZE - page_offset; 806 807 /* If we get a fault while copying data, then (presumably) our 808 * source page isn't available. Return the error and we'll 809 * retry in the slow path. 810 */ 811 if (fast_user_write(ggtt->mappable, page_base, 812 page_offset, user_data, page_length)) { 813 ret = -EFAULT; 814 goto out_flush; 815 } 816 817 remain -= page_length; 818 user_data += page_length; 819 offset += page_length; 820 } 821 822 out_flush: 823 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 824 out_unpin: 825 i915_gem_object_ggtt_unpin(obj); 826 out: 827 return ret; 828 } 829 830 /* Per-page copy function for the shmem pwrite fastpath. 831 * Flushes invalid cachelines before writing to the target if 832 * needs_clflush_before is set and flushes out any written cachelines after 833 * writing if needs_clflush is set. */ 834 static int 835 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 836 char __user *user_data, 837 bool page_do_bit17_swizzling, 838 bool needs_clflush_before, 839 bool needs_clflush_after) 840 { 841 char *vaddr; 842 int ret; 843 844 if (unlikely(page_do_bit17_swizzling)) 845 return -EINVAL; 846 847 vaddr = kmap_atomic(page); 848 if (needs_clflush_before) 849 drm_clflush_virt_range(vaddr + shmem_page_offset, 850 page_length); 851 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 852 user_data, page_length); 853 if (needs_clflush_after) 854 drm_clflush_virt_range(vaddr + shmem_page_offset, 855 page_length); 856 kunmap_atomic(vaddr); 857 858 return ret ? -EFAULT : 0; 859 } 860 861 /* Only difference to the fast-path function is that this can handle bit17 862 * and uses non-atomic copy and kmap functions. */ 863 static int 864 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 865 char __user *user_data, 866 bool page_do_bit17_swizzling, 867 bool needs_clflush_before, 868 bool needs_clflush_after) 869 { 870 char *vaddr; 871 int ret; 872 873 vaddr = kmap(page); 874 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 875 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 876 page_length, 877 page_do_bit17_swizzling); 878 if (page_do_bit17_swizzling) 879 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 880 user_data, 881 page_length); 882 else 883 ret = __copy_from_user(vaddr + shmem_page_offset, 884 user_data, 885 page_length); 886 if (needs_clflush_after) 887 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 888 page_length, 889 page_do_bit17_swizzling); 890 kunmap(page); 891 892 return ret ? -EFAULT : 0; 893 } 894 895 static int 896 i915_gem_shmem_pwrite(struct drm_device *dev, 897 struct drm_i915_gem_object *obj, 898 struct drm_i915_gem_pwrite *args, 899 struct drm_file *file) 900 { 901 ssize_t remain; 902 loff_t offset; 903 char __user *user_data; 904 int shmem_page_offset, page_length, ret = 0; 905 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 906 int hit_slowpath = 0; 907 int needs_clflush_after = 0; 908 int needs_clflush_before = 0; 909 struct sg_page_iter sg_iter; 910 911 user_data = to_user_ptr(args->data_ptr); 912 remain = args->size; 913 914 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 915 916 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 917 /* If we're not in the cpu write domain, set ourself into the gtt 918 * write domain and manually flush cachelines (if required). This 919 * optimizes for the case when the gpu will use the data 920 * right away and we therefore have to clflush anyway. */ 921 needs_clflush_after = cpu_write_needs_clflush(obj); 922 ret = i915_gem_object_wait_rendering(obj, false); 923 if (ret) 924 return ret; 925 } 926 /* Same trick applies to invalidate partially written cachelines read 927 * before writing. */ 928 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 929 needs_clflush_before = 930 !cpu_cache_is_coherent(dev, obj->cache_level); 931 932 ret = i915_gem_object_get_pages(obj); 933 if (ret) 934 return ret; 935 936 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 937 938 i915_gem_object_pin_pages(obj); 939 940 offset = args->offset; 941 obj->dirty = 1; 942 943 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 944 offset >> PAGE_SHIFT) { 945 struct page *page = sg_page_iter_page(&sg_iter); 946 int partial_cacheline_write; 947 948 if (remain <= 0) 949 break; 950 951 /* Operation in this page 952 * 953 * shmem_page_offset = offset within page in shmem file 954 * page_length = bytes to copy for this page 955 */ 956 shmem_page_offset = offset_in_page(offset); 957 958 page_length = remain; 959 if ((shmem_page_offset + page_length) > PAGE_SIZE) 960 page_length = PAGE_SIZE - shmem_page_offset; 961 962 /* If we don't overwrite a cacheline completely we need to be 963 * careful to have up-to-date data by first clflushing. Don't 964 * overcomplicate things and flush the entire patch. */ 965 partial_cacheline_write = needs_clflush_before && 966 ((shmem_page_offset | page_length) 967 & (boot_cpu_data.x86_clflush_size - 1)); 968 969 page_do_bit17_swizzling = obj_do_bit17_swizzling && 970 (page_to_phys(page) & (1 << 17)) != 0; 971 972 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 973 user_data, page_do_bit17_swizzling, 974 partial_cacheline_write, 975 needs_clflush_after); 976 if (ret == 0) 977 goto next_page; 978 979 hit_slowpath = 1; 980 mutex_unlock(&dev->struct_mutex); 981 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 982 user_data, page_do_bit17_swizzling, 983 partial_cacheline_write, 984 needs_clflush_after); 985 986 mutex_lock(&dev->struct_mutex); 987 988 if (ret) 989 goto out; 990 991 next_page: 992 remain -= page_length; 993 user_data += page_length; 994 offset += page_length; 995 } 996 997 out: 998 i915_gem_object_unpin_pages(obj); 999 1000 if (hit_slowpath) { 1001 /* 1002 * Fixup: Flush cpu caches in case we didn't flush the dirty 1003 * cachelines in-line while writing and the object moved 1004 * out of the cpu write domain while we've dropped the lock. 1005 */ 1006 if (!needs_clflush_after && 1007 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1008 if (i915_gem_clflush_object(obj, obj->pin_display)) 1009 needs_clflush_after = true; 1010 } 1011 } 1012 1013 if (needs_clflush_after) 1014 i915_gem_chipset_flush(dev); 1015 else 1016 obj->cache_dirty = true; 1017 1018 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1019 return ret; 1020 } 1021 1022 /** 1023 * Writes data to the object referenced by handle. 1024 * 1025 * On error, the contents of the buffer that were to be modified are undefined. 1026 */ 1027 int 1028 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1029 struct drm_file *file) 1030 { 1031 struct drm_i915_private *dev_priv = dev->dev_private; 1032 struct drm_i915_gem_pwrite *args = data; 1033 struct drm_i915_gem_object *obj; 1034 int ret; 1035 1036 if (args->size == 0) 1037 return 0; 1038 1039 if (!access_ok(VERIFY_READ, 1040 to_user_ptr(args->data_ptr), 1041 args->size)) 1042 return -EFAULT; 1043 1044 if (likely(!i915.prefault_disable)) { 1045 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1046 args->size); 1047 if (ret) 1048 return -EFAULT; 1049 } 1050 1051 intel_runtime_pm_get(dev_priv); 1052 1053 ret = i915_mutex_lock_interruptible(dev); 1054 if (ret) 1055 goto put_rpm; 1056 1057 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1058 if (&obj->base == NULL) { 1059 ret = -ENOENT; 1060 goto unlock; 1061 } 1062 1063 /* Bounds check destination. */ 1064 if (args->offset > obj->base.size || 1065 args->size > obj->base.size - args->offset) { 1066 ret = -EINVAL; 1067 goto out; 1068 } 1069 1070 /* prime objects have no backing filp to GEM pread/pwrite 1071 * pages from. 1072 */ 1073 if (!obj->base.filp) { 1074 ret = -EINVAL; 1075 goto out; 1076 } 1077 1078 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1079 1080 ret = -EFAULT; 1081 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1082 * it would end up going through the fenced access, and we'll get 1083 * different detiling behavior between reading and writing. 1084 * pread/pwrite currently are reading and writing from the CPU 1085 * perspective, requiring manual detiling by the client. 1086 */ 1087 if (obj->tiling_mode == I915_TILING_NONE && 1088 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1089 cpu_write_needs_clflush(obj)) { 1090 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1091 /* Note that the gtt paths might fail with non-page-backed user 1092 * pointers (e.g. gtt mappings when moving data between 1093 * textures). Fallback to the shmem path in that case. */ 1094 } 1095 1096 if (ret == -EFAULT || ret == -ENOSPC) { 1097 if (obj->phys_handle) 1098 ret = i915_gem_phys_pwrite(obj, args, file); 1099 else 1100 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1101 } 1102 1103 out: 1104 drm_gem_object_unreference(&obj->base); 1105 unlock: 1106 mutex_unlock(&dev->struct_mutex); 1107 put_rpm: 1108 intel_runtime_pm_put(dev_priv); 1109 1110 return ret; 1111 } 1112 1113 int 1114 i915_gem_check_wedge(struct i915_gpu_error *error, 1115 bool interruptible) 1116 { 1117 if (i915_reset_in_progress(error)) { 1118 /* Non-interruptible callers can't handle -EAGAIN, hence return 1119 * -EIO unconditionally for these. */ 1120 if (!interruptible) 1121 return -EIO; 1122 1123 /* Recovery complete, but the reset failed ... */ 1124 if (i915_terminally_wedged(error)) 1125 return -EIO; 1126 1127 /* 1128 * Check if GPU Reset is in progress - we need intel_ring_begin 1129 * to work properly to reinit the hw state while the gpu is 1130 * still marked as reset-in-progress. Handle this with a flag. 1131 */ 1132 if (!error->reload_in_reset) 1133 return -EAGAIN; 1134 } 1135 1136 return 0; 1137 } 1138 1139 static void fake_irq(unsigned long data) 1140 { 1141 wake_up_process((struct task_struct *)data); 1142 } 1143 1144 static bool missed_irq(struct drm_i915_private *dev_priv, 1145 struct intel_engine_cs *engine) 1146 { 1147 return test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings); 1148 } 1149 1150 static unsigned long local_clock_us(unsigned *cpu) 1151 { 1152 unsigned long t; 1153 1154 /* Cheaply and approximately convert from nanoseconds to microseconds. 1155 * The result and subsequent calculations are also defined in the same 1156 * approximate microseconds units. The principal source of timing 1157 * error here is from the simple truncation. 1158 * 1159 * Note that local_clock() is only defined wrt to the current CPU; 1160 * the comparisons are no longer valid if we switch CPUs. Instead of 1161 * blocking preemption for the entire busywait, we can detect the CPU 1162 * switch and use that as indicator of system load and a reason to 1163 * stop busywaiting, see busywait_stop(). 1164 */ 1165 *cpu = get_cpu(); 1166 t = local_clock() >> 10; 1167 put_cpu(); 1168 1169 return t; 1170 } 1171 1172 static bool busywait_stop(unsigned long timeout, unsigned cpu) 1173 { 1174 unsigned this_cpu; 1175 1176 if (time_after(local_clock_us(&this_cpu), timeout)) 1177 return true; 1178 1179 return this_cpu != cpu; 1180 } 1181 1182 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1183 { 1184 unsigned long timeout; 1185 unsigned cpu; 1186 1187 /* When waiting for high frequency requests, e.g. during synchronous 1188 * rendering split between the CPU and GPU, the finite amount of time 1189 * required to set up the irq and wait upon it limits the response 1190 * rate. By busywaiting on the request completion for a short while we 1191 * can service the high frequency waits as quick as possible. However, 1192 * if it is a slow request, we want to sleep as quickly as possible. 1193 * The tradeoff between waiting and sleeping is roughly the time it 1194 * takes to sleep on a request, on the order of a microsecond. 1195 */ 1196 1197 if (req->engine->irq_refcount) 1198 return -EBUSY; 1199 1200 /* Only spin if we know the GPU is processing this request */ 1201 if (!i915_gem_request_started(req, true)) 1202 return -EAGAIN; 1203 1204 timeout = local_clock_us(&cpu) + 5; 1205 while (!need_resched()) { 1206 if (i915_gem_request_completed(req, true)) 1207 return 0; 1208 1209 if (signal_pending_state(state, current)) 1210 break; 1211 1212 if (busywait_stop(timeout, cpu)) 1213 break; 1214 1215 cpu_relax_lowlatency(); 1216 } 1217 1218 if (i915_gem_request_completed(req, false)) 1219 return 0; 1220 1221 return -EAGAIN; 1222 } 1223 1224 /** 1225 * __i915_wait_request - wait until execution of request has finished 1226 * @req: duh! 1227 * @reset_counter: reset sequence associated with the given request 1228 * @interruptible: do an interruptible wait (normally yes) 1229 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1230 * 1231 * Note: It is of utmost importance that the passed in seqno and reset_counter 1232 * values have been read by the caller in an smp safe manner. Where read-side 1233 * locks are involved, it is sufficient to read the reset_counter before 1234 * unlocking the lock that protects the seqno. For lockless tricks, the 1235 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1236 * inserted. 1237 * 1238 * Returns 0 if the request was found within the alloted time. Else returns the 1239 * errno with remaining time filled in timeout argument. 1240 */ 1241 int __i915_wait_request(struct drm_i915_gem_request *req, 1242 unsigned reset_counter, 1243 bool interruptible, 1244 s64 *timeout, 1245 struct intel_rps_client *rps) 1246 { 1247 struct intel_engine_cs *engine = i915_gem_request_get_engine(req); 1248 struct drm_device *dev = engine->dev; 1249 struct drm_i915_private *dev_priv = dev->dev_private; 1250 const bool irq_test_in_progress = 1251 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); 1252 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 1253 DEFINE_WAIT(wait); 1254 unsigned long timeout_expire; 1255 s64 before = 0; /* Only to silence a compiler warning. */ 1256 int ret; 1257 1258 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1259 1260 if (list_empty(&req->list)) 1261 return 0; 1262 1263 if (i915_gem_request_completed(req, true)) 1264 return 0; 1265 1266 timeout_expire = 0; 1267 if (timeout) { 1268 if (WARN_ON(*timeout < 0)) 1269 return -EINVAL; 1270 1271 if (*timeout == 0) 1272 return -ETIME; 1273 1274 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1275 1276 /* 1277 * Record current time in case interrupted by signal, or wedged. 1278 */ 1279 before = ktime_get_raw_ns(); 1280 } 1281 1282 if (INTEL_INFO(dev_priv)->gen >= 6) 1283 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1284 1285 trace_i915_gem_request_wait_begin(req); 1286 1287 /* Optimistic spin for the next jiffie before touching IRQs */ 1288 ret = __i915_spin_request(req, state); 1289 if (ret == 0) 1290 goto out; 1291 1292 if (!irq_test_in_progress && WARN_ON(!engine->irq_get(engine))) { 1293 ret = -ENODEV; 1294 goto out; 1295 } 1296 1297 for (;;) { 1298 struct timer_list timer; 1299 1300 prepare_to_wait(&engine->irq_queue, &wait, state); 1301 1302 /* We need to check whether any gpu reset happened in between 1303 * the caller grabbing the seqno and now ... */ 1304 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1305 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1306 * is truely gone. */ 1307 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1308 if (ret == 0) 1309 ret = -EAGAIN; 1310 break; 1311 } 1312 1313 if (i915_gem_request_completed(req, false)) { 1314 ret = 0; 1315 break; 1316 } 1317 1318 if (signal_pending_state(state, current)) { 1319 ret = -ERESTARTSYS; 1320 break; 1321 } 1322 1323 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1324 ret = -ETIME; 1325 break; 1326 } 1327 1328 timer.function = NULL; 1329 if (timeout || missed_irq(dev_priv, engine)) { 1330 unsigned long expire; 1331 1332 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); 1333 expire = missed_irq(dev_priv, engine) ? jiffies + 1 : timeout_expire; 1334 mod_timer(&timer, expire); 1335 } 1336 1337 io_schedule(); 1338 1339 if (timer.function) { 1340 del_singleshot_timer_sync(&timer); 1341 destroy_timer_on_stack(&timer); 1342 } 1343 } 1344 if (!irq_test_in_progress) 1345 engine->irq_put(engine); 1346 1347 finish_wait(&engine->irq_queue, &wait); 1348 1349 out: 1350 trace_i915_gem_request_wait_end(req); 1351 1352 if (timeout) { 1353 s64 tres = *timeout - (ktime_get_raw_ns() - before); 1354 1355 *timeout = tres < 0 ? 0 : tres; 1356 1357 /* 1358 * Apparently ktime isn't accurate enough and occasionally has a 1359 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1360 * things up to make the test happy. We allow up to 1 jiffy. 1361 * 1362 * This is a regrssion from the timespec->ktime conversion. 1363 */ 1364 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1365 *timeout = 0; 1366 } 1367 1368 return ret; 1369 } 1370 1371 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1372 struct drm_file *file) 1373 { 1374 struct drm_i915_file_private *file_priv; 1375 1376 WARN_ON(!req || !file || req->file_priv); 1377 1378 if (!req || !file) 1379 return -EINVAL; 1380 1381 if (req->file_priv) 1382 return -EINVAL; 1383 1384 file_priv = file->driver_priv; 1385 1386 spin_lock(&file_priv->mm.lock); 1387 req->file_priv = file_priv; 1388 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1389 spin_unlock(&file_priv->mm.lock); 1390 1391 req->pid = get_pid(task_pid(current)); 1392 1393 return 0; 1394 } 1395 1396 static inline void 1397 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1398 { 1399 struct drm_i915_file_private *file_priv = request->file_priv; 1400 1401 if (!file_priv) 1402 return; 1403 1404 spin_lock(&file_priv->mm.lock); 1405 list_del(&request->client_list); 1406 request->file_priv = NULL; 1407 spin_unlock(&file_priv->mm.lock); 1408 1409 put_pid(request->pid); 1410 request->pid = NULL; 1411 } 1412 1413 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1414 { 1415 trace_i915_gem_request_retire(request); 1416 1417 /* We know the GPU must have read the request to have 1418 * sent us the seqno + interrupt, so use the position 1419 * of tail of the request to update the last known position 1420 * of the GPU head. 1421 * 1422 * Note this requires that we are always called in request 1423 * completion order. 1424 */ 1425 request->ringbuf->last_retired_head = request->postfix; 1426 1427 list_del_init(&request->list); 1428 i915_gem_request_remove_from_client(request); 1429 1430 i915_gem_request_unreference(request); 1431 } 1432 1433 static void 1434 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1435 { 1436 struct intel_engine_cs *engine = req->engine; 1437 struct drm_i915_gem_request *tmp; 1438 1439 lockdep_assert_held(&engine->dev->struct_mutex); 1440 1441 if (list_empty(&req->list)) 1442 return; 1443 1444 do { 1445 tmp = list_first_entry(&engine->request_list, 1446 typeof(*tmp), list); 1447 1448 i915_gem_request_retire(tmp); 1449 } while (tmp != req); 1450 1451 WARN_ON(i915_verify_lists(engine->dev)); 1452 } 1453 1454 /** 1455 * Waits for a request to be signaled, and cleans up the 1456 * request and object lists appropriately for that event. 1457 */ 1458 int 1459 i915_wait_request(struct drm_i915_gem_request *req) 1460 { 1461 struct drm_device *dev; 1462 struct drm_i915_private *dev_priv; 1463 bool interruptible; 1464 int ret; 1465 1466 BUG_ON(req == NULL); 1467 1468 dev = req->engine->dev; 1469 dev_priv = dev->dev_private; 1470 interruptible = dev_priv->mm.interruptible; 1471 1472 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1473 1474 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1475 if (ret) 1476 return ret; 1477 1478 ret = __i915_wait_request(req, 1479 atomic_read(&dev_priv->gpu_error.reset_counter), 1480 interruptible, NULL, NULL); 1481 if (ret) 1482 return ret; 1483 1484 __i915_gem_request_retire__upto(req); 1485 return 0; 1486 } 1487 1488 /** 1489 * Ensures that all rendering to the object has completed and the object is 1490 * safe to unbind from the GTT or access from the CPU. 1491 */ 1492 int 1493 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1494 bool readonly) 1495 { 1496 int ret, i; 1497 1498 if (!obj->active) 1499 return 0; 1500 1501 if (readonly) { 1502 if (obj->last_write_req != NULL) { 1503 ret = i915_wait_request(obj->last_write_req); 1504 if (ret) 1505 return ret; 1506 1507 i = obj->last_write_req->engine->id; 1508 if (obj->last_read_req[i] == obj->last_write_req) 1509 i915_gem_object_retire__read(obj, i); 1510 else 1511 i915_gem_object_retire__write(obj); 1512 } 1513 } else { 1514 for (i = 0; i < I915_NUM_ENGINES; i++) { 1515 if (obj->last_read_req[i] == NULL) 1516 continue; 1517 1518 ret = i915_wait_request(obj->last_read_req[i]); 1519 if (ret) 1520 return ret; 1521 1522 i915_gem_object_retire__read(obj, i); 1523 } 1524 RQ_BUG_ON(obj->active); 1525 } 1526 1527 return 0; 1528 } 1529 1530 static void 1531 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1532 struct drm_i915_gem_request *req) 1533 { 1534 int ring = req->engine->id; 1535 1536 if (obj->last_read_req[ring] == req) 1537 i915_gem_object_retire__read(obj, ring); 1538 else if (obj->last_write_req == req) 1539 i915_gem_object_retire__write(obj); 1540 1541 __i915_gem_request_retire__upto(req); 1542 } 1543 1544 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1545 * as the object state may change during this call. 1546 */ 1547 static __must_check int 1548 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1549 struct intel_rps_client *rps, 1550 bool readonly) 1551 { 1552 struct drm_device *dev = obj->base.dev; 1553 struct drm_i915_private *dev_priv = dev->dev_private; 1554 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 1555 unsigned reset_counter; 1556 int ret, i, n = 0; 1557 1558 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1559 BUG_ON(!dev_priv->mm.interruptible); 1560 1561 if (!obj->active) 1562 return 0; 1563 1564 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1565 if (ret) 1566 return ret; 1567 1568 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1569 1570 if (readonly) { 1571 struct drm_i915_gem_request *req; 1572 1573 req = obj->last_write_req; 1574 if (req == NULL) 1575 return 0; 1576 1577 requests[n++] = i915_gem_request_reference(req); 1578 } else { 1579 for (i = 0; i < I915_NUM_ENGINES; i++) { 1580 struct drm_i915_gem_request *req; 1581 1582 req = obj->last_read_req[i]; 1583 if (req == NULL) 1584 continue; 1585 1586 requests[n++] = i915_gem_request_reference(req); 1587 } 1588 } 1589 1590 mutex_unlock(&dev->struct_mutex); 1591 for (i = 0; ret == 0 && i < n; i++) 1592 ret = __i915_wait_request(requests[i], reset_counter, true, 1593 NULL, rps); 1594 mutex_lock(&dev->struct_mutex); 1595 1596 for (i = 0; i < n; i++) { 1597 if (ret == 0) 1598 i915_gem_object_retire_request(obj, requests[i]); 1599 i915_gem_request_unreference(requests[i]); 1600 } 1601 1602 return ret; 1603 } 1604 1605 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1606 { 1607 struct drm_i915_file_private *fpriv = file->driver_priv; 1608 return &fpriv->rps; 1609 } 1610 1611 /** 1612 * Called when user space prepares to use an object with the CPU, either 1613 * through the mmap ioctl's mapping or a GTT mapping. 1614 */ 1615 int 1616 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1617 struct drm_file *file) 1618 { 1619 struct drm_i915_gem_set_domain *args = data; 1620 struct drm_i915_gem_object *obj; 1621 uint32_t read_domains = args->read_domains; 1622 uint32_t write_domain = args->write_domain; 1623 int ret; 1624 1625 /* Only handle setting domains to types used by the CPU. */ 1626 if (write_domain & I915_GEM_GPU_DOMAINS) 1627 return -EINVAL; 1628 1629 if (read_domains & I915_GEM_GPU_DOMAINS) 1630 return -EINVAL; 1631 1632 /* Having something in the write domain implies it's in the read 1633 * domain, and only that read domain. Enforce that in the request. 1634 */ 1635 if (write_domain != 0 && read_domains != write_domain) 1636 return -EINVAL; 1637 1638 ret = i915_mutex_lock_interruptible(dev); 1639 if (ret) 1640 return ret; 1641 1642 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1643 if (&obj->base == NULL) { 1644 ret = -ENOENT; 1645 goto unlock; 1646 } 1647 1648 /* Try to flush the object off the GPU without holding the lock. 1649 * We will repeat the flush holding the lock in the normal manner 1650 * to catch cases where we are gazumped. 1651 */ 1652 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1653 to_rps_client(file), 1654 !write_domain); 1655 if (ret) 1656 goto unref; 1657 1658 if (read_domains & I915_GEM_DOMAIN_GTT) 1659 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1660 else 1661 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1662 1663 if (write_domain != 0) 1664 intel_fb_obj_invalidate(obj, 1665 write_domain == I915_GEM_DOMAIN_GTT ? 1666 ORIGIN_GTT : ORIGIN_CPU); 1667 1668 unref: 1669 drm_gem_object_unreference(&obj->base); 1670 unlock: 1671 mutex_unlock(&dev->struct_mutex); 1672 return ret; 1673 } 1674 1675 /** 1676 * Called when user space has done writes to this buffer 1677 */ 1678 int 1679 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1680 struct drm_file *file) 1681 { 1682 struct drm_i915_gem_sw_finish *args = data; 1683 struct drm_i915_gem_object *obj; 1684 int ret = 0; 1685 1686 ret = i915_mutex_lock_interruptible(dev); 1687 if (ret) 1688 return ret; 1689 1690 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1691 if (&obj->base == NULL) { 1692 ret = -ENOENT; 1693 goto unlock; 1694 } 1695 1696 /* Pinned buffers may be scanout, so flush the cache */ 1697 if (obj->pin_display) 1698 i915_gem_object_flush_cpu_write_domain(obj); 1699 1700 drm_gem_object_unreference(&obj->base); 1701 unlock: 1702 mutex_unlock(&dev->struct_mutex); 1703 return ret; 1704 } 1705 1706 /** 1707 * Maps the contents of an object, returning the address it is mapped 1708 * into. 1709 * 1710 * While the mapping holds a reference on the contents of the object, it doesn't 1711 * imply a ref on the object itself. 1712 * 1713 * IMPORTANT: 1714 * 1715 * DRM driver writers who look a this function as an example for how to do GEM 1716 * mmap support, please don't implement mmap support like here. The modern way 1717 * to implement DRM mmap support is with an mmap offset ioctl (like 1718 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1719 * That way debug tooling like valgrind will understand what's going on, hiding 1720 * the mmap call in a driver private ioctl will break that. The i915 driver only 1721 * does cpu mmaps this way because we didn't know better. 1722 */ 1723 int 1724 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1725 struct drm_file *file) 1726 { 1727 struct drm_i915_gem_mmap *args = data; 1728 struct drm_gem_object *obj; 1729 unsigned long addr; 1730 1731 if (args->flags & ~(I915_MMAP_WC)) 1732 return -EINVAL; 1733 1734 if (args->flags & I915_MMAP_WC && !cpu_has_pat) 1735 return -ENODEV; 1736 1737 obj = drm_gem_object_lookup(dev, file, args->handle); 1738 if (obj == NULL) 1739 return -ENOENT; 1740 1741 /* prime objects have no backing filp to GEM mmap 1742 * pages from. 1743 */ 1744 if (!obj->filp) { 1745 drm_gem_object_unreference_unlocked(obj); 1746 return -EINVAL; 1747 } 1748 1749 addr = vm_mmap(obj->filp, 0, args->size, 1750 PROT_READ | PROT_WRITE, MAP_SHARED, 1751 args->offset); 1752 if (args->flags & I915_MMAP_WC) { 1753 struct mm_struct *mm = current->mm; 1754 struct vm_area_struct *vma; 1755 1756 down_write(&mm->mmap_sem); 1757 vma = find_vma(mm, addr); 1758 if (vma) 1759 vma->vm_page_prot = 1760 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1761 else 1762 addr = -ENOMEM; 1763 up_write(&mm->mmap_sem); 1764 } 1765 drm_gem_object_unreference_unlocked(obj); 1766 if (IS_ERR((void *)addr)) 1767 return addr; 1768 1769 args->addr_ptr = (uint64_t) addr; 1770 1771 return 0; 1772 } 1773 1774 /** 1775 * i915_gem_fault - fault a page into the GTT 1776 * @vma: VMA in question 1777 * @vmf: fault info 1778 * 1779 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1780 * from userspace. The fault handler takes care of binding the object to 1781 * the GTT (if needed), allocating and programming a fence register (again, 1782 * only if needed based on whether the old reg is still valid or the object 1783 * is tiled) and inserting a new PTE into the faulting process. 1784 * 1785 * Note that the faulting process may involve evicting existing objects 1786 * from the GTT and/or fence registers to make room. So performance may 1787 * suffer if the GTT working set is large or there are few fence registers 1788 * left. 1789 */ 1790 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1791 { 1792 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1793 struct drm_device *dev = obj->base.dev; 1794 struct drm_i915_private *dev_priv = to_i915(dev); 1795 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1796 struct i915_ggtt_view view = i915_ggtt_view_normal; 1797 pgoff_t page_offset; 1798 unsigned long pfn; 1799 int ret = 0; 1800 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1801 1802 intel_runtime_pm_get(dev_priv); 1803 1804 /* We don't use vmf->pgoff since that has the fake offset */ 1805 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1806 PAGE_SHIFT; 1807 1808 ret = i915_mutex_lock_interruptible(dev); 1809 if (ret) 1810 goto out; 1811 1812 trace_i915_gem_object_fault(obj, page_offset, true, write); 1813 1814 /* Try to flush the object off the GPU first without holding the lock. 1815 * Upon reacquiring the lock, we will perform our sanity checks and then 1816 * repeat the flush holding the lock in the normal manner to catch cases 1817 * where we are gazumped. 1818 */ 1819 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1820 if (ret) 1821 goto unlock; 1822 1823 /* Access to snoopable pages through the GTT is incoherent. */ 1824 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1825 ret = -EFAULT; 1826 goto unlock; 1827 } 1828 1829 /* Use a partial view if the object is bigger than the aperture. */ 1830 if (obj->base.size >= ggtt->mappable_end && 1831 obj->tiling_mode == I915_TILING_NONE) { 1832 static const unsigned int chunk_size = 256; // 1 MiB 1833 1834 memset(&view, 0, sizeof(view)); 1835 view.type = I915_GGTT_VIEW_PARTIAL; 1836 view.params.partial.offset = rounddown(page_offset, chunk_size); 1837 view.params.partial.size = 1838 min_t(unsigned int, 1839 chunk_size, 1840 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1841 view.params.partial.offset); 1842 } 1843 1844 /* Now pin it into the GTT if needed */ 1845 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1846 if (ret) 1847 goto unlock; 1848 1849 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1850 if (ret) 1851 goto unpin; 1852 1853 ret = i915_gem_object_get_fence(obj); 1854 if (ret) 1855 goto unpin; 1856 1857 /* Finally, remap it using the new GTT offset */ 1858 pfn = ggtt->mappable_base + 1859 i915_gem_obj_ggtt_offset_view(obj, &view); 1860 pfn >>= PAGE_SHIFT; 1861 1862 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1863 /* Overriding existing pages in partial view does not cause 1864 * us any trouble as TLBs are still valid because the fault 1865 * is due to userspace losing part of the mapping or never 1866 * having accessed it before (at this partials' range). 1867 */ 1868 unsigned long base = vma->vm_start + 1869 (view.params.partial.offset << PAGE_SHIFT); 1870 unsigned int i; 1871 1872 for (i = 0; i < view.params.partial.size; i++) { 1873 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1874 if (ret) 1875 break; 1876 } 1877 1878 obj->fault_mappable = true; 1879 } else { 1880 if (!obj->fault_mappable) { 1881 unsigned long size = min_t(unsigned long, 1882 vma->vm_end - vma->vm_start, 1883 obj->base.size); 1884 int i; 1885 1886 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1887 ret = vm_insert_pfn(vma, 1888 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1889 pfn + i); 1890 if (ret) 1891 break; 1892 } 1893 1894 obj->fault_mappable = true; 1895 } else 1896 ret = vm_insert_pfn(vma, 1897 (unsigned long)vmf->virtual_address, 1898 pfn + page_offset); 1899 } 1900 unpin: 1901 i915_gem_object_ggtt_unpin_view(obj, &view); 1902 unlock: 1903 mutex_unlock(&dev->struct_mutex); 1904 out: 1905 switch (ret) { 1906 case -EIO: 1907 /* 1908 * We eat errors when the gpu is terminally wedged to avoid 1909 * userspace unduly crashing (gl has no provisions for mmaps to 1910 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1911 * and so needs to be reported. 1912 */ 1913 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1914 ret = VM_FAULT_SIGBUS; 1915 break; 1916 } 1917 case -EAGAIN: 1918 /* 1919 * EAGAIN means the gpu is hung and we'll wait for the error 1920 * handler to reset everything when re-faulting in 1921 * i915_mutex_lock_interruptible. 1922 */ 1923 case 0: 1924 case -ERESTARTSYS: 1925 case -EINTR: 1926 case -EBUSY: 1927 /* 1928 * EBUSY is ok: this just means that another thread 1929 * already did the job. 1930 */ 1931 ret = VM_FAULT_NOPAGE; 1932 break; 1933 case -ENOMEM: 1934 ret = VM_FAULT_OOM; 1935 break; 1936 case -ENOSPC: 1937 case -EFAULT: 1938 ret = VM_FAULT_SIGBUS; 1939 break; 1940 default: 1941 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1942 ret = VM_FAULT_SIGBUS; 1943 break; 1944 } 1945 1946 intel_runtime_pm_put(dev_priv); 1947 return ret; 1948 } 1949 1950 /** 1951 * i915_gem_release_mmap - remove physical page mappings 1952 * @obj: obj in question 1953 * 1954 * Preserve the reservation of the mmapping with the DRM core code, but 1955 * relinquish ownership of the pages back to the system. 1956 * 1957 * It is vital that we remove the page mapping if we have mapped a tiled 1958 * object through the GTT and then lose the fence register due to 1959 * resource pressure. Similarly if the object has been moved out of the 1960 * aperture, than pages mapped into userspace must be revoked. Removing the 1961 * mapping will then trigger a page fault on the next user access, allowing 1962 * fixup by i915_gem_fault(). 1963 */ 1964 void 1965 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1966 { 1967 if (!obj->fault_mappable) 1968 return; 1969 1970 drm_vma_node_unmap(&obj->base.vma_node, 1971 obj->base.dev->anon_inode->i_mapping); 1972 obj->fault_mappable = false; 1973 } 1974 1975 void 1976 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1977 { 1978 struct drm_i915_gem_object *obj; 1979 1980 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1981 i915_gem_release_mmap(obj); 1982 } 1983 1984 uint32_t 1985 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1986 { 1987 uint32_t gtt_size; 1988 1989 if (INTEL_INFO(dev)->gen >= 4 || 1990 tiling_mode == I915_TILING_NONE) 1991 return size; 1992 1993 /* Previous chips need a power-of-two fence region when tiling */ 1994 if (INTEL_INFO(dev)->gen == 3) 1995 gtt_size = 1024*1024; 1996 else 1997 gtt_size = 512*1024; 1998 1999 while (gtt_size < size) 2000 gtt_size <<= 1; 2001 2002 return gtt_size; 2003 } 2004 2005 /** 2006 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2007 * @obj: object to check 2008 * 2009 * Return the required GTT alignment for an object, taking into account 2010 * potential fence register mapping. 2011 */ 2012 uint32_t 2013 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2014 int tiling_mode, bool fenced) 2015 { 2016 /* 2017 * Minimum alignment is 4k (GTT page size), but might be greater 2018 * if a fence register is needed for the object. 2019 */ 2020 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2021 tiling_mode == I915_TILING_NONE) 2022 return 4096; 2023 2024 /* 2025 * Previous chips need to be aligned to the size of the smallest 2026 * fence register that can contain the object. 2027 */ 2028 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2029 } 2030 2031 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2032 { 2033 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2034 int ret; 2035 2036 dev_priv->mm.shrinker_no_lock_stealing = true; 2037 2038 ret = drm_gem_create_mmap_offset(&obj->base); 2039 if (ret != -ENOSPC) 2040 goto out; 2041 2042 /* Badly fragmented mmap space? The only way we can recover 2043 * space is by destroying unwanted objects. We can't randomly release 2044 * mmap_offsets as userspace expects them to be persistent for the 2045 * lifetime of the objects. The closest we can is to release the 2046 * offsets on purgeable objects by truncating it and marking it purged, 2047 * which prevents userspace from ever using that object again. 2048 */ 2049 i915_gem_shrink(dev_priv, 2050 obj->base.size >> PAGE_SHIFT, 2051 I915_SHRINK_BOUND | 2052 I915_SHRINK_UNBOUND | 2053 I915_SHRINK_PURGEABLE); 2054 ret = drm_gem_create_mmap_offset(&obj->base); 2055 if (ret != -ENOSPC) 2056 goto out; 2057 2058 i915_gem_shrink_all(dev_priv); 2059 ret = drm_gem_create_mmap_offset(&obj->base); 2060 out: 2061 dev_priv->mm.shrinker_no_lock_stealing = false; 2062 2063 return ret; 2064 } 2065 2066 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2067 { 2068 drm_gem_free_mmap_offset(&obj->base); 2069 } 2070 2071 int 2072 i915_gem_mmap_gtt(struct drm_file *file, 2073 struct drm_device *dev, 2074 uint32_t handle, 2075 uint64_t *offset) 2076 { 2077 struct drm_i915_gem_object *obj; 2078 int ret; 2079 2080 ret = i915_mutex_lock_interruptible(dev); 2081 if (ret) 2082 return ret; 2083 2084 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2085 if (&obj->base == NULL) { 2086 ret = -ENOENT; 2087 goto unlock; 2088 } 2089 2090 if (obj->madv != I915_MADV_WILLNEED) { 2091 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2092 ret = -EFAULT; 2093 goto out; 2094 } 2095 2096 ret = i915_gem_object_create_mmap_offset(obj); 2097 if (ret) 2098 goto out; 2099 2100 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2101 2102 out: 2103 drm_gem_object_unreference(&obj->base); 2104 unlock: 2105 mutex_unlock(&dev->struct_mutex); 2106 return ret; 2107 } 2108 2109 /** 2110 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2111 * @dev: DRM device 2112 * @data: GTT mapping ioctl data 2113 * @file: GEM object info 2114 * 2115 * Simply returns the fake offset to userspace so it can mmap it. 2116 * The mmap call will end up in drm_gem_mmap(), which will set things 2117 * up so we can get faults in the handler above. 2118 * 2119 * The fault handler will take care of binding the object into the GTT 2120 * (since it may have been evicted to make room for something), allocating 2121 * a fence register, and mapping the appropriate aperture address into 2122 * userspace. 2123 */ 2124 int 2125 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2126 struct drm_file *file) 2127 { 2128 struct drm_i915_gem_mmap_gtt *args = data; 2129 2130 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2131 } 2132 2133 /* Immediately discard the backing storage */ 2134 static void 2135 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2136 { 2137 i915_gem_object_free_mmap_offset(obj); 2138 2139 if (obj->base.filp == NULL) 2140 return; 2141 2142 /* Our goal here is to return as much of the memory as 2143 * is possible back to the system as we are called from OOM. 2144 * To do this we must instruct the shmfs to drop all of its 2145 * backing pages, *now*. 2146 */ 2147 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2148 obj->madv = __I915_MADV_PURGED; 2149 } 2150 2151 /* Try to discard unwanted pages */ 2152 static void 2153 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2154 { 2155 struct address_space *mapping; 2156 2157 switch (obj->madv) { 2158 case I915_MADV_DONTNEED: 2159 i915_gem_object_truncate(obj); 2160 case __I915_MADV_PURGED: 2161 return; 2162 } 2163 2164 if (obj->base.filp == NULL) 2165 return; 2166 2167 mapping = file_inode(obj->base.filp)->i_mapping, 2168 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2169 } 2170 2171 static void 2172 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2173 { 2174 struct sg_page_iter sg_iter; 2175 int ret; 2176 2177 BUG_ON(obj->madv == __I915_MADV_PURGED); 2178 2179 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2180 if (ret) { 2181 /* In the event of a disaster, abandon all caches and 2182 * hope for the best. 2183 */ 2184 WARN_ON(ret != -EIO); 2185 i915_gem_clflush_object(obj, true); 2186 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2187 } 2188 2189 i915_gem_gtt_finish_object(obj); 2190 2191 if (i915_gem_object_needs_bit17_swizzle(obj)) 2192 i915_gem_object_save_bit_17_swizzle(obj); 2193 2194 if (obj->madv == I915_MADV_DONTNEED) 2195 obj->dirty = 0; 2196 2197 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2198 struct page *page = sg_page_iter_page(&sg_iter); 2199 2200 if (obj->dirty) 2201 set_page_dirty(page); 2202 2203 if (obj->madv == I915_MADV_WILLNEED) 2204 mark_page_accessed(page); 2205 2206 put_page(page); 2207 } 2208 obj->dirty = 0; 2209 2210 sg_free_table(obj->pages); 2211 kfree(obj->pages); 2212 } 2213 2214 int 2215 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2216 { 2217 const struct drm_i915_gem_object_ops *ops = obj->ops; 2218 2219 if (obj->pages == NULL) 2220 return 0; 2221 2222 if (obj->pages_pin_count) 2223 return -EBUSY; 2224 2225 BUG_ON(i915_gem_obj_bound_any(obj)); 2226 2227 /* ->put_pages might need to allocate memory for the bit17 swizzle 2228 * array, hence protect them from being reaped by removing them from gtt 2229 * lists early. */ 2230 list_del(&obj->global_list); 2231 2232 if (obj->mapping) { 2233 if (is_vmalloc_addr(obj->mapping)) 2234 vunmap(obj->mapping); 2235 else 2236 kunmap(kmap_to_page(obj->mapping)); 2237 obj->mapping = NULL; 2238 } 2239 2240 ops->put_pages(obj); 2241 obj->pages = NULL; 2242 2243 i915_gem_object_invalidate(obj); 2244 2245 return 0; 2246 } 2247 2248 static int 2249 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2250 { 2251 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2252 int page_count, i; 2253 struct address_space *mapping; 2254 struct sg_table *st; 2255 struct scatterlist *sg; 2256 struct sg_page_iter sg_iter; 2257 struct page *page; 2258 unsigned long last_pfn = 0; /* suppress gcc warning */ 2259 int ret; 2260 gfp_t gfp; 2261 2262 /* Assert that the object is not currently in any GPU domain. As it 2263 * wasn't in the GTT, there shouldn't be any way it could have been in 2264 * a GPU cache 2265 */ 2266 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2267 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2268 2269 st = kmalloc(sizeof(*st), GFP_KERNEL); 2270 if (st == NULL) 2271 return -ENOMEM; 2272 2273 page_count = obj->base.size / PAGE_SIZE; 2274 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2275 kfree(st); 2276 return -ENOMEM; 2277 } 2278 2279 /* Get the list of pages out of our struct file. They'll be pinned 2280 * at this point until we release them. 2281 * 2282 * Fail silently without starting the shrinker 2283 */ 2284 mapping = file_inode(obj->base.filp)->i_mapping; 2285 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); 2286 gfp |= __GFP_NORETRY | __GFP_NOWARN; 2287 sg = st->sgl; 2288 st->nents = 0; 2289 for (i = 0; i < page_count; i++) { 2290 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2291 if (IS_ERR(page)) { 2292 i915_gem_shrink(dev_priv, 2293 page_count, 2294 I915_SHRINK_BOUND | 2295 I915_SHRINK_UNBOUND | 2296 I915_SHRINK_PURGEABLE); 2297 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2298 } 2299 if (IS_ERR(page)) { 2300 /* We've tried hard to allocate the memory by reaping 2301 * our own buffer, now let the real VM do its job and 2302 * go down in flames if truly OOM. 2303 */ 2304 i915_gem_shrink_all(dev_priv); 2305 page = shmem_read_mapping_page(mapping, i); 2306 if (IS_ERR(page)) { 2307 ret = PTR_ERR(page); 2308 goto err_pages; 2309 } 2310 } 2311 #ifdef CONFIG_SWIOTLB 2312 if (swiotlb_nr_tbl()) { 2313 st->nents++; 2314 sg_set_page(sg, page, PAGE_SIZE, 0); 2315 sg = sg_next(sg); 2316 continue; 2317 } 2318 #endif 2319 if (!i || page_to_pfn(page) != last_pfn + 1) { 2320 if (i) 2321 sg = sg_next(sg); 2322 st->nents++; 2323 sg_set_page(sg, page, PAGE_SIZE, 0); 2324 } else { 2325 sg->length += PAGE_SIZE; 2326 } 2327 last_pfn = page_to_pfn(page); 2328 2329 /* Check that the i965g/gm workaround works. */ 2330 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2331 } 2332 #ifdef CONFIG_SWIOTLB 2333 if (!swiotlb_nr_tbl()) 2334 #endif 2335 sg_mark_end(sg); 2336 obj->pages = st; 2337 2338 ret = i915_gem_gtt_prepare_object(obj); 2339 if (ret) 2340 goto err_pages; 2341 2342 if (i915_gem_object_needs_bit17_swizzle(obj)) 2343 i915_gem_object_do_bit_17_swizzle(obj); 2344 2345 if (obj->tiling_mode != I915_TILING_NONE && 2346 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2347 i915_gem_object_pin_pages(obj); 2348 2349 return 0; 2350 2351 err_pages: 2352 sg_mark_end(sg); 2353 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 2354 put_page(sg_page_iter_page(&sg_iter)); 2355 sg_free_table(st); 2356 kfree(st); 2357 2358 /* shmemfs first checks if there is enough memory to allocate the page 2359 * and reports ENOSPC should there be insufficient, along with the usual 2360 * ENOMEM for a genuine allocation failure. 2361 * 2362 * We use ENOSPC in our driver to mean that we have run out of aperture 2363 * space and so want to translate the error from shmemfs back to our 2364 * usual understanding of ENOMEM. 2365 */ 2366 if (ret == -ENOSPC) 2367 ret = -ENOMEM; 2368 2369 return ret; 2370 } 2371 2372 /* Ensure that the associated pages are gathered from the backing storage 2373 * and pinned into our object. i915_gem_object_get_pages() may be called 2374 * multiple times before they are released by a single call to 2375 * i915_gem_object_put_pages() - once the pages are no longer referenced 2376 * either as a result of memory pressure (reaping pages under the shrinker) 2377 * or as the object is itself released. 2378 */ 2379 int 2380 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2381 { 2382 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2383 const struct drm_i915_gem_object_ops *ops = obj->ops; 2384 int ret; 2385 2386 if (obj->pages) 2387 return 0; 2388 2389 if (obj->madv != I915_MADV_WILLNEED) { 2390 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2391 return -EFAULT; 2392 } 2393 2394 BUG_ON(obj->pages_pin_count); 2395 2396 ret = ops->get_pages(obj); 2397 if (ret) 2398 return ret; 2399 2400 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2401 2402 obj->get_page.sg = obj->pages->sgl; 2403 obj->get_page.last = 0; 2404 2405 return 0; 2406 } 2407 2408 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2409 { 2410 int ret; 2411 2412 lockdep_assert_held(&obj->base.dev->struct_mutex); 2413 2414 ret = i915_gem_object_get_pages(obj); 2415 if (ret) 2416 return ERR_PTR(ret); 2417 2418 i915_gem_object_pin_pages(obj); 2419 2420 if (obj->mapping == NULL) { 2421 struct page **pages; 2422 2423 pages = NULL; 2424 if (obj->base.size == PAGE_SIZE) 2425 obj->mapping = kmap(sg_page(obj->pages->sgl)); 2426 else 2427 pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT, 2428 sizeof(*pages), 2429 GFP_TEMPORARY); 2430 if (pages != NULL) { 2431 struct sg_page_iter sg_iter; 2432 int n; 2433 2434 n = 0; 2435 for_each_sg_page(obj->pages->sgl, &sg_iter, 2436 obj->pages->nents, 0) 2437 pages[n++] = sg_page_iter_page(&sg_iter); 2438 2439 obj->mapping = vmap(pages, n, 0, PAGE_KERNEL); 2440 drm_free_large(pages); 2441 } 2442 if (obj->mapping == NULL) { 2443 i915_gem_object_unpin_pages(obj); 2444 return ERR_PTR(-ENOMEM); 2445 } 2446 } 2447 2448 return obj->mapping; 2449 } 2450 2451 void i915_vma_move_to_active(struct i915_vma *vma, 2452 struct drm_i915_gem_request *req) 2453 { 2454 struct drm_i915_gem_object *obj = vma->obj; 2455 struct intel_engine_cs *engine; 2456 2457 engine = i915_gem_request_get_engine(req); 2458 2459 /* Add a reference if we're newly entering the active list. */ 2460 if (obj->active == 0) 2461 drm_gem_object_reference(&obj->base); 2462 obj->active |= intel_engine_flag(engine); 2463 2464 list_move_tail(&obj->engine_list[engine->id], &engine->active_list); 2465 i915_gem_request_assign(&obj->last_read_req[engine->id], req); 2466 2467 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2468 } 2469 2470 static void 2471 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2472 { 2473 RQ_BUG_ON(obj->last_write_req == NULL); 2474 RQ_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2475 2476 i915_gem_request_assign(&obj->last_write_req, NULL); 2477 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2478 } 2479 2480 static void 2481 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2482 { 2483 struct i915_vma *vma; 2484 2485 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2486 RQ_BUG_ON(!(obj->active & (1 << ring))); 2487 2488 list_del_init(&obj->engine_list[ring]); 2489 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2490 2491 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2492 i915_gem_object_retire__write(obj); 2493 2494 obj->active &= ~(1 << ring); 2495 if (obj->active) 2496 return; 2497 2498 /* Bump our place on the bound list to keep it roughly in LRU order 2499 * so that we don't steal from recently used but inactive objects 2500 * (unless we are forced to ofc!) 2501 */ 2502 list_move_tail(&obj->global_list, 2503 &to_i915(obj->base.dev)->mm.bound_list); 2504 2505 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2506 if (!list_empty(&vma->vm_link)) 2507 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2508 } 2509 2510 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2511 drm_gem_object_unreference(&obj->base); 2512 } 2513 2514 static int 2515 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2516 { 2517 struct drm_i915_private *dev_priv = dev->dev_private; 2518 struct intel_engine_cs *engine; 2519 int ret; 2520 2521 /* Carefully retire all requests without writing to the rings */ 2522 for_each_engine(engine, dev_priv) { 2523 ret = intel_engine_idle(engine); 2524 if (ret) 2525 return ret; 2526 } 2527 i915_gem_retire_requests(dev); 2528 2529 /* Finally reset hw state */ 2530 for_each_engine(engine, dev_priv) 2531 intel_ring_init_seqno(engine, seqno); 2532 2533 return 0; 2534 } 2535 2536 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2537 { 2538 struct drm_i915_private *dev_priv = dev->dev_private; 2539 int ret; 2540 2541 if (seqno == 0) 2542 return -EINVAL; 2543 2544 /* HWS page needs to be set less than what we 2545 * will inject to ring 2546 */ 2547 ret = i915_gem_init_seqno(dev, seqno - 1); 2548 if (ret) 2549 return ret; 2550 2551 /* Carefully set the last_seqno value so that wrap 2552 * detection still works 2553 */ 2554 dev_priv->next_seqno = seqno; 2555 dev_priv->last_seqno = seqno - 1; 2556 if (dev_priv->last_seqno == 0) 2557 dev_priv->last_seqno--; 2558 2559 return 0; 2560 } 2561 2562 int 2563 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2564 { 2565 struct drm_i915_private *dev_priv = dev->dev_private; 2566 2567 /* reserve 0 for non-seqno */ 2568 if (dev_priv->next_seqno == 0) { 2569 int ret = i915_gem_init_seqno(dev, 0); 2570 if (ret) 2571 return ret; 2572 2573 dev_priv->next_seqno = 1; 2574 } 2575 2576 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2577 return 0; 2578 } 2579 2580 /* 2581 * NB: This function is not allowed to fail. Doing so would mean the the 2582 * request is not being tracked for completion but the work itself is 2583 * going to happen on the hardware. This would be a Bad Thing(tm). 2584 */ 2585 void __i915_add_request(struct drm_i915_gem_request *request, 2586 struct drm_i915_gem_object *obj, 2587 bool flush_caches) 2588 { 2589 struct intel_engine_cs *engine; 2590 struct drm_i915_private *dev_priv; 2591 struct intel_ringbuffer *ringbuf; 2592 u32 request_start; 2593 int ret; 2594 2595 if (WARN_ON(request == NULL)) 2596 return; 2597 2598 engine = request->engine; 2599 dev_priv = request->i915; 2600 ringbuf = request->ringbuf; 2601 2602 /* 2603 * To ensure that this call will not fail, space for its emissions 2604 * should already have been reserved in the ring buffer. Let the ring 2605 * know that it is time to use that space up. 2606 */ 2607 intel_ring_reserved_space_use(ringbuf); 2608 2609 request_start = intel_ring_get_tail(ringbuf); 2610 /* 2611 * Emit any outstanding flushes - execbuf can fail to emit the flush 2612 * after having emitted the batchbuffer command. Hence we need to fix 2613 * things up similar to emitting the lazy request. The difference here 2614 * is that the flush _must_ happen before the next request, no matter 2615 * what. 2616 */ 2617 if (flush_caches) { 2618 if (i915.enable_execlists) 2619 ret = logical_ring_flush_all_caches(request); 2620 else 2621 ret = intel_ring_flush_all_caches(request); 2622 /* Not allowed to fail! */ 2623 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2624 } 2625 2626 trace_i915_gem_request_add(request); 2627 2628 request->head = request_start; 2629 2630 /* Whilst this request exists, batch_obj will be on the 2631 * active_list, and so will hold the active reference. Only when this 2632 * request is retired will the the batch_obj be moved onto the 2633 * inactive_list and lose its active reference. Hence we do not need 2634 * to explicitly hold another reference here. 2635 */ 2636 request->batch_obj = obj; 2637 2638 /* Seal the request and mark it as pending execution. Note that 2639 * we may inspect this state, without holding any locks, during 2640 * hangcheck. Hence we apply the barrier to ensure that we do not 2641 * see a more recent value in the hws than we are tracking. 2642 */ 2643 request->emitted_jiffies = jiffies; 2644 request->previous_seqno = engine->last_submitted_seqno; 2645 smp_store_mb(engine->last_submitted_seqno, request->seqno); 2646 list_add_tail(&request->list, &engine->request_list); 2647 2648 /* Record the position of the start of the request so that 2649 * should we detect the updated seqno part-way through the 2650 * GPU processing the request, we never over-estimate the 2651 * position of the head. 2652 */ 2653 request->postfix = intel_ring_get_tail(ringbuf); 2654 2655 if (i915.enable_execlists) 2656 ret = engine->emit_request(request); 2657 else { 2658 ret = engine->add_request(request); 2659 2660 request->tail = intel_ring_get_tail(ringbuf); 2661 } 2662 /* Not allowed to fail! */ 2663 WARN(ret, "emit|add_request failed: %d!\n", ret); 2664 2665 i915_queue_hangcheck(engine->dev); 2666 2667 queue_delayed_work(dev_priv->wq, 2668 &dev_priv->mm.retire_work, 2669 round_jiffies_up_relative(HZ)); 2670 intel_mark_busy(dev_priv->dev); 2671 2672 /* Sanity check that the reserved size was large enough. */ 2673 intel_ring_reserved_space_end(ringbuf); 2674 } 2675 2676 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2677 const struct intel_context *ctx) 2678 { 2679 unsigned long elapsed; 2680 2681 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2682 2683 if (ctx->hang_stats.banned) 2684 return true; 2685 2686 if (ctx->hang_stats.ban_period_seconds && 2687 elapsed <= ctx->hang_stats.ban_period_seconds) { 2688 if (!i915_gem_context_is_default(ctx)) { 2689 DRM_DEBUG("context hanging too fast, banning!\n"); 2690 return true; 2691 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2692 if (i915_stop_ring_allow_warn(dev_priv)) 2693 DRM_ERROR("gpu hanging too fast, banning!\n"); 2694 return true; 2695 } 2696 } 2697 2698 return false; 2699 } 2700 2701 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2702 struct intel_context *ctx, 2703 const bool guilty) 2704 { 2705 struct i915_ctx_hang_stats *hs; 2706 2707 if (WARN_ON(!ctx)) 2708 return; 2709 2710 hs = &ctx->hang_stats; 2711 2712 if (guilty) { 2713 hs->banned = i915_context_is_banned(dev_priv, ctx); 2714 hs->batch_active++; 2715 hs->guilty_ts = get_seconds(); 2716 } else { 2717 hs->batch_pending++; 2718 } 2719 } 2720 2721 void i915_gem_request_free(struct kref *req_ref) 2722 { 2723 struct drm_i915_gem_request *req = container_of(req_ref, 2724 typeof(*req), ref); 2725 struct intel_context *ctx = req->ctx; 2726 2727 if (req->file_priv) 2728 i915_gem_request_remove_from_client(req); 2729 2730 if (ctx) { 2731 if (i915.enable_execlists && ctx != req->i915->kernel_context) 2732 intel_lr_context_unpin(ctx, req->engine); 2733 2734 i915_gem_context_unreference(ctx); 2735 } 2736 2737 kmem_cache_free(req->i915->requests, req); 2738 } 2739 2740 static inline int 2741 __i915_gem_request_alloc(struct intel_engine_cs *engine, 2742 struct intel_context *ctx, 2743 struct drm_i915_gem_request **req_out) 2744 { 2745 struct drm_i915_private *dev_priv = to_i915(engine->dev); 2746 struct drm_i915_gem_request *req; 2747 int ret; 2748 2749 if (!req_out) 2750 return -EINVAL; 2751 2752 *req_out = NULL; 2753 2754 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 2755 if (req == NULL) 2756 return -ENOMEM; 2757 2758 ret = i915_gem_get_seqno(engine->dev, &req->seqno); 2759 if (ret) 2760 goto err; 2761 2762 kref_init(&req->ref); 2763 req->i915 = dev_priv; 2764 req->engine = engine; 2765 req->ctx = ctx; 2766 i915_gem_context_reference(req->ctx); 2767 2768 if (i915.enable_execlists) 2769 ret = intel_logical_ring_alloc_request_extras(req); 2770 else 2771 ret = intel_ring_alloc_request_extras(req); 2772 if (ret) { 2773 i915_gem_context_unreference(req->ctx); 2774 goto err; 2775 } 2776 2777 /* 2778 * Reserve space in the ring buffer for all the commands required to 2779 * eventually emit this request. This is to guarantee that the 2780 * i915_add_request() call can't fail. Note that the reserve may need 2781 * to be redone if the request is not actually submitted straight 2782 * away, e.g. because a GPU scheduler has deferred it. 2783 */ 2784 if (i915.enable_execlists) 2785 ret = intel_logical_ring_reserve_space(req); 2786 else 2787 ret = intel_ring_reserve_space(req); 2788 if (ret) { 2789 /* 2790 * At this point, the request is fully allocated even if not 2791 * fully prepared. Thus it can be cleaned up using the proper 2792 * free code. 2793 */ 2794 i915_gem_request_cancel(req); 2795 return ret; 2796 } 2797 2798 *req_out = req; 2799 return 0; 2800 2801 err: 2802 kmem_cache_free(dev_priv->requests, req); 2803 return ret; 2804 } 2805 2806 /** 2807 * i915_gem_request_alloc - allocate a request structure 2808 * 2809 * @engine: engine that we wish to issue the request on. 2810 * @ctx: context that the request will be associated with. 2811 * This can be NULL if the request is not directly related to 2812 * any specific user context, in which case this function will 2813 * choose an appropriate context to use. 2814 * 2815 * Returns a pointer to the allocated request if successful, 2816 * or an error code if not. 2817 */ 2818 struct drm_i915_gem_request * 2819 i915_gem_request_alloc(struct intel_engine_cs *engine, 2820 struct intel_context *ctx) 2821 { 2822 struct drm_i915_gem_request *req; 2823 int err; 2824 2825 if (ctx == NULL) 2826 ctx = to_i915(engine->dev)->kernel_context; 2827 err = __i915_gem_request_alloc(engine, ctx, &req); 2828 return err ? ERR_PTR(err) : req; 2829 } 2830 2831 void i915_gem_request_cancel(struct drm_i915_gem_request *req) 2832 { 2833 intel_ring_reserved_space_cancel(req->ringbuf); 2834 2835 i915_gem_request_unreference(req); 2836 } 2837 2838 struct drm_i915_gem_request * 2839 i915_gem_find_active_request(struct intel_engine_cs *engine) 2840 { 2841 struct drm_i915_gem_request *request; 2842 2843 list_for_each_entry(request, &engine->request_list, list) { 2844 if (i915_gem_request_completed(request, false)) 2845 continue; 2846 2847 return request; 2848 } 2849 2850 return NULL; 2851 } 2852 2853 static void i915_gem_reset_engine_status(struct drm_i915_private *dev_priv, 2854 struct intel_engine_cs *engine) 2855 { 2856 struct drm_i915_gem_request *request; 2857 bool ring_hung; 2858 2859 request = i915_gem_find_active_request(engine); 2860 2861 if (request == NULL) 2862 return; 2863 2864 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2865 2866 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2867 2868 list_for_each_entry_continue(request, &engine->request_list, list) 2869 i915_set_reset_status(dev_priv, request->ctx, false); 2870 } 2871 2872 static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv, 2873 struct intel_engine_cs *engine) 2874 { 2875 struct intel_ringbuffer *buffer; 2876 2877 while (!list_empty(&engine->active_list)) { 2878 struct drm_i915_gem_object *obj; 2879 2880 obj = list_first_entry(&engine->active_list, 2881 struct drm_i915_gem_object, 2882 engine_list[engine->id]); 2883 2884 i915_gem_object_retire__read(obj, engine->id); 2885 } 2886 2887 /* 2888 * Clear the execlists queue up before freeing the requests, as those 2889 * are the ones that keep the context and ringbuffer backing objects 2890 * pinned in place. 2891 */ 2892 2893 if (i915.enable_execlists) { 2894 /* Ensure irq handler finishes or is cancelled. */ 2895 tasklet_kill(&engine->irq_tasklet); 2896 2897 spin_lock_bh(&engine->execlist_lock); 2898 /* list_splice_tail_init checks for empty lists */ 2899 list_splice_tail_init(&engine->execlist_queue, 2900 &engine->execlist_retired_req_list); 2901 spin_unlock_bh(&engine->execlist_lock); 2902 2903 intel_execlists_retire_requests(engine); 2904 } 2905 2906 /* 2907 * We must free the requests after all the corresponding objects have 2908 * been moved off active lists. Which is the same order as the normal 2909 * retire_requests function does. This is important if object hold 2910 * implicit references on things like e.g. ppgtt address spaces through 2911 * the request. 2912 */ 2913 while (!list_empty(&engine->request_list)) { 2914 struct drm_i915_gem_request *request; 2915 2916 request = list_first_entry(&engine->request_list, 2917 struct drm_i915_gem_request, 2918 list); 2919 2920 i915_gem_request_retire(request); 2921 } 2922 2923 /* Having flushed all requests from all queues, we know that all 2924 * ringbuffers must now be empty. However, since we do not reclaim 2925 * all space when retiring the request (to prevent HEADs colliding 2926 * with rapid ringbuffer wraparound) the amount of available space 2927 * upon reset is less than when we start. Do one more pass over 2928 * all the ringbuffers to reset last_retired_head. 2929 */ 2930 list_for_each_entry(buffer, &engine->buffers, link) { 2931 buffer->last_retired_head = buffer->tail; 2932 intel_ring_update_space(buffer); 2933 } 2934 2935 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 2936 } 2937 2938 void i915_gem_reset(struct drm_device *dev) 2939 { 2940 struct drm_i915_private *dev_priv = dev->dev_private; 2941 struct intel_engine_cs *engine; 2942 2943 /* 2944 * Before we free the objects from the requests, we need to inspect 2945 * them for finding the guilty party. As the requests only borrow 2946 * their reference to the objects, the inspection must be done first. 2947 */ 2948 for_each_engine(engine, dev_priv) 2949 i915_gem_reset_engine_status(dev_priv, engine); 2950 2951 for_each_engine(engine, dev_priv) 2952 i915_gem_reset_engine_cleanup(dev_priv, engine); 2953 2954 i915_gem_context_reset(dev); 2955 2956 i915_gem_restore_fences(dev); 2957 2958 WARN_ON(i915_verify_lists(dev)); 2959 } 2960 2961 /** 2962 * This function clears the request list as sequence numbers are passed. 2963 */ 2964 void 2965 i915_gem_retire_requests_ring(struct intel_engine_cs *engine) 2966 { 2967 WARN_ON(i915_verify_lists(engine->dev)); 2968 2969 /* Retire requests first as we use it above for the early return. 2970 * If we retire requests last, we may use a later seqno and so clear 2971 * the requests lists without clearing the active list, leading to 2972 * confusion. 2973 */ 2974 while (!list_empty(&engine->request_list)) { 2975 struct drm_i915_gem_request *request; 2976 2977 request = list_first_entry(&engine->request_list, 2978 struct drm_i915_gem_request, 2979 list); 2980 2981 if (!i915_gem_request_completed(request, true)) 2982 break; 2983 2984 i915_gem_request_retire(request); 2985 } 2986 2987 /* Move any buffers on the active list that are no longer referenced 2988 * by the ringbuffer to the flushing/inactive lists as appropriate, 2989 * before we free the context associated with the requests. 2990 */ 2991 while (!list_empty(&engine->active_list)) { 2992 struct drm_i915_gem_object *obj; 2993 2994 obj = list_first_entry(&engine->active_list, 2995 struct drm_i915_gem_object, 2996 engine_list[engine->id]); 2997 2998 if (!list_empty(&obj->last_read_req[engine->id]->list)) 2999 break; 3000 3001 i915_gem_object_retire__read(obj, engine->id); 3002 } 3003 3004 if (unlikely(engine->trace_irq_req && 3005 i915_gem_request_completed(engine->trace_irq_req, true))) { 3006 engine->irq_put(engine); 3007 i915_gem_request_assign(&engine->trace_irq_req, NULL); 3008 } 3009 3010 WARN_ON(i915_verify_lists(engine->dev)); 3011 } 3012 3013 bool 3014 i915_gem_retire_requests(struct drm_device *dev) 3015 { 3016 struct drm_i915_private *dev_priv = dev->dev_private; 3017 struct intel_engine_cs *engine; 3018 bool idle = true; 3019 3020 for_each_engine(engine, dev_priv) { 3021 i915_gem_retire_requests_ring(engine); 3022 idle &= list_empty(&engine->request_list); 3023 if (i915.enable_execlists) { 3024 spin_lock_bh(&engine->execlist_lock); 3025 idle &= list_empty(&engine->execlist_queue); 3026 spin_unlock_bh(&engine->execlist_lock); 3027 3028 intel_execlists_retire_requests(engine); 3029 } 3030 } 3031 3032 if (idle) 3033 mod_delayed_work(dev_priv->wq, 3034 &dev_priv->mm.idle_work, 3035 msecs_to_jiffies(100)); 3036 3037 return idle; 3038 } 3039 3040 static void 3041 i915_gem_retire_work_handler(struct work_struct *work) 3042 { 3043 struct drm_i915_private *dev_priv = 3044 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3045 struct drm_device *dev = dev_priv->dev; 3046 bool idle; 3047 3048 /* Come back later if the device is busy... */ 3049 idle = false; 3050 if (mutex_trylock(&dev->struct_mutex)) { 3051 idle = i915_gem_retire_requests(dev); 3052 mutex_unlock(&dev->struct_mutex); 3053 } 3054 if (!idle) 3055 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3056 round_jiffies_up_relative(HZ)); 3057 } 3058 3059 static void 3060 i915_gem_idle_work_handler(struct work_struct *work) 3061 { 3062 struct drm_i915_private *dev_priv = 3063 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3064 struct drm_device *dev = dev_priv->dev; 3065 struct intel_engine_cs *engine; 3066 3067 for_each_engine(engine, dev_priv) 3068 if (!list_empty(&engine->request_list)) 3069 return; 3070 3071 /* we probably should sync with hangcheck here, using cancel_work_sync. 3072 * Also locking seems to be fubar here, engine->request_list is protected 3073 * by dev->struct_mutex. */ 3074 3075 intel_mark_idle(dev); 3076 3077 if (mutex_trylock(&dev->struct_mutex)) { 3078 for_each_engine(engine, dev_priv) 3079 i915_gem_batch_pool_fini(&engine->batch_pool); 3080 3081 mutex_unlock(&dev->struct_mutex); 3082 } 3083 } 3084 3085 /** 3086 * Ensures that an object will eventually get non-busy by flushing any required 3087 * write domains, emitting any outstanding lazy request and retiring and 3088 * completed requests. 3089 */ 3090 static int 3091 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3092 { 3093 int i; 3094 3095 if (!obj->active) 3096 return 0; 3097 3098 for (i = 0; i < I915_NUM_ENGINES; i++) { 3099 struct drm_i915_gem_request *req; 3100 3101 req = obj->last_read_req[i]; 3102 if (req == NULL) 3103 continue; 3104 3105 if (list_empty(&req->list)) 3106 goto retire; 3107 3108 if (i915_gem_request_completed(req, true)) { 3109 __i915_gem_request_retire__upto(req); 3110 retire: 3111 i915_gem_object_retire__read(obj, i); 3112 } 3113 } 3114 3115 return 0; 3116 } 3117 3118 /** 3119 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3120 * @DRM_IOCTL_ARGS: standard ioctl arguments 3121 * 3122 * Returns 0 if successful, else an error is returned with the remaining time in 3123 * the timeout parameter. 3124 * -ETIME: object is still busy after timeout 3125 * -ERESTARTSYS: signal interrupted the wait 3126 * -ENONENT: object doesn't exist 3127 * Also possible, but rare: 3128 * -EAGAIN: GPU wedged 3129 * -ENOMEM: damn 3130 * -ENODEV: Internal IRQ fail 3131 * -E?: The add request failed 3132 * 3133 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3134 * non-zero timeout parameter the wait ioctl will wait for the given number of 3135 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3136 * without holding struct_mutex the object may become re-busied before this 3137 * function completes. A similar but shorter * race condition exists in the busy 3138 * ioctl 3139 */ 3140 int 3141 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3142 { 3143 struct drm_i915_private *dev_priv = dev->dev_private; 3144 struct drm_i915_gem_wait *args = data; 3145 struct drm_i915_gem_object *obj; 3146 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3147 unsigned reset_counter; 3148 int i, n = 0; 3149 int ret; 3150 3151 if (args->flags != 0) 3152 return -EINVAL; 3153 3154 ret = i915_mutex_lock_interruptible(dev); 3155 if (ret) 3156 return ret; 3157 3158 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3159 if (&obj->base == NULL) { 3160 mutex_unlock(&dev->struct_mutex); 3161 return -ENOENT; 3162 } 3163 3164 /* Need to make sure the object gets inactive eventually. */ 3165 ret = i915_gem_object_flush_active(obj); 3166 if (ret) 3167 goto out; 3168 3169 if (!obj->active) 3170 goto out; 3171 3172 /* Do this after OLR check to make sure we make forward progress polling 3173 * on this IOCTL with a timeout == 0 (like busy ioctl) 3174 */ 3175 if (args->timeout_ns == 0) { 3176 ret = -ETIME; 3177 goto out; 3178 } 3179 3180 drm_gem_object_unreference(&obj->base); 3181 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3182 3183 for (i = 0; i < I915_NUM_ENGINES; i++) { 3184 if (obj->last_read_req[i] == NULL) 3185 continue; 3186 3187 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3188 } 3189 3190 mutex_unlock(&dev->struct_mutex); 3191 3192 for (i = 0; i < n; i++) { 3193 if (ret == 0) 3194 ret = __i915_wait_request(req[i], reset_counter, true, 3195 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3196 to_rps_client(file)); 3197 i915_gem_request_unreference__unlocked(req[i]); 3198 } 3199 return ret; 3200 3201 out: 3202 drm_gem_object_unreference(&obj->base); 3203 mutex_unlock(&dev->struct_mutex); 3204 return ret; 3205 } 3206 3207 static int 3208 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3209 struct intel_engine_cs *to, 3210 struct drm_i915_gem_request *from_req, 3211 struct drm_i915_gem_request **to_req) 3212 { 3213 struct intel_engine_cs *from; 3214 int ret; 3215 3216 from = i915_gem_request_get_engine(from_req); 3217 if (to == from) 3218 return 0; 3219 3220 if (i915_gem_request_completed(from_req, true)) 3221 return 0; 3222 3223 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3224 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3225 ret = __i915_wait_request(from_req, 3226 atomic_read(&i915->gpu_error.reset_counter), 3227 i915->mm.interruptible, 3228 NULL, 3229 &i915->rps.semaphores); 3230 if (ret) 3231 return ret; 3232 3233 i915_gem_object_retire_request(obj, from_req); 3234 } else { 3235 int idx = intel_ring_sync_index(from, to); 3236 u32 seqno = i915_gem_request_get_seqno(from_req); 3237 3238 WARN_ON(!to_req); 3239 3240 if (seqno <= from->semaphore.sync_seqno[idx]) 3241 return 0; 3242 3243 if (*to_req == NULL) { 3244 struct drm_i915_gem_request *req; 3245 3246 req = i915_gem_request_alloc(to, NULL); 3247 if (IS_ERR(req)) 3248 return PTR_ERR(req); 3249 3250 *to_req = req; 3251 } 3252 3253 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3254 ret = to->semaphore.sync_to(*to_req, from, seqno); 3255 if (ret) 3256 return ret; 3257 3258 /* We use last_read_req because sync_to() 3259 * might have just caused seqno wrap under 3260 * the radar. 3261 */ 3262 from->semaphore.sync_seqno[idx] = 3263 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3264 } 3265 3266 return 0; 3267 } 3268 3269 /** 3270 * i915_gem_object_sync - sync an object to a ring. 3271 * 3272 * @obj: object which may be in use on another ring. 3273 * @to: ring we wish to use the object on. May be NULL. 3274 * @to_req: request we wish to use the object for. See below. 3275 * This will be allocated and returned if a request is 3276 * required but not passed in. 3277 * 3278 * This code is meant to abstract object synchronization with the GPU. 3279 * Calling with NULL implies synchronizing the object with the CPU 3280 * rather than a particular GPU ring. Conceptually we serialise writes 3281 * between engines inside the GPU. We only allow one engine to write 3282 * into a buffer at any time, but multiple readers. To ensure each has 3283 * a coherent view of memory, we must: 3284 * 3285 * - If there is an outstanding write request to the object, the new 3286 * request must wait for it to complete (either CPU or in hw, requests 3287 * on the same ring will be naturally ordered). 3288 * 3289 * - If we are a write request (pending_write_domain is set), the new 3290 * request must wait for outstanding read requests to complete. 3291 * 3292 * For CPU synchronisation (NULL to) no request is required. For syncing with 3293 * rings to_req must be non-NULL. However, a request does not have to be 3294 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3295 * request will be allocated automatically and returned through *to_req. Note 3296 * that it is not guaranteed that commands will be emitted (because the system 3297 * might already be idle). Hence there is no need to create a request that 3298 * might never have any work submitted. Note further that if a request is 3299 * returned in *to_req, it is the responsibility of the caller to submit 3300 * that request (after potentially adding more work to it). 3301 * 3302 * Returns 0 if successful, else propagates up the lower layer error. 3303 */ 3304 int 3305 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3306 struct intel_engine_cs *to, 3307 struct drm_i915_gem_request **to_req) 3308 { 3309 const bool readonly = obj->base.pending_write_domain == 0; 3310 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3311 int ret, i, n; 3312 3313 if (!obj->active) 3314 return 0; 3315 3316 if (to == NULL) 3317 return i915_gem_object_wait_rendering(obj, readonly); 3318 3319 n = 0; 3320 if (readonly) { 3321 if (obj->last_write_req) 3322 req[n++] = obj->last_write_req; 3323 } else { 3324 for (i = 0; i < I915_NUM_ENGINES; i++) 3325 if (obj->last_read_req[i]) 3326 req[n++] = obj->last_read_req[i]; 3327 } 3328 for (i = 0; i < n; i++) { 3329 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3330 if (ret) 3331 return ret; 3332 } 3333 3334 return 0; 3335 } 3336 3337 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3338 { 3339 u32 old_write_domain, old_read_domains; 3340 3341 /* Force a pagefault for domain tracking on next user access */ 3342 i915_gem_release_mmap(obj); 3343 3344 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3345 return; 3346 3347 /* Wait for any direct GTT access to complete */ 3348 mb(); 3349 3350 old_read_domains = obj->base.read_domains; 3351 old_write_domain = obj->base.write_domain; 3352 3353 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3354 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3355 3356 trace_i915_gem_object_change_domain(obj, 3357 old_read_domains, 3358 old_write_domain); 3359 } 3360 3361 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3362 { 3363 struct drm_i915_gem_object *obj = vma->obj; 3364 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3365 int ret; 3366 3367 if (list_empty(&vma->obj_link)) 3368 return 0; 3369 3370 if (!drm_mm_node_allocated(&vma->node)) { 3371 i915_gem_vma_destroy(vma); 3372 return 0; 3373 } 3374 3375 if (vma->pin_count) 3376 return -EBUSY; 3377 3378 BUG_ON(obj->pages == NULL); 3379 3380 if (wait) { 3381 ret = i915_gem_object_wait_rendering(obj, false); 3382 if (ret) 3383 return ret; 3384 } 3385 3386 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3387 i915_gem_object_finish_gtt(obj); 3388 3389 /* release the fence reg _after_ flushing */ 3390 ret = i915_gem_object_put_fence(obj); 3391 if (ret) 3392 return ret; 3393 } 3394 3395 trace_i915_vma_unbind(vma); 3396 3397 vma->vm->unbind_vma(vma); 3398 vma->bound = 0; 3399 3400 list_del_init(&vma->vm_link); 3401 if (vma->is_ggtt) { 3402 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3403 obj->map_and_fenceable = false; 3404 } else if (vma->ggtt_view.pages) { 3405 sg_free_table(vma->ggtt_view.pages); 3406 kfree(vma->ggtt_view.pages); 3407 } 3408 vma->ggtt_view.pages = NULL; 3409 } 3410 3411 drm_mm_remove_node(&vma->node); 3412 i915_gem_vma_destroy(vma); 3413 3414 /* Since the unbound list is global, only move to that list if 3415 * no more VMAs exist. */ 3416 if (list_empty(&obj->vma_list)) 3417 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3418 3419 /* And finally now the object is completely decoupled from this vma, 3420 * we can drop its hold on the backing storage and allow it to be 3421 * reaped by the shrinker. 3422 */ 3423 i915_gem_object_unpin_pages(obj); 3424 3425 return 0; 3426 } 3427 3428 int i915_vma_unbind(struct i915_vma *vma) 3429 { 3430 return __i915_vma_unbind(vma, true); 3431 } 3432 3433 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3434 { 3435 return __i915_vma_unbind(vma, false); 3436 } 3437 3438 int i915_gpu_idle(struct drm_device *dev) 3439 { 3440 struct drm_i915_private *dev_priv = dev->dev_private; 3441 struct intel_engine_cs *engine; 3442 int ret; 3443 3444 /* Flush everything onto the inactive list. */ 3445 for_each_engine(engine, dev_priv) { 3446 if (!i915.enable_execlists) { 3447 struct drm_i915_gem_request *req; 3448 3449 req = i915_gem_request_alloc(engine, NULL); 3450 if (IS_ERR(req)) 3451 return PTR_ERR(req); 3452 3453 ret = i915_switch_context(req); 3454 if (ret) { 3455 i915_gem_request_cancel(req); 3456 return ret; 3457 } 3458 3459 i915_add_request_no_flush(req); 3460 } 3461 3462 ret = intel_engine_idle(engine); 3463 if (ret) 3464 return ret; 3465 } 3466 3467 WARN_ON(i915_verify_lists(dev)); 3468 return 0; 3469 } 3470 3471 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3472 unsigned long cache_level) 3473 { 3474 struct drm_mm_node *gtt_space = &vma->node; 3475 struct drm_mm_node *other; 3476 3477 /* 3478 * On some machines we have to be careful when putting differing types 3479 * of snoopable memory together to avoid the prefetcher crossing memory 3480 * domains and dying. During vm initialisation, we decide whether or not 3481 * these constraints apply and set the drm_mm.color_adjust 3482 * appropriately. 3483 */ 3484 if (vma->vm->mm.color_adjust == NULL) 3485 return true; 3486 3487 if (!drm_mm_node_allocated(gtt_space)) 3488 return true; 3489 3490 if (list_empty(>t_space->node_list)) 3491 return true; 3492 3493 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3494 if (other->allocated && !other->hole_follows && other->color != cache_level) 3495 return false; 3496 3497 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3498 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3499 return false; 3500 3501 return true; 3502 } 3503 3504 /** 3505 * Finds free space in the GTT aperture and binds the object or a view of it 3506 * there. 3507 */ 3508 static struct i915_vma * 3509 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3510 struct i915_address_space *vm, 3511 const struct i915_ggtt_view *ggtt_view, 3512 unsigned alignment, 3513 uint64_t flags) 3514 { 3515 struct drm_device *dev = obj->base.dev; 3516 struct drm_i915_private *dev_priv = to_i915(dev); 3517 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3518 u32 fence_alignment, unfenced_alignment; 3519 u32 search_flag, alloc_flag; 3520 u64 start, end; 3521 u64 size, fence_size; 3522 struct i915_vma *vma; 3523 int ret; 3524 3525 if (i915_is_ggtt(vm)) { 3526 u32 view_size; 3527 3528 if (WARN_ON(!ggtt_view)) 3529 return ERR_PTR(-EINVAL); 3530 3531 view_size = i915_ggtt_view_size(obj, ggtt_view); 3532 3533 fence_size = i915_gem_get_gtt_size(dev, 3534 view_size, 3535 obj->tiling_mode); 3536 fence_alignment = i915_gem_get_gtt_alignment(dev, 3537 view_size, 3538 obj->tiling_mode, 3539 true); 3540 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3541 view_size, 3542 obj->tiling_mode, 3543 false); 3544 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3545 } else { 3546 fence_size = i915_gem_get_gtt_size(dev, 3547 obj->base.size, 3548 obj->tiling_mode); 3549 fence_alignment = i915_gem_get_gtt_alignment(dev, 3550 obj->base.size, 3551 obj->tiling_mode, 3552 true); 3553 unfenced_alignment = 3554 i915_gem_get_gtt_alignment(dev, 3555 obj->base.size, 3556 obj->tiling_mode, 3557 false); 3558 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3559 } 3560 3561 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3562 end = vm->total; 3563 if (flags & PIN_MAPPABLE) 3564 end = min_t(u64, end, ggtt->mappable_end); 3565 if (flags & PIN_ZONE_4G) 3566 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3567 3568 if (alignment == 0) 3569 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3570 unfenced_alignment; 3571 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3572 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3573 ggtt_view ? ggtt_view->type : 0, 3574 alignment); 3575 return ERR_PTR(-EINVAL); 3576 } 3577 3578 /* If binding the object/GGTT view requires more space than the entire 3579 * aperture has, reject it early before evicting everything in a vain 3580 * attempt to find space. 3581 */ 3582 if (size > end) { 3583 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3584 ggtt_view ? ggtt_view->type : 0, 3585 size, 3586 flags & PIN_MAPPABLE ? "mappable" : "total", 3587 end); 3588 return ERR_PTR(-E2BIG); 3589 } 3590 3591 ret = i915_gem_object_get_pages(obj); 3592 if (ret) 3593 return ERR_PTR(ret); 3594 3595 i915_gem_object_pin_pages(obj); 3596 3597 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3598 i915_gem_obj_lookup_or_create_vma(obj, vm); 3599 3600 if (IS_ERR(vma)) 3601 goto err_unpin; 3602 3603 if (flags & PIN_OFFSET_FIXED) { 3604 uint64_t offset = flags & PIN_OFFSET_MASK; 3605 3606 if (offset & (alignment - 1) || offset + size > end) { 3607 ret = -EINVAL; 3608 goto err_free_vma; 3609 } 3610 vma->node.start = offset; 3611 vma->node.size = size; 3612 vma->node.color = obj->cache_level; 3613 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3614 if (ret) { 3615 ret = i915_gem_evict_for_vma(vma); 3616 if (ret == 0) 3617 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3618 } 3619 if (ret) 3620 goto err_free_vma; 3621 } else { 3622 if (flags & PIN_HIGH) { 3623 search_flag = DRM_MM_SEARCH_BELOW; 3624 alloc_flag = DRM_MM_CREATE_TOP; 3625 } else { 3626 search_flag = DRM_MM_SEARCH_DEFAULT; 3627 alloc_flag = DRM_MM_CREATE_DEFAULT; 3628 } 3629 3630 search_free: 3631 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3632 size, alignment, 3633 obj->cache_level, 3634 start, end, 3635 search_flag, 3636 alloc_flag); 3637 if (ret) { 3638 ret = i915_gem_evict_something(dev, vm, size, alignment, 3639 obj->cache_level, 3640 start, end, 3641 flags); 3642 if (ret == 0) 3643 goto search_free; 3644 3645 goto err_free_vma; 3646 } 3647 } 3648 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3649 ret = -EINVAL; 3650 goto err_remove_node; 3651 } 3652 3653 trace_i915_vma_bind(vma, flags); 3654 ret = i915_vma_bind(vma, obj->cache_level, flags); 3655 if (ret) 3656 goto err_remove_node; 3657 3658 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3659 list_add_tail(&vma->vm_link, &vm->inactive_list); 3660 3661 return vma; 3662 3663 err_remove_node: 3664 drm_mm_remove_node(&vma->node); 3665 err_free_vma: 3666 i915_gem_vma_destroy(vma); 3667 vma = ERR_PTR(ret); 3668 err_unpin: 3669 i915_gem_object_unpin_pages(obj); 3670 return vma; 3671 } 3672 3673 bool 3674 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3675 bool force) 3676 { 3677 /* If we don't have a page list set up, then we're not pinned 3678 * to GPU, and we can ignore the cache flush because it'll happen 3679 * again at bind time. 3680 */ 3681 if (obj->pages == NULL) 3682 return false; 3683 3684 /* 3685 * Stolen memory is always coherent with the GPU as it is explicitly 3686 * marked as wc by the system, or the system is cache-coherent. 3687 */ 3688 if (obj->stolen || obj->phys_handle) 3689 return false; 3690 3691 /* If the GPU is snooping the contents of the CPU cache, 3692 * we do not need to manually clear the CPU cache lines. However, 3693 * the caches are only snooped when the render cache is 3694 * flushed/invalidated. As we always have to emit invalidations 3695 * and flushes when moving into and out of the RENDER domain, correct 3696 * snooping behaviour occurs naturally as the result of our domain 3697 * tracking. 3698 */ 3699 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3700 obj->cache_dirty = true; 3701 return false; 3702 } 3703 3704 trace_i915_gem_object_clflush(obj); 3705 drm_clflush_sg(obj->pages); 3706 obj->cache_dirty = false; 3707 3708 return true; 3709 } 3710 3711 /** Flushes the GTT write domain for the object if it's dirty. */ 3712 static void 3713 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3714 { 3715 uint32_t old_write_domain; 3716 3717 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3718 return; 3719 3720 /* No actual flushing is required for the GTT write domain. Writes 3721 * to it immediately go to main memory as far as we know, so there's 3722 * no chipset flush. It also doesn't land in render cache. 3723 * 3724 * However, we do have to enforce the order so that all writes through 3725 * the GTT land before any writes to the device, such as updates to 3726 * the GATT itself. 3727 */ 3728 wmb(); 3729 3730 old_write_domain = obj->base.write_domain; 3731 obj->base.write_domain = 0; 3732 3733 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3734 3735 trace_i915_gem_object_change_domain(obj, 3736 obj->base.read_domains, 3737 old_write_domain); 3738 } 3739 3740 /** Flushes the CPU write domain for the object if it's dirty. */ 3741 static void 3742 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3743 { 3744 uint32_t old_write_domain; 3745 3746 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3747 return; 3748 3749 if (i915_gem_clflush_object(obj, obj->pin_display)) 3750 i915_gem_chipset_flush(obj->base.dev); 3751 3752 old_write_domain = obj->base.write_domain; 3753 obj->base.write_domain = 0; 3754 3755 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3756 3757 trace_i915_gem_object_change_domain(obj, 3758 obj->base.read_domains, 3759 old_write_domain); 3760 } 3761 3762 /** 3763 * Moves a single object to the GTT read, and possibly write domain. 3764 * 3765 * This function returns when the move is complete, including waiting on 3766 * flushes to occur. 3767 */ 3768 int 3769 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3770 { 3771 struct drm_device *dev = obj->base.dev; 3772 struct drm_i915_private *dev_priv = to_i915(dev); 3773 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3774 uint32_t old_write_domain, old_read_domains; 3775 struct i915_vma *vma; 3776 int ret; 3777 3778 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3779 return 0; 3780 3781 ret = i915_gem_object_wait_rendering(obj, !write); 3782 if (ret) 3783 return ret; 3784 3785 /* Flush and acquire obj->pages so that we are coherent through 3786 * direct access in memory with previous cached writes through 3787 * shmemfs and that our cache domain tracking remains valid. 3788 * For example, if the obj->filp was moved to swap without us 3789 * being notified and releasing the pages, we would mistakenly 3790 * continue to assume that the obj remained out of the CPU cached 3791 * domain. 3792 */ 3793 ret = i915_gem_object_get_pages(obj); 3794 if (ret) 3795 return ret; 3796 3797 i915_gem_object_flush_cpu_write_domain(obj); 3798 3799 /* Serialise direct access to this object with the barriers for 3800 * coherent writes from the GPU, by effectively invalidating the 3801 * GTT domain upon first access. 3802 */ 3803 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3804 mb(); 3805 3806 old_write_domain = obj->base.write_domain; 3807 old_read_domains = obj->base.read_domains; 3808 3809 /* It should now be out of any other write domains, and we can update 3810 * the domain values for our changes. 3811 */ 3812 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3813 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3814 if (write) { 3815 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3816 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3817 obj->dirty = 1; 3818 } 3819 3820 trace_i915_gem_object_change_domain(obj, 3821 old_read_domains, 3822 old_write_domain); 3823 3824 /* And bump the LRU for this access */ 3825 vma = i915_gem_obj_to_ggtt(obj); 3826 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3827 list_move_tail(&vma->vm_link, 3828 &ggtt->base.inactive_list); 3829 3830 return 0; 3831 } 3832 3833 /** 3834 * Changes the cache-level of an object across all VMA. 3835 * 3836 * After this function returns, the object will be in the new cache-level 3837 * across all GTT and the contents of the backing storage will be coherent, 3838 * with respect to the new cache-level. In order to keep the backing storage 3839 * coherent for all users, we only allow a single cache level to be set 3840 * globally on the object and prevent it from being changed whilst the 3841 * hardware is reading from the object. That is if the object is currently 3842 * on the scanout it will be set to uncached (or equivalent display 3843 * cache coherency) and all non-MOCS GPU access will also be uncached so 3844 * that all direct access to the scanout remains coherent. 3845 */ 3846 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3847 enum i915_cache_level cache_level) 3848 { 3849 struct drm_device *dev = obj->base.dev; 3850 struct i915_vma *vma, *next; 3851 bool bound = false; 3852 int ret = 0; 3853 3854 if (obj->cache_level == cache_level) 3855 goto out; 3856 3857 /* Inspect the list of currently bound VMA and unbind any that would 3858 * be invalid given the new cache-level. This is principally to 3859 * catch the issue of the CS prefetch crossing page boundaries and 3860 * reading an invalid PTE on older architectures. 3861 */ 3862 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 3863 if (!drm_mm_node_allocated(&vma->node)) 3864 continue; 3865 3866 if (vma->pin_count) { 3867 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3868 return -EBUSY; 3869 } 3870 3871 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3872 ret = i915_vma_unbind(vma); 3873 if (ret) 3874 return ret; 3875 } else 3876 bound = true; 3877 } 3878 3879 /* We can reuse the existing drm_mm nodes but need to change the 3880 * cache-level on the PTE. We could simply unbind them all and 3881 * rebind with the correct cache-level on next use. However since 3882 * we already have a valid slot, dma mapping, pages etc, we may as 3883 * rewrite the PTE in the belief that doing so tramples upon less 3884 * state and so involves less work. 3885 */ 3886 if (bound) { 3887 /* Before we change the PTE, the GPU must not be accessing it. 3888 * If we wait upon the object, we know that all the bound 3889 * VMA are no longer active. 3890 */ 3891 ret = i915_gem_object_wait_rendering(obj, false); 3892 if (ret) 3893 return ret; 3894 3895 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 3896 /* Access to snoopable pages through the GTT is 3897 * incoherent and on some machines causes a hard 3898 * lockup. Relinquish the CPU mmaping to force 3899 * userspace to refault in the pages and we can 3900 * then double check if the GTT mapping is still 3901 * valid for that pointer access. 3902 */ 3903 i915_gem_release_mmap(obj); 3904 3905 /* As we no longer need a fence for GTT access, 3906 * we can relinquish it now (and so prevent having 3907 * to steal a fence from someone else on the next 3908 * fence request). Note GPU activity would have 3909 * dropped the fence as all snoopable access is 3910 * supposed to be linear. 3911 */ 3912 ret = i915_gem_object_put_fence(obj); 3913 if (ret) 3914 return ret; 3915 } else { 3916 /* We either have incoherent backing store and 3917 * so no GTT access or the architecture is fully 3918 * coherent. In such cases, existing GTT mmaps 3919 * ignore the cache bit in the PTE and we can 3920 * rewrite it without confusing the GPU or having 3921 * to force userspace to fault back in its mmaps. 3922 */ 3923 } 3924 3925 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3926 if (!drm_mm_node_allocated(&vma->node)) 3927 continue; 3928 3929 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3930 if (ret) 3931 return ret; 3932 } 3933 } 3934 3935 list_for_each_entry(vma, &obj->vma_list, obj_link) 3936 vma->node.color = cache_level; 3937 obj->cache_level = cache_level; 3938 3939 out: 3940 /* Flush the dirty CPU caches to the backing storage so that the 3941 * object is now coherent at its new cache level (with respect 3942 * to the access domain). 3943 */ 3944 if (obj->cache_dirty && 3945 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 3946 cpu_write_needs_clflush(obj)) { 3947 if (i915_gem_clflush_object(obj, true)) 3948 i915_gem_chipset_flush(obj->base.dev); 3949 } 3950 3951 return 0; 3952 } 3953 3954 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3955 struct drm_file *file) 3956 { 3957 struct drm_i915_gem_caching *args = data; 3958 struct drm_i915_gem_object *obj; 3959 3960 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3961 if (&obj->base == NULL) 3962 return -ENOENT; 3963 3964 switch (obj->cache_level) { 3965 case I915_CACHE_LLC: 3966 case I915_CACHE_L3_LLC: 3967 args->caching = I915_CACHING_CACHED; 3968 break; 3969 3970 case I915_CACHE_WT: 3971 args->caching = I915_CACHING_DISPLAY; 3972 break; 3973 3974 default: 3975 args->caching = I915_CACHING_NONE; 3976 break; 3977 } 3978 3979 drm_gem_object_unreference_unlocked(&obj->base); 3980 return 0; 3981 } 3982 3983 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3984 struct drm_file *file) 3985 { 3986 struct drm_i915_private *dev_priv = dev->dev_private; 3987 struct drm_i915_gem_caching *args = data; 3988 struct drm_i915_gem_object *obj; 3989 enum i915_cache_level level; 3990 int ret; 3991 3992 switch (args->caching) { 3993 case I915_CACHING_NONE: 3994 level = I915_CACHE_NONE; 3995 break; 3996 case I915_CACHING_CACHED: 3997 /* 3998 * Due to a HW issue on BXT A stepping, GPU stores via a 3999 * snooped mapping may leave stale data in a corresponding CPU 4000 * cacheline, whereas normally such cachelines would get 4001 * invalidated. 4002 */ 4003 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 4004 return -ENODEV; 4005 4006 level = I915_CACHE_LLC; 4007 break; 4008 case I915_CACHING_DISPLAY: 4009 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4010 break; 4011 default: 4012 return -EINVAL; 4013 } 4014 4015 intel_runtime_pm_get(dev_priv); 4016 4017 ret = i915_mutex_lock_interruptible(dev); 4018 if (ret) 4019 goto rpm_put; 4020 4021 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4022 if (&obj->base == NULL) { 4023 ret = -ENOENT; 4024 goto unlock; 4025 } 4026 4027 ret = i915_gem_object_set_cache_level(obj, level); 4028 4029 drm_gem_object_unreference(&obj->base); 4030 unlock: 4031 mutex_unlock(&dev->struct_mutex); 4032 rpm_put: 4033 intel_runtime_pm_put(dev_priv); 4034 4035 return ret; 4036 } 4037 4038 /* 4039 * Prepare buffer for display plane (scanout, cursors, etc). 4040 * Can be called from an uninterruptible phase (modesetting) and allows 4041 * any flushes to be pipelined (for pageflips). 4042 */ 4043 int 4044 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4045 u32 alignment, 4046 const struct i915_ggtt_view *view) 4047 { 4048 u32 old_read_domains, old_write_domain; 4049 int ret; 4050 4051 /* Mark the pin_display early so that we account for the 4052 * display coherency whilst setting up the cache domains. 4053 */ 4054 obj->pin_display++; 4055 4056 /* The display engine is not coherent with the LLC cache on gen6. As 4057 * a result, we make sure that the pinning that is about to occur is 4058 * done with uncached PTEs. This is lowest common denominator for all 4059 * chipsets. 4060 * 4061 * However for gen6+, we could do better by using the GFDT bit instead 4062 * of uncaching, which would allow us to flush all the LLC-cached data 4063 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4064 */ 4065 ret = i915_gem_object_set_cache_level(obj, 4066 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4067 if (ret) 4068 goto err_unpin_display; 4069 4070 /* As the user may map the buffer once pinned in the display plane 4071 * (e.g. libkms for the bootup splash), we have to ensure that we 4072 * always use map_and_fenceable for all scanout buffers. 4073 */ 4074 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4075 view->type == I915_GGTT_VIEW_NORMAL ? 4076 PIN_MAPPABLE : 0); 4077 if (ret) 4078 goto err_unpin_display; 4079 4080 i915_gem_object_flush_cpu_write_domain(obj); 4081 4082 old_write_domain = obj->base.write_domain; 4083 old_read_domains = obj->base.read_domains; 4084 4085 /* It should now be out of any other write domains, and we can update 4086 * the domain values for our changes. 4087 */ 4088 obj->base.write_domain = 0; 4089 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4090 4091 trace_i915_gem_object_change_domain(obj, 4092 old_read_domains, 4093 old_write_domain); 4094 4095 return 0; 4096 4097 err_unpin_display: 4098 obj->pin_display--; 4099 return ret; 4100 } 4101 4102 void 4103 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4104 const struct i915_ggtt_view *view) 4105 { 4106 if (WARN_ON(obj->pin_display == 0)) 4107 return; 4108 4109 i915_gem_object_ggtt_unpin_view(obj, view); 4110 4111 obj->pin_display--; 4112 } 4113 4114 /** 4115 * Moves a single object to the CPU read, and possibly write domain. 4116 * 4117 * This function returns when the move is complete, including waiting on 4118 * flushes to occur. 4119 */ 4120 int 4121 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4122 { 4123 uint32_t old_write_domain, old_read_domains; 4124 int ret; 4125 4126 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4127 return 0; 4128 4129 ret = i915_gem_object_wait_rendering(obj, !write); 4130 if (ret) 4131 return ret; 4132 4133 i915_gem_object_flush_gtt_write_domain(obj); 4134 4135 old_write_domain = obj->base.write_domain; 4136 old_read_domains = obj->base.read_domains; 4137 4138 /* Flush the CPU cache if it's still invalid. */ 4139 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4140 i915_gem_clflush_object(obj, false); 4141 4142 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4143 } 4144 4145 /* It should now be out of any other write domains, and we can update 4146 * the domain values for our changes. 4147 */ 4148 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4149 4150 /* If we're writing through the CPU, then the GPU read domains will 4151 * need to be invalidated at next use. 4152 */ 4153 if (write) { 4154 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4155 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4156 } 4157 4158 trace_i915_gem_object_change_domain(obj, 4159 old_read_domains, 4160 old_write_domain); 4161 4162 return 0; 4163 } 4164 4165 /* Throttle our rendering by waiting until the ring has completed our requests 4166 * emitted over 20 msec ago. 4167 * 4168 * Note that if we were to use the current jiffies each time around the loop, 4169 * we wouldn't escape the function with any frames outstanding if the time to 4170 * render a frame was over 20ms. 4171 * 4172 * This should get us reasonable parallelism between CPU and GPU but also 4173 * relatively low latency when blocking on a particular request to finish. 4174 */ 4175 static int 4176 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4177 { 4178 struct drm_i915_private *dev_priv = dev->dev_private; 4179 struct drm_i915_file_private *file_priv = file->driver_priv; 4180 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4181 struct drm_i915_gem_request *request, *target = NULL; 4182 unsigned reset_counter; 4183 int ret; 4184 4185 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4186 if (ret) 4187 return ret; 4188 4189 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4190 if (ret) 4191 return ret; 4192 4193 spin_lock(&file_priv->mm.lock); 4194 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4195 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4196 break; 4197 4198 /* 4199 * Note that the request might not have been submitted yet. 4200 * In which case emitted_jiffies will be zero. 4201 */ 4202 if (!request->emitted_jiffies) 4203 continue; 4204 4205 target = request; 4206 } 4207 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4208 if (target) 4209 i915_gem_request_reference(target); 4210 spin_unlock(&file_priv->mm.lock); 4211 4212 if (target == NULL) 4213 return 0; 4214 4215 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4216 if (ret == 0) 4217 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4218 4219 i915_gem_request_unreference__unlocked(target); 4220 4221 return ret; 4222 } 4223 4224 static bool 4225 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4226 { 4227 struct drm_i915_gem_object *obj = vma->obj; 4228 4229 if (alignment && 4230 vma->node.start & (alignment - 1)) 4231 return true; 4232 4233 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4234 return true; 4235 4236 if (flags & PIN_OFFSET_BIAS && 4237 vma->node.start < (flags & PIN_OFFSET_MASK)) 4238 return true; 4239 4240 if (flags & PIN_OFFSET_FIXED && 4241 vma->node.start != (flags & PIN_OFFSET_MASK)) 4242 return true; 4243 4244 return false; 4245 } 4246 4247 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4248 { 4249 struct drm_i915_gem_object *obj = vma->obj; 4250 bool mappable, fenceable; 4251 u32 fence_size, fence_alignment; 4252 4253 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4254 obj->base.size, 4255 obj->tiling_mode); 4256 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4257 obj->base.size, 4258 obj->tiling_mode, 4259 true); 4260 4261 fenceable = (vma->node.size == fence_size && 4262 (vma->node.start & (fence_alignment - 1)) == 0); 4263 4264 mappable = (vma->node.start + fence_size <= 4265 to_i915(obj->base.dev)->ggtt.mappable_end); 4266 4267 obj->map_and_fenceable = mappable && fenceable; 4268 } 4269 4270 static int 4271 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4272 struct i915_address_space *vm, 4273 const struct i915_ggtt_view *ggtt_view, 4274 uint32_t alignment, 4275 uint64_t flags) 4276 { 4277 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4278 struct i915_vma *vma; 4279 unsigned bound; 4280 int ret; 4281 4282 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4283 return -ENODEV; 4284 4285 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4286 return -EINVAL; 4287 4288 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4289 return -EINVAL; 4290 4291 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4292 return -EINVAL; 4293 4294 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4295 i915_gem_obj_to_vma(obj, vm); 4296 4297 if (vma) { 4298 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4299 return -EBUSY; 4300 4301 if (i915_vma_misplaced(vma, alignment, flags)) { 4302 WARN(vma->pin_count, 4303 "bo is already pinned in %s with incorrect alignment:" 4304 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4305 " obj->map_and_fenceable=%d\n", 4306 ggtt_view ? "ggtt" : "ppgtt", 4307 upper_32_bits(vma->node.start), 4308 lower_32_bits(vma->node.start), 4309 alignment, 4310 !!(flags & PIN_MAPPABLE), 4311 obj->map_and_fenceable); 4312 ret = i915_vma_unbind(vma); 4313 if (ret) 4314 return ret; 4315 4316 vma = NULL; 4317 } 4318 } 4319 4320 bound = vma ? vma->bound : 0; 4321 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4322 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4323 flags); 4324 if (IS_ERR(vma)) 4325 return PTR_ERR(vma); 4326 } else { 4327 ret = i915_vma_bind(vma, obj->cache_level, flags); 4328 if (ret) 4329 return ret; 4330 } 4331 4332 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4333 (bound ^ vma->bound) & GLOBAL_BIND) { 4334 __i915_vma_set_map_and_fenceable(vma); 4335 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4336 } 4337 4338 vma->pin_count++; 4339 return 0; 4340 } 4341 4342 int 4343 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4344 struct i915_address_space *vm, 4345 uint32_t alignment, 4346 uint64_t flags) 4347 { 4348 return i915_gem_object_do_pin(obj, vm, 4349 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4350 alignment, flags); 4351 } 4352 4353 int 4354 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4355 const struct i915_ggtt_view *view, 4356 uint32_t alignment, 4357 uint64_t flags) 4358 { 4359 struct drm_device *dev = obj->base.dev; 4360 struct drm_i915_private *dev_priv = to_i915(dev); 4361 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4362 4363 BUG_ON(!view); 4364 4365 return i915_gem_object_do_pin(obj, &ggtt->base, view, 4366 alignment, flags | PIN_GLOBAL); 4367 } 4368 4369 void 4370 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4371 const struct i915_ggtt_view *view) 4372 { 4373 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4374 4375 BUG_ON(!vma); 4376 WARN_ON(vma->pin_count == 0); 4377 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4378 4379 --vma->pin_count; 4380 } 4381 4382 int 4383 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4384 struct drm_file *file) 4385 { 4386 struct drm_i915_gem_busy *args = data; 4387 struct drm_i915_gem_object *obj; 4388 int ret; 4389 4390 ret = i915_mutex_lock_interruptible(dev); 4391 if (ret) 4392 return ret; 4393 4394 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4395 if (&obj->base == NULL) { 4396 ret = -ENOENT; 4397 goto unlock; 4398 } 4399 4400 /* Count all active objects as busy, even if they are currently not used 4401 * by the gpu. Users of this interface expect objects to eventually 4402 * become non-busy without any further actions, therefore emit any 4403 * necessary flushes here. 4404 */ 4405 ret = i915_gem_object_flush_active(obj); 4406 if (ret) 4407 goto unref; 4408 4409 args->busy = 0; 4410 if (obj->active) { 4411 int i; 4412 4413 for (i = 0; i < I915_NUM_ENGINES; i++) { 4414 struct drm_i915_gem_request *req; 4415 4416 req = obj->last_read_req[i]; 4417 if (req) 4418 args->busy |= 1 << (16 + req->engine->exec_id); 4419 } 4420 if (obj->last_write_req) 4421 args->busy |= obj->last_write_req->engine->exec_id; 4422 } 4423 4424 unref: 4425 drm_gem_object_unreference(&obj->base); 4426 unlock: 4427 mutex_unlock(&dev->struct_mutex); 4428 return ret; 4429 } 4430 4431 int 4432 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4433 struct drm_file *file_priv) 4434 { 4435 return i915_gem_ring_throttle(dev, file_priv); 4436 } 4437 4438 int 4439 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4440 struct drm_file *file_priv) 4441 { 4442 struct drm_i915_private *dev_priv = dev->dev_private; 4443 struct drm_i915_gem_madvise *args = data; 4444 struct drm_i915_gem_object *obj; 4445 int ret; 4446 4447 switch (args->madv) { 4448 case I915_MADV_DONTNEED: 4449 case I915_MADV_WILLNEED: 4450 break; 4451 default: 4452 return -EINVAL; 4453 } 4454 4455 ret = i915_mutex_lock_interruptible(dev); 4456 if (ret) 4457 return ret; 4458 4459 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4460 if (&obj->base == NULL) { 4461 ret = -ENOENT; 4462 goto unlock; 4463 } 4464 4465 if (i915_gem_obj_is_pinned(obj)) { 4466 ret = -EINVAL; 4467 goto out; 4468 } 4469 4470 if (obj->pages && 4471 obj->tiling_mode != I915_TILING_NONE && 4472 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4473 if (obj->madv == I915_MADV_WILLNEED) 4474 i915_gem_object_unpin_pages(obj); 4475 if (args->madv == I915_MADV_WILLNEED) 4476 i915_gem_object_pin_pages(obj); 4477 } 4478 4479 if (obj->madv != __I915_MADV_PURGED) 4480 obj->madv = args->madv; 4481 4482 /* if the object is no longer attached, discard its backing storage */ 4483 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4484 i915_gem_object_truncate(obj); 4485 4486 args->retained = obj->madv != __I915_MADV_PURGED; 4487 4488 out: 4489 drm_gem_object_unreference(&obj->base); 4490 unlock: 4491 mutex_unlock(&dev->struct_mutex); 4492 return ret; 4493 } 4494 4495 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4496 const struct drm_i915_gem_object_ops *ops) 4497 { 4498 int i; 4499 4500 INIT_LIST_HEAD(&obj->global_list); 4501 for (i = 0; i < I915_NUM_ENGINES; i++) 4502 INIT_LIST_HEAD(&obj->engine_list[i]); 4503 INIT_LIST_HEAD(&obj->obj_exec_link); 4504 INIT_LIST_HEAD(&obj->vma_list); 4505 INIT_LIST_HEAD(&obj->batch_pool_link); 4506 4507 obj->ops = ops; 4508 4509 obj->fence_reg = I915_FENCE_REG_NONE; 4510 obj->madv = I915_MADV_WILLNEED; 4511 4512 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4513 } 4514 4515 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4516 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4517 .get_pages = i915_gem_object_get_pages_gtt, 4518 .put_pages = i915_gem_object_put_pages_gtt, 4519 }; 4520 4521 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4522 size_t size) 4523 { 4524 struct drm_i915_gem_object *obj; 4525 struct address_space *mapping; 4526 gfp_t mask; 4527 4528 obj = i915_gem_object_alloc(dev); 4529 if (obj == NULL) 4530 return NULL; 4531 4532 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4533 i915_gem_object_free(obj); 4534 return NULL; 4535 } 4536 4537 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4538 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4539 /* 965gm cannot relocate objects above 4GiB. */ 4540 mask &= ~__GFP_HIGHMEM; 4541 mask |= __GFP_DMA32; 4542 } 4543 4544 mapping = file_inode(obj->base.filp)->i_mapping; 4545 mapping_set_gfp_mask(mapping, mask); 4546 4547 i915_gem_object_init(obj, &i915_gem_object_ops); 4548 4549 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4550 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4551 4552 if (HAS_LLC(dev)) { 4553 /* On some devices, we can have the GPU use the LLC (the CPU 4554 * cache) for about a 10% performance improvement 4555 * compared to uncached. Graphics requests other than 4556 * display scanout are coherent with the CPU in 4557 * accessing this cache. This means in this mode we 4558 * don't need to clflush on the CPU side, and on the 4559 * GPU side we only need to flush internal caches to 4560 * get data visible to the CPU. 4561 * 4562 * However, we maintain the display planes as UC, and so 4563 * need to rebind when first used as such. 4564 */ 4565 obj->cache_level = I915_CACHE_LLC; 4566 } else 4567 obj->cache_level = I915_CACHE_NONE; 4568 4569 trace_i915_gem_object_create(obj); 4570 4571 return obj; 4572 } 4573 4574 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4575 { 4576 /* If we are the last user of the backing storage (be it shmemfs 4577 * pages or stolen etc), we know that the pages are going to be 4578 * immediately released. In this case, we can then skip copying 4579 * back the contents from the GPU. 4580 */ 4581 4582 if (obj->madv != I915_MADV_WILLNEED) 4583 return false; 4584 4585 if (obj->base.filp == NULL) 4586 return true; 4587 4588 /* At first glance, this looks racy, but then again so would be 4589 * userspace racing mmap against close. However, the first external 4590 * reference to the filp can only be obtained through the 4591 * i915_gem_mmap_ioctl() which safeguards us against the user 4592 * acquiring such a reference whilst we are in the middle of 4593 * freeing the object. 4594 */ 4595 return atomic_long_read(&obj->base.filp->f_count) == 1; 4596 } 4597 4598 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4599 { 4600 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4601 struct drm_device *dev = obj->base.dev; 4602 struct drm_i915_private *dev_priv = dev->dev_private; 4603 struct i915_vma *vma, *next; 4604 4605 intel_runtime_pm_get(dev_priv); 4606 4607 trace_i915_gem_object_destroy(obj); 4608 4609 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4610 int ret; 4611 4612 vma->pin_count = 0; 4613 ret = i915_vma_unbind(vma); 4614 if (WARN_ON(ret == -ERESTARTSYS)) { 4615 bool was_interruptible; 4616 4617 was_interruptible = dev_priv->mm.interruptible; 4618 dev_priv->mm.interruptible = false; 4619 4620 WARN_ON(i915_vma_unbind(vma)); 4621 4622 dev_priv->mm.interruptible = was_interruptible; 4623 } 4624 } 4625 4626 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4627 * before progressing. */ 4628 if (obj->stolen) 4629 i915_gem_object_unpin_pages(obj); 4630 4631 WARN_ON(obj->frontbuffer_bits); 4632 4633 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4634 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4635 obj->tiling_mode != I915_TILING_NONE) 4636 i915_gem_object_unpin_pages(obj); 4637 4638 if (WARN_ON(obj->pages_pin_count)) 4639 obj->pages_pin_count = 0; 4640 if (discard_backing_storage(obj)) 4641 obj->madv = I915_MADV_DONTNEED; 4642 i915_gem_object_put_pages(obj); 4643 i915_gem_object_free_mmap_offset(obj); 4644 4645 BUG_ON(obj->pages); 4646 4647 if (obj->base.import_attach) 4648 drm_prime_gem_destroy(&obj->base, NULL); 4649 4650 if (obj->ops->release) 4651 obj->ops->release(obj); 4652 4653 drm_gem_object_release(&obj->base); 4654 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4655 4656 kfree(obj->bit_17); 4657 i915_gem_object_free(obj); 4658 4659 intel_runtime_pm_put(dev_priv); 4660 } 4661 4662 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4663 struct i915_address_space *vm) 4664 { 4665 struct i915_vma *vma; 4666 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4667 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4668 vma->vm == vm) 4669 return vma; 4670 } 4671 return NULL; 4672 } 4673 4674 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4675 const struct i915_ggtt_view *view) 4676 { 4677 struct drm_device *dev = obj->base.dev; 4678 struct drm_i915_private *dev_priv = to_i915(dev); 4679 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4680 struct i915_vma *vma; 4681 4682 BUG_ON(!view); 4683 4684 list_for_each_entry(vma, &obj->vma_list, obj_link) 4685 if (vma->vm == &ggtt->base && 4686 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4687 return vma; 4688 return NULL; 4689 } 4690 4691 void i915_gem_vma_destroy(struct i915_vma *vma) 4692 { 4693 WARN_ON(vma->node.allocated); 4694 4695 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4696 if (!list_empty(&vma->exec_list)) 4697 return; 4698 4699 if (!vma->is_ggtt) 4700 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4701 4702 list_del(&vma->obj_link); 4703 4704 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 4705 } 4706 4707 static void 4708 i915_gem_stop_engines(struct drm_device *dev) 4709 { 4710 struct drm_i915_private *dev_priv = dev->dev_private; 4711 struct intel_engine_cs *engine; 4712 4713 for_each_engine(engine, dev_priv) 4714 dev_priv->gt.stop_engine(engine); 4715 } 4716 4717 int 4718 i915_gem_suspend(struct drm_device *dev) 4719 { 4720 struct drm_i915_private *dev_priv = dev->dev_private; 4721 int ret = 0; 4722 4723 mutex_lock(&dev->struct_mutex); 4724 ret = i915_gpu_idle(dev); 4725 if (ret) 4726 goto err; 4727 4728 i915_gem_retire_requests(dev); 4729 4730 i915_gem_stop_engines(dev); 4731 mutex_unlock(&dev->struct_mutex); 4732 4733 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4734 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4735 flush_delayed_work(&dev_priv->mm.idle_work); 4736 4737 /* Assert that we sucessfully flushed all the work and 4738 * reset the GPU back to its idle, low power state. 4739 */ 4740 WARN_ON(dev_priv->mm.busy); 4741 4742 return 0; 4743 4744 err: 4745 mutex_unlock(&dev->struct_mutex); 4746 return ret; 4747 } 4748 4749 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4750 { 4751 struct intel_engine_cs *engine = req->engine; 4752 struct drm_device *dev = engine->dev; 4753 struct drm_i915_private *dev_priv = dev->dev_private; 4754 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4755 int i, ret; 4756 4757 if (!HAS_L3_DPF(dev) || !remap_info) 4758 return 0; 4759 4760 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4761 if (ret) 4762 return ret; 4763 4764 /* 4765 * Note: We do not worry about the concurrent register cacheline hang 4766 * here because no other code should access these registers other than 4767 * at initialization time. 4768 */ 4769 for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { 4770 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 4771 intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); 4772 intel_ring_emit(engine, remap_info[i]); 4773 } 4774 4775 intel_ring_advance(engine); 4776 4777 return ret; 4778 } 4779 4780 void i915_gem_init_swizzling(struct drm_device *dev) 4781 { 4782 struct drm_i915_private *dev_priv = dev->dev_private; 4783 4784 if (INTEL_INFO(dev)->gen < 5 || 4785 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4786 return; 4787 4788 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4789 DISP_TILE_SURFACE_SWIZZLING); 4790 4791 if (IS_GEN5(dev)) 4792 return; 4793 4794 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4795 if (IS_GEN6(dev)) 4796 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4797 else if (IS_GEN7(dev)) 4798 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4799 else if (IS_GEN8(dev)) 4800 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4801 else 4802 BUG(); 4803 } 4804 4805 static void init_unused_ring(struct drm_device *dev, u32 base) 4806 { 4807 struct drm_i915_private *dev_priv = dev->dev_private; 4808 4809 I915_WRITE(RING_CTL(base), 0); 4810 I915_WRITE(RING_HEAD(base), 0); 4811 I915_WRITE(RING_TAIL(base), 0); 4812 I915_WRITE(RING_START(base), 0); 4813 } 4814 4815 static void init_unused_rings(struct drm_device *dev) 4816 { 4817 if (IS_I830(dev)) { 4818 init_unused_ring(dev, PRB1_BASE); 4819 init_unused_ring(dev, SRB0_BASE); 4820 init_unused_ring(dev, SRB1_BASE); 4821 init_unused_ring(dev, SRB2_BASE); 4822 init_unused_ring(dev, SRB3_BASE); 4823 } else if (IS_GEN2(dev)) { 4824 init_unused_ring(dev, SRB0_BASE); 4825 init_unused_ring(dev, SRB1_BASE); 4826 } else if (IS_GEN3(dev)) { 4827 init_unused_ring(dev, PRB1_BASE); 4828 init_unused_ring(dev, PRB2_BASE); 4829 } 4830 } 4831 4832 int i915_gem_init_engines(struct drm_device *dev) 4833 { 4834 struct drm_i915_private *dev_priv = dev->dev_private; 4835 int ret; 4836 4837 ret = intel_init_render_ring_buffer(dev); 4838 if (ret) 4839 return ret; 4840 4841 if (HAS_BSD(dev)) { 4842 ret = intel_init_bsd_ring_buffer(dev); 4843 if (ret) 4844 goto cleanup_render_ring; 4845 } 4846 4847 if (HAS_BLT(dev)) { 4848 ret = intel_init_blt_ring_buffer(dev); 4849 if (ret) 4850 goto cleanup_bsd_ring; 4851 } 4852 4853 if (HAS_VEBOX(dev)) { 4854 ret = intel_init_vebox_ring_buffer(dev); 4855 if (ret) 4856 goto cleanup_blt_ring; 4857 } 4858 4859 if (HAS_BSD2(dev)) { 4860 ret = intel_init_bsd2_ring_buffer(dev); 4861 if (ret) 4862 goto cleanup_vebox_ring; 4863 } 4864 4865 return 0; 4866 4867 cleanup_vebox_ring: 4868 intel_cleanup_engine(&dev_priv->engine[VECS]); 4869 cleanup_blt_ring: 4870 intel_cleanup_engine(&dev_priv->engine[BCS]); 4871 cleanup_bsd_ring: 4872 intel_cleanup_engine(&dev_priv->engine[VCS]); 4873 cleanup_render_ring: 4874 intel_cleanup_engine(&dev_priv->engine[RCS]); 4875 4876 return ret; 4877 } 4878 4879 int 4880 i915_gem_init_hw(struct drm_device *dev) 4881 { 4882 struct drm_i915_private *dev_priv = dev->dev_private; 4883 struct intel_engine_cs *engine; 4884 int ret, j; 4885 4886 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4887 return -EIO; 4888 4889 /* Double layer security blanket, see i915_gem_init() */ 4890 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4891 4892 if (dev_priv->ellc_size) 4893 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4894 4895 if (IS_HASWELL(dev)) 4896 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4897 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4898 4899 if (HAS_PCH_NOP(dev)) { 4900 if (IS_IVYBRIDGE(dev)) { 4901 u32 temp = I915_READ(GEN7_MSG_CTL); 4902 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4903 I915_WRITE(GEN7_MSG_CTL, temp); 4904 } else if (INTEL_INFO(dev)->gen >= 7) { 4905 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4906 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4907 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4908 } 4909 } 4910 4911 i915_gem_init_swizzling(dev); 4912 4913 /* 4914 * At least 830 can leave some of the unused rings 4915 * "active" (ie. head != tail) after resume which 4916 * will prevent c3 entry. Makes sure all unused rings 4917 * are totally idle. 4918 */ 4919 init_unused_rings(dev); 4920 4921 BUG_ON(!dev_priv->kernel_context); 4922 4923 ret = i915_ppgtt_init_hw(dev); 4924 if (ret) { 4925 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 4926 goto out; 4927 } 4928 4929 /* Need to do basic initialisation of all rings first: */ 4930 for_each_engine(engine, dev_priv) { 4931 ret = engine->init_hw(engine); 4932 if (ret) 4933 goto out; 4934 } 4935 4936 /* We can't enable contexts until all firmware is loaded */ 4937 if (HAS_GUC_UCODE(dev)) { 4938 ret = intel_guc_ucode_load(dev); 4939 if (ret) { 4940 DRM_ERROR("Failed to initialize GuC, error %d\n", ret); 4941 ret = -EIO; 4942 goto out; 4943 } 4944 } 4945 4946 /* 4947 * Increment the next seqno by 0x100 so we have a visible break 4948 * on re-initialisation 4949 */ 4950 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 4951 if (ret) 4952 goto out; 4953 4954 /* Now it is safe to go back round and do everything else: */ 4955 for_each_engine(engine, dev_priv) { 4956 struct drm_i915_gem_request *req; 4957 4958 req = i915_gem_request_alloc(engine, NULL); 4959 if (IS_ERR(req)) { 4960 ret = PTR_ERR(req); 4961 i915_gem_cleanup_engines(dev); 4962 goto out; 4963 } 4964 4965 if (engine->id == RCS) { 4966 for (j = 0; j < NUM_L3_SLICES(dev); j++) 4967 i915_gem_l3_remap(req, j); 4968 } 4969 4970 ret = i915_ppgtt_init_ring(req); 4971 if (ret && ret != -EIO) { 4972 DRM_ERROR("PPGTT enable %s failed %d\n", 4973 engine->name, ret); 4974 i915_gem_request_cancel(req); 4975 i915_gem_cleanup_engines(dev); 4976 goto out; 4977 } 4978 4979 ret = i915_gem_context_enable(req); 4980 if (ret && ret != -EIO) { 4981 DRM_ERROR("Context enable %s failed %d\n", 4982 engine->name, ret); 4983 i915_gem_request_cancel(req); 4984 i915_gem_cleanup_engines(dev); 4985 goto out; 4986 } 4987 4988 i915_add_request_no_flush(req); 4989 } 4990 4991 out: 4992 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4993 return ret; 4994 } 4995 4996 int i915_gem_init(struct drm_device *dev) 4997 { 4998 struct drm_i915_private *dev_priv = dev->dev_private; 4999 int ret; 5000 5001 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5002 i915.enable_execlists); 5003 5004 mutex_lock(&dev->struct_mutex); 5005 5006 if (!i915.enable_execlists) { 5007 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5008 dev_priv->gt.init_engines = i915_gem_init_engines; 5009 dev_priv->gt.cleanup_engine = intel_cleanup_engine; 5010 dev_priv->gt.stop_engine = intel_stop_engine; 5011 } else { 5012 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5013 dev_priv->gt.init_engines = intel_logical_rings_init; 5014 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5015 dev_priv->gt.stop_engine = intel_logical_ring_stop; 5016 } 5017 5018 /* This is just a security blanket to placate dragons. 5019 * On some systems, we very sporadically observe that the first TLBs 5020 * used by the CS may be stale, despite us poking the TLB reset. If 5021 * we hold the forcewake during initialisation these problems 5022 * just magically go away. 5023 */ 5024 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5025 5026 ret = i915_gem_init_userptr(dev); 5027 if (ret) 5028 goto out_unlock; 5029 5030 i915_gem_init_ggtt(dev); 5031 5032 ret = i915_gem_context_init(dev); 5033 if (ret) 5034 goto out_unlock; 5035 5036 ret = dev_priv->gt.init_engines(dev); 5037 if (ret) 5038 goto out_unlock; 5039 5040 ret = i915_gem_init_hw(dev); 5041 if (ret == -EIO) { 5042 /* Allow ring initialisation to fail by marking the GPU as 5043 * wedged. But we only want to do this where the GPU is angry, 5044 * for all other failure, such as an allocation failure, bail. 5045 */ 5046 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5047 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5048 ret = 0; 5049 } 5050 5051 out_unlock: 5052 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5053 mutex_unlock(&dev->struct_mutex); 5054 5055 return ret; 5056 } 5057 5058 void 5059 i915_gem_cleanup_engines(struct drm_device *dev) 5060 { 5061 struct drm_i915_private *dev_priv = dev->dev_private; 5062 struct intel_engine_cs *engine; 5063 5064 for_each_engine(engine, dev_priv) 5065 dev_priv->gt.cleanup_engine(engine); 5066 5067 if (i915.enable_execlists) 5068 /* 5069 * Neither the BIOS, ourselves or any other kernel 5070 * expects the system to be in execlists mode on startup, 5071 * so we need to reset the GPU back to legacy mode. 5072 */ 5073 intel_gpu_reset(dev, ALL_ENGINES); 5074 } 5075 5076 static void 5077 init_engine_lists(struct intel_engine_cs *engine) 5078 { 5079 INIT_LIST_HEAD(&engine->active_list); 5080 INIT_LIST_HEAD(&engine->request_list); 5081 } 5082 5083 void 5084 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5085 { 5086 struct drm_device *dev = dev_priv->dev; 5087 5088 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5089 !IS_CHERRYVIEW(dev_priv)) 5090 dev_priv->num_fence_regs = 32; 5091 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 5092 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 5093 dev_priv->num_fence_regs = 16; 5094 else 5095 dev_priv->num_fence_regs = 8; 5096 5097 if (intel_vgpu_active(dev)) 5098 dev_priv->num_fence_regs = 5099 I915_READ(vgtif_reg(avail_rs.fence_num)); 5100 5101 /* Initialize fence registers to zero */ 5102 i915_gem_restore_fences(dev); 5103 5104 i915_gem_detect_bit_6_swizzle(dev); 5105 } 5106 5107 void 5108 i915_gem_load_init(struct drm_device *dev) 5109 { 5110 struct drm_i915_private *dev_priv = dev->dev_private; 5111 int i; 5112 5113 dev_priv->objects = 5114 kmem_cache_create("i915_gem_object", 5115 sizeof(struct drm_i915_gem_object), 0, 5116 SLAB_HWCACHE_ALIGN, 5117 NULL); 5118 dev_priv->vmas = 5119 kmem_cache_create("i915_gem_vma", 5120 sizeof(struct i915_vma), 0, 5121 SLAB_HWCACHE_ALIGN, 5122 NULL); 5123 dev_priv->requests = 5124 kmem_cache_create("i915_gem_request", 5125 sizeof(struct drm_i915_gem_request), 0, 5126 SLAB_HWCACHE_ALIGN, 5127 NULL); 5128 5129 INIT_LIST_HEAD(&dev_priv->vm_list); 5130 INIT_LIST_HEAD(&dev_priv->context_list); 5131 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5132 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5133 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5134 for (i = 0; i < I915_NUM_ENGINES; i++) 5135 init_engine_lists(&dev_priv->engine[i]); 5136 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5137 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5138 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5139 i915_gem_retire_work_handler); 5140 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5141 i915_gem_idle_work_handler); 5142 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5143 5144 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5145 5146 /* 5147 * Set initial sequence number for requests. 5148 * Using this number allows the wraparound to happen early, 5149 * catching any obvious problems. 5150 */ 5151 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5152 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5153 5154 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5155 5156 init_waitqueue_head(&dev_priv->pending_flip_queue); 5157 5158 dev_priv->mm.interruptible = true; 5159 5160 mutex_init(&dev_priv->fb_tracking.lock); 5161 } 5162 5163 void i915_gem_load_cleanup(struct drm_device *dev) 5164 { 5165 struct drm_i915_private *dev_priv = to_i915(dev); 5166 5167 kmem_cache_destroy(dev_priv->requests); 5168 kmem_cache_destroy(dev_priv->vmas); 5169 kmem_cache_destroy(dev_priv->objects); 5170 } 5171 5172 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5173 { 5174 struct drm_i915_file_private *file_priv = file->driver_priv; 5175 5176 /* Clean up our request list when the client is going away, so that 5177 * later retire_requests won't dereference our soon-to-be-gone 5178 * file_priv. 5179 */ 5180 spin_lock(&file_priv->mm.lock); 5181 while (!list_empty(&file_priv->mm.request_list)) { 5182 struct drm_i915_gem_request *request; 5183 5184 request = list_first_entry(&file_priv->mm.request_list, 5185 struct drm_i915_gem_request, 5186 client_list); 5187 list_del(&request->client_list); 5188 request->file_priv = NULL; 5189 } 5190 spin_unlock(&file_priv->mm.lock); 5191 5192 if (!list_empty(&file_priv->rps.link)) { 5193 spin_lock(&to_i915(dev)->rps.client_lock); 5194 list_del(&file_priv->rps.link); 5195 spin_unlock(&to_i915(dev)->rps.client_lock); 5196 } 5197 } 5198 5199 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5200 { 5201 struct drm_i915_file_private *file_priv; 5202 int ret; 5203 5204 DRM_DEBUG_DRIVER("\n"); 5205 5206 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5207 if (!file_priv) 5208 return -ENOMEM; 5209 5210 file->driver_priv = file_priv; 5211 file_priv->dev_priv = dev->dev_private; 5212 file_priv->file = file; 5213 INIT_LIST_HEAD(&file_priv->rps.link); 5214 5215 spin_lock_init(&file_priv->mm.lock); 5216 INIT_LIST_HEAD(&file_priv->mm.request_list); 5217 5218 file_priv->bsd_ring = -1; 5219 5220 ret = i915_gem_context_open(dev, file); 5221 if (ret) 5222 kfree(file_priv); 5223 5224 return ret; 5225 } 5226 5227 /** 5228 * i915_gem_track_fb - update frontbuffer tracking 5229 * @old: current GEM buffer for the frontbuffer slots 5230 * @new: new GEM buffer for the frontbuffer slots 5231 * @frontbuffer_bits: bitmask of frontbuffer slots 5232 * 5233 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5234 * from @old and setting them in @new. Both @old and @new can be NULL. 5235 */ 5236 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5237 struct drm_i915_gem_object *new, 5238 unsigned frontbuffer_bits) 5239 { 5240 if (old) { 5241 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5242 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5243 old->frontbuffer_bits &= ~frontbuffer_bits; 5244 } 5245 5246 if (new) { 5247 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5248 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5249 new->frontbuffer_bits |= frontbuffer_bits; 5250 } 5251 } 5252 5253 /* All the new VM stuff */ 5254 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5255 struct i915_address_space *vm) 5256 { 5257 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5258 struct i915_vma *vma; 5259 5260 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5261 5262 list_for_each_entry(vma, &o->vma_list, obj_link) { 5263 if (vma->is_ggtt && 5264 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5265 continue; 5266 if (vma->vm == vm) 5267 return vma->node.start; 5268 } 5269 5270 WARN(1, "%s vma for this object not found.\n", 5271 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5272 return -1; 5273 } 5274 5275 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5276 const struct i915_ggtt_view *view) 5277 { 5278 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5279 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5280 struct i915_vma *vma; 5281 5282 list_for_each_entry(vma, &o->vma_list, obj_link) 5283 if (vma->vm == &ggtt->base && 5284 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5285 return vma->node.start; 5286 5287 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5288 return -1; 5289 } 5290 5291 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5292 struct i915_address_space *vm) 5293 { 5294 struct i915_vma *vma; 5295 5296 list_for_each_entry(vma, &o->vma_list, obj_link) { 5297 if (vma->is_ggtt && 5298 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5299 continue; 5300 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5301 return true; 5302 } 5303 5304 return false; 5305 } 5306 5307 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5308 const struct i915_ggtt_view *view) 5309 { 5310 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5311 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5312 struct i915_vma *vma; 5313 5314 list_for_each_entry(vma, &o->vma_list, obj_link) 5315 if (vma->vm == &ggtt->base && 5316 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5317 drm_mm_node_allocated(&vma->node)) 5318 return true; 5319 5320 return false; 5321 } 5322 5323 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5324 { 5325 struct i915_vma *vma; 5326 5327 list_for_each_entry(vma, &o->vma_list, obj_link) 5328 if (drm_mm_node_allocated(&vma->node)) 5329 return true; 5330 5331 return false; 5332 } 5333 5334 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5335 struct i915_address_space *vm) 5336 { 5337 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5338 struct i915_vma *vma; 5339 5340 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5341 5342 BUG_ON(list_empty(&o->vma_list)); 5343 5344 list_for_each_entry(vma, &o->vma_list, obj_link) { 5345 if (vma->is_ggtt && 5346 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5347 continue; 5348 if (vma->vm == vm) 5349 return vma->node.size; 5350 } 5351 return 0; 5352 } 5353 5354 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5355 { 5356 struct i915_vma *vma; 5357 list_for_each_entry(vma, &obj->vma_list, obj_link) 5358 if (vma->pin_count > 0) 5359 return true; 5360 5361 return false; 5362 } 5363 5364 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5365 struct page * 5366 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5367 { 5368 struct page *page; 5369 5370 /* Only default objects have per-page dirty tracking */ 5371 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 5372 return NULL; 5373 5374 page = i915_gem_object_get_page(obj, n); 5375 set_page_dirty(page); 5376 return page; 5377 } 5378 5379 /* Allocate a new GEM object and fill it with the supplied data */ 5380 struct drm_i915_gem_object * 5381 i915_gem_object_create_from_data(struct drm_device *dev, 5382 const void *data, size_t size) 5383 { 5384 struct drm_i915_gem_object *obj; 5385 struct sg_table *sg; 5386 size_t bytes; 5387 int ret; 5388 5389 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5390 if (IS_ERR_OR_NULL(obj)) 5391 return obj; 5392 5393 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5394 if (ret) 5395 goto fail; 5396 5397 ret = i915_gem_object_get_pages(obj); 5398 if (ret) 5399 goto fail; 5400 5401 i915_gem_object_pin_pages(obj); 5402 sg = obj->pages; 5403 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 5404 obj->dirty = 1; /* Backing store is now out of date */ 5405 i915_gem_object_unpin_pages(obj); 5406 5407 if (WARN_ON(bytes != size)) { 5408 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5409 ret = -EFAULT; 5410 goto fail; 5411 } 5412 5413 return obj; 5414 5415 fail: 5416 drm_gem_object_unreference(&obj->base); 5417 return ERR_PTR(ret); 5418 } 5419