1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 #include <linux/dma-buf.h> 40 41 #define RQ_BUG_ON(expr) 42 43 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 44 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 45 static void 46 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 47 static void 48 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 49 50 static bool cpu_cache_is_coherent(struct drm_device *dev, 51 enum i915_cache_level level) 52 { 53 return HAS_LLC(dev) || level != I915_CACHE_NONE; 54 } 55 56 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 57 { 58 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 59 return true; 60 61 return obj->pin_display; 62 } 63 64 /* some bookkeeping */ 65 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 66 size_t size) 67 { 68 spin_lock(&dev_priv->mm.object_stat_lock); 69 dev_priv->mm.object_count++; 70 dev_priv->mm.object_memory += size; 71 spin_unlock(&dev_priv->mm.object_stat_lock); 72 } 73 74 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 75 size_t size) 76 { 77 spin_lock(&dev_priv->mm.object_stat_lock); 78 dev_priv->mm.object_count--; 79 dev_priv->mm.object_memory -= size; 80 spin_unlock(&dev_priv->mm.object_stat_lock); 81 } 82 83 static int 84 i915_gem_wait_for_error(struct i915_gpu_error *error) 85 { 86 int ret; 87 88 #define EXIT_COND (!i915_reset_in_progress(error) || \ 89 i915_terminally_wedged(error)) 90 if (EXIT_COND) 91 return 0; 92 93 /* 94 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 95 * userspace. If it takes that long something really bad is going on and 96 * we should simply try to bail out and fail as gracefully as possible. 97 */ 98 ret = wait_event_interruptible_timeout(error->reset_queue, 99 EXIT_COND, 100 10*HZ); 101 if (ret == 0) { 102 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 103 return -EIO; 104 } else if (ret < 0) { 105 return ret; 106 } 107 #undef EXIT_COND 108 109 return 0; 110 } 111 112 int i915_mutex_lock_interruptible(struct drm_device *dev) 113 { 114 struct drm_i915_private *dev_priv = dev->dev_private; 115 int ret; 116 117 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 118 if (ret) 119 return ret; 120 121 ret = mutex_lock_interruptible(&dev->struct_mutex); 122 if (ret) 123 return ret; 124 125 WARN_ON(i915_verify_lists(dev)); 126 return 0; 127 } 128 129 int 130 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 131 struct drm_file *file) 132 { 133 struct drm_i915_private *dev_priv = dev->dev_private; 134 struct drm_i915_gem_get_aperture *args = data; 135 struct i915_gtt *ggtt = &dev_priv->gtt; 136 struct i915_vma *vma; 137 size_t pinned; 138 139 pinned = 0; 140 mutex_lock(&dev->struct_mutex); 141 list_for_each_entry(vma, &ggtt->base.active_list, mm_list) 142 if (vma->pin_count) 143 pinned += vma->node.size; 144 list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list) 145 if (vma->pin_count) 146 pinned += vma->node.size; 147 mutex_unlock(&dev->struct_mutex); 148 149 args->aper_size = dev_priv->gtt.base.total; 150 args->aper_available_size = args->aper_size - pinned; 151 152 return 0; 153 } 154 155 static int 156 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 157 { 158 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 159 char *vaddr = obj->phys_handle->vaddr; 160 struct sg_table *st; 161 struct scatterlist *sg; 162 int i; 163 164 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 165 return -EINVAL; 166 167 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 168 struct page *page; 169 char *src; 170 171 page = shmem_read_mapping_page(mapping, i); 172 if (IS_ERR(page)) 173 return PTR_ERR(page); 174 175 src = kmap_atomic(page); 176 memcpy(vaddr, src, PAGE_SIZE); 177 drm_clflush_virt_range(vaddr, PAGE_SIZE); 178 kunmap_atomic(src); 179 180 page_cache_release(page); 181 vaddr += PAGE_SIZE; 182 } 183 184 i915_gem_chipset_flush(obj->base.dev); 185 186 st = kmalloc(sizeof(*st), GFP_KERNEL); 187 if (st == NULL) 188 return -ENOMEM; 189 190 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 191 kfree(st); 192 return -ENOMEM; 193 } 194 195 sg = st->sgl; 196 sg->offset = 0; 197 sg->length = obj->base.size; 198 199 sg_dma_address(sg) = obj->phys_handle->busaddr; 200 sg_dma_len(sg) = obj->base.size; 201 202 obj->pages = st; 203 return 0; 204 } 205 206 static void 207 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 208 { 209 int ret; 210 211 BUG_ON(obj->madv == __I915_MADV_PURGED); 212 213 ret = i915_gem_object_set_to_cpu_domain(obj, true); 214 if (ret) { 215 /* In the event of a disaster, abandon all caches and 216 * hope for the best. 217 */ 218 WARN_ON(ret != -EIO); 219 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 220 } 221 222 if (obj->madv == I915_MADV_DONTNEED) 223 obj->dirty = 0; 224 225 if (obj->dirty) { 226 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 227 char *vaddr = obj->phys_handle->vaddr; 228 int i; 229 230 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 231 struct page *page; 232 char *dst; 233 234 page = shmem_read_mapping_page(mapping, i); 235 if (IS_ERR(page)) 236 continue; 237 238 dst = kmap_atomic(page); 239 drm_clflush_virt_range(vaddr, PAGE_SIZE); 240 memcpy(dst, vaddr, PAGE_SIZE); 241 kunmap_atomic(dst); 242 243 set_page_dirty(page); 244 if (obj->madv == I915_MADV_WILLNEED) 245 mark_page_accessed(page); 246 page_cache_release(page); 247 vaddr += PAGE_SIZE; 248 } 249 obj->dirty = 0; 250 } 251 252 sg_free_table(obj->pages); 253 kfree(obj->pages); 254 } 255 256 static void 257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 258 { 259 drm_pci_free(obj->base.dev, obj->phys_handle); 260 } 261 262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 263 .get_pages = i915_gem_object_get_pages_phys, 264 .put_pages = i915_gem_object_put_pages_phys, 265 .release = i915_gem_object_release_phys, 266 }; 267 268 static int 269 drop_pages(struct drm_i915_gem_object *obj) 270 { 271 struct i915_vma *vma, *next; 272 int ret; 273 274 drm_gem_object_reference(&obj->base); 275 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 276 if (i915_vma_unbind(vma)) 277 break; 278 279 ret = i915_gem_object_put_pages(obj); 280 drm_gem_object_unreference(&obj->base); 281 282 return ret; 283 } 284 285 int 286 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 287 int align) 288 { 289 drm_dma_handle_t *phys; 290 int ret; 291 292 if (obj->phys_handle) { 293 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 294 return -EBUSY; 295 296 return 0; 297 } 298 299 if (obj->madv != I915_MADV_WILLNEED) 300 return -EFAULT; 301 302 if (obj->base.filp == NULL) 303 return -EINVAL; 304 305 ret = drop_pages(obj); 306 if (ret) 307 return ret; 308 309 /* create a new object */ 310 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 311 if (!phys) 312 return -ENOMEM; 313 314 obj->phys_handle = phys; 315 obj->ops = &i915_gem_phys_ops; 316 317 return i915_gem_object_get_pages(obj); 318 } 319 320 static int 321 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 322 struct drm_i915_gem_pwrite *args, 323 struct drm_file *file_priv) 324 { 325 struct drm_device *dev = obj->base.dev; 326 void *vaddr = obj->phys_handle->vaddr + args->offset; 327 char __user *user_data = to_user_ptr(args->data_ptr); 328 int ret = 0; 329 330 /* We manually control the domain here and pretend that it 331 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 332 */ 333 ret = i915_gem_object_wait_rendering(obj, false); 334 if (ret) 335 return ret; 336 337 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 338 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 339 unsigned long unwritten; 340 341 /* The physical object once assigned is fixed for the lifetime 342 * of the obj, so we can safely drop the lock and continue 343 * to access vaddr. 344 */ 345 mutex_unlock(&dev->struct_mutex); 346 unwritten = copy_from_user(vaddr, user_data, args->size); 347 mutex_lock(&dev->struct_mutex); 348 if (unwritten) { 349 ret = -EFAULT; 350 goto out; 351 } 352 } 353 354 drm_clflush_virt_range(vaddr, args->size); 355 i915_gem_chipset_flush(dev); 356 357 out: 358 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 359 return ret; 360 } 361 362 void *i915_gem_object_alloc(struct drm_device *dev) 363 { 364 struct drm_i915_private *dev_priv = dev->dev_private; 365 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 366 } 367 368 void i915_gem_object_free(struct drm_i915_gem_object *obj) 369 { 370 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 371 kmem_cache_free(dev_priv->objects, obj); 372 } 373 374 static int 375 i915_gem_create(struct drm_file *file, 376 struct drm_device *dev, 377 uint64_t size, 378 uint32_t *handle_p) 379 { 380 struct drm_i915_gem_object *obj; 381 int ret; 382 u32 handle; 383 384 size = roundup(size, PAGE_SIZE); 385 if (size == 0) 386 return -EINVAL; 387 388 /* Allocate the new object */ 389 obj = i915_gem_alloc_object(dev, size); 390 if (obj == NULL) 391 return -ENOMEM; 392 393 ret = drm_gem_handle_create(file, &obj->base, &handle); 394 /* drop reference from allocate - handle holds it now */ 395 drm_gem_object_unreference_unlocked(&obj->base); 396 if (ret) 397 return ret; 398 399 *handle_p = handle; 400 return 0; 401 } 402 403 int 404 i915_gem_dumb_create(struct drm_file *file, 405 struct drm_device *dev, 406 struct drm_mode_create_dumb *args) 407 { 408 /* have to work out size/pitch and return them */ 409 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 410 args->size = args->pitch * args->height; 411 return i915_gem_create(file, dev, 412 args->size, &args->handle); 413 } 414 415 /** 416 * Creates a new mm object and returns a handle to it. 417 */ 418 int 419 i915_gem_create_ioctl(struct drm_device *dev, void *data, 420 struct drm_file *file) 421 { 422 struct drm_i915_gem_create *args = data; 423 424 return i915_gem_create(file, dev, 425 args->size, &args->handle); 426 } 427 428 static inline int 429 __copy_to_user_swizzled(char __user *cpu_vaddr, 430 const char *gpu_vaddr, int gpu_offset, 431 int length) 432 { 433 int ret, cpu_offset = 0; 434 435 while (length > 0) { 436 int cacheline_end = ALIGN(gpu_offset + 1, 64); 437 int this_length = min(cacheline_end - gpu_offset, length); 438 int swizzled_gpu_offset = gpu_offset ^ 64; 439 440 ret = __copy_to_user(cpu_vaddr + cpu_offset, 441 gpu_vaddr + swizzled_gpu_offset, 442 this_length); 443 if (ret) 444 return ret + length; 445 446 cpu_offset += this_length; 447 gpu_offset += this_length; 448 length -= this_length; 449 } 450 451 return 0; 452 } 453 454 static inline int 455 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 456 const char __user *cpu_vaddr, 457 int length) 458 { 459 int ret, cpu_offset = 0; 460 461 while (length > 0) { 462 int cacheline_end = ALIGN(gpu_offset + 1, 64); 463 int this_length = min(cacheline_end - gpu_offset, length); 464 int swizzled_gpu_offset = gpu_offset ^ 64; 465 466 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 467 cpu_vaddr + cpu_offset, 468 this_length); 469 if (ret) 470 return ret + length; 471 472 cpu_offset += this_length; 473 gpu_offset += this_length; 474 length -= this_length; 475 } 476 477 return 0; 478 } 479 480 /* 481 * Pins the specified object's pages and synchronizes the object with 482 * GPU accesses. Sets needs_clflush to non-zero if the caller should 483 * flush the object from the CPU cache. 484 */ 485 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 486 int *needs_clflush) 487 { 488 int ret; 489 490 *needs_clflush = 0; 491 492 if (!obj->base.filp) 493 return -EINVAL; 494 495 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 496 /* If we're not in the cpu read domain, set ourself into the gtt 497 * read domain and manually flush cachelines (if required). This 498 * optimizes for the case when the gpu will dirty the data 499 * anyway again before the next pread happens. */ 500 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 501 obj->cache_level); 502 ret = i915_gem_object_wait_rendering(obj, true); 503 if (ret) 504 return ret; 505 } 506 507 ret = i915_gem_object_get_pages(obj); 508 if (ret) 509 return ret; 510 511 i915_gem_object_pin_pages(obj); 512 513 return ret; 514 } 515 516 /* Per-page copy function for the shmem pread fastpath. 517 * Flushes invalid cachelines before reading the target if 518 * needs_clflush is set. */ 519 static int 520 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 521 char __user *user_data, 522 bool page_do_bit17_swizzling, bool needs_clflush) 523 { 524 char *vaddr; 525 int ret; 526 527 if (unlikely(page_do_bit17_swizzling)) 528 return -EINVAL; 529 530 vaddr = kmap_atomic(page); 531 if (needs_clflush) 532 drm_clflush_virt_range(vaddr + shmem_page_offset, 533 page_length); 534 ret = __copy_to_user_inatomic(user_data, 535 vaddr + shmem_page_offset, 536 page_length); 537 kunmap_atomic(vaddr); 538 539 return ret ? -EFAULT : 0; 540 } 541 542 static void 543 shmem_clflush_swizzled_range(char *addr, unsigned long length, 544 bool swizzled) 545 { 546 if (unlikely(swizzled)) { 547 unsigned long start = (unsigned long) addr; 548 unsigned long end = (unsigned long) addr + length; 549 550 /* For swizzling simply ensure that we always flush both 551 * channels. Lame, but simple and it works. Swizzled 552 * pwrite/pread is far from a hotpath - current userspace 553 * doesn't use it at all. */ 554 start = round_down(start, 128); 555 end = round_up(end, 128); 556 557 drm_clflush_virt_range((void *)start, end - start); 558 } else { 559 drm_clflush_virt_range(addr, length); 560 } 561 562 } 563 564 /* Only difference to the fast-path function is that this can handle bit17 565 * and uses non-atomic copy and kmap functions. */ 566 static int 567 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 568 char __user *user_data, 569 bool page_do_bit17_swizzling, bool needs_clflush) 570 { 571 char *vaddr; 572 int ret; 573 574 vaddr = kmap(page); 575 if (needs_clflush) 576 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 577 page_length, 578 page_do_bit17_swizzling); 579 580 if (page_do_bit17_swizzling) 581 ret = __copy_to_user_swizzled(user_data, 582 vaddr, shmem_page_offset, 583 page_length); 584 else 585 ret = __copy_to_user(user_data, 586 vaddr + shmem_page_offset, 587 page_length); 588 kunmap(page); 589 590 return ret ? - EFAULT : 0; 591 } 592 593 static int 594 i915_gem_shmem_pread(struct drm_device *dev, 595 struct drm_i915_gem_object *obj, 596 struct drm_i915_gem_pread *args, 597 struct drm_file *file) 598 { 599 char __user *user_data; 600 ssize_t remain; 601 loff_t offset; 602 int shmem_page_offset, page_length, ret = 0; 603 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 604 int prefaulted = 0; 605 int needs_clflush = 0; 606 struct sg_page_iter sg_iter; 607 608 user_data = to_user_ptr(args->data_ptr); 609 remain = args->size; 610 611 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 612 613 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 614 if (ret) 615 return ret; 616 617 offset = args->offset; 618 619 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 620 offset >> PAGE_SHIFT) { 621 struct page *page = sg_page_iter_page(&sg_iter); 622 623 if (remain <= 0) 624 break; 625 626 /* Operation in this page 627 * 628 * shmem_page_offset = offset within page in shmem file 629 * page_length = bytes to copy for this page 630 */ 631 shmem_page_offset = offset_in_page(offset); 632 page_length = remain; 633 if ((shmem_page_offset + page_length) > PAGE_SIZE) 634 page_length = PAGE_SIZE - shmem_page_offset; 635 636 page_do_bit17_swizzling = obj_do_bit17_swizzling && 637 (page_to_phys(page) & (1 << 17)) != 0; 638 639 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 640 user_data, page_do_bit17_swizzling, 641 needs_clflush); 642 if (ret == 0) 643 goto next_page; 644 645 mutex_unlock(&dev->struct_mutex); 646 647 if (likely(!i915.prefault_disable) && !prefaulted) { 648 ret = fault_in_multipages_writeable(user_data, remain); 649 /* Userspace is tricking us, but we've already clobbered 650 * its pages with the prefault and promised to write the 651 * data up to the first fault. Hence ignore any errors 652 * and just continue. */ 653 (void)ret; 654 prefaulted = 1; 655 } 656 657 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 658 user_data, page_do_bit17_swizzling, 659 needs_clflush); 660 661 mutex_lock(&dev->struct_mutex); 662 663 if (ret) 664 goto out; 665 666 next_page: 667 remain -= page_length; 668 user_data += page_length; 669 offset += page_length; 670 } 671 672 out: 673 i915_gem_object_unpin_pages(obj); 674 675 return ret; 676 } 677 678 /** 679 * Reads data from the object referenced by handle. 680 * 681 * On error, the contents of *data are undefined. 682 */ 683 int 684 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 685 struct drm_file *file) 686 { 687 struct drm_i915_gem_pread *args = data; 688 struct drm_i915_gem_object *obj; 689 int ret = 0; 690 691 if (args->size == 0) 692 return 0; 693 694 if (!access_ok(VERIFY_WRITE, 695 to_user_ptr(args->data_ptr), 696 args->size)) 697 return -EFAULT; 698 699 ret = i915_mutex_lock_interruptible(dev); 700 if (ret) 701 return ret; 702 703 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 704 if (&obj->base == NULL) { 705 ret = -ENOENT; 706 goto unlock; 707 } 708 709 /* Bounds check source. */ 710 if (args->offset > obj->base.size || 711 args->size > obj->base.size - args->offset) { 712 ret = -EINVAL; 713 goto out; 714 } 715 716 /* prime objects have no backing filp to GEM pread/pwrite 717 * pages from. 718 */ 719 if (!obj->base.filp) { 720 ret = -EINVAL; 721 goto out; 722 } 723 724 trace_i915_gem_object_pread(obj, args->offset, args->size); 725 726 ret = i915_gem_shmem_pread(dev, obj, args, file); 727 728 out: 729 drm_gem_object_unreference(&obj->base); 730 unlock: 731 mutex_unlock(&dev->struct_mutex); 732 return ret; 733 } 734 735 /* This is the fast write path which cannot handle 736 * page faults in the source data 737 */ 738 739 static inline int 740 fast_user_write(struct io_mapping *mapping, 741 loff_t page_base, int page_offset, 742 char __user *user_data, 743 int length) 744 { 745 void __iomem *vaddr_atomic; 746 void *vaddr; 747 unsigned long unwritten; 748 749 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 750 /* We can use the cpu mem copy function because this is X86. */ 751 vaddr = (void __force*)vaddr_atomic + page_offset; 752 unwritten = __copy_from_user_inatomic_nocache(vaddr, 753 user_data, length); 754 io_mapping_unmap_atomic(vaddr_atomic); 755 return unwritten; 756 } 757 758 /** 759 * This is the fast pwrite path, where we copy the data directly from the 760 * user into the GTT, uncached. 761 */ 762 static int 763 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 764 struct drm_i915_gem_object *obj, 765 struct drm_i915_gem_pwrite *args, 766 struct drm_file *file) 767 { 768 struct drm_i915_private *dev_priv = dev->dev_private; 769 ssize_t remain; 770 loff_t offset, page_base; 771 char __user *user_data; 772 int page_offset, page_length, ret; 773 774 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 775 if (ret) 776 goto out; 777 778 ret = i915_gem_object_set_to_gtt_domain(obj, true); 779 if (ret) 780 goto out_unpin; 781 782 ret = i915_gem_object_put_fence(obj); 783 if (ret) 784 goto out_unpin; 785 786 user_data = to_user_ptr(args->data_ptr); 787 remain = args->size; 788 789 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 790 791 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 792 793 while (remain > 0) { 794 /* Operation in this page 795 * 796 * page_base = page offset within aperture 797 * page_offset = offset within page 798 * page_length = bytes to copy for this page 799 */ 800 page_base = offset & PAGE_MASK; 801 page_offset = offset_in_page(offset); 802 page_length = remain; 803 if ((page_offset + remain) > PAGE_SIZE) 804 page_length = PAGE_SIZE - page_offset; 805 806 /* If we get a fault while copying data, then (presumably) our 807 * source page isn't available. Return the error and we'll 808 * retry in the slow path. 809 */ 810 if (fast_user_write(dev_priv->gtt.mappable, page_base, 811 page_offset, user_data, page_length)) { 812 ret = -EFAULT; 813 goto out_flush; 814 } 815 816 remain -= page_length; 817 user_data += page_length; 818 offset += page_length; 819 } 820 821 out_flush: 822 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 823 out_unpin: 824 i915_gem_object_ggtt_unpin(obj); 825 out: 826 return ret; 827 } 828 829 /* Per-page copy function for the shmem pwrite fastpath. 830 * Flushes invalid cachelines before writing to the target if 831 * needs_clflush_before is set and flushes out any written cachelines after 832 * writing if needs_clflush is set. */ 833 static int 834 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 835 char __user *user_data, 836 bool page_do_bit17_swizzling, 837 bool needs_clflush_before, 838 bool needs_clflush_after) 839 { 840 char *vaddr; 841 int ret; 842 843 if (unlikely(page_do_bit17_swizzling)) 844 return -EINVAL; 845 846 vaddr = kmap_atomic(page); 847 if (needs_clflush_before) 848 drm_clflush_virt_range(vaddr + shmem_page_offset, 849 page_length); 850 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 851 user_data, page_length); 852 if (needs_clflush_after) 853 drm_clflush_virt_range(vaddr + shmem_page_offset, 854 page_length); 855 kunmap_atomic(vaddr); 856 857 return ret ? -EFAULT : 0; 858 } 859 860 /* Only difference to the fast-path function is that this can handle bit17 861 * and uses non-atomic copy and kmap functions. */ 862 static int 863 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 864 char __user *user_data, 865 bool page_do_bit17_swizzling, 866 bool needs_clflush_before, 867 bool needs_clflush_after) 868 { 869 char *vaddr; 870 int ret; 871 872 vaddr = kmap(page); 873 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 874 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 875 page_length, 876 page_do_bit17_swizzling); 877 if (page_do_bit17_swizzling) 878 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 879 user_data, 880 page_length); 881 else 882 ret = __copy_from_user(vaddr + shmem_page_offset, 883 user_data, 884 page_length); 885 if (needs_clflush_after) 886 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 887 page_length, 888 page_do_bit17_swizzling); 889 kunmap(page); 890 891 return ret ? -EFAULT : 0; 892 } 893 894 static int 895 i915_gem_shmem_pwrite(struct drm_device *dev, 896 struct drm_i915_gem_object *obj, 897 struct drm_i915_gem_pwrite *args, 898 struct drm_file *file) 899 { 900 ssize_t remain; 901 loff_t offset; 902 char __user *user_data; 903 int shmem_page_offset, page_length, ret = 0; 904 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 905 int hit_slowpath = 0; 906 int needs_clflush_after = 0; 907 int needs_clflush_before = 0; 908 struct sg_page_iter sg_iter; 909 910 user_data = to_user_ptr(args->data_ptr); 911 remain = args->size; 912 913 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 914 915 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 916 /* If we're not in the cpu write domain, set ourself into the gtt 917 * write domain and manually flush cachelines (if required). This 918 * optimizes for the case when the gpu will use the data 919 * right away and we therefore have to clflush anyway. */ 920 needs_clflush_after = cpu_write_needs_clflush(obj); 921 ret = i915_gem_object_wait_rendering(obj, false); 922 if (ret) 923 return ret; 924 } 925 /* Same trick applies to invalidate partially written cachelines read 926 * before writing. */ 927 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 928 needs_clflush_before = 929 !cpu_cache_is_coherent(dev, obj->cache_level); 930 931 ret = i915_gem_object_get_pages(obj); 932 if (ret) 933 return ret; 934 935 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 936 937 i915_gem_object_pin_pages(obj); 938 939 offset = args->offset; 940 obj->dirty = 1; 941 942 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 943 offset >> PAGE_SHIFT) { 944 struct page *page = sg_page_iter_page(&sg_iter); 945 int partial_cacheline_write; 946 947 if (remain <= 0) 948 break; 949 950 /* Operation in this page 951 * 952 * shmem_page_offset = offset within page in shmem file 953 * page_length = bytes to copy for this page 954 */ 955 shmem_page_offset = offset_in_page(offset); 956 957 page_length = remain; 958 if ((shmem_page_offset + page_length) > PAGE_SIZE) 959 page_length = PAGE_SIZE - shmem_page_offset; 960 961 /* If we don't overwrite a cacheline completely we need to be 962 * careful to have up-to-date data by first clflushing. Don't 963 * overcomplicate things and flush the entire patch. */ 964 partial_cacheline_write = needs_clflush_before && 965 ((shmem_page_offset | page_length) 966 & (boot_cpu_data.x86_clflush_size - 1)); 967 968 page_do_bit17_swizzling = obj_do_bit17_swizzling && 969 (page_to_phys(page) & (1 << 17)) != 0; 970 971 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 972 user_data, page_do_bit17_swizzling, 973 partial_cacheline_write, 974 needs_clflush_after); 975 if (ret == 0) 976 goto next_page; 977 978 hit_slowpath = 1; 979 mutex_unlock(&dev->struct_mutex); 980 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 981 user_data, page_do_bit17_swizzling, 982 partial_cacheline_write, 983 needs_clflush_after); 984 985 mutex_lock(&dev->struct_mutex); 986 987 if (ret) 988 goto out; 989 990 next_page: 991 remain -= page_length; 992 user_data += page_length; 993 offset += page_length; 994 } 995 996 out: 997 i915_gem_object_unpin_pages(obj); 998 999 if (hit_slowpath) { 1000 /* 1001 * Fixup: Flush cpu caches in case we didn't flush the dirty 1002 * cachelines in-line while writing and the object moved 1003 * out of the cpu write domain while we've dropped the lock. 1004 */ 1005 if (!needs_clflush_after && 1006 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1007 if (i915_gem_clflush_object(obj, obj->pin_display)) 1008 needs_clflush_after = true; 1009 } 1010 } 1011 1012 if (needs_clflush_after) 1013 i915_gem_chipset_flush(dev); 1014 else 1015 obj->cache_dirty = true; 1016 1017 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1018 return ret; 1019 } 1020 1021 /** 1022 * Writes data to the object referenced by handle. 1023 * 1024 * On error, the contents of the buffer that were to be modified are undefined. 1025 */ 1026 int 1027 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1028 struct drm_file *file) 1029 { 1030 struct drm_i915_private *dev_priv = dev->dev_private; 1031 struct drm_i915_gem_pwrite *args = data; 1032 struct drm_i915_gem_object *obj; 1033 int ret; 1034 1035 if (args->size == 0) 1036 return 0; 1037 1038 if (!access_ok(VERIFY_READ, 1039 to_user_ptr(args->data_ptr), 1040 args->size)) 1041 return -EFAULT; 1042 1043 if (likely(!i915.prefault_disable)) { 1044 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1045 args->size); 1046 if (ret) 1047 return -EFAULT; 1048 } 1049 1050 intel_runtime_pm_get(dev_priv); 1051 1052 ret = i915_mutex_lock_interruptible(dev); 1053 if (ret) 1054 goto put_rpm; 1055 1056 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1057 if (&obj->base == NULL) { 1058 ret = -ENOENT; 1059 goto unlock; 1060 } 1061 1062 /* Bounds check destination. */ 1063 if (args->offset > obj->base.size || 1064 args->size > obj->base.size - args->offset) { 1065 ret = -EINVAL; 1066 goto out; 1067 } 1068 1069 /* prime objects have no backing filp to GEM pread/pwrite 1070 * pages from. 1071 */ 1072 if (!obj->base.filp) { 1073 ret = -EINVAL; 1074 goto out; 1075 } 1076 1077 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1078 1079 ret = -EFAULT; 1080 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1081 * it would end up going through the fenced access, and we'll get 1082 * different detiling behavior between reading and writing. 1083 * pread/pwrite currently are reading and writing from the CPU 1084 * perspective, requiring manual detiling by the client. 1085 */ 1086 if (obj->tiling_mode == I915_TILING_NONE && 1087 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1088 cpu_write_needs_clflush(obj)) { 1089 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1090 /* Note that the gtt paths might fail with non-page-backed user 1091 * pointers (e.g. gtt mappings when moving data between 1092 * textures). Fallback to the shmem path in that case. */ 1093 } 1094 1095 if (ret == -EFAULT || ret == -ENOSPC) { 1096 if (obj->phys_handle) 1097 ret = i915_gem_phys_pwrite(obj, args, file); 1098 else 1099 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1100 } 1101 1102 out: 1103 drm_gem_object_unreference(&obj->base); 1104 unlock: 1105 mutex_unlock(&dev->struct_mutex); 1106 put_rpm: 1107 intel_runtime_pm_put(dev_priv); 1108 1109 return ret; 1110 } 1111 1112 int 1113 i915_gem_check_wedge(struct i915_gpu_error *error, 1114 bool interruptible) 1115 { 1116 if (i915_reset_in_progress(error)) { 1117 /* Non-interruptible callers can't handle -EAGAIN, hence return 1118 * -EIO unconditionally for these. */ 1119 if (!interruptible) 1120 return -EIO; 1121 1122 /* Recovery complete, but the reset failed ... */ 1123 if (i915_terminally_wedged(error)) 1124 return -EIO; 1125 1126 /* 1127 * Check if GPU Reset is in progress - we need intel_ring_begin 1128 * to work properly to reinit the hw state while the gpu is 1129 * still marked as reset-in-progress. Handle this with a flag. 1130 */ 1131 if (!error->reload_in_reset) 1132 return -EAGAIN; 1133 } 1134 1135 return 0; 1136 } 1137 1138 static void fake_irq(unsigned long data) 1139 { 1140 wake_up_process((struct task_struct *)data); 1141 } 1142 1143 static bool missed_irq(struct drm_i915_private *dev_priv, 1144 struct intel_engine_cs *ring) 1145 { 1146 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1147 } 1148 1149 static int __i915_spin_request(struct drm_i915_gem_request *req) 1150 { 1151 unsigned long timeout; 1152 1153 if (i915_gem_request_get_ring(req)->irq_refcount) 1154 return -EBUSY; 1155 1156 timeout = jiffies + 1; 1157 while (!need_resched()) { 1158 if (i915_gem_request_completed(req, true)) 1159 return 0; 1160 1161 if (time_after_eq(jiffies, timeout)) 1162 break; 1163 1164 cpu_relax_lowlatency(); 1165 } 1166 if (i915_gem_request_completed(req, false)) 1167 return 0; 1168 1169 return -EAGAIN; 1170 } 1171 1172 /** 1173 * __i915_wait_request - wait until execution of request has finished 1174 * @req: duh! 1175 * @reset_counter: reset sequence associated with the given request 1176 * @interruptible: do an interruptible wait (normally yes) 1177 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1178 * 1179 * Note: It is of utmost importance that the passed in seqno and reset_counter 1180 * values have been read by the caller in an smp safe manner. Where read-side 1181 * locks are involved, it is sufficient to read the reset_counter before 1182 * unlocking the lock that protects the seqno. For lockless tricks, the 1183 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1184 * inserted. 1185 * 1186 * Returns 0 if the request was found within the alloted time. Else returns the 1187 * errno with remaining time filled in timeout argument. 1188 */ 1189 int __i915_wait_request(struct drm_i915_gem_request *req, 1190 unsigned reset_counter, 1191 bool interruptible, 1192 s64 *timeout, 1193 struct intel_rps_client *rps) 1194 { 1195 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1196 struct drm_device *dev = ring->dev; 1197 struct drm_i915_private *dev_priv = dev->dev_private; 1198 const bool irq_test_in_progress = 1199 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1200 DEFINE_WAIT(wait); 1201 unsigned long timeout_expire; 1202 s64 before, now; 1203 int ret; 1204 1205 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1206 1207 if (list_empty(&req->list)) 1208 return 0; 1209 1210 if (i915_gem_request_completed(req, true)) 1211 return 0; 1212 1213 timeout_expire = timeout ? 1214 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; 1215 1216 if (INTEL_INFO(dev_priv)->gen >= 6) 1217 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1218 1219 /* Record current time in case interrupted by signal, or wedged */ 1220 trace_i915_gem_request_wait_begin(req); 1221 before = ktime_get_raw_ns(); 1222 1223 /* Optimistic spin for the next jiffie before touching IRQs */ 1224 ret = __i915_spin_request(req); 1225 if (ret == 0) 1226 goto out; 1227 1228 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1229 ret = -ENODEV; 1230 goto out; 1231 } 1232 1233 for (;;) { 1234 struct timer_list timer; 1235 1236 prepare_to_wait(&ring->irq_queue, &wait, 1237 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); 1238 1239 /* We need to check whether any gpu reset happened in between 1240 * the caller grabbing the seqno and now ... */ 1241 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1242 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1243 * is truely gone. */ 1244 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1245 if (ret == 0) 1246 ret = -EAGAIN; 1247 break; 1248 } 1249 1250 if (i915_gem_request_completed(req, false)) { 1251 ret = 0; 1252 break; 1253 } 1254 1255 if (interruptible && signal_pending(current)) { 1256 ret = -ERESTARTSYS; 1257 break; 1258 } 1259 1260 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1261 ret = -ETIME; 1262 break; 1263 } 1264 1265 timer.function = NULL; 1266 if (timeout || missed_irq(dev_priv, ring)) { 1267 unsigned long expire; 1268 1269 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); 1270 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1271 mod_timer(&timer, expire); 1272 } 1273 1274 io_schedule(); 1275 1276 if (timer.function) { 1277 del_singleshot_timer_sync(&timer); 1278 destroy_timer_on_stack(&timer); 1279 } 1280 } 1281 if (!irq_test_in_progress) 1282 ring->irq_put(ring); 1283 1284 finish_wait(&ring->irq_queue, &wait); 1285 1286 out: 1287 now = ktime_get_raw_ns(); 1288 trace_i915_gem_request_wait_end(req); 1289 1290 if (timeout) { 1291 s64 tres = *timeout - (now - before); 1292 1293 *timeout = tres < 0 ? 0 : tres; 1294 1295 /* 1296 * Apparently ktime isn't accurate enough and occasionally has a 1297 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1298 * things up to make the test happy. We allow up to 1 jiffy. 1299 * 1300 * This is a regrssion from the timespec->ktime conversion. 1301 */ 1302 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1303 *timeout = 0; 1304 } 1305 1306 return ret; 1307 } 1308 1309 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1310 struct drm_file *file) 1311 { 1312 struct drm_i915_private *dev_private; 1313 struct drm_i915_file_private *file_priv; 1314 1315 WARN_ON(!req || !file || req->file_priv); 1316 1317 if (!req || !file) 1318 return -EINVAL; 1319 1320 if (req->file_priv) 1321 return -EINVAL; 1322 1323 dev_private = req->ring->dev->dev_private; 1324 file_priv = file->driver_priv; 1325 1326 spin_lock(&file_priv->mm.lock); 1327 req->file_priv = file_priv; 1328 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1329 spin_unlock(&file_priv->mm.lock); 1330 1331 req->pid = get_pid(task_pid(current)); 1332 1333 return 0; 1334 } 1335 1336 static inline void 1337 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1338 { 1339 struct drm_i915_file_private *file_priv = request->file_priv; 1340 1341 if (!file_priv) 1342 return; 1343 1344 spin_lock(&file_priv->mm.lock); 1345 list_del(&request->client_list); 1346 request->file_priv = NULL; 1347 spin_unlock(&file_priv->mm.lock); 1348 1349 put_pid(request->pid); 1350 request->pid = NULL; 1351 } 1352 1353 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1354 { 1355 trace_i915_gem_request_retire(request); 1356 1357 /* We know the GPU must have read the request to have 1358 * sent us the seqno + interrupt, so use the position 1359 * of tail of the request to update the last known position 1360 * of the GPU head. 1361 * 1362 * Note this requires that we are always called in request 1363 * completion order. 1364 */ 1365 request->ringbuf->last_retired_head = request->postfix; 1366 1367 list_del_init(&request->list); 1368 i915_gem_request_remove_from_client(request); 1369 1370 i915_gem_request_unreference(request); 1371 } 1372 1373 static void 1374 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1375 { 1376 struct intel_engine_cs *engine = req->ring; 1377 struct drm_i915_gem_request *tmp; 1378 1379 lockdep_assert_held(&engine->dev->struct_mutex); 1380 1381 if (list_empty(&req->list)) 1382 return; 1383 1384 do { 1385 tmp = list_first_entry(&engine->request_list, 1386 typeof(*tmp), list); 1387 1388 i915_gem_request_retire(tmp); 1389 } while (tmp != req); 1390 1391 WARN_ON(i915_verify_lists(engine->dev)); 1392 } 1393 1394 /** 1395 * Waits for a request to be signaled, and cleans up the 1396 * request and object lists appropriately for that event. 1397 */ 1398 int 1399 i915_wait_request(struct drm_i915_gem_request *req) 1400 { 1401 struct drm_device *dev; 1402 struct drm_i915_private *dev_priv; 1403 bool interruptible; 1404 int ret; 1405 1406 BUG_ON(req == NULL); 1407 1408 dev = req->ring->dev; 1409 dev_priv = dev->dev_private; 1410 interruptible = dev_priv->mm.interruptible; 1411 1412 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1413 1414 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1415 if (ret) 1416 return ret; 1417 1418 ret = __i915_wait_request(req, 1419 atomic_read(&dev_priv->gpu_error.reset_counter), 1420 interruptible, NULL, NULL); 1421 if (ret) 1422 return ret; 1423 1424 __i915_gem_request_retire__upto(req); 1425 return 0; 1426 } 1427 1428 /** 1429 * Ensures that all rendering to the object has completed and the object is 1430 * safe to unbind from the GTT or access from the CPU. 1431 */ 1432 int 1433 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1434 bool readonly) 1435 { 1436 int ret, i; 1437 1438 if (!obj->active) 1439 return 0; 1440 1441 if (readonly) { 1442 if (obj->last_write_req != NULL) { 1443 ret = i915_wait_request(obj->last_write_req); 1444 if (ret) 1445 return ret; 1446 1447 i = obj->last_write_req->ring->id; 1448 if (obj->last_read_req[i] == obj->last_write_req) 1449 i915_gem_object_retire__read(obj, i); 1450 else 1451 i915_gem_object_retire__write(obj); 1452 } 1453 } else { 1454 for (i = 0; i < I915_NUM_RINGS; i++) { 1455 if (obj->last_read_req[i] == NULL) 1456 continue; 1457 1458 ret = i915_wait_request(obj->last_read_req[i]); 1459 if (ret) 1460 return ret; 1461 1462 i915_gem_object_retire__read(obj, i); 1463 } 1464 RQ_BUG_ON(obj->active); 1465 } 1466 1467 return 0; 1468 } 1469 1470 static void 1471 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1472 struct drm_i915_gem_request *req) 1473 { 1474 int ring = req->ring->id; 1475 1476 if (obj->last_read_req[ring] == req) 1477 i915_gem_object_retire__read(obj, ring); 1478 else if (obj->last_write_req == req) 1479 i915_gem_object_retire__write(obj); 1480 1481 __i915_gem_request_retire__upto(req); 1482 } 1483 1484 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1485 * as the object state may change during this call. 1486 */ 1487 static __must_check int 1488 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1489 struct intel_rps_client *rps, 1490 bool readonly) 1491 { 1492 struct drm_device *dev = obj->base.dev; 1493 struct drm_i915_private *dev_priv = dev->dev_private; 1494 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1495 unsigned reset_counter; 1496 int ret, i, n = 0; 1497 1498 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1499 BUG_ON(!dev_priv->mm.interruptible); 1500 1501 if (!obj->active) 1502 return 0; 1503 1504 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1505 if (ret) 1506 return ret; 1507 1508 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1509 1510 if (readonly) { 1511 struct drm_i915_gem_request *req; 1512 1513 req = obj->last_write_req; 1514 if (req == NULL) 1515 return 0; 1516 1517 requests[n++] = i915_gem_request_reference(req); 1518 } else { 1519 for (i = 0; i < I915_NUM_RINGS; i++) { 1520 struct drm_i915_gem_request *req; 1521 1522 req = obj->last_read_req[i]; 1523 if (req == NULL) 1524 continue; 1525 1526 requests[n++] = i915_gem_request_reference(req); 1527 } 1528 } 1529 1530 mutex_unlock(&dev->struct_mutex); 1531 for (i = 0; ret == 0 && i < n; i++) 1532 ret = __i915_wait_request(requests[i], reset_counter, true, 1533 NULL, rps); 1534 mutex_lock(&dev->struct_mutex); 1535 1536 for (i = 0; i < n; i++) { 1537 if (ret == 0) 1538 i915_gem_object_retire_request(obj, requests[i]); 1539 i915_gem_request_unreference(requests[i]); 1540 } 1541 1542 return ret; 1543 } 1544 1545 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1546 { 1547 struct drm_i915_file_private *fpriv = file->driver_priv; 1548 return &fpriv->rps; 1549 } 1550 1551 /** 1552 * Called when user space prepares to use an object with the CPU, either 1553 * through the mmap ioctl's mapping or a GTT mapping. 1554 */ 1555 int 1556 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1557 struct drm_file *file) 1558 { 1559 struct drm_i915_gem_set_domain *args = data; 1560 struct drm_i915_gem_object *obj; 1561 uint32_t read_domains = args->read_domains; 1562 uint32_t write_domain = args->write_domain; 1563 int ret; 1564 1565 /* Only handle setting domains to types used by the CPU. */ 1566 if (write_domain & I915_GEM_GPU_DOMAINS) 1567 return -EINVAL; 1568 1569 if (read_domains & I915_GEM_GPU_DOMAINS) 1570 return -EINVAL; 1571 1572 /* Having something in the write domain implies it's in the read 1573 * domain, and only that read domain. Enforce that in the request. 1574 */ 1575 if (write_domain != 0 && read_domains != write_domain) 1576 return -EINVAL; 1577 1578 ret = i915_mutex_lock_interruptible(dev); 1579 if (ret) 1580 return ret; 1581 1582 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1583 if (&obj->base == NULL) { 1584 ret = -ENOENT; 1585 goto unlock; 1586 } 1587 1588 /* Try to flush the object off the GPU without holding the lock. 1589 * We will repeat the flush holding the lock in the normal manner 1590 * to catch cases where we are gazumped. 1591 */ 1592 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1593 to_rps_client(file), 1594 !write_domain); 1595 if (ret) 1596 goto unref; 1597 1598 if (read_domains & I915_GEM_DOMAIN_GTT) 1599 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1600 else 1601 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1602 1603 if (write_domain != 0) 1604 intel_fb_obj_invalidate(obj, 1605 write_domain == I915_GEM_DOMAIN_GTT ? 1606 ORIGIN_GTT : ORIGIN_CPU); 1607 1608 unref: 1609 drm_gem_object_unreference(&obj->base); 1610 unlock: 1611 mutex_unlock(&dev->struct_mutex); 1612 return ret; 1613 } 1614 1615 /** 1616 * Called when user space has done writes to this buffer 1617 */ 1618 int 1619 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1620 struct drm_file *file) 1621 { 1622 struct drm_i915_gem_sw_finish *args = data; 1623 struct drm_i915_gem_object *obj; 1624 int ret = 0; 1625 1626 ret = i915_mutex_lock_interruptible(dev); 1627 if (ret) 1628 return ret; 1629 1630 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1631 if (&obj->base == NULL) { 1632 ret = -ENOENT; 1633 goto unlock; 1634 } 1635 1636 /* Pinned buffers may be scanout, so flush the cache */ 1637 if (obj->pin_display) 1638 i915_gem_object_flush_cpu_write_domain(obj); 1639 1640 drm_gem_object_unreference(&obj->base); 1641 unlock: 1642 mutex_unlock(&dev->struct_mutex); 1643 return ret; 1644 } 1645 1646 /** 1647 * Maps the contents of an object, returning the address it is mapped 1648 * into. 1649 * 1650 * While the mapping holds a reference on the contents of the object, it doesn't 1651 * imply a ref on the object itself. 1652 * 1653 * IMPORTANT: 1654 * 1655 * DRM driver writers who look a this function as an example for how to do GEM 1656 * mmap support, please don't implement mmap support like here. The modern way 1657 * to implement DRM mmap support is with an mmap offset ioctl (like 1658 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1659 * That way debug tooling like valgrind will understand what's going on, hiding 1660 * the mmap call in a driver private ioctl will break that. The i915 driver only 1661 * does cpu mmaps this way because we didn't know better. 1662 */ 1663 int 1664 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1665 struct drm_file *file) 1666 { 1667 struct drm_i915_gem_mmap *args = data; 1668 struct drm_gem_object *obj; 1669 unsigned long addr; 1670 1671 if (args->flags & ~(I915_MMAP_WC)) 1672 return -EINVAL; 1673 1674 if (args->flags & I915_MMAP_WC && !cpu_has_pat) 1675 return -ENODEV; 1676 1677 obj = drm_gem_object_lookup(dev, file, args->handle); 1678 if (obj == NULL) 1679 return -ENOENT; 1680 1681 /* prime objects have no backing filp to GEM mmap 1682 * pages from. 1683 */ 1684 if (!obj->filp) { 1685 drm_gem_object_unreference_unlocked(obj); 1686 return -EINVAL; 1687 } 1688 1689 addr = vm_mmap(obj->filp, 0, args->size, 1690 PROT_READ | PROT_WRITE, MAP_SHARED, 1691 args->offset); 1692 if (args->flags & I915_MMAP_WC) { 1693 struct mm_struct *mm = current->mm; 1694 struct vm_area_struct *vma; 1695 1696 down_write(&mm->mmap_sem); 1697 vma = find_vma(mm, addr); 1698 if (vma) 1699 vma->vm_page_prot = 1700 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1701 else 1702 addr = -ENOMEM; 1703 up_write(&mm->mmap_sem); 1704 } 1705 drm_gem_object_unreference_unlocked(obj); 1706 if (IS_ERR((void *)addr)) 1707 return addr; 1708 1709 args->addr_ptr = (uint64_t) addr; 1710 1711 return 0; 1712 } 1713 1714 /** 1715 * i915_gem_fault - fault a page into the GTT 1716 * @vma: VMA in question 1717 * @vmf: fault info 1718 * 1719 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1720 * from userspace. The fault handler takes care of binding the object to 1721 * the GTT (if needed), allocating and programming a fence register (again, 1722 * only if needed based on whether the old reg is still valid or the object 1723 * is tiled) and inserting a new PTE into the faulting process. 1724 * 1725 * Note that the faulting process may involve evicting existing objects 1726 * from the GTT and/or fence registers to make room. So performance may 1727 * suffer if the GTT working set is large or there are few fence registers 1728 * left. 1729 */ 1730 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1731 { 1732 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1733 struct drm_device *dev = obj->base.dev; 1734 struct drm_i915_private *dev_priv = dev->dev_private; 1735 struct i915_ggtt_view view = i915_ggtt_view_normal; 1736 pgoff_t page_offset; 1737 unsigned long pfn; 1738 int ret = 0; 1739 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1740 1741 intel_runtime_pm_get(dev_priv); 1742 1743 /* We don't use vmf->pgoff since that has the fake offset */ 1744 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1745 PAGE_SHIFT; 1746 1747 ret = i915_mutex_lock_interruptible(dev); 1748 if (ret) 1749 goto out; 1750 1751 trace_i915_gem_object_fault(obj, page_offset, true, write); 1752 1753 /* Try to flush the object off the GPU first without holding the lock. 1754 * Upon reacquiring the lock, we will perform our sanity checks and then 1755 * repeat the flush holding the lock in the normal manner to catch cases 1756 * where we are gazumped. 1757 */ 1758 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1759 if (ret) 1760 goto unlock; 1761 1762 /* Access to snoopable pages through the GTT is incoherent. */ 1763 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1764 ret = -EFAULT; 1765 goto unlock; 1766 } 1767 1768 /* Use a partial view if the object is bigger than the aperture. */ 1769 if (obj->base.size >= dev_priv->gtt.mappable_end && 1770 obj->tiling_mode == I915_TILING_NONE) { 1771 static const unsigned int chunk_size = 256; // 1 MiB 1772 1773 memset(&view, 0, sizeof(view)); 1774 view.type = I915_GGTT_VIEW_PARTIAL; 1775 view.params.partial.offset = rounddown(page_offset, chunk_size); 1776 view.params.partial.size = 1777 min_t(unsigned int, 1778 chunk_size, 1779 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1780 view.params.partial.offset); 1781 } 1782 1783 /* Now pin it into the GTT if needed */ 1784 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1785 if (ret) 1786 goto unlock; 1787 1788 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1789 if (ret) 1790 goto unpin; 1791 1792 ret = i915_gem_object_get_fence(obj); 1793 if (ret) 1794 goto unpin; 1795 1796 /* Finally, remap it using the new GTT offset */ 1797 pfn = dev_priv->gtt.mappable_base + 1798 i915_gem_obj_ggtt_offset_view(obj, &view); 1799 pfn >>= PAGE_SHIFT; 1800 1801 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1802 /* Overriding existing pages in partial view does not cause 1803 * us any trouble as TLBs are still valid because the fault 1804 * is due to userspace losing part of the mapping or never 1805 * having accessed it before (at this partials' range). 1806 */ 1807 unsigned long base = vma->vm_start + 1808 (view.params.partial.offset << PAGE_SHIFT); 1809 unsigned int i; 1810 1811 for (i = 0; i < view.params.partial.size; i++) { 1812 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1813 if (ret) 1814 break; 1815 } 1816 1817 obj->fault_mappable = true; 1818 } else { 1819 if (!obj->fault_mappable) { 1820 unsigned long size = min_t(unsigned long, 1821 vma->vm_end - vma->vm_start, 1822 obj->base.size); 1823 int i; 1824 1825 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1826 ret = vm_insert_pfn(vma, 1827 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1828 pfn + i); 1829 if (ret) 1830 break; 1831 } 1832 1833 obj->fault_mappable = true; 1834 } else 1835 ret = vm_insert_pfn(vma, 1836 (unsigned long)vmf->virtual_address, 1837 pfn + page_offset); 1838 } 1839 unpin: 1840 i915_gem_object_ggtt_unpin_view(obj, &view); 1841 unlock: 1842 mutex_unlock(&dev->struct_mutex); 1843 out: 1844 switch (ret) { 1845 case -EIO: 1846 /* 1847 * We eat errors when the gpu is terminally wedged to avoid 1848 * userspace unduly crashing (gl has no provisions for mmaps to 1849 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1850 * and so needs to be reported. 1851 */ 1852 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1853 ret = VM_FAULT_SIGBUS; 1854 break; 1855 } 1856 case -EAGAIN: 1857 /* 1858 * EAGAIN means the gpu is hung and we'll wait for the error 1859 * handler to reset everything when re-faulting in 1860 * i915_mutex_lock_interruptible. 1861 */ 1862 case 0: 1863 case -ERESTARTSYS: 1864 case -EINTR: 1865 case -EBUSY: 1866 /* 1867 * EBUSY is ok: this just means that another thread 1868 * already did the job. 1869 */ 1870 ret = VM_FAULT_NOPAGE; 1871 break; 1872 case -ENOMEM: 1873 ret = VM_FAULT_OOM; 1874 break; 1875 case -ENOSPC: 1876 case -EFAULT: 1877 ret = VM_FAULT_SIGBUS; 1878 break; 1879 default: 1880 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1881 ret = VM_FAULT_SIGBUS; 1882 break; 1883 } 1884 1885 intel_runtime_pm_put(dev_priv); 1886 return ret; 1887 } 1888 1889 /** 1890 * i915_gem_release_mmap - remove physical page mappings 1891 * @obj: obj in question 1892 * 1893 * Preserve the reservation of the mmapping with the DRM core code, but 1894 * relinquish ownership of the pages back to the system. 1895 * 1896 * It is vital that we remove the page mapping if we have mapped a tiled 1897 * object through the GTT and then lose the fence register due to 1898 * resource pressure. Similarly if the object has been moved out of the 1899 * aperture, than pages mapped into userspace must be revoked. Removing the 1900 * mapping will then trigger a page fault on the next user access, allowing 1901 * fixup by i915_gem_fault(). 1902 */ 1903 void 1904 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1905 { 1906 if (!obj->fault_mappable) 1907 return; 1908 1909 drm_vma_node_unmap(&obj->base.vma_node, 1910 obj->base.dev->anon_inode->i_mapping); 1911 obj->fault_mappable = false; 1912 } 1913 1914 void 1915 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1916 { 1917 struct drm_i915_gem_object *obj; 1918 1919 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1920 i915_gem_release_mmap(obj); 1921 } 1922 1923 uint32_t 1924 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1925 { 1926 uint32_t gtt_size; 1927 1928 if (INTEL_INFO(dev)->gen >= 4 || 1929 tiling_mode == I915_TILING_NONE) 1930 return size; 1931 1932 /* Previous chips need a power-of-two fence region when tiling */ 1933 if (INTEL_INFO(dev)->gen == 3) 1934 gtt_size = 1024*1024; 1935 else 1936 gtt_size = 512*1024; 1937 1938 while (gtt_size < size) 1939 gtt_size <<= 1; 1940 1941 return gtt_size; 1942 } 1943 1944 /** 1945 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1946 * @obj: object to check 1947 * 1948 * Return the required GTT alignment for an object, taking into account 1949 * potential fence register mapping. 1950 */ 1951 uint32_t 1952 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1953 int tiling_mode, bool fenced) 1954 { 1955 /* 1956 * Minimum alignment is 4k (GTT page size), but might be greater 1957 * if a fence register is needed for the object. 1958 */ 1959 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1960 tiling_mode == I915_TILING_NONE) 1961 return 4096; 1962 1963 /* 1964 * Previous chips need to be aligned to the size of the smallest 1965 * fence register that can contain the object. 1966 */ 1967 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1968 } 1969 1970 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1971 { 1972 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1973 int ret; 1974 1975 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1976 return 0; 1977 1978 dev_priv->mm.shrinker_no_lock_stealing = true; 1979 1980 ret = drm_gem_create_mmap_offset(&obj->base); 1981 if (ret != -ENOSPC) 1982 goto out; 1983 1984 /* Badly fragmented mmap space? The only way we can recover 1985 * space is by destroying unwanted objects. We can't randomly release 1986 * mmap_offsets as userspace expects them to be persistent for the 1987 * lifetime of the objects. The closest we can is to release the 1988 * offsets on purgeable objects by truncating it and marking it purged, 1989 * which prevents userspace from ever using that object again. 1990 */ 1991 i915_gem_shrink(dev_priv, 1992 obj->base.size >> PAGE_SHIFT, 1993 I915_SHRINK_BOUND | 1994 I915_SHRINK_UNBOUND | 1995 I915_SHRINK_PURGEABLE); 1996 ret = drm_gem_create_mmap_offset(&obj->base); 1997 if (ret != -ENOSPC) 1998 goto out; 1999 2000 i915_gem_shrink_all(dev_priv); 2001 ret = drm_gem_create_mmap_offset(&obj->base); 2002 out: 2003 dev_priv->mm.shrinker_no_lock_stealing = false; 2004 2005 return ret; 2006 } 2007 2008 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2009 { 2010 drm_gem_free_mmap_offset(&obj->base); 2011 } 2012 2013 int 2014 i915_gem_mmap_gtt(struct drm_file *file, 2015 struct drm_device *dev, 2016 uint32_t handle, 2017 uint64_t *offset) 2018 { 2019 struct drm_i915_gem_object *obj; 2020 int ret; 2021 2022 ret = i915_mutex_lock_interruptible(dev); 2023 if (ret) 2024 return ret; 2025 2026 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2027 if (&obj->base == NULL) { 2028 ret = -ENOENT; 2029 goto unlock; 2030 } 2031 2032 if (obj->madv != I915_MADV_WILLNEED) { 2033 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2034 ret = -EFAULT; 2035 goto out; 2036 } 2037 2038 ret = i915_gem_object_create_mmap_offset(obj); 2039 if (ret) 2040 goto out; 2041 2042 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2043 2044 out: 2045 drm_gem_object_unreference(&obj->base); 2046 unlock: 2047 mutex_unlock(&dev->struct_mutex); 2048 return ret; 2049 } 2050 2051 /** 2052 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2053 * @dev: DRM device 2054 * @data: GTT mapping ioctl data 2055 * @file: GEM object info 2056 * 2057 * Simply returns the fake offset to userspace so it can mmap it. 2058 * The mmap call will end up in drm_gem_mmap(), which will set things 2059 * up so we can get faults in the handler above. 2060 * 2061 * The fault handler will take care of binding the object into the GTT 2062 * (since it may have been evicted to make room for something), allocating 2063 * a fence register, and mapping the appropriate aperture address into 2064 * userspace. 2065 */ 2066 int 2067 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2068 struct drm_file *file) 2069 { 2070 struct drm_i915_gem_mmap_gtt *args = data; 2071 2072 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2073 } 2074 2075 /* Immediately discard the backing storage */ 2076 static void 2077 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2078 { 2079 i915_gem_object_free_mmap_offset(obj); 2080 2081 if (obj->base.filp == NULL) 2082 return; 2083 2084 /* Our goal here is to return as much of the memory as 2085 * is possible back to the system as we are called from OOM. 2086 * To do this we must instruct the shmfs to drop all of its 2087 * backing pages, *now*. 2088 */ 2089 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2090 obj->madv = __I915_MADV_PURGED; 2091 } 2092 2093 /* Try to discard unwanted pages */ 2094 static void 2095 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2096 { 2097 struct address_space *mapping; 2098 2099 switch (obj->madv) { 2100 case I915_MADV_DONTNEED: 2101 i915_gem_object_truncate(obj); 2102 case __I915_MADV_PURGED: 2103 return; 2104 } 2105 2106 if (obj->base.filp == NULL) 2107 return; 2108 2109 mapping = file_inode(obj->base.filp)->i_mapping, 2110 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2111 } 2112 2113 static void 2114 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2115 { 2116 struct sg_page_iter sg_iter; 2117 int ret; 2118 2119 BUG_ON(obj->madv == __I915_MADV_PURGED); 2120 2121 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2122 if (ret) { 2123 /* In the event of a disaster, abandon all caches and 2124 * hope for the best. 2125 */ 2126 WARN_ON(ret != -EIO); 2127 i915_gem_clflush_object(obj, true); 2128 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2129 } 2130 2131 i915_gem_gtt_finish_object(obj); 2132 2133 if (i915_gem_object_needs_bit17_swizzle(obj)) 2134 i915_gem_object_save_bit_17_swizzle(obj); 2135 2136 if (obj->madv == I915_MADV_DONTNEED) 2137 obj->dirty = 0; 2138 2139 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2140 struct page *page = sg_page_iter_page(&sg_iter); 2141 2142 if (obj->dirty) 2143 set_page_dirty(page); 2144 2145 if (obj->madv == I915_MADV_WILLNEED) 2146 mark_page_accessed(page); 2147 2148 page_cache_release(page); 2149 } 2150 obj->dirty = 0; 2151 2152 sg_free_table(obj->pages); 2153 kfree(obj->pages); 2154 } 2155 2156 int 2157 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2158 { 2159 const struct drm_i915_gem_object_ops *ops = obj->ops; 2160 2161 if (obj->pages == NULL) 2162 return 0; 2163 2164 if (obj->pages_pin_count) 2165 return -EBUSY; 2166 2167 BUG_ON(i915_gem_obj_bound_any(obj)); 2168 2169 /* ->put_pages might need to allocate memory for the bit17 swizzle 2170 * array, hence protect them from being reaped by removing them from gtt 2171 * lists early. */ 2172 list_del(&obj->global_list); 2173 2174 ops->put_pages(obj); 2175 obj->pages = NULL; 2176 2177 i915_gem_object_invalidate(obj); 2178 2179 return 0; 2180 } 2181 2182 static int 2183 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2184 { 2185 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2186 int page_count, i; 2187 struct address_space *mapping; 2188 struct sg_table *st; 2189 struct scatterlist *sg; 2190 struct sg_page_iter sg_iter; 2191 struct page *page; 2192 unsigned long last_pfn = 0; /* suppress gcc warning */ 2193 int ret; 2194 gfp_t gfp; 2195 2196 /* Assert that the object is not currently in any GPU domain. As it 2197 * wasn't in the GTT, there shouldn't be any way it could have been in 2198 * a GPU cache 2199 */ 2200 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2201 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2202 2203 st = kmalloc(sizeof(*st), GFP_KERNEL); 2204 if (st == NULL) 2205 return -ENOMEM; 2206 2207 page_count = obj->base.size / PAGE_SIZE; 2208 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2209 kfree(st); 2210 return -ENOMEM; 2211 } 2212 2213 /* Get the list of pages out of our struct file. They'll be pinned 2214 * at this point until we release them. 2215 * 2216 * Fail silently without starting the shrinker 2217 */ 2218 mapping = file_inode(obj->base.filp)->i_mapping; 2219 gfp = mapping_gfp_mask(mapping); 2220 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2221 gfp &= ~(__GFP_IO | __GFP_WAIT); 2222 sg = st->sgl; 2223 st->nents = 0; 2224 for (i = 0; i < page_count; i++) { 2225 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2226 if (IS_ERR(page)) { 2227 i915_gem_shrink(dev_priv, 2228 page_count, 2229 I915_SHRINK_BOUND | 2230 I915_SHRINK_UNBOUND | 2231 I915_SHRINK_PURGEABLE); 2232 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2233 } 2234 if (IS_ERR(page)) { 2235 /* We've tried hard to allocate the memory by reaping 2236 * our own buffer, now let the real VM do its job and 2237 * go down in flames if truly OOM. 2238 */ 2239 i915_gem_shrink_all(dev_priv); 2240 page = shmem_read_mapping_page(mapping, i); 2241 if (IS_ERR(page)) { 2242 ret = PTR_ERR(page); 2243 goto err_pages; 2244 } 2245 } 2246 #ifdef CONFIG_SWIOTLB 2247 if (swiotlb_nr_tbl()) { 2248 st->nents++; 2249 sg_set_page(sg, page, PAGE_SIZE, 0); 2250 sg = sg_next(sg); 2251 continue; 2252 } 2253 #endif 2254 if (!i || page_to_pfn(page) != last_pfn + 1) { 2255 if (i) 2256 sg = sg_next(sg); 2257 st->nents++; 2258 sg_set_page(sg, page, PAGE_SIZE, 0); 2259 } else { 2260 sg->length += PAGE_SIZE; 2261 } 2262 last_pfn = page_to_pfn(page); 2263 2264 /* Check that the i965g/gm workaround works. */ 2265 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2266 } 2267 #ifdef CONFIG_SWIOTLB 2268 if (!swiotlb_nr_tbl()) 2269 #endif 2270 sg_mark_end(sg); 2271 obj->pages = st; 2272 2273 ret = i915_gem_gtt_prepare_object(obj); 2274 if (ret) 2275 goto err_pages; 2276 2277 if (i915_gem_object_needs_bit17_swizzle(obj)) 2278 i915_gem_object_do_bit_17_swizzle(obj); 2279 2280 if (obj->tiling_mode != I915_TILING_NONE && 2281 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2282 i915_gem_object_pin_pages(obj); 2283 2284 return 0; 2285 2286 err_pages: 2287 sg_mark_end(sg); 2288 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 2289 page_cache_release(sg_page_iter_page(&sg_iter)); 2290 sg_free_table(st); 2291 kfree(st); 2292 2293 /* shmemfs first checks if there is enough memory to allocate the page 2294 * and reports ENOSPC should there be insufficient, along with the usual 2295 * ENOMEM for a genuine allocation failure. 2296 * 2297 * We use ENOSPC in our driver to mean that we have run out of aperture 2298 * space and so want to translate the error from shmemfs back to our 2299 * usual understanding of ENOMEM. 2300 */ 2301 if (ret == -ENOSPC) 2302 ret = -ENOMEM; 2303 2304 return ret; 2305 } 2306 2307 /* Ensure that the associated pages are gathered from the backing storage 2308 * and pinned into our object. i915_gem_object_get_pages() may be called 2309 * multiple times before they are released by a single call to 2310 * i915_gem_object_put_pages() - once the pages are no longer referenced 2311 * either as a result of memory pressure (reaping pages under the shrinker) 2312 * or as the object is itself released. 2313 */ 2314 int 2315 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2316 { 2317 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2318 const struct drm_i915_gem_object_ops *ops = obj->ops; 2319 int ret; 2320 2321 if (obj->pages) 2322 return 0; 2323 2324 if (obj->madv != I915_MADV_WILLNEED) { 2325 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2326 return -EFAULT; 2327 } 2328 2329 BUG_ON(obj->pages_pin_count); 2330 2331 ret = ops->get_pages(obj); 2332 if (ret) 2333 return ret; 2334 2335 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2336 2337 obj->get_page.sg = obj->pages->sgl; 2338 obj->get_page.last = 0; 2339 2340 return 0; 2341 } 2342 2343 void i915_vma_move_to_active(struct i915_vma *vma, 2344 struct drm_i915_gem_request *req) 2345 { 2346 struct drm_i915_gem_object *obj = vma->obj; 2347 struct intel_engine_cs *ring; 2348 2349 ring = i915_gem_request_get_ring(req); 2350 2351 /* Add a reference if we're newly entering the active list. */ 2352 if (obj->active == 0) 2353 drm_gem_object_reference(&obj->base); 2354 obj->active |= intel_ring_flag(ring); 2355 2356 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2357 i915_gem_request_assign(&obj->last_read_req[ring->id], req); 2358 2359 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2360 } 2361 2362 static void 2363 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2364 { 2365 RQ_BUG_ON(obj->last_write_req == NULL); 2366 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2367 2368 i915_gem_request_assign(&obj->last_write_req, NULL); 2369 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2370 } 2371 2372 static void 2373 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2374 { 2375 struct i915_vma *vma; 2376 2377 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2378 RQ_BUG_ON(!(obj->active & (1 << ring))); 2379 2380 list_del_init(&obj->ring_list[ring]); 2381 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2382 2383 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2384 i915_gem_object_retire__write(obj); 2385 2386 obj->active &= ~(1 << ring); 2387 if (obj->active) 2388 return; 2389 2390 /* Bump our place on the bound list to keep it roughly in LRU order 2391 * so that we don't steal from recently used but inactive objects 2392 * (unless we are forced to ofc!) 2393 */ 2394 list_move_tail(&obj->global_list, 2395 &to_i915(obj->base.dev)->mm.bound_list); 2396 2397 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2398 if (!list_empty(&vma->mm_list)) 2399 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2400 } 2401 2402 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2403 drm_gem_object_unreference(&obj->base); 2404 } 2405 2406 static int 2407 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2408 { 2409 struct drm_i915_private *dev_priv = dev->dev_private; 2410 struct intel_engine_cs *ring; 2411 int ret, i, j; 2412 2413 /* Carefully retire all requests without writing to the rings */ 2414 for_each_ring(ring, dev_priv, i) { 2415 ret = intel_ring_idle(ring); 2416 if (ret) 2417 return ret; 2418 } 2419 i915_gem_retire_requests(dev); 2420 2421 /* Finally reset hw state */ 2422 for_each_ring(ring, dev_priv, i) { 2423 intel_ring_init_seqno(ring, seqno); 2424 2425 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2426 ring->semaphore.sync_seqno[j] = 0; 2427 } 2428 2429 return 0; 2430 } 2431 2432 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2433 { 2434 struct drm_i915_private *dev_priv = dev->dev_private; 2435 int ret; 2436 2437 if (seqno == 0) 2438 return -EINVAL; 2439 2440 /* HWS page needs to be set less than what we 2441 * will inject to ring 2442 */ 2443 ret = i915_gem_init_seqno(dev, seqno - 1); 2444 if (ret) 2445 return ret; 2446 2447 /* Carefully set the last_seqno value so that wrap 2448 * detection still works 2449 */ 2450 dev_priv->next_seqno = seqno; 2451 dev_priv->last_seqno = seqno - 1; 2452 if (dev_priv->last_seqno == 0) 2453 dev_priv->last_seqno--; 2454 2455 return 0; 2456 } 2457 2458 int 2459 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2460 { 2461 struct drm_i915_private *dev_priv = dev->dev_private; 2462 2463 /* reserve 0 for non-seqno */ 2464 if (dev_priv->next_seqno == 0) { 2465 int ret = i915_gem_init_seqno(dev, 0); 2466 if (ret) 2467 return ret; 2468 2469 dev_priv->next_seqno = 1; 2470 } 2471 2472 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2473 return 0; 2474 } 2475 2476 /* 2477 * NB: This function is not allowed to fail. Doing so would mean the the 2478 * request is not being tracked for completion but the work itself is 2479 * going to happen on the hardware. This would be a Bad Thing(tm). 2480 */ 2481 void __i915_add_request(struct drm_i915_gem_request *request, 2482 struct drm_i915_gem_object *obj, 2483 bool flush_caches) 2484 { 2485 struct intel_engine_cs *ring; 2486 struct drm_i915_private *dev_priv; 2487 struct intel_ringbuffer *ringbuf; 2488 u32 request_start; 2489 int ret; 2490 2491 if (WARN_ON(request == NULL)) 2492 return; 2493 2494 ring = request->ring; 2495 dev_priv = ring->dev->dev_private; 2496 ringbuf = request->ringbuf; 2497 2498 /* 2499 * To ensure that this call will not fail, space for its emissions 2500 * should already have been reserved in the ring buffer. Let the ring 2501 * know that it is time to use that space up. 2502 */ 2503 intel_ring_reserved_space_use(ringbuf); 2504 2505 request_start = intel_ring_get_tail(ringbuf); 2506 /* 2507 * Emit any outstanding flushes - execbuf can fail to emit the flush 2508 * after having emitted the batchbuffer command. Hence we need to fix 2509 * things up similar to emitting the lazy request. The difference here 2510 * is that the flush _must_ happen before the next request, no matter 2511 * what. 2512 */ 2513 if (flush_caches) { 2514 if (i915.enable_execlists) 2515 ret = logical_ring_flush_all_caches(request); 2516 else 2517 ret = intel_ring_flush_all_caches(request); 2518 /* Not allowed to fail! */ 2519 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2520 } 2521 2522 /* Record the position of the start of the request so that 2523 * should we detect the updated seqno part-way through the 2524 * GPU processing the request, we never over-estimate the 2525 * position of the head. 2526 */ 2527 request->postfix = intel_ring_get_tail(ringbuf); 2528 2529 if (i915.enable_execlists) 2530 ret = ring->emit_request(request); 2531 else { 2532 ret = ring->add_request(request); 2533 2534 request->tail = intel_ring_get_tail(ringbuf); 2535 } 2536 /* Not allowed to fail! */ 2537 WARN(ret, "emit|add_request failed: %d!\n", ret); 2538 2539 request->head = request_start; 2540 2541 /* Whilst this request exists, batch_obj will be on the 2542 * active_list, and so will hold the active reference. Only when this 2543 * request is retired will the the batch_obj be moved onto the 2544 * inactive_list and lose its active reference. Hence we do not need 2545 * to explicitly hold another reference here. 2546 */ 2547 request->batch_obj = obj; 2548 2549 request->emitted_jiffies = jiffies; 2550 ring->last_submitted_seqno = request->seqno; 2551 list_add_tail(&request->list, &ring->request_list); 2552 2553 trace_i915_gem_request_add(request); 2554 2555 i915_queue_hangcheck(ring->dev); 2556 2557 queue_delayed_work(dev_priv->wq, 2558 &dev_priv->mm.retire_work, 2559 round_jiffies_up_relative(HZ)); 2560 intel_mark_busy(dev_priv->dev); 2561 2562 /* Sanity check that the reserved size was large enough. */ 2563 intel_ring_reserved_space_end(ringbuf); 2564 } 2565 2566 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2567 const struct intel_context *ctx) 2568 { 2569 unsigned long elapsed; 2570 2571 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2572 2573 if (ctx->hang_stats.banned) 2574 return true; 2575 2576 if (ctx->hang_stats.ban_period_seconds && 2577 elapsed <= ctx->hang_stats.ban_period_seconds) { 2578 if (!i915_gem_context_is_default(ctx)) { 2579 DRM_DEBUG("context hanging too fast, banning!\n"); 2580 return true; 2581 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2582 if (i915_stop_ring_allow_warn(dev_priv)) 2583 DRM_ERROR("gpu hanging too fast, banning!\n"); 2584 return true; 2585 } 2586 } 2587 2588 return false; 2589 } 2590 2591 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2592 struct intel_context *ctx, 2593 const bool guilty) 2594 { 2595 struct i915_ctx_hang_stats *hs; 2596 2597 if (WARN_ON(!ctx)) 2598 return; 2599 2600 hs = &ctx->hang_stats; 2601 2602 if (guilty) { 2603 hs->banned = i915_context_is_banned(dev_priv, ctx); 2604 hs->batch_active++; 2605 hs->guilty_ts = get_seconds(); 2606 } else { 2607 hs->batch_pending++; 2608 } 2609 } 2610 2611 void i915_gem_request_free(struct kref *req_ref) 2612 { 2613 struct drm_i915_gem_request *req = container_of(req_ref, 2614 typeof(*req), ref); 2615 struct intel_context *ctx = req->ctx; 2616 2617 if (req->file_priv) 2618 i915_gem_request_remove_from_client(req); 2619 2620 if (ctx) { 2621 if (i915.enable_execlists) { 2622 if (ctx != req->ring->default_context) 2623 intel_lr_context_unpin(req); 2624 } 2625 2626 i915_gem_context_unreference(ctx); 2627 } 2628 2629 kmem_cache_free(req->i915->requests, req); 2630 } 2631 2632 int i915_gem_request_alloc(struct intel_engine_cs *ring, 2633 struct intel_context *ctx, 2634 struct drm_i915_gem_request **req_out) 2635 { 2636 struct drm_i915_private *dev_priv = to_i915(ring->dev); 2637 struct drm_i915_gem_request *req; 2638 int ret; 2639 2640 if (!req_out) 2641 return -EINVAL; 2642 2643 *req_out = NULL; 2644 2645 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 2646 if (req == NULL) 2647 return -ENOMEM; 2648 2649 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 2650 if (ret) 2651 goto err; 2652 2653 kref_init(&req->ref); 2654 req->i915 = dev_priv; 2655 req->ring = ring; 2656 req->ctx = ctx; 2657 i915_gem_context_reference(req->ctx); 2658 2659 if (i915.enable_execlists) 2660 ret = intel_logical_ring_alloc_request_extras(req); 2661 else 2662 ret = intel_ring_alloc_request_extras(req); 2663 if (ret) { 2664 i915_gem_context_unreference(req->ctx); 2665 goto err; 2666 } 2667 2668 /* 2669 * Reserve space in the ring buffer for all the commands required to 2670 * eventually emit this request. This is to guarantee that the 2671 * i915_add_request() call can't fail. Note that the reserve may need 2672 * to be redone if the request is not actually submitted straight 2673 * away, e.g. because a GPU scheduler has deferred it. 2674 */ 2675 if (i915.enable_execlists) 2676 ret = intel_logical_ring_reserve_space(req); 2677 else 2678 ret = intel_ring_reserve_space(req); 2679 if (ret) { 2680 /* 2681 * At this point, the request is fully allocated even if not 2682 * fully prepared. Thus it can be cleaned up using the proper 2683 * free code. 2684 */ 2685 i915_gem_request_cancel(req); 2686 return ret; 2687 } 2688 2689 *req_out = req; 2690 return 0; 2691 2692 err: 2693 kmem_cache_free(dev_priv->requests, req); 2694 return ret; 2695 } 2696 2697 void i915_gem_request_cancel(struct drm_i915_gem_request *req) 2698 { 2699 intel_ring_reserved_space_cancel(req->ringbuf); 2700 2701 i915_gem_request_unreference(req); 2702 } 2703 2704 struct drm_i915_gem_request * 2705 i915_gem_find_active_request(struct intel_engine_cs *ring) 2706 { 2707 struct drm_i915_gem_request *request; 2708 2709 list_for_each_entry(request, &ring->request_list, list) { 2710 if (i915_gem_request_completed(request, false)) 2711 continue; 2712 2713 return request; 2714 } 2715 2716 return NULL; 2717 } 2718 2719 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2720 struct intel_engine_cs *ring) 2721 { 2722 struct drm_i915_gem_request *request; 2723 bool ring_hung; 2724 2725 request = i915_gem_find_active_request(ring); 2726 2727 if (request == NULL) 2728 return; 2729 2730 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2731 2732 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2733 2734 list_for_each_entry_continue(request, &ring->request_list, list) 2735 i915_set_reset_status(dev_priv, request->ctx, false); 2736 } 2737 2738 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2739 struct intel_engine_cs *ring) 2740 { 2741 while (!list_empty(&ring->active_list)) { 2742 struct drm_i915_gem_object *obj; 2743 2744 obj = list_first_entry(&ring->active_list, 2745 struct drm_i915_gem_object, 2746 ring_list[ring->id]); 2747 2748 i915_gem_object_retire__read(obj, ring->id); 2749 } 2750 2751 /* 2752 * Clear the execlists queue up before freeing the requests, as those 2753 * are the ones that keep the context and ringbuffer backing objects 2754 * pinned in place. 2755 */ 2756 while (!list_empty(&ring->execlist_queue)) { 2757 struct drm_i915_gem_request *submit_req; 2758 2759 submit_req = list_first_entry(&ring->execlist_queue, 2760 struct drm_i915_gem_request, 2761 execlist_link); 2762 list_del(&submit_req->execlist_link); 2763 2764 if (submit_req->ctx != ring->default_context) 2765 intel_lr_context_unpin(submit_req); 2766 2767 i915_gem_request_unreference(submit_req); 2768 } 2769 2770 /* 2771 * We must free the requests after all the corresponding objects have 2772 * been moved off active lists. Which is the same order as the normal 2773 * retire_requests function does. This is important if object hold 2774 * implicit references on things like e.g. ppgtt address spaces through 2775 * the request. 2776 */ 2777 while (!list_empty(&ring->request_list)) { 2778 struct drm_i915_gem_request *request; 2779 2780 request = list_first_entry(&ring->request_list, 2781 struct drm_i915_gem_request, 2782 list); 2783 2784 i915_gem_request_retire(request); 2785 } 2786 } 2787 2788 void i915_gem_reset(struct drm_device *dev) 2789 { 2790 struct drm_i915_private *dev_priv = dev->dev_private; 2791 struct intel_engine_cs *ring; 2792 int i; 2793 2794 /* 2795 * Before we free the objects from the requests, we need to inspect 2796 * them for finding the guilty party. As the requests only borrow 2797 * their reference to the objects, the inspection must be done first. 2798 */ 2799 for_each_ring(ring, dev_priv, i) 2800 i915_gem_reset_ring_status(dev_priv, ring); 2801 2802 for_each_ring(ring, dev_priv, i) 2803 i915_gem_reset_ring_cleanup(dev_priv, ring); 2804 2805 i915_gem_context_reset(dev); 2806 2807 i915_gem_restore_fences(dev); 2808 2809 WARN_ON(i915_verify_lists(dev)); 2810 } 2811 2812 /** 2813 * This function clears the request list as sequence numbers are passed. 2814 */ 2815 void 2816 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2817 { 2818 WARN_ON(i915_verify_lists(ring->dev)); 2819 2820 /* Retire requests first as we use it above for the early return. 2821 * If we retire requests last, we may use a later seqno and so clear 2822 * the requests lists without clearing the active list, leading to 2823 * confusion. 2824 */ 2825 while (!list_empty(&ring->request_list)) { 2826 struct drm_i915_gem_request *request; 2827 2828 request = list_first_entry(&ring->request_list, 2829 struct drm_i915_gem_request, 2830 list); 2831 2832 if (!i915_gem_request_completed(request, true)) 2833 break; 2834 2835 i915_gem_request_retire(request); 2836 } 2837 2838 /* Move any buffers on the active list that are no longer referenced 2839 * by the ringbuffer to the flushing/inactive lists as appropriate, 2840 * before we free the context associated with the requests. 2841 */ 2842 while (!list_empty(&ring->active_list)) { 2843 struct drm_i915_gem_object *obj; 2844 2845 obj = list_first_entry(&ring->active_list, 2846 struct drm_i915_gem_object, 2847 ring_list[ring->id]); 2848 2849 if (!list_empty(&obj->last_read_req[ring->id]->list)) 2850 break; 2851 2852 i915_gem_object_retire__read(obj, ring->id); 2853 } 2854 2855 if (unlikely(ring->trace_irq_req && 2856 i915_gem_request_completed(ring->trace_irq_req, true))) { 2857 ring->irq_put(ring); 2858 i915_gem_request_assign(&ring->trace_irq_req, NULL); 2859 } 2860 2861 WARN_ON(i915_verify_lists(ring->dev)); 2862 } 2863 2864 bool 2865 i915_gem_retire_requests(struct drm_device *dev) 2866 { 2867 struct drm_i915_private *dev_priv = dev->dev_private; 2868 struct intel_engine_cs *ring; 2869 bool idle = true; 2870 int i; 2871 2872 for_each_ring(ring, dev_priv, i) { 2873 i915_gem_retire_requests_ring(ring); 2874 idle &= list_empty(&ring->request_list); 2875 if (i915.enable_execlists) { 2876 unsigned long flags; 2877 2878 spin_lock_irqsave(&ring->execlist_lock, flags); 2879 idle &= list_empty(&ring->execlist_queue); 2880 spin_unlock_irqrestore(&ring->execlist_lock, flags); 2881 2882 intel_execlists_retire_requests(ring); 2883 } 2884 } 2885 2886 if (idle) 2887 mod_delayed_work(dev_priv->wq, 2888 &dev_priv->mm.idle_work, 2889 msecs_to_jiffies(100)); 2890 2891 return idle; 2892 } 2893 2894 static void 2895 i915_gem_retire_work_handler(struct work_struct *work) 2896 { 2897 struct drm_i915_private *dev_priv = 2898 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2899 struct drm_device *dev = dev_priv->dev; 2900 bool idle; 2901 2902 /* Come back later if the device is busy... */ 2903 idle = false; 2904 if (mutex_trylock(&dev->struct_mutex)) { 2905 idle = i915_gem_retire_requests(dev); 2906 mutex_unlock(&dev->struct_mutex); 2907 } 2908 if (!idle) 2909 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2910 round_jiffies_up_relative(HZ)); 2911 } 2912 2913 static void 2914 i915_gem_idle_work_handler(struct work_struct *work) 2915 { 2916 struct drm_i915_private *dev_priv = 2917 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2918 struct drm_device *dev = dev_priv->dev; 2919 struct intel_engine_cs *ring; 2920 int i; 2921 2922 for_each_ring(ring, dev_priv, i) 2923 if (!list_empty(&ring->request_list)) 2924 return; 2925 2926 intel_mark_idle(dev); 2927 2928 if (mutex_trylock(&dev->struct_mutex)) { 2929 struct intel_engine_cs *ring; 2930 int i; 2931 2932 for_each_ring(ring, dev_priv, i) 2933 i915_gem_batch_pool_fini(&ring->batch_pool); 2934 2935 mutex_unlock(&dev->struct_mutex); 2936 } 2937 } 2938 2939 /** 2940 * Ensures that an object will eventually get non-busy by flushing any required 2941 * write domains, emitting any outstanding lazy request and retiring and 2942 * completed requests. 2943 */ 2944 static int 2945 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2946 { 2947 int i; 2948 2949 if (!obj->active) 2950 return 0; 2951 2952 for (i = 0; i < I915_NUM_RINGS; i++) { 2953 struct drm_i915_gem_request *req; 2954 2955 req = obj->last_read_req[i]; 2956 if (req == NULL) 2957 continue; 2958 2959 if (list_empty(&req->list)) 2960 goto retire; 2961 2962 if (i915_gem_request_completed(req, true)) { 2963 __i915_gem_request_retire__upto(req); 2964 retire: 2965 i915_gem_object_retire__read(obj, i); 2966 } 2967 } 2968 2969 return 0; 2970 } 2971 2972 /** 2973 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2974 * @DRM_IOCTL_ARGS: standard ioctl arguments 2975 * 2976 * Returns 0 if successful, else an error is returned with the remaining time in 2977 * the timeout parameter. 2978 * -ETIME: object is still busy after timeout 2979 * -ERESTARTSYS: signal interrupted the wait 2980 * -ENONENT: object doesn't exist 2981 * Also possible, but rare: 2982 * -EAGAIN: GPU wedged 2983 * -ENOMEM: damn 2984 * -ENODEV: Internal IRQ fail 2985 * -E?: The add request failed 2986 * 2987 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2988 * non-zero timeout parameter the wait ioctl will wait for the given number of 2989 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2990 * without holding struct_mutex the object may become re-busied before this 2991 * function completes. A similar but shorter * race condition exists in the busy 2992 * ioctl 2993 */ 2994 int 2995 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2996 { 2997 struct drm_i915_private *dev_priv = dev->dev_private; 2998 struct drm_i915_gem_wait *args = data; 2999 struct drm_i915_gem_object *obj; 3000 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3001 unsigned reset_counter; 3002 int i, n = 0; 3003 int ret; 3004 3005 if (args->flags != 0) 3006 return -EINVAL; 3007 3008 ret = i915_mutex_lock_interruptible(dev); 3009 if (ret) 3010 return ret; 3011 3012 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3013 if (&obj->base == NULL) { 3014 mutex_unlock(&dev->struct_mutex); 3015 return -ENOENT; 3016 } 3017 3018 /* Need to make sure the object gets inactive eventually. */ 3019 ret = i915_gem_object_flush_active(obj); 3020 if (ret) 3021 goto out; 3022 3023 if (!obj->active) 3024 goto out; 3025 3026 /* Do this after OLR check to make sure we make forward progress polling 3027 * on this IOCTL with a timeout == 0 (like busy ioctl) 3028 */ 3029 if (args->timeout_ns == 0) { 3030 ret = -ETIME; 3031 goto out; 3032 } 3033 3034 drm_gem_object_unreference(&obj->base); 3035 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3036 3037 for (i = 0; i < I915_NUM_RINGS; i++) { 3038 if (obj->last_read_req[i] == NULL) 3039 continue; 3040 3041 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3042 } 3043 3044 mutex_unlock(&dev->struct_mutex); 3045 3046 for (i = 0; i < n; i++) { 3047 if (ret == 0) 3048 ret = __i915_wait_request(req[i], reset_counter, true, 3049 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3050 file->driver_priv); 3051 i915_gem_request_unreference__unlocked(req[i]); 3052 } 3053 return ret; 3054 3055 out: 3056 drm_gem_object_unreference(&obj->base); 3057 mutex_unlock(&dev->struct_mutex); 3058 return ret; 3059 } 3060 3061 static int 3062 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3063 struct intel_engine_cs *to, 3064 struct drm_i915_gem_request *from_req, 3065 struct drm_i915_gem_request **to_req) 3066 { 3067 struct intel_engine_cs *from; 3068 int ret; 3069 3070 from = i915_gem_request_get_ring(from_req); 3071 if (to == from) 3072 return 0; 3073 3074 if (i915_gem_request_completed(from_req, true)) 3075 return 0; 3076 3077 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3078 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3079 ret = __i915_wait_request(from_req, 3080 atomic_read(&i915->gpu_error.reset_counter), 3081 i915->mm.interruptible, 3082 NULL, 3083 &i915->rps.semaphores); 3084 if (ret) 3085 return ret; 3086 3087 i915_gem_object_retire_request(obj, from_req); 3088 } else { 3089 int idx = intel_ring_sync_index(from, to); 3090 u32 seqno = i915_gem_request_get_seqno(from_req); 3091 3092 WARN_ON(!to_req); 3093 3094 if (seqno <= from->semaphore.sync_seqno[idx]) 3095 return 0; 3096 3097 if (*to_req == NULL) { 3098 ret = i915_gem_request_alloc(to, to->default_context, to_req); 3099 if (ret) 3100 return ret; 3101 } 3102 3103 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3104 ret = to->semaphore.sync_to(*to_req, from, seqno); 3105 if (ret) 3106 return ret; 3107 3108 /* We use last_read_req because sync_to() 3109 * might have just caused seqno wrap under 3110 * the radar. 3111 */ 3112 from->semaphore.sync_seqno[idx] = 3113 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3114 } 3115 3116 return 0; 3117 } 3118 3119 /** 3120 * i915_gem_object_sync - sync an object to a ring. 3121 * 3122 * @obj: object which may be in use on another ring. 3123 * @to: ring we wish to use the object on. May be NULL. 3124 * @to_req: request we wish to use the object for. See below. 3125 * This will be allocated and returned if a request is 3126 * required but not passed in. 3127 * 3128 * This code is meant to abstract object synchronization with the GPU. 3129 * Calling with NULL implies synchronizing the object with the CPU 3130 * rather than a particular GPU ring. Conceptually we serialise writes 3131 * between engines inside the GPU. We only allow one engine to write 3132 * into a buffer at any time, but multiple readers. To ensure each has 3133 * a coherent view of memory, we must: 3134 * 3135 * - If there is an outstanding write request to the object, the new 3136 * request must wait for it to complete (either CPU or in hw, requests 3137 * on the same ring will be naturally ordered). 3138 * 3139 * - If we are a write request (pending_write_domain is set), the new 3140 * request must wait for outstanding read requests to complete. 3141 * 3142 * For CPU synchronisation (NULL to) no request is required. For syncing with 3143 * rings to_req must be non-NULL. However, a request does not have to be 3144 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3145 * request will be allocated automatically and returned through *to_req. Note 3146 * that it is not guaranteed that commands will be emitted (because the system 3147 * might already be idle). Hence there is no need to create a request that 3148 * might never have any work submitted. Note further that if a request is 3149 * returned in *to_req, it is the responsibility of the caller to submit 3150 * that request (after potentially adding more work to it). 3151 * 3152 * Returns 0 if successful, else propagates up the lower layer error. 3153 */ 3154 int 3155 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3156 struct intel_engine_cs *to, 3157 struct drm_i915_gem_request **to_req) 3158 { 3159 const bool readonly = obj->base.pending_write_domain == 0; 3160 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3161 int ret, i, n; 3162 3163 if (!obj->active) 3164 return 0; 3165 3166 if (to == NULL) 3167 return i915_gem_object_wait_rendering(obj, readonly); 3168 3169 n = 0; 3170 if (readonly) { 3171 if (obj->last_write_req) 3172 req[n++] = obj->last_write_req; 3173 } else { 3174 for (i = 0; i < I915_NUM_RINGS; i++) 3175 if (obj->last_read_req[i]) 3176 req[n++] = obj->last_read_req[i]; 3177 } 3178 for (i = 0; i < n; i++) { 3179 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3180 if (ret) 3181 return ret; 3182 } 3183 3184 return 0; 3185 } 3186 3187 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3188 { 3189 u32 old_write_domain, old_read_domains; 3190 3191 /* Force a pagefault for domain tracking on next user access */ 3192 i915_gem_release_mmap(obj); 3193 3194 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3195 return; 3196 3197 /* Wait for any direct GTT access to complete */ 3198 mb(); 3199 3200 old_read_domains = obj->base.read_domains; 3201 old_write_domain = obj->base.write_domain; 3202 3203 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3204 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3205 3206 trace_i915_gem_object_change_domain(obj, 3207 old_read_domains, 3208 old_write_domain); 3209 } 3210 3211 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3212 { 3213 struct drm_i915_gem_object *obj = vma->obj; 3214 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3215 int ret; 3216 3217 if (list_empty(&vma->vma_link)) 3218 return 0; 3219 3220 if (!drm_mm_node_allocated(&vma->node)) { 3221 i915_gem_vma_destroy(vma); 3222 return 0; 3223 } 3224 3225 if (vma->pin_count) 3226 return -EBUSY; 3227 3228 BUG_ON(obj->pages == NULL); 3229 3230 if (wait) { 3231 ret = i915_gem_object_wait_rendering(obj, false); 3232 if (ret) 3233 return ret; 3234 } 3235 3236 if (i915_is_ggtt(vma->vm) && 3237 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3238 i915_gem_object_finish_gtt(obj); 3239 3240 /* release the fence reg _after_ flushing */ 3241 ret = i915_gem_object_put_fence(obj); 3242 if (ret) 3243 return ret; 3244 } 3245 3246 trace_i915_vma_unbind(vma); 3247 3248 vma->vm->unbind_vma(vma); 3249 vma->bound = 0; 3250 3251 list_del_init(&vma->mm_list); 3252 if (i915_is_ggtt(vma->vm)) { 3253 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3254 obj->map_and_fenceable = false; 3255 } else if (vma->ggtt_view.pages) { 3256 sg_free_table(vma->ggtt_view.pages); 3257 kfree(vma->ggtt_view.pages); 3258 } 3259 vma->ggtt_view.pages = NULL; 3260 } 3261 3262 drm_mm_remove_node(&vma->node); 3263 i915_gem_vma_destroy(vma); 3264 3265 /* Since the unbound list is global, only move to that list if 3266 * no more VMAs exist. */ 3267 if (list_empty(&obj->vma_list)) 3268 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3269 3270 /* And finally now the object is completely decoupled from this vma, 3271 * we can drop its hold on the backing storage and allow it to be 3272 * reaped by the shrinker. 3273 */ 3274 i915_gem_object_unpin_pages(obj); 3275 3276 return 0; 3277 } 3278 3279 int i915_vma_unbind(struct i915_vma *vma) 3280 { 3281 return __i915_vma_unbind(vma, true); 3282 } 3283 3284 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3285 { 3286 return __i915_vma_unbind(vma, false); 3287 } 3288 3289 int i915_gpu_idle(struct drm_device *dev) 3290 { 3291 struct drm_i915_private *dev_priv = dev->dev_private; 3292 struct intel_engine_cs *ring; 3293 int ret, i; 3294 3295 /* Flush everything onto the inactive list. */ 3296 for_each_ring(ring, dev_priv, i) { 3297 if (!i915.enable_execlists) { 3298 struct drm_i915_gem_request *req; 3299 3300 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 3301 if (ret) 3302 return ret; 3303 3304 ret = i915_switch_context(req); 3305 if (ret) { 3306 i915_gem_request_cancel(req); 3307 return ret; 3308 } 3309 3310 i915_add_request_no_flush(req); 3311 } 3312 3313 ret = intel_ring_idle(ring); 3314 if (ret) 3315 return ret; 3316 } 3317 3318 WARN_ON(i915_verify_lists(dev)); 3319 return 0; 3320 } 3321 3322 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3323 unsigned long cache_level) 3324 { 3325 struct drm_mm_node *gtt_space = &vma->node; 3326 struct drm_mm_node *other; 3327 3328 /* 3329 * On some machines we have to be careful when putting differing types 3330 * of snoopable memory together to avoid the prefetcher crossing memory 3331 * domains and dying. During vm initialisation, we decide whether or not 3332 * these constraints apply and set the drm_mm.color_adjust 3333 * appropriately. 3334 */ 3335 if (vma->vm->mm.color_adjust == NULL) 3336 return true; 3337 3338 if (!drm_mm_node_allocated(gtt_space)) 3339 return true; 3340 3341 if (list_empty(>t_space->node_list)) 3342 return true; 3343 3344 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3345 if (other->allocated && !other->hole_follows && other->color != cache_level) 3346 return false; 3347 3348 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3349 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3350 return false; 3351 3352 return true; 3353 } 3354 3355 /** 3356 * Finds free space in the GTT aperture and binds the object or a view of it 3357 * there. 3358 */ 3359 static struct i915_vma * 3360 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3361 struct i915_address_space *vm, 3362 const struct i915_ggtt_view *ggtt_view, 3363 unsigned alignment, 3364 uint64_t flags) 3365 { 3366 struct drm_device *dev = obj->base.dev; 3367 struct drm_i915_private *dev_priv = dev->dev_private; 3368 u32 fence_alignment, unfenced_alignment; 3369 u32 search_flag, alloc_flag; 3370 u64 start, end; 3371 u64 size, fence_size; 3372 struct i915_vma *vma; 3373 int ret; 3374 3375 if (i915_is_ggtt(vm)) { 3376 u32 view_size; 3377 3378 if (WARN_ON(!ggtt_view)) 3379 return ERR_PTR(-EINVAL); 3380 3381 view_size = i915_ggtt_view_size(obj, ggtt_view); 3382 3383 fence_size = i915_gem_get_gtt_size(dev, 3384 view_size, 3385 obj->tiling_mode); 3386 fence_alignment = i915_gem_get_gtt_alignment(dev, 3387 view_size, 3388 obj->tiling_mode, 3389 true); 3390 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3391 view_size, 3392 obj->tiling_mode, 3393 false); 3394 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3395 } else { 3396 fence_size = i915_gem_get_gtt_size(dev, 3397 obj->base.size, 3398 obj->tiling_mode); 3399 fence_alignment = i915_gem_get_gtt_alignment(dev, 3400 obj->base.size, 3401 obj->tiling_mode, 3402 true); 3403 unfenced_alignment = 3404 i915_gem_get_gtt_alignment(dev, 3405 obj->base.size, 3406 obj->tiling_mode, 3407 false); 3408 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3409 } 3410 3411 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3412 end = vm->total; 3413 if (flags & PIN_MAPPABLE) 3414 end = min_t(u64, end, dev_priv->gtt.mappable_end); 3415 if (flags & PIN_ZONE_4G) 3416 end = min_t(u64, end, (1ULL << 32)); 3417 3418 if (alignment == 0) 3419 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3420 unfenced_alignment; 3421 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3422 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3423 ggtt_view ? ggtt_view->type : 0, 3424 alignment); 3425 return ERR_PTR(-EINVAL); 3426 } 3427 3428 /* If binding the object/GGTT view requires more space than the entire 3429 * aperture has, reject it early before evicting everything in a vain 3430 * attempt to find space. 3431 */ 3432 if (size > end) { 3433 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3434 ggtt_view ? ggtt_view->type : 0, 3435 size, 3436 flags & PIN_MAPPABLE ? "mappable" : "total", 3437 end); 3438 return ERR_PTR(-E2BIG); 3439 } 3440 3441 ret = i915_gem_object_get_pages(obj); 3442 if (ret) 3443 return ERR_PTR(ret); 3444 3445 i915_gem_object_pin_pages(obj); 3446 3447 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3448 i915_gem_obj_lookup_or_create_vma(obj, vm); 3449 3450 if (IS_ERR(vma)) 3451 goto err_unpin; 3452 3453 if (flags & PIN_HIGH) { 3454 search_flag = DRM_MM_SEARCH_BELOW; 3455 alloc_flag = DRM_MM_CREATE_TOP; 3456 } else { 3457 search_flag = DRM_MM_SEARCH_DEFAULT; 3458 alloc_flag = DRM_MM_CREATE_DEFAULT; 3459 } 3460 3461 search_free: 3462 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3463 size, alignment, 3464 obj->cache_level, 3465 start, end, 3466 search_flag, 3467 alloc_flag); 3468 if (ret) { 3469 ret = i915_gem_evict_something(dev, vm, size, alignment, 3470 obj->cache_level, 3471 start, end, 3472 flags); 3473 if (ret == 0) 3474 goto search_free; 3475 3476 goto err_free_vma; 3477 } 3478 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3479 ret = -EINVAL; 3480 goto err_remove_node; 3481 } 3482 3483 trace_i915_vma_bind(vma, flags); 3484 ret = i915_vma_bind(vma, obj->cache_level, flags); 3485 if (ret) 3486 goto err_remove_node; 3487 3488 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3489 list_add_tail(&vma->mm_list, &vm->inactive_list); 3490 3491 return vma; 3492 3493 err_remove_node: 3494 drm_mm_remove_node(&vma->node); 3495 err_free_vma: 3496 i915_gem_vma_destroy(vma); 3497 vma = ERR_PTR(ret); 3498 err_unpin: 3499 i915_gem_object_unpin_pages(obj); 3500 return vma; 3501 } 3502 3503 bool 3504 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3505 bool force) 3506 { 3507 /* If we don't have a page list set up, then we're not pinned 3508 * to GPU, and we can ignore the cache flush because it'll happen 3509 * again at bind time. 3510 */ 3511 if (obj->pages == NULL) 3512 return false; 3513 3514 /* 3515 * Stolen memory is always coherent with the GPU as it is explicitly 3516 * marked as wc by the system, or the system is cache-coherent. 3517 */ 3518 if (obj->stolen || obj->phys_handle) 3519 return false; 3520 3521 /* If the GPU is snooping the contents of the CPU cache, 3522 * we do not need to manually clear the CPU cache lines. However, 3523 * the caches are only snooped when the render cache is 3524 * flushed/invalidated. As we always have to emit invalidations 3525 * and flushes when moving into and out of the RENDER domain, correct 3526 * snooping behaviour occurs naturally as the result of our domain 3527 * tracking. 3528 */ 3529 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3530 obj->cache_dirty = true; 3531 return false; 3532 } 3533 3534 trace_i915_gem_object_clflush(obj); 3535 drm_clflush_sg(obj->pages); 3536 obj->cache_dirty = false; 3537 3538 return true; 3539 } 3540 3541 /** Flushes the GTT write domain for the object if it's dirty. */ 3542 static void 3543 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3544 { 3545 uint32_t old_write_domain; 3546 3547 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3548 return; 3549 3550 /* No actual flushing is required for the GTT write domain. Writes 3551 * to it immediately go to main memory as far as we know, so there's 3552 * no chipset flush. It also doesn't land in render cache. 3553 * 3554 * However, we do have to enforce the order so that all writes through 3555 * the GTT land before any writes to the device, such as updates to 3556 * the GATT itself. 3557 */ 3558 wmb(); 3559 3560 old_write_domain = obj->base.write_domain; 3561 obj->base.write_domain = 0; 3562 3563 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3564 3565 trace_i915_gem_object_change_domain(obj, 3566 obj->base.read_domains, 3567 old_write_domain); 3568 } 3569 3570 /** Flushes the CPU write domain for the object if it's dirty. */ 3571 static void 3572 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3573 { 3574 uint32_t old_write_domain; 3575 3576 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3577 return; 3578 3579 if (i915_gem_clflush_object(obj, obj->pin_display)) 3580 i915_gem_chipset_flush(obj->base.dev); 3581 3582 old_write_domain = obj->base.write_domain; 3583 obj->base.write_domain = 0; 3584 3585 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3586 3587 trace_i915_gem_object_change_domain(obj, 3588 obj->base.read_domains, 3589 old_write_domain); 3590 } 3591 3592 /** 3593 * Moves a single object to the GTT read, and possibly write domain. 3594 * 3595 * This function returns when the move is complete, including waiting on 3596 * flushes to occur. 3597 */ 3598 int 3599 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3600 { 3601 uint32_t old_write_domain, old_read_domains; 3602 struct i915_vma *vma; 3603 int ret; 3604 3605 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3606 return 0; 3607 3608 ret = i915_gem_object_wait_rendering(obj, !write); 3609 if (ret) 3610 return ret; 3611 3612 /* Flush and acquire obj->pages so that we are coherent through 3613 * direct access in memory with previous cached writes through 3614 * shmemfs and that our cache domain tracking remains valid. 3615 * For example, if the obj->filp was moved to swap without us 3616 * being notified and releasing the pages, we would mistakenly 3617 * continue to assume that the obj remained out of the CPU cached 3618 * domain. 3619 */ 3620 ret = i915_gem_object_get_pages(obj); 3621 if (ret) 3622 return ret; 3623 3624 i915_gem_object_flush_cpu_write_domain(obj); 3625 3626 /* Serialise direct access to this object with the barriers for 3627 * coherent writes from the GPU, by effectively invalidating the 3628 * GTT domain upon first access. 3629 */ 3630 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3631 mb(); 3632 3633 old_write_domain = obj->base.write_domain; 3634 old_read_domains = obj->base.read_domains; 3635 3636 /* It should now be out of any other write domains, and we can update 3637 * the domain values for our changes. 3638 */ 3639 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3640 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3641 if (write) { 3642 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3643 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3644 obj->dirty = 1; 3645 } 3646 3647 trace_i915_gem_object_change_domain(obj, 3648 old_read_domains, 3649 old_write_domain); 3650 3651 /* And bump the LRU for this access */ 3652 vma = i915_gem_obj_to_ggtt(obj); 3653 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3654 list_move_tail(&vma->mm_list, 3655 &to_i915(obj->base.dev)->gtt.base.inactive_list); 3656 3657 return 0; 3658 } 3659 3660 /** 3661 * Changes the cache-level of an object across all VMA. 3662 * 3663 * After this function returns, the object will be in the new cache-level 3664 * across all GTT and the contents of the backing storage will be coherent, 3665 * with respect to the new cache-level. In order to keep the backing storage 3666 * coherent for all users, we only allow a single cache level to be set 3667 * globally on the object and prevent it from being changed whilst the 3668 * hardware is reading from the object. That is if the object is currently 3669 * on the scanout it will be set to uncached (or equivalent display 3670 * cache coherency) and all non-MOCS GPU access will also be uncached so 3671 * that all direct access to the scanout remains coherent. 3672 */ 3673 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3674 enum i915_cache_level cache_level) 3675 { 3676 struct drm_device *dev = obj->base.dev; 3677 struct i915_vma *vma, *next; 3678 bool bound = false; 3679 int ret = 0; 3680 3681 if (obj->cache_level == cache_level) 3682 goto out; 3683 3684 /* Inspect the list of currently bound VMA and unbind any that would 3685 * be invalid given the new cache-level. This is principally to 3686 * catch the issue of the CS prefetch crossing page boundaries and 3687 * reading an invalid PTE on older architectures. 3688 */ 3689 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3690 if (!drm_mm_node_allocated(&vma->node)) 3691 continue; 3692 3693 if (vma->pin_count) { 3694 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3695 return -EBUSY; 3696 } 3697 3698 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3699 ret = i915_vma_unbind(vma); 3700 if (ret) 3701 return ret; 3702 } else 3703 bound = true; 3704 } 3705 3706 /* We can reuse the existing drm_mm nodes but need to change the 3707 * cache-level on the PTE. We could simply unbind them all and 3708 * rebind with the correct cache-level on next use. However since 3709 * we already have a valid slot, dma mapping, pages etc, we may as 3710 * rewrite the PTE in the belief that doing so tramples upon less 3711 * state and so involves less work. 3712 */ 3713 if (bound) { 3714 /* Before we change the PTE, the GPU must not be accessing it. 3715 * If we wait upon the object, we know that all the bound 3716 * VMA are no longer active. 3717 */ 3718 ret = i915_gem_object_wait_rendering(obj, false); 3719 if (ret) 3720 return ret; 3721 3722 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 3723 /* Access to snoopable pages through the GTT is 3724 * incoherent and on some machines causes a hard 3725 * lockup. Relinquish the CPU mmaping to force 3726 * userspace to refault in the pages and we can 3727 * then double check if the GTT mapping is still 3728 * valid for that pointer access. 3729 */ 3730 i915_gem_release_mmap(obj); 3731 3732 /* As we no longer need a fence for GTT access, 3733 * we can relinquish it now (and so prevent having 3734 * to steal a fence from someone else on the next 3735 * fence request). Note GPU activity would have 3736 * dropped the fence as all snoopable access is 3737 * supposed to be linear. 3738 */ 3739 ret = i915_gem_object_put_fence(obj); 3740 if (ret) 3741 return ret; 3742 } else { 3743 /* We either have incoherent backing store and 3744 * so no GTT access or the architecture is fully 3745 * coherent. In such cases, existing GTT mmaps 3746 * ignore the cache bit in the PTE and we can 3747 * rewrite it without confusing the GPU or having 3748 * to force userspace to fault back in its mmaps. 3749 */ 3750 } 3751 3752 list_for_each_entry(vma, &obj->vma_list, vma_link) { 3753 if (!drm_mm_node_allocated(&vma->node)) 3754 continue; 3755 3756 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3757 if (ret) 3758 return ret; 3759 } 3760 } 3761 3762 list_for_each_entry(vma, &obj->vma_list, vma_link) 3763 vma->node.color = cache_level; 3764 obj->cache_level = cache_level; 3765 3766 out: 3767 /* Flush the dirty CPU caches to the backing storage so that the 3768 * object is now coherent at its new cache level (with respect 3769 * to the access domain). 3770 */ 3771 if (obj->cache_dirty && 3772 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 3773 cpu_write_needs_clflush(obj)) { 3774 if (i915_gem_clflush_object(obj, true)) 3775 i915_gem_chipset_flush(obj->base.dev); 3776 } 3777 3778 return 0; 3779 } 3780 3781 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3782 struct drm_file *file) 3783 { 3784 struct drm_i915_gem_caching *args = data; 3785 struct drm_i915_gem_object *obj; 3786 3787 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3788 if (&obj->base == NULL) 3789 return -ENOENT; 3790 3791 switch (obj->cache_level) { 3792 case I915_CACHE_LLC: 3793 case I915_CACHE_L3_LLC: 3794 args->caching = I915_CACHING_CACHED; 3795 break; 3796 3797 case I915_CACHE_WT: 3798 args->caching = I915_CACHING_DISPLAY; 3799 break; 3800 3801 default: 3802 args->caching = I915_CACHING_NONE; 3803 break; 3804 } 3805 3806 drm_gem_object_unreference_unlocked(&obj->base); 3807 return 0; 3808 } 3809 3810 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3811 struct drm_file *file) 3812 { 3813 struct drm_i915_gem_caching *args = data; 3814 struct drm_i915_gem_object *obj; 3815 enum i915_cache_level level; 3816 int ret; 3817 3818 switch (args->caching) { 3819 case I915_CACHING_NONE: 3820 level = I915_CACHE_NONE; 3821 break; 3822 case I915_CACHING_CACHED: 3823 /* 3824 * Due to a HW issue on BXT A stepping, GPU stores via a 3825 * snooped mapping may leave stale data in a corresponding CPU 3826 * cacheline, whereas normally such cachelines would get 3827 * invalidated. 3828 */ 3829 if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) 3830 return -ENODEV; 3831 3832 level = I915_CACHE_LLC; 3833 break; 3834 case I915_CACHING_DISPLAY: 3835 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3836 break; 3837 default: 3838 return -EINVAL; 3839 } 3840 3841 ret = i915_mutex_lock_interruptible(dev); 3842 if (ret) 3843 return ret; 3844 3845 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3846 if (&obj->base == NULL) { 3847 ret = -ENOENT; 3848 goto unlock; 3849 } 3850 3851 ret = i915_gem_object_set_cache_level(obj, level); 3852 3853 drm_gem_object_unreference(&obj->base); 3854 unlock: 3855 mutex_unlock(&dev->struct_mutex); 3856 return ret; 3857 } 3858 3859 /* 3860 * Prepare buffer for display plane (scanout, cursors, etc). 3861 * Can be called from an uninterruptible phase (modesetting) and allows 3862 * any flushes to be pipelined (for pageflips). 3863 */ 3864 int 3865 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3866 u32 alignment, 3867 struct intel_engine_cs *pipelined, 3868 struct drm_i915_gem_request **pipelined_request, 3869 const struct i915_ggtt_view *view) 3870 { 3871 u32 old_read_domains, old_write_domain; 3872 int ret; 3873 3874 ret = i915_gem_object_sync(obj, pipelined, pipelined_request); 3875 if (ret) 3876 return ret; 3877 3878 /* Mark the pin_display early so that we account for the 3879 * display coherency whilst setting up the cache domains. 3880 */ 3881 obj->pin_display++; 3882 3883 /* The display engine is not coherent with the LLC cache on gen6. As 3884 * a result, we make sure that the pinning that is about to occur is 3885 * done with uncached PTEs. This is lowest common denominator for all 3886 * chipsets. 3887 * 3888 * However for gen6+, we could do better by using the GFDT bit instead 3889 * of uncaching, which would allow us to flush all the LLC-cached data 3890 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3891 */ 3892 ret = i915_gem_object_set_cache_level(obj, 3893 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 3894 if (ret) 3895 goto err_unpin_display; 3896 3897 /* As the user may map the buffer once pinned in the display plane 3898 * (e.g. libkms for the bootup splash), we have to ensure that we 3899 * always use map_and_fenceable for all scanout buffers. 3900 */ 3901 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 3902 view->type == I915_GGTT_VIEW_NORMAL ? 3903 PIN_MAPPABLE : 0); 3904 if (ret) 3905 goto err_unpin_display; 3906 3907 i915_gem_object_flush_cpu_write_domain(obj); 3908 3909 old_write_domain = obj->base.write_domain; 3910 old_read_domains = obj->base.read_domains; 3911 3912 /* It should now be out of any other write domains, and we can update 3913 * the domain values for our changes. 3914 */ 3915 obj->base.write_domain = 0; 3916 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3917 3918 trace_i915_gem_object_change_domain(obj, 3919 old_read_domains, 3920 old_write_domain); 3921 3922 return 0; 3923 3924 err_unpin_display: 3925 obj->pin_display--; 3926 return ret; 3927 } 3928 3929 void 3930 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 3931 const struct i915_ggtt_view *view) 3932 { 3933 if (WARN_ON(obj->pin_display == 0)) 3934 return; 3935 3936 i915_gem_object_ggtt_unpin_view(obj, view); 3937 3938 obj->pin_display--; 3939 } 3940 3941 /** 3942 * Moves a single object to the CPU read, and possibly write domain. 3943 * 3944 * This function returns when the move is complete, including waiting on 3945 * flushes to occur. 3946 */ 3947 int 3948 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3949 { 3950 uint32_t old_write_domain, old_read_domains; 3951 int ret; 3952 3953 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3954 return 0; 3955 3956 ret = i915_gem_object_wait_rendering(obj, !write); 3957 if (ret) 3958 return ret; 3959 3960 i915_gem_object_flush_gtt_write_domain(obj); 3961 3962 old_write_domain = obj->base.write_domain; 3963 old_read_domains = obj->base.read_domains; 3964 3965 /* Flush the CPU cache if it's still invalid. */ 3966 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3967 i915_gem_clflush_object(obj, false); 3968 3969 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3970 } 3971 3972 /* It should now be out of any other write domains, and we can update 3973 * the domain values for our changes. 3974 */ 3975 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3976 3977 /* If we're writing through the CPU, then the GPU read domains will 3978 * need to be invalidated at next use. 3979 */ 3980 if (write) { 3981 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3982 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3983 } 3984 3985 trace_i915_gem_object_change_domain(obj, 3986 old_read_domains, 3987 old_write_domain); 3988 3989 return 0; 3990 } 3991 3992 /* Throttle our rendering by waiting until the ring has completed our requests 3993 * emitted over 20 msec ago. 3994 * 3995 * Note that if we were to use the current jiffies each time around the loop, 3996 * we wouldn't escape the function with any frames outstanding if the time to 3997 * render a frame was over 20ms. 3998 * 3999 * This should get us reasonable parallelism between CPU and GPU but also 4000 * relatively low latency when blocking on a particular request to finish. 4001 */ 4002 static int 4003 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4004 { 4005 struct drm_i915_private *dev_priv = dev->dev_private; 4006 struct drm_i915_file_private *file_priv = file->driver_priv; 4007 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4008 struct drm_i915_gem_request *request, *target = NULL; 4009 unsigned reset_counter; 4010 int ret; 4011 4012 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4013 if (ret) 4014 return ret; 4015 4016 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4017 if (ret) 4018 return ret; 4019 4020 spin_lock(&file_priv->mm.lock); 4021 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4022 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4023 break; 4024 4025 /* 4026 * Note that the request might not have been submitted yet. 4027 * In which case emitted_jiffies will be zero. 4028 */ 4029 if (!request->emitted_jiffies) 4030 continue; 4031 4032 target = request; 4033 } 4034 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4035 if (target) 4036 i915_gem_request_reference(target); 4037 spin_unlock(&file_priv->mm.lock); 4038 4039 if (target == NULL) 4040 return 0; 4041 4042 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4043 if (ret == 0) 4044 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4045 4046 i915_gem_request_unreference__unlocked(target); 4047 4048 return ret; 4049 } 4050 4051 static bool 4052 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4053 { 4054 struct drm_i915_gem_object *obj = vma->obj; 4055 4056 if (alignment && 4057 vma->node.start & (alignment - 1)) 4058 return true; 4059 4060 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4061 return true; 4062 4063 if (flags & PIN_OFFSET_BIAS && 4064 vma->node.start < (flags & PIN_OFFSET_MASK)) 4065 return true; 4066 4067 return false; 4068 } 4069 4070 static int 4071 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4072 struct i915_address_space *vm, 4073 const struct i915_ggtt_view *ggtt_view, 4074 uint32_t alignment, 4075 uint64_t flags) 4076 { 4077 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4078 struct i915_vma *vma; 4079 unsigned bound; 4080 int ret; 4081 4082 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4083 return -ENODEV; 4084 4085 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4086 return -EINVAL; 4087 4088 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4089 return -EINVAL; 4090 4091 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4092 return -EINVAL; 4093 4094 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4095 i915_gem_obj_to_vma(obj, vm); 4096 4097 if (IS_ERR(vma)) 4098 return PTR_ERR(vma); 4099 4100 if (vma) { 4101 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4102 return -EBUSY; 4103 4104 if (i915_vma_misplaced(vma, alignment, flags)) { 4105 WARN(vma->pin_count, 4106 "bo is already pinned in %s with incorrect alignment:" 4107 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4108 " obj->map_and_fenceable=%d\n", 4109 ggtt_view ? "ggtt" : "ppgtt", 4110 upper_32_bits(vma->node.start), 4111 lower_32_bits(vma->node.start), 4112 alignment, 4113 !!(flags & PIN_MAPPABLE), 4114 obj->map_and_fenceable); 4115 ret = i915_vma_unbind(vma); 4116 if (ret) 4117 return ret; 4118 4119 vma = NULL; 4120 } 4121 } 4122 4123 bound = vma ? vma->bound : 0; 4124 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4125 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4126 flags); 4127 if (IS_ERR(vma)) 4128 return PTR_ERR(vma); 4129 } else { 4130 ret = i915_vma_bind(vma, obj->cache_level, flags); 4131 if (ret) 4132 return ret; 4133 } 4134 4135 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4136 (bound ^ vma->bound) & GLOBAL_BIND) { 4137 bool mappable, fenceable; 4138 u32 fence_size, fence_alignment; 4139 4140 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4141 obj->base.size, 4142 obj->tiling_mode); 4143 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4144 obj->base.size, 4145 obj->tiling_mode, 4146 true); 4147 4148 fenceable = (vma->node.size == fence_size && 4149 (vma->node.start & (fence_alignment - 1)) == 0); 4150 4151 mappable = (vma->node.start + fence_size <= 4152 dev_priv->gtt.mappable_end); 4153 4154 obj->map_and_fenceable = mappable && fenceable; 4155 4156 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4157 } 4158 4159 vma->pin_count++; 4160 return 0; 4161 } 4162 4163 int 4164 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4165 struct i915_address_space *vm, 4166 uint32_t alignment, 4167 uint64_t flags) 4168 { 4169 return i915_gem_object_do_pin(obj, vm, 4170 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4171 alignment, flags); 4172 } 4173 4174 int 4175 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4176 const struct i915_ggtt_view *view, 4177 uint32_t alignment, 4178 uint64_t flags) 4179 { 4180 if (WARN_ONCE(!view, "no view specified")) 4181 return -EINVAL; 4182 4183 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4184 alignment, flags | PIN_GLOBAL); 4185 } 4186 4187 void 4188 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4189 const struct i915_ggtt_view *view) 4190 { 4191 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4192 4193 BUG_ON(!vma); 4194 WARN_ON(vma->pin_count == 0); 4195 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4196 4197 --vma->pin_count; 4198 } 4199 4200 int 4201 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4202 struct drm_file *file) 4203 { 4204 struct drm_i915_gem_busy *args = data; 4205 struct drm_i915_gem_object *obj; 4206 int ret; 4207 4208 ret = i915_mutex_lock_interruptible(dev); 4209 if (ret) 4210 return ret; 4211 4212 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4213 if (&obj->base == NULL) { 4214 ret = -ENOENT; 4215 goto unlock; 4216 } 4217 4218 /* Count all active objects as busy, even if they are currently not used 4219 * by the gpu. Users of this interface expect objects to eventually 4220 * become non-busy without any further actions, therefore emit any 4221 * necessary flushes here. 4222 */ 4223 ret = i915_gem_object_flush_active(obj); 4224 if (ret) 4225 goto unref; 4226 4227 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4228 args->busy = obj->active << 16; 4229 if (obj->last_write_req) 4230 args->busy |= obj->last_write_req->ring->id; 4231 4232 unref: 4233 drm_gem_object_unreference(&obj->base); 4234 unlock: 4235 mutex_unlock(&dev->struct_mutex); 4236 return ret; 4237 } 4238 4239 int 4240 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4241 struct drm_file *file_priv) 4242 { 4243 return i915_gem_ring_throttle(dev, file_priv); 4244 } 4245 4246 int 4247 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4248 struct drm_file *file_priv) 4249 { 4250 struct drm_i915_private *dev_priv = dev->dev_private; 4251 struct drm_i915_gem_madvise *args = data; 4252 struct drm_i915_gem_object *obj; 4253 int ret; 4254 4255 switch (args->madv) { 4256 case I915_MADV_DONTNEED: 4257 case I915_MADV_WILLNEED: 4258 break; 4259 default: 4260 return -EINVAL; 4261 } 4262 4263 ret = i915_mutex_lock_interruptible(dev); 4264 if (ret) 4265 return ret; 4266 4267 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4268 if (&obj->base == NULL) { 4269 ret = -ENOENT; 4270 goto unlock; 4271 } 4272 4273 if (i915_gem_obj_is_pinned(obj)) { 4274 ret = -EINVAL; 4275 goto out; 4276 } 4277 4278 if (obj->pages && 4279 obj->tiling_mode != I915_TILING_NONE && 4280 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4281 if (obj->madv == I915_MADV_WILLNEED) 4282 i915_gem_object_unpin_pages(obj); 4283 if (args->madv == I915_MADV_WILLNEED) 4284 i915_gem_object_pin_pages(obj); 4285 } 4286 4287 if (obj->madv != __I915_MADV_PURGED) 4288 obj->madv = args->madv; 4289 4290 /* if the object is no longer attached, discard its backing storage */ 4291 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4292 i915_gem_object_truncate(obj); 4293 4294 args->retained = obj->madv != __I915_MADV_PURGED; 4295 4296 out: 4297 drm_gem_object_unreference(&obj->base); 4298 unlock: 4299 mutex_unlock(&dev->struct_mutex); 4300 return ret; 4301 } 4302 4303 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4304 const struct drm_i915_gem_object_ops *ops) 4305 { 4306 int i; 4307 4308 INIT_LIST_HEAD(&obj->global_list); 4309 for (i = 0; i < I915_NUM_RINGS; i++) 4310 INIT_LIST_HEAD(&obj->ring_list[i]); 4311 INIT_LIST_HEAD(&obj->obj_exec_link); 4312 INIT_LIST_HEAD(&obj->vma_list); 4313 INIT_LIST_HEAD(&obj->batch_pool_link); 4314 4315 obj->ops = ops; 4316 4317 obj->fence_reg = I915_FENCE_REG_NONE; 4318 obj->madv = I915_MADV_WILLNEED; 4319 4320 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4321 } 4322 4323 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4324 .get_pages = i915_gem_object_get_pages_gtt, 4325 .put_pages = i915_gem_object_put_pages_gtt, 4326 }; 4327 4328 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4329 size_t size) 4330 { 4331 struct drm_i915_gem_object *obj; 4332 struct address_space *mapping; 4333 gfp_t mask; 4334 4335 obj = i915_gem_object_alloc(dev); 4336 if (obj == NULL) 4337 return NULL; 4338 4339 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4340 i915_gem_object_free(obj); 4341 return NULL; 4342 } 4343 4344 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4345 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4346 /* 965gm cannot relocate objects above 4GiB. */ 4347 mask &= ~__GFP_HIGHMEM; 4348 mask |= __GFP_DMA32; 4349 } 4350 4351 mapping = file_inode(obj->base.filp)->i_mapping; 4352 mapping_set_gfp_mask(mapping, mask); 4353 4354 i915_gem_object_init(obj, &i915_gem_object_ops); 4355 4356 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4357 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4358 4359 if (HAS_LLC(dev)) { 4360 /* On some devices, we can have the GPU use the LLC (the CPU 4361 * cache) for about a 10% performance improvement 4362 * compared to uncached. Graphics requests other than 4363 * display scanout are coherent with the CPU in 4364 * accessing this cache. This means in this mode we 4365 * don't need to clflush on the CPU side, and on the 4366 * GPU side we only need to flush internal caches to 4367 * get data visible to the CPU. 4368 * 4369 * However, we maintain the display planes as UC, and so 4370 * need to rebind when first used as such. 4371 */ 4372 obj->cache_level = I915_CACHE_LLC; 4373 } else 4374 obj->cache_level = I915_CACHE_NONE; 4375 4376 trace_i915_gem_object_create(obj); 4377 4378 return obj; 4379 } 4380 4381 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4382 { 4383 /* If we are the last user of the backing storage (be it shmemfs 4384 * pages or stolen etc), we know that the pages are going to be 4385 * immediately released. In this case, we can then skip copying 4386 * back the contents from the GPU. 4387 */ 4388 4389 if (obj->madv != I915_MADV_WILLNEED) 4390 return false; 4391 4392 if (obj->base.filp == NULL) 4393 return true; 4394 4395 /* At first glance, this looks racy, but then again so would be 4396 * userspace racing mmap against close. However, the first external 4397 * reference to the filp can only be obtained through the 4398 * i915_gem_mmap_ioctl() which safeguards us against the user 4399 * acquiring such a reference whilst we are in the middle of 4400 * freeing the object. 4401 */ 4402 return atomic_long_read(&obj->base.filp->f_count) == 1; 4403 } 4404 4405 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4406 { 4407 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4408 struct drm_device *dev = obj->base.dev; 4409 struct drm_i915_private *dev_priv = dev->dev_private; 4410 struct i915_vma *vma, *next; 4411 4412 intel_runtime_pm_get(dev_priv); 4413 4414 trace_i915_gem_object_destroy(obj); 4415 4416 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4417 int ret; 4418 4419 vma->pin_count = 0; 4420 ret = i915_vma_unbind(vma); 4421 if (WARN_ON(ret == -ERESTARTSYS)) { 4422 bool was_interruptible; 4423 4424 was_interruptible = dev_priv->mm.interruptible; 4425 dev_priv->mm.interruptible = false; 4426 4427 WARN_ON(i915_vma_unbind(vma)); 4428 4429 dev_priv->mm.interruptible = was_interruptible; 4430 } 4431 } 4432 4433 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4434 * before progressing. */ 4435 if (obj->stolen) 4436 i915_gem_object_unpin_pages(obj); 4437 4438 WARN_ON(obj->frontbuffer_bits); 4439 4440 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4441 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4442 obj->tiling_mode != I915_TILING_NONE) 4443 i915_gem_object_unpin_pages(obj); 4444 4445 if (WARN_ON(obj->pages_pin_count)) 4446 obj->pages_pin_count = 0; 4447 if (discard_backing_storage(obj)) 4448 obj->madv = I915_MADV_DONTNEED; 4449 i915_gem_object_put_pages(obj); 4450 i915_gem_object_free_mmap_offset(obj); 4451 4452 BUG_ON(obj->pages); 4453 4454 if (obj->base.import_attach) 4455 drm_prime_gem_destroy(&obj->base, NULL); 4456 4457 if (obj->ops->release) 4458 obj->ops->release(obj); 4459 4460 drm_gem_object_release(&obj->base); 4461 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4462 4463 kfree(obj->bit_17); 4464 i915_gem_object_free(obj); 4465 4466 intel_runtime_pm_put(dev_priv); 4467 } 4468 4469 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4470 struct i915_address_space *vm) 4471 { 4472 struct i915_vma *vma; 4473 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4474 if (i915_is_ggtt(vma->vm) && 4475 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4476 continue; 4477 if (vma->vm == vm) 4478 return vma; 4479 } 4480 return NULL; 4481 } 4482 4483 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4484 const struct i915_ggtt_view *view) 4485 { 4486 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 4487 struct i915_vma *vma; 4488 4489 if (WARN_ONCE(!view, "no view specified")) 4490 return ERR_PTR(-EINVAL); 4491 4492 list_for_each_entry(vma, &obj->vma_list, vma_link) 4493 if (vma->vm == ggtt && 4494 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4495 return vma; 4496 return NULL; 4497 } 4498 4499 void i915_gem_vma_destroy(struct i915_vma *vma) 4500 { 4501 struct i915_address_space *vm = NULL; 4502 WARN_ON(vma->node.allocated); 4503 4504 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4505 if (!list_empty(&vma->exec_list)) 4506 return; 4507 4508 vm = vma->vm; 4509 4510 if (!i915_is_ggtt(vm)) 4511 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4512 4513 list_del(&vma->vma_link); 4514 4515 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 4516 } 4517 4518 static void 4519 i915_gem_stop_ringbuffers(struct drm_device *dev) 4520 { 4521 struct drm_i915_private *dev_priv = dev->dev_private; 4522 struct intel_engine_cs *ring; 4523 int i; 4524 4525 for_each_ring(ring, dev_priv, i) 4526 dev_priv->gt.stop_ring(ring); 4527 } 4528 4529 int 4530 i915_gem_suspend(struct drm_device *dev) 4531 { 4532 struct drm_i915_private *dev_priv = dev->dev_private; 4533 int ret = 0; 4534 4535 mutex_lock(&dev->struct_mutex); 4536 ret = i915_gpu_idle(dev); 4537 if (ret) 4538 goto err; 4539 4540 i915_gem_retire_requests(dev); 4541 4542 i915_gem_stop_ringbuffers(dev); 4543 mutex_unlock(&dev->struct_mutex); 4544 4545 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4546 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4547 flush_delayed_work(&dev_priv->mm.idle_work); 4548 4549 /* Assert that we sucessfully flushed all the work and 4550 * reset the GPU back to its idle, low power state. 4551 */ 4552 WARN_ON(dev_priv->mm.busy); 4553 4554 return 0; 4555 4556 err: 4557 mutex_unlock(&dev->struct_mutex); 4558 return ret; 4559 } 4560 4561 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4562 { 4563 struct intel_engine_cs *ring = req->ring; 4564 struct drm_device *dev = ring->dev; 4565 struct drm_i915_private *dev_priv = dev->dev_private; 4566 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4567 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4568 int i, ret; 4569 4570 if (!HAS_L3_DPF(dev) || !remap_info) 4571 return 0; 4572 4573 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4574 if (ret) 4575 return ret; 4576 4577 /* 4578 * Note: We do not worry about the concurrent register cacheline hang 4579 * here because no other code should access these registers other than 4580 * at initialization time. 4581 */ 4582 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4583 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4584 intel_ring_emit(ring, reg_base + i); 4585 intel_ring_emit(ring, remap_info[i/4]); 4586 } 4587 4588 intel_ring_advance(ring); 4589 4590 return ret; 4591 } 4592 4593 void i915_gem_init_swizzling(struct drm_device *dev) 4594 { 4595 struct drm_i915_private *dev_priv = dev->dev_private; 4596 4597 if (INTEL_INFO(dev)->gen < 5 || 4598 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4599 return; 4600 4601 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4602 DISP_TILE_SURFACE_SWIZZLING); 4603 4604 if (IS_GEN5(dev)) 4605 return; 4606 4607 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4608 if (IS_GEN6(dev)) 4609 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4610 else if (IS_GEN7(dev)) 4611 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4612 else if (IS_GEN8(dev)) 4613 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4614 else 4615 BUG(); 4616 } 4617 4618 static void init_unused_ring(struct drm_device *dev, u32 base) 4619 { 4620 struct drm_i915_private *dev_priv = dev->dev_private; 4621 4622 I915_WRITE(RING_CTL(base), 0); 4623 I915_WRITE(RING_HEAD(base), 0); 4624 I915_WRITE(RING_TAIL(base), 0); 4625 I915_WRITE(RING_START(base), 0); 4626 } 4627 4628 static void init_unused_rings(struct drm_device *dev) 4629 { 4630 if (IS_I830(dev)) { 4631 init_unused_ring(dev, PRB1_BASE); 4632 init_unused_ring(dev, SRB0_BASE); 4633 init_unused_ring(dev, SRB1_BASE); 4634 init_unused_ring(dev, SRB2_BASE); 4635 init_unused_ring(dev, SRB3_BASE); 4636 } else if (IS_GEN2(dev)) { 4637 init_unused_ring(dev, SRB0_BASE); 4638 init_unused_ring(dev, SRB1_BASE); 4639 } else if (IS_GEN3(dev)) { 4640 init_unused_ring(dev, PRB1_BASE); 4641 init_unused_ring(dev, PRB2_BASE); 4642 } 4643 } 4644 4645 int i915_gem_init_rings(struct drm_device *dev) 4646 { 4647 struct drm_i915_private *dev_priv = dev->dev_private; 4648 int ret; 4649 4650 ret = intel_init_render_ring_buffer(dev); 4651 if (ret) 4652 return ret; 4653 4654 if (HAS_BSD(dev)) { 4655 ret = intel_init_bsd_ring_buffer(dev); 4656 if (ret) 4657 goto cleanup_render_ring; 4658 } 4659 4660 if (HAS_BLT(dev)) { 4661 ret = intel_init_blt_ring_buffer(dev); 4662 if (ret) 4663 goto cleanup_bsd_ring; 4664 } 4665 4666 if (HAS_VEBOX(dev)) { 4667 ret = intel_init_vebox_ring_buffer(dev); 4668 if (ret) 4669 goto cleanup_blt_ring; 4670 } 4671 4672 if (HAS_BSD2(dev)) { 4673 ret = intel_init_bsd2_ring_buffer(dev); 4674 if (ret) 4675 goto cleanup_vebox_ring; 4676 } 4677 4678 return 0; 4679 4680 cleanup_vebox_ring: 4681 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4682 cleanup_blt_ring: 4683 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4684 cleanup_bsd_ring: 4685 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4686 cleanup_render_ring: 4687 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4688 4689 return ret; 4690 } 4691 4692 int 4693 i915_gem_init_hw(struct drm_device *dev) 4694 { 4695 struct drm_i915_private *dev_priv = dev->dev_private; 4696 struct intel_engine_cs *ring; 4697 int ret, i, j; 4698 4699 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4700 return -EIO; 4701 4702 /* Double layer security blanket, see i915_gem_init() */ 4703 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4704 4705 if (dev_priv->ellc_size) 4706 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4707 4708 if (IS_HASWELL(dev)) 4709 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4710 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4711 4712 if (HAS_PCH_NOP(dev)) { 4713 if (IS_IVYBRIDGE(dev)) { 4714 u32 temp = I915_READ(GEN7_MSG_CTL); 4715 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4716 I915_WRITE(GEN7_MSG_CTL, temp); 4717 } else if (INTEL_INFO(dev)->gen >= 7) { 4718 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4719 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4720 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4721 } 4722 } 4723 4724 i915_gem_init_swizzling(dev); 4725 4726 /* 4727 * At least 830 can leave some of the unused rings 4728 * "active" (ie. head != tail) after resume which 4729 * will prevent c3 entry. Makes sure all unused rings 4730 * are totally idle. 4731 */ 4732 init_unused_rings(dev); 4733 4734 BUG_ON(!dev_priv->ring[RCS].default_context); 4735 4736 ret = i915_ppgtt_init_hw(dev); 4737 if (ret) { 4738 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 4739 goto out; 4740 } 4741 4742 /* Need to do basic initialisation of all rings first: */ 4743 for_each_ring(ring, dev_priv, i) { 4744 ret = ring->init_hw(ring); 4745 if (ret) 4746 goto out; 4747 } 4748 4749 /* We can't enable contexts until all firmware is loaded */ 4750 if (HAS_GUC_UCODE(dev)) { 4751 ret = intel_guc_ucode_load(dev); 4752 if (ret) { 4753 /* 4754 * If we got an error and GuC submission is enabled, map 4755 * the error to -EIO so the GPU will be declared wedged. 4756 * OTOH, if we didn't intend to use the GuC anyway, just 4757 * discard the error and carry on. 4758 */ 4759 DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret, 4760 i915.enable_guc_submission ? "" : 4761 " (ignored)"); 4762 ret = i915.enable_guc_submission ? -EIO : 0; 4763 if (ret) 4764 goto out; 4765 } 4766 } 4767 4768 /* 4769 * Increment the next seqno by 0x100 so we have a visible break 4770 * on re-initialisation 4771 */ 4772 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 4773 if (ret) 4774 goto out; 4775 4776 /* Now it is safe to go back round and do everything else: */ 4777 for_each_ring(ring, dev_priv, i) { 4778 struct drm_i915_gem_request *req; 4779 4780 WARN_ON(!ring->default_context); 4781 4782 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 4783 if (ret) { 4784 i915_gem_cleanup_ringbuffer(dev); 4785 goto out; 4786 } 4787 4788 if (ring->id == RCS) { 4789 for (j = 0; j < NUM_L3_SLICES(dev); j++) 4790 i915_gem_l3_remap(req, j); 4791 } 4792 4793 ret = i915_ppgtt_init_ring(req); 4794 if (ret && ret != -EIO) { 4795 DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); 4796 i915_gem_request_cancel(req); 4797 i915_gem_cleanup_ringbuffer(dev); 4798 goto out; 4799 } 4800 4801 ret = i915_gem_context_enable(req); 4802 if (ret && ret != -EIO) { 4803 DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); 4804 i915_gem_request_cancel(req); 4805 i915_gem_cleanup_ringbuffer(dev); 4806 goto out; 4807 } 4808 4809 i915_add_request_no_flush(req); 4810 } 4811 4812 out: 4813 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4814 return ret; 4815 } 4816 4817 int i915_gem_init(struct drm_device *dev) 4818 { 4819 struct drm_i915_private *dev_priv = dev->dev_private; 4820 int ret; 4821 4822 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 4823 i915.enable_execlists); 4824 4825 mutex_lock(&dev->struct_mutex); 4826 4827 if (IS_VALLEYVIEW(dev)) { 4828 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 4829 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 4830 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 4831 VLV_GTLC_ALLOWWAKEACK), 10)) 4832 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 4833 } 4834 4835 if (!i915.enable_execlists) { 4836 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 4837 dev_priv->gt.init_rings = i915_gem_init_rings; 4838 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 4839 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 4840 } else { 4841 dev_priv->gt.execbuf_submit = intel_execlists_submission; 4842 dev_priv->gt.init_rings = intel_logical_rings_init; 4843 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 4844 dev_priv->gt.stop_ring = intel_logical_ring_stop; 4845 } 4846 4847 /* This is just a security blanket to placate dragons. 4848 * On some systems, we very sporadically observe that the first TLBs 4849 * used by the CS may be stale, despite us poking the TLB reset. If 4850 * we hold the forcewake during initialisation these problems 4851 * just magically go away. 4852 */ 4853 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4854 4855 ret = i915_gem_init_userptr(dev); 4856 if (ret) 4857 goto out_unlock; 4858 4859 i915_gem_init_global_gtt(dev); 4860 4861 ret = i915_gem_context_init(dev); 4862 if (ret) 4863 goto out_unlock; 4864 4865 ret = dev_priv->gt.init_rings(dev); 4866 if (ret) 4867 goto out_unlock; 4868 4869 ret = i915_gem_init_hw(dev); 4870 if (ret == -EIO) { 4871 /* Allow ring initialisation to fail by marking the GPU as 4872 * wedged. But we only want to do this where the GPU is angry, 4873 * for all other failure, such as an allocation failure, bail. 4874 */ 4875 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 4876 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 4877 ret = 0; 4878 } 4879 4880 out_unlock: 4881 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4882 mutex_unlock(&dev->struct_mutex); 4883 4884 return ret; 4885 } 4886 4887 void 4888 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4889 { 4890 struct drm_i915_private *dev_priv = dev->dev_private; 4891 struct intel_engine_cs *ring; 4892 int i; 4893 4894 for_each_ring(ring, dev_priv, i) 4895 dev_priv->gt.cleanup_ring(ring); 4896 4897 if (i915.enable_execlists) 4898 /* 4899 * Neither the BIOS, ourselves or any other kernel 4900 * expects the system to be in execlists mode on startup, 4901 * so we need to reset the GPU back to legacy mode. 4902 */ 4903 intel_gpu_reset(dev); 4904 } 4905 4906 static void 4907 init_ring_lists(struct intel_engine_cs *ring) 4908 { 4909 INIT_LIST_HEAD(&ring->active_list); 4910 INIT_LIST_HEAD(&ring->request_list); 4911 } 4912 4913 void 4914 i915_gem_load(struct drm_device *dev) 4915 { 4916 struct drm_i915_private *dev_priv = dev->dev_private; 4917 int i; 4918 4919 dev_priv->objects = 4920 kmem_cache_create("i915_gem_object", 4921 sizeof(struct drm_i915_gem_object), 0, 4922 SLAB_HWCACHE_ALIGN, 4923 NULL); 4924 dev_priv->vmas = 4925 kmem_cache_create("i915_gem_vma", 4926 sizeof(struct i915_vma), 0, 4927 SLAB_HWCACHE_ALIGN, 4928 NULL); 4929 dev_priv->requests = 4930 kmem_cache_create("i915_gem_request", 4931 sizeof(struct drm_i915_gem_request), 0, 4932 SLAB_HWCACHE_ALIGN, 4933 NULL); 4934 4935 INIT_LIST_HEAD(&dev_priv->vm_list); 4936 INIT_LIST_HEAD(&dev_priv->context_list); 4937 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4938 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4939 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4940 for (i = 0; i < I915_NUM_RINGS; i++) 4941 init_ring_lists(&dev_priv->ring[i]); 4942 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4943 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4944 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4945 i915_gem_retire_work_handler); 4946 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 4947 i915_gem_idle_work_handler); 4948 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 4949 4950 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4951 4952 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 4953 dev_priv->num_fence_regs = 32; 4954 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4955 dev_priv->num_fence_regs = 16; 4956 else 4957 dev_priv->num_fence_regs = 8; 4958 4959 if (intel_vgpu_active(dev)) 4960 dev_priv->num_fence_regs = 4961 I915_READ(vgtif_reg(avail_rs.fence_num)); 4962 4963 /* 4964 * Set initial sequence number for requests. 4965 * Using this number allows the wraparound to happen early, 4966 * catching any obvious problems. 4967 */ 4968 dev_priv->next_seqno = ((u32)~0 - 0x1100); 4969 dev_priv->last_seqno = ((u32)~0 - 0x1101); 4970 4971 /* Initialize fence registers to zero */ 4972 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4973 i915_gem_restore_fences(dev); 4974 4975 i915_gem_detect_bit_6_swizzle(dev); 4976 init_waitqueue_head(&dev_priv->pending_flip_queue); 4977 4978 dev_priv->mm.interruptible = true; 4979 4980 i915_gem_shrinker_init(dev_priv); 4981 4982 mutex_init(&dev_priv->fb_tracking.lock); 4983 } 4984 4985 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4986 { 4987 struct drm_i915_file_private *file_priv = file->driver_priv; 4988 4989 /* Clean up our request list when the client is going away, so that 4990 * later retire_requests won't dereference our soon-to-be-gone 4991 * file_priv. 4992 */ 4993 spin_lock(&file_priv->mm.lock); 4994 while (!list_empty(&file_priv->mm.request_list)) { 4995 struct drm_i915_gem_request *request; 4996 4997 request = list_first_entry(&file_priv->mm.request_list, 4998 struct drm_i915_gem_request, 4999 client_list); 5000 list_del(&request->client_list); 5001 request->file_priv = NULL; 5002 } 5003 spin_unlock(&file_priv->mm.lock); 5004 5005 if (!list_empty(&file_priv->rps.link)) { 5006 spin_lock(&to_i915(dev)->rps.client_lock); 5007 list_del(&file_priv->rps.link); 5008 spin_unlock(&to_i915(dev)->rps.client_lock); 5009 } 5010 } 5011 5012 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5013 { 5014 struct drm_i915_file_private *file_priv; 5015 int ret; 5016 5017 DRM_DEBUG_DRIVER("\n"); 5018 5019 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5020 if (!file_priv) 5021 return -ENOMEM; 5022 5023 file->driver_priv = file_priv; 5024 file_priv->dev_priv = dev->dev_private; 5025 file_priv->file = file; 5026 INIT_LIST_HEAD(&file_priv->rps.link); 5027 5028 spin_lock_init(&file_priv->mm.lock); 5029 INIT_LIST_HEAD(&file_priv->mm.request_list); 5030 5031 ret = i915_gem_context_open(dev, file); 5032 if (ret) 5033 kfree(file_priv); 5034 5035 return ret; 5036 } 5037 5038 /** 5039 * i915_gem_track_fb - update frontbuffer tracking 5040 * @old: current GEM buffer for the frontbuffer slots 5041 * @new: new GEM buffer for the frontbuffer slots 5042 * @frontbuffer_bits: bitmask of frontbuffer slots 5043 * 5044 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5045 * from @old and setting them in @new. Both @old and @new can be NULL. 5046 */ 5047 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5048 struct drm_i915_gem_object *new, 5049 unsigned frontbuffer_bits) 5050 { 5051 if (old) { 5052 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5053 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5054 old->frontbuffer_bits &= ~frontbuffer_bits; 5055 } 5056 5057 if (new) { 5058 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5059 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5060 new->frontbuffer_bits |= frontbuffer_bits; 5061 } 5062 } 5063 5064 /* All the new VM stuff */ 5065 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5066 struct i915_address_space *vm) 5067 { 5068 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5069 struct i915_vma *vma; 5070 5071 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5072 5073 list_for_each_entry(vma, &o->vma_list, vma_link) { 5074 if (i915_is_ggtt(vma->vm) && 5075 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5076 continue; 5077 if (vma->vm == vm) 5078 return vma->node.start; 5079 } 5080 5081 WARN(1, "%s vma for this object not found.\n", 5082 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5083 return -1; 5084 } 5085 5086 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5087 const struct i915_ggtt_view *view) 5088 { 5089 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5090 struct i915_vma *vma; 5091 5092 list_for_each_entry(vma, &o->vma_list, vma_link) 5093 if (vma->vm == ggtt && 5094 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5095 return vma->node.start; 5096 5097 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5098 return -1; 5099 } 5100 5101 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5102 struct i915_address_space *vm) 5103 { 5104 struct i915_vma *vma; 5105 5106 list_for_each_entry(vma, &o->vma_list, vma_link) { 5107 if (i915_is_ggtt(vma->vm) && 5108 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5109 continue; 5110 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5111 return true; 5112 } 5113 5114 return false; 5115 } 5116 5117 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5118 const struct i915_ggtt_view *view) 5119 { 5120 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5121 struct i915_vma *vma; 5122 5123 list_for_each_entry(vma, &o->vma_list, vma_link) 5124 if (vma->vm == ggtt && 5125 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5126 drm_mm_node_allocated(&vma->node)) 5127 return true; 5128 5129 return false; 5130 } 5131 5132 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5133 { 5134 struct i915_vma *vma; 5135 5136 list_for_each_entry(vma, &o->vma_list, vma_link) 5137 if (drm_mm_node_allocated(&vma->node)) 5138 return true; 5139 5140 return false; 5141 } 5142 5143 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5144 struct i915_address_space *vm) 5145 { 5146 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5147 struct i915_vma *vma; 5148 5149 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5150 5151 BUG_ON(list_empty(&o->vma_list)); 5152 5153 list_for_each_entry(vma, &o->vma_list, vma_link) { 5154 if (i915_is_ggtt(vma->vm) && 5155 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5156 continue; 5157 if (vma->vm == vm) 5158 return vma->node.size; 5159 } 5160 return 0; 5161 } 5162 5163 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5164 { 5165 struct i915_vma *vma; 5166 list_for_each_entry(vma, &obj->vma_list, vma_link) 5167 if (vma->pin_count > 0) 5168 return true; 5169 5170 return false; 5171 } 5172 5173 /* Allocate a new GEM object and fill it with the supplied data */ 5174 struct drm_i915_gem_object * 5175 i915_gem_object_create_from_data(struct drm_device *dev, 5176 const void *data, size_t size) 5177 { 5178 struct drm_i915_gem_object *obj; 5179 struct sg_table *sg; 5180 size_t bytes; 5181 int ret; 5182 5183 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5184 if (IS_ERR_OR_NULL(obj)) 5185 return obj; 5186 5187 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5188 if (ret) 5189 goto fail; 5190 5191 ret = i915_gem_object_get_pages(obj); 5192 if (ret) 5193 goto fail; 5194 5195 i915_gem_object_pin_pages(obj); 5196 sg = obj->pages; 5197 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 5198 i915_gem_object_unpin_pages(obj); 5199 5200 if (WARN_ON(bytes != size)) { 5201 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5202 ret = -EFAULT; 5203 goto fail; 5204 } 5205 5206 return obj; 5207 5208 fail: 5209 drm_gem_object_unreference(&obj->base); 5210 return ERR_PTR(ret); 5211 } 5212