1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 #include <linux/dma-buf.h> 40 41 #define RQ_BUG_ON(expr) 42 43 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 44 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 45 static void 46 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 47 static void 48 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 49 static void i915_gem_write_fence(struct drm_device *dev, int reg, 50 struct drm_i915_gem_object *obj); 51 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 52 struct drm_i915_fence_reg *fence, 53 bool enable); 54 55 static bool cpu_cache_is_coherent(struct drm_device *dev, 56 enum i915_cache_level level) 57 { 58 return HAS_LLC(dev) || level != I915_CACHE_NONE; 59 } 60 61 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 62 { 63 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 64 return true; 65 66 return obj->pin_display; 67 } 68 69 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 70 { 71 if (obj->tiling_mode) 72 i915_gem_release_mmap(obj); 73 74 /* As we do not have an associated fence register, we will force 75 * a tiling change if we ever need to acquire one. 76 */ 77 obj->fence_dirty = false; 78 obj->fence_reg = I915_FENCE_REG_NONE; 79 } 80 81 /* some bookkeeping */ 82 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 83 size_t size) 84 { 85 spin_lock(&dev_priv->mm.object_stat_lock); 86 dev_priv->mm.object_count++; 87 dev_priv->mm.object_memory += size; 88 spin_unlock(&dev_priv->mm.object_stat_lock); 89 } 90 91 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 92 size_t size) 93 { 94 spin_lock(&dev_priv->mm.object_stat_lock); 95 dev_priv->mm.object_count--; 96 dev_priv->mm.object_memory -= size; 97 spin_unlock(&dev_priv->mm.object_stat_lock); 98 } 99 100 static int 101 i915_gem_wait_for_error(struct i915_gpu_error *error) 102 { 103 int ret; 104 105 #define EXIT_COND (!i915_reset_in_progress(error) || \ 106 i915_terminally_wedged(error)) 107 if (EXIT_COND) 108 return 0; 109 110 /* 111 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 112 * userspace. If it takes that long something really bad is going on and 113 * we should simply try to bail out and fail as gracefully as possible. 114 */ 115 ret = wait_event_interruptible_timeout(error->reset_queue, 116 EXIT_COND, 117 10*HZ); 118 if (ret == 0) { 119 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 120 return -EIO; 121 } else if (ret < 0) { 122 return ret; 123 } 124 #undef EXIT_COND 125 126 return 0; 127 } 128 129 int i915_mutex_lock_interruptible(struct drm_device *dev) 130 { 131 struct drm_i915_private *dev_priv = dev->dev_private; 132 int ret; 133 134 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 135 if (ret) 136 return ret; 137 138 ret = mutex_lock_interruptible(&dev->struct_mutex); 139 if (ret) 140 return ret; 141 142 WARN_ON(i915_verify_lists(dev)); 143 return 0; 144 } 145 146 int 147 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 148 struct drm_file *file) 149 { 150 struct drm_i915_private *dev_priv = dev->dev_private; 151 struct drm_i915_gem_get_aperture *args = data; 152 struct drm_i915_gem_object *obj; 153 size_t pinned; 154 155 pinned = 0; 156 mutex_lock(&dev->struct_mutex); 157 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 158 if (i915_gem_obj_is_pinned(obj)) 159 pinned += i915_gem_obj_ggtt_size(obj); 160 mutex_unlock(&dev->struct_mutex); 161 162 args->aper_size = dev_priv->gtt.base.total; 163 args->aper_available_size = args->aper_size - pinned; 164 165 return 0; 166 } 167 168 static int 169 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 170 { 171 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 172 char *vaddr = obj->phys_handle->vaddr; 173 struct sg_table *st; 174 struct scatterlist *sg; 175 int i; 176 177 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 178 return -EINVAL; 179 180 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 181 struct page *page; 182 char *src; 183 184 page = shmem_read_mapping_page(mapping, i); 185 if (IS_ERR(page)) 186 return PTR_ERR(page); 187 188 src = kmap_atomic(page); 189 memcpy(vaddr, src, PAGE_SIZE); 190 drm_clflush_virt_range(vaddr, PAGE_SIZE); 191 kunmap_atomic(src); 192 193 page_cache_release(page); 194 vaddr += PAGE_SIZE; 195 } 196 197 i915_gem_chipset_flush(obj->base.dev); 198 199 st = kmalloc(sizeof(*st), GFP_KERNEL); 200 if (st == NULL) 201 return -ENOMEM; 202 203 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 204 kfree(st); 205 return -ENOMEM; 206 } 207 208 sg = st->sgl; 209 sg->offset = 0; 210 sg->length = obj->base.size; 211 212 sg_dma_address(sg) = obj->phys_handle->busaddr; 213 sg_dma_len(sg) = obj->base.size; 214 215 obj->pages = st; 216 obj->has_dma_mapping = true; 217 return 0; 218 } 219 220 static void 221 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 222 { 223 int ret; 224 225 BUG_ON(obj->madv == __I915_MADV_PURGED); 226 227 ret = i915_gem_object_set_to_cpu_domain(obj, true); 228 if (ret) { 229 /* In the event of a disaster, abandon all caches and 230 * hope for the best. 231 */ 232 WARN_ON(ret != -EIO); 233 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 234 } 235 236 if (obj->madv == I915_MADV_DONTNEED) 237 obj->dirty = 0; 238 239 if (obj->dirty) { 240 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 241 char *vaddr = obj->phys_handle->vaddr; 242 int i; 243 244 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 245 struct page *page; 246 char *dst; 247 248 page = shmem_read_mapping_page(mapping, i); 249 if (IS_ERR(page)) 250 continue; 251 252 dst = kmap_atomic(page); 253 drm_clflush_virt_range(vaddr, PAGE_SIZE); 254 memcpy(dst, vaddr, PAGE_SIZE); 255 kunmap_atomic(dst); 256 257 set_page_dirty(page); 258 if (obj->madv == I915_MADV_WILLNEED) 259 mark_page_accessed(page); 260 page_cache_release(page); 261 vaddr += PAGE_SIZE; 262 } 263 obj->dirty = 0; 264 } 265 266 sg_free_table(obj->pages); 267 kfree(obj->pages); 268 269 obj->has_dma_mapping = false; 270 } 271 272 static void 273 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 274 { 275 drm_pci_free(obj->base.dev, obj->phys_handle); 276 } 277 278 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 279 .get_pages = i915_gem_object_get_pages_phys, 280 .put_pages = i915_gem_object_put_pages_phys, 281 .release = i915_gem_object_release_phys, 282 }; 283 284 static int 285 drop_pages(struct drm_i915_gem_object *obj) 286 { 287 struct i915_vma *vma, *next; 288 int ret; 289 290 drm_gem_object_reference(&obj->base); 291 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 292 if (i915_vma_unbind(vma)) 293 break; 294 295 ret = i915_gem_object_put_pages(obj); 296 drm_gem_object_unreference(&obj->base); 297 298 return ret; 299 } 300 301 int 302 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 303 int align) 304 { 305 drm_dma_handle_t *phys; 306 int ret; 307 308 if (obj->phys_handle) { 309 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 310 return -EBUSY; 311 312 return 0; 313 } 314 315 if (obj->madv != I915_MADV_WILLNEED) 316 return -EFAULT; 317 318 if (obj->base.filp == NULL) 319 return -EINVAL; 320 321 ret = drop_pages(obj); 322 if (ret) 323 return ret; 324 325 /* create a new object */ 326 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 327 if (!phys) 328 return -ENOMEM; 329 330 obj->phys_handle = phys; 331 obj->ops = &i915_gem_phys_ops; 332 333 return i915_gem_object_get_pages(obj); 334 } 335 336 static int 337 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 338 struct drm_i915_gem_pwrite *args, 339 struct drm_file *file_priv) 340 { 341 struct drm_device *dev = obj->base.dev; 342 void *vaddr = obj->phys_handle->vaddr + args->offset; 343 char __user *user_data = to_user_ptr(args->data_ptr); 344 int ret = 0; 345 346 /* We manually control the domain here and pretend that it 347 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 348 */ 349 ret = i915_gem_object_wait_rendering(obj, false); 350 if (ret) 351 return ret; 352 353 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 354 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 355 unsigned long unwritten; 356 357 /* The physical object once assigned is fixed for the lifetime 358 * of the obj, so we can safely drop the lock and continue 359 * to access vaddr. 360 */ 361 mutex_unlock(&dev->struct_mutex); 362 unwritten = copy_from_user(vaddr, user_data, args->size); 363 mutex_lock(&dev->struct_mutex); 364 if (unwritten) { 365 ret = -EFAULT; 366 goto out; 367 } 368 } 369 370 drm_clflush_virt_range(vaddr, args->size); 371 i915_gem_chipset_flush(dev); 372 373 out: 374 intel_fb_obj_flush(obj, false); 375 return ret; 376 } 377 378 void *i915_gem_object_alloc(struct drm_device *dev) 379 { 380 struct drm_i915_private *dev_priv = dev->dev_private; 381 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 382 } 383 384 void i915_gem_object_free(struct drm_i915_gem_object *obj) 385 { 386 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 387 kmem_cache_free(dev_priv->objects, obj); 388 } 389 390 static int 391 i915_gem_create(struct drm_file *file, 392 struct drm_device *dev, 393 uint64_t size, 394 uint32_t *handle_p) 395 { 396 struct drm_i915_gem_object *obj; 397 int ret; 398 u32 handle; 399 400 size = roundup(size, PAGE_SIZE); 401 if (size == 0) 402 return -EINVAL; 403 404 /* Allocate the new object */ 405 obj = i915_gem_alloc_object(dev, size); 406 if (obj == NULL) 407 return -ENOMEM; 408 409 ret = drm_gem_handle_create(file, &obj->base, &handle); 410 /* drop reference from allocate - handle holds it now */ 411 drm_gem_object_unreference_unlocked(&obj->base); 412 if (ret) 413 return ret; 414 415 *handle_p = handle; 416 return 0; 417 } 418 419 int 420 i915_gem_dumb_create(struct drm_file *file, 421 struct drm_device *dev, 422 struct drm_mode_create_dumb *args) 423 { 424 /* have to work out size/pitch and return them */ 425 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 426 args->size = args->pitch * args->height; 427 return i915_gem_create(file, dev, 428 args->size, &args->handle); 429 } 430 431 /** 432 * Creates a new mm object and returns a handle to it. 433 */ 434 int 435 i915_gem_create_ioctl(struct drm_device *dev, void *data, 436 struct drm_file *file) 437 { 438 struct drm_i915_gem_create *args = data; 439 440 return i915_gem_create(file, dev, 441 args->size, &args->handle); 442 } 443 444 static inline int 445 __copy_to_user_swizzled(char __user *cpu_vaddr, 446 const char *gpu_vaddr, int gpu_offset, 447 int length) 448 { 449 int ret, cpu_offset = 0; 450 451 while (length > 0) { 452 int cacheline_end = ALIGN(gpu_offset + 1, 64); 453 int this_length = min(cacheline_end - gpu_offset, length); 454 int swizzled_gpu_offset = gpu_offset ^ 64; 455 456 ret = __copy_to_user(cpu_vaddr + cpu_offset, 457 gpu_vaddr + swizzled_gpu_offset, 458 this_length); 459 if (ret) 460 return ret + length; 461 462 cpu_offset += this_length; 463 gpu_offset += this_length; 464 length -= this_length; 465 } 466 467 return 0; 468 } 469 470 static inline int 471 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 472 const char __user *cpu_vaddr, 473 int length) 474 { 475 int ret, cpu_offset = 0; 476 477 while (length > 0) { 478 int cacheline_end = ALIGN(gpu_offset + 1, 64); 479 int this_length = min(cacheline_end - gpu_offset, length); 480 int swizzled_gpu_offset = gpu_offset ^ 64; 481 482 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 483 cpu_vaddr + cpu_offset, 484 this_length); 485 if (ret) 486 return ret + length; 487 488 cpu_offset += this_length; 489 gpu_offset += this_length; 490 length -= this_length; 491 } 492 493 return 0; 494 } 495 496 /* 497 * Pins the specified object's pages and synchronizes the object with 498 * GPU accesses. Sets needs_clflush to non-zero if the caller should 499 * flush the object from the CPU cache. 500 */ 501 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 502 int *needs_clflush) 503 { 504 int ret; 505 506 *needs_clflush = 0; 507 508 if (!obj->base.filp) 509 return -EINVAL; 510 511 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 512 /* If we're not in the cpu read domain, set ourself into the gtt 513 * read domain and manually flush cachelines (if required). This 514 * optimizes for the case when the gpu will dirty the data 515 * anyway again before the next pread happens. */ 516 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 517 obj->cache_level); 518 ret = i915_gem_object_wait_rendering(obj, true); 519 if (ret) 520 return ret; 521 } 522 523 ret = i915_gem_object_get_pages(obj); 524 if (ret) 525 return ret; 526 527 i915_gem_object_pin_pages(obj); 528 529 return ret; 530 } 531 532 /* Per-page copy function for the shmem pread fastpath. 533 * Flushes invalid cachelines before reading the target if 534 * needs_clflush is set. */ 535 static int 536 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 537 char __user *user_data, 538 bool page_do_bit17_swizzling, bool needs_clflush) 539 { 540 char *vaddr; 541 int ret; 542 543 if (unlikely(page_do_bit17_swizzling)) 544 return -EINVAL; 545 546 vaddr = kmap_atomic(page); 547 if (needs_clflush) 548 drm_clflush_virt_range(vaddr + shmem_page_offset, 549 page_length); 550 ret = __copy_to_user_inatomic(user_data, 551 vaddr + shmem_page_offset, 552 page_length); 553 kunmap_atomic(vaddr); 554 555 return ret ? -EFAULT : 0; 556 } 557 558 static void 559 shmem_clflush_swizzled_range(char *addr, unsigned long length, 560 bool swizzled) 561 { 562 if (unlikely(swizzled)) { 563 unsigned long start = (unsigned long) addr; 564 unsigned long end = (unsigned long) addr + length; 565 566 /* For swizzling simply ensure that we always flush both 567 * channels. Lame, but simple and it works. Swizzled 568 * pwrite/pread is far from a hotpath - current userspace 569 * doesn't use it at all. */ 570 start = round_down(start, 128); 571 end = round_up(end, 128); 572 573 drm_clflush_virt_range((void *)start, end - start); 574 } else { 575 drm_clflush_virt_range(addr, length); 576 } 577 578 } 579 580 /* Only difference to the fast-path function is that this can handle bit17 581 * and uses non-atomic copy and kmap functions. */ 582 static int 583 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 584 char __user *user_data, 585 bool page_do_bit17_swizzling, bool needs_clflush) 586 { 587 char *vaddr; 588 int ret; 589 590 vaddr = kmap(page); 591 if (needs_clflush) 592 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 593 page_length, 594 page_do_bit17_swizzling); 595 596 if (page_do_bit17_swizzling) 597 ret = __copy_to_user_swizzled(user_data, 598 vaddr, shmem_page_offset, 599 page_length); 600 else 601 ret = __copy_to_user(user_data, 602 vaddr + shmem_page_offset, 603 page_length); 604 kunmap(page); 605 606 return ret ? - EFAULT : 0; 607 } 608 609 static int 610 i915_gem_shmem_pread(struct drm_device *dev, 611 struct drm_i915_gem_object *obj, 612 struct drm_i915_gem_pread *args, 613 struct drm_file *file) 614 { 615 char __user *user_data; 616 ssize_t remain; 617 loff_t offset; 618 int shmem_page_offset, page_length, ret = 0; 619 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 620 int prefaulted = 0; 621 int needs_clflush = 0; 622 struct sg_page_iter sg_iter; 623 624 user_data = to_user_ptr(args->data_ptr); 625 remain = args->size; 626 627 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 628 629 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 630 if (ret) 631 return ret; 632 633 offset = args->offset; 634 635 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 636 offset >> PAGE_SHIFT) { 637 struct page *page = sg_page_iter_page(&sg_iter); 638 639 if (remain <= 0) 640 break; 641 642 /* Operation in this page 643 * 644 * shmem_page_offset = offset within page in shmem file 645 * page_length = bytes to copy for this page 646 */ 647 shmem_page_offset = offset_in_page(offset); 648 page_length = remain; 649 if ((shmem_page_offset + page_length) > PAGE_SIZE) 650 page_length = PAGE_SIZE - shmem_page_offset; 651 652 page_do_bit17_swizzling = obj_do_bit17_swizzling && 653 (page_to_phys(page) & (1 << 17)) != 0; 654 655 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 656 user_data, page_do_bit17_swizzling, 657 needs_clflush); 658 if (ret == 0) 659 goto next_page; 660 661 mutex_unlock(&dev->struct_mutex); 662 663 if (likely(!i915.prefault_disable) && !prefaulted) { 664 ret = fault_in_multipages_writeable(user_data, remain); 665 /* Userspace is tricking us, but we've already clobbered 666 * its pages with the prefault and promised to write the 667 * data up to the first fault. Hence ignore any errors 668 * and just continue. */ 669 (void)ret; 670 prefaulted = 1; 671 } 672 673 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 674 user_data, page_do_bit17_swizzling, 675 needs_clflush); 676 677 mutex_lock(&dev->struct_mutex); 678 679 if (ret) 680 goto out; 681 682 next_page: 683 remain -= page_length; 684 user_data += page_length; 685 offset += page_length; 686 } 687 688 out: 689 i915_gem_object_unpin_pages(obj); 690 691 return ret; 692 } 693 694 /** 695 * Reads data from the object referenced by handle. 696 * 697 * On error, the contents of *data are undefined. 698 */ 699 int 700 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 701 struct drm_file *file) 702 { 703 struct drm_i915_gem_pread *args = data; 704 struct drm_i915_gem_object *obj; 705 int ret = 0; 706 707 if (args->size == 0) 708 return 0; 709 710 if (!access_ok(VERIFY_WRITE, 711 to_user_ptr(args->data_ptr), 712 args->size)) 713 return -EFAULT; 714 715 ret = i915_mutex_lock_interruptible(dev); 716 if (ret) 717 return ret; 718 719 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 720 if (&obj->base == NULL) { 721 ret = -ENOENT; 722 goto unlock; 723 } 724 725 /* Bounds check source. */ 726 if (args->offset > obj->base.size || 727 args->size > obj->base.size - args->offset) { 728 ret = -EINVAL; 729 goto out; 730 } 731 732 /* prime objects have no backing filp to GEM pread/pwrite 733 * pages from. 734 */ 735 if (!obj->base.filp) { 736 ret = -EINVAL; 737 goto out; 738 } 739 740 trace_i915_gem_object_pread(obj, args->offset, args->size); 741 742 ret = i915_gem_shmem_pread(dev, obj, args, file); 743 744 out: 745 drm_gem_object_unreference(&obj->base); 746 unlock: 747 mutex_unlock(&dev->struct_mutex); 748 return ret; 749 } 750 751 /* This is the fast write path which cannot handle 752 * page faults in the source data 753 */ 754 755 static inline int 756 fast_user_write(struct io_mapping *mapping, 757 loff_t page_base, int page_offset, 758 char __user *user_data, 759 int length) 760 { 761 void __iomem *vaddr_atomic; 762 void *vaddr; 763 unsigned long unwritten; 764 765 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 766 /* We can use the cpu mem copy function because this is X86. */ 767 vaddr = (void __force*)vaddr_atomic + page_offset; 768 unwritten = __copy_from_user_inatomic_nocache(vaddr, 769 user_data, length); 770 io_mapping_unmap_atomic(vaddr_atomic); 771 return unwritten; 772 } 773 774 /** 775 * This is the fast pwrite path, where we copy the data directly from the 776 * user into the GTT, uncached. 777 */ 778 static int 779 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 780 struct drm_i915_gem_object *obj, 781 struct drm_i915_gem_pwrite *args, 782 struct drm_file *file) 783 { 784 struct drm_i915_private *dev_priv = dev->dev_private; 785 ssize_t remain; 786 loff_t offset, page_base; 787 char __user *user_data; 788 int page_offset, page_length, ret; 789 790 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 791 if (ret) 792 goto out; 793 794 ret = i915_gem_object_set_to_gtt_domain(obj, true); 795 if (ret) 796 goto out_unpin; 797 798 ret = i915_gem_object_put_fence(obj); 799 if (ret) 800 goto out_unpin; 801 802 user_data = to_user_ptr(args->data_ptr); 803 remain = args->size; 804 805 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 806 807 intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); 808 809 while (remain > 0) { 810 /* Operation in this page 811 * 812 * page_base = page offset within aperture 813 * page_offset = offset within page 814 * page_length = bytes to copy for this page 815 */ 816 page_base = offset & PAGE_MASK; 817 page_offset = offset_in_page(offset); 818 page_length = remain; 819 if ((page_offset + remain) > PAGE_SIZE) 820 page_length = PAGE_SIZE - page_offset; 821 822 /* If we get a fault while copying data, then (presumably) our 823 * source page isn't available. Return the error and we'll 824 * retry in the slow path. 825 */ 826 if (fast_user_write(dev_priv->gtt.mappable, page_base, 827 page_offset, user_data, page_length)) { 828 ret = -EFAULT; 829 goto out_flush; 830 } 831 832 remain -= page_length; 833 user_data += page_length; 834 offset += page_length; 835 } 836 837 out_flush: 838 intel_fb_obj_flush(obj, false); 839 out_unpin: 840 i915_gem_object_ggtt_unpin(obj); 841 out: 842 return ret; 843 } 844 845 /* Per-page copy function for the shmem pwrite fastpath. 846 * Flushes invalid cachelines before writing to the target if 847 * needs_clflush_before is set and flushes out any written cachelines after 848 * writing if needs_clflush is set. */ 849 static int 850 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 851 char __user *user_data, 852 bool page_do_bit17_swizzling, 853 bool needs_clflush_before, 854 bool needs_clflush_after) 855 { 856 char *vaddr; 857 int ret; 858 859 if (unlikely(page_do_bit17_swizzling)) 860 return -EINVAL; 861 862 vaddr = kmap_atomic(page); 863 if (needs_clflush_before) 864 drm_clflush_virt_range(vaddr + shmem_page_offset, 865 page_length); 866 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 867 user_data, page_length); 868 if (needs_clflush_after) 869 drm_clflush_virt_range(vaddr + shmem_page_offset, 870 page_length); 871 kunmap_atomic(vaddr); 872 873 return ret ? -EFAULT : 0; 874 } 875 876 /* Only difference to the fast-path function is that this can handle bit17 877 * and uses non-atomic copy and kmap functions. */ 878 static int 879 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 880 char __user *user_data, 881 bool page_do_bit17_swizzling, 882 bool needs_clflush_before, 883 bool needs_clflush_after) 884 { 885 char *vaddr; 886 int ret; 887 888 vaddr = kmap(page); 889 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 890 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 891 page_length, 892 page_do_bit17_swizzling); 893 if (page_do_bit17_swizzling) 894 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 895 user_data, 896 page_length); 897 else 898 ret = __copy_from_user(vaddr + shmem_page_offset, 899 user_data, 900 page_length); 901 if (needs_clflush_after) 902 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 903 page_length, 904 page_do_bit17_swizzling); 905 kunmap(page); 906 907 return ret ? -EFAULT : 0; 908 } 909 910 static int 911 i915_gem_shmem_pwrite(struct drm_device *dev, 912 struct drm_i915_gem_object *obj, 913 struct drm_i915_gem_pwrite *args, 914 struct drm_file *file) 915 { 916 ssize_t remain; 917 loff_t offset; 918 char __user *user_data; 919 int shmem_page_offset, page_length, ret = 0; 920 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 921 int hit_slowpath = 0; 922 int needs_clflush_after = 0; 923 int needs_clflush_before = 0; 924 struct sg_page_iter sg_iter; 925 926 user_data = to_user_ptr(args->data_ptr); 927 remain = args->size; 928 929 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 930 931 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 932 /* If we're not in the cpu write domain, set ourself into the gtt 933 * write domain and manually flush cachelines (if required). This 934 * optimizes for the case when the gpu will use the data 935 * right away and we therefore have to clflush anyway. */ 936 needs_clflush_after = cpu_write_needs_clflush(obj); 937 ret = i915_gem_object_wait_rendering(obj, false); 938 if (ret) 939 return ret; 940 } 941 /* Same trick applies to invalidate partially written cachelines read 942 * before writing. */ 943 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 944 needs_clflush_before = 945 !cpu_cache_is_coherent(dev, obj->cache_level); 946 947 ret = i915_gem_object_get_pages(obj); 948 if (ret) 949 return ret; 950 951 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 952 953 i915_gem_object_pin_pages(obj); 954 955 offset = args->offset; 956 obj->dirty = 1; 957 958 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 959 offset >> PAGE_SHIFT) { 960 struct page *page = sg_page_iter_page(&sg_iter); 961 int partial_cacheline_write; 962 963 if (remain <= 0) 964 break; 965 966 /* Operation in this page 967 * 968 * shmem_page_offset = offset within page in shmem file 969 * page_length = bytes to copy for this page 970 */ 971 shmem_page_offset = offset_in_page(offset); 972 973 page_length = remain; 974 if ((shmem_page_offset + page_length) > PAGE_SIZE) 975 page_length = PAGE_SIZE - shmem_page_offset; 976 977 /* If we don't overwrite a cacheline completely we need to be 978 * careful to have up-to-date data by first clflushing. Don't 979 * overcomplicate things and flush the entire patch. */ 980 partial_cacheline_write = needs_clflush_before && 981 ((shmem_page_offset | page_length) 982 & (boot_cpu_data.x86_clflush_size - 1)); 983 984 page_do_bit17_swizzling = obj_do_bit17_swizzling && 985 (page_to_phys(page) & (1 << 17)) != 0; 986 987 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 988 user_data, page_do_bit17_swizzling, 989 partial_cacheline_write, 990 needs_clflush_after); 991 if (ret == 0) 992 goto next_page; 993 994 hit_slowpath = 1; 995 mutex_unlock(&dev->struct_mutex); 996 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 997 user_data, page_do_bit17_swizzling, 998 partial_cacheline_write, 999 needs_clflush_after); 1000 1001 mutex_lock(&dev->struct_mutex); 1002 1003 if (ret) 1004 goto out; 1005 1006 next_page: 1007 remain -= page_length; 1008 user_data += page_length; 1009 offset += page_length; 1010 } 1011 1012 out: 1013 i915_gem_object_unpin_pages(obj); 1014 1015 if (hit_slowpath) { 1016 /* 1017 * Fixup: Flush cpu caches in case we didn't flush the dirty 1018 * cachelines in-line while writing and the object moved 1019 * out of the cpu write domain while we've dropped the lock. 1020 */ 1021 if (!needs_clflush_after && 1022 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1023 if (i915_gem_clflush_object(obj, obj->pin_display)) 1024 i915_gem_chipset_flush(dev); 1025 } 1026 } 1027 1028 if (needs_clflush_after) 1029 i915_gem_chipset_flush(dev); 1030 1031 intel_fb_obj_flush(obj, false); 1032 return ret; 1033 } 1034 1035 /** 1036 * Writes data to the object referenced by handle. 1037 * 1038 * On error, the contents of the buffer that were to be modified are undefined. 1039 */ 1040 int 1041 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1042 struct drm_file *file) 1043 { 1044 struct drm_i915_private *dev_priv = dev->dev_private; 1045 struct drm_i915_gem_pwrite *args = data; 1046 struct drm_i915_gem_object *obj; 1047 int ret; 1048 1049 if (args->size == 0) 1050 return 0; 1051 1052 if (!access_ok(VERIFY_READ, 1053 to_user_ptr(args->data_ptr), 1054 args->size)) 1055 return -EFAULT; 1056 1057 if (likely(!i915.prefault_disable)) { 1058 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1059 args->size); 1060 if (ret) 1061 return -EFAULT; 1062 } 1063 1064 intel_runtime_pm_get(dev_priv); 1065 1066 ret = i915_mutex_lock_interruptible(dev); 1067 if (ret) 1068 goto put_rpm; 1069 1070 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1071 if (&obj->base == NULL) { 1072 ret = -ENOENT; 1073 goto unlock; 1074 } 1075 1076 /* Bounds check destination. */ 1077 if (args->offset > obj->base.size || 1078 args->size > obj->base.size - args->offset) { 1079 ret = -EINVAL; 1080 goto out; 1081 } 1082 1083 /* prime objects have no backing filp to GEM pread/pwrite 1084 * pages from. 1085 */ 1086 if (!obj->base.filp) { 1087 ret = -EINVAL; 1088 goto out; 1089 } 1090 1091 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1092 1093 ret = -EFAULT; 1094 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1095 * it would end up going through the fenced access, and we'll get 1096 * different detiling behavior between reading and writing. 1097 * pread/pwrite currently are reading and writing from the CPU 1098 * perspective, requiring manual detiling by the client. 1099 */ 1100 if (obj->tiling_mode == I915_TILING_NONE && 1101 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1102 cpu_write_needs_clflush(obj)) { 1103 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1104 /* Note that the gtt paths might fail with non-page-backed user 1105 * pointers (e.g. gtt mappings when moving data between 1106 * textures). Fallback to the shmem path in that case. */ 1107 } 1108 1109 if (ret == -EFAULT || ret == -ENOSPC) { 1110 if (obj->phys_handle) 1111 ret = i915_gem_phys_pwrite(obj, args, file); 1112 else 1113 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1114 } 1115 1116 out: 1117 drm_gem_object_unreference(&obj->base); 1118 unlock: 1119 mutex_unlock(&dev->struct_mutex); 1120 put_rpm: 1121 intel_runtime_pm_put(dev_priv); 1122 1123 return ret; 1124 } 1125 1126 int 1127 i915_gem_check_wedge(struct i915_gpu_error *error, 1128 bool interruptible) 1129 { 1130 if (i915_reset_in_progress(error)) { 1131 /* Non-interruptible callers can't handle -EAGAIN, hence return 1132 * -EIO unconditionally for these. */ 1133 if (!interruptible) 1134 return -EIO; 1135 1136 /* Recovery complete, but the reset failed ... */ 1137 if (i915_terminally_wedged(error)) 1138 return -EIO; 1139 1140 /* 1141 * Check if GPU Reset is in progress - we need intel_ring_begin 1142 * to work properly to reinit the hw state while the gpu is 1143 * still marked as reset-in-progress. Handle this with a flag. 1144 */ 1145 if (!error->reload_in_reset) 1146 return -EAGAIN; 1147 } 1148 1149 return 0; 1150 } 1151 1152 /* 1153 * Compare arbitrary request against outstanding lazy request. Emit on match. 1154 */ 1155 int 1156 i915_gem_check_olr(struct drm_i915_gem_request *req) 1157 { 1158 int ret; 1159 1160 WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); 1161 1162 ret = 0; 1163 if (req == req->ring->outstanding_lazy_request) 1164 ret = i915_add_request(req->ring); 1165 1166 return ret; 1167 } 1168 1169 static void fake_irq(unsigned long data) 1170 { 1171 wake_up_process((struct task_struct *)data); 1172 } 1173 1174 static bool missed_irq(struct drm_i915_private *dev_priv, 1175 struct intel_engine_cs *ring) 1176 { 1177 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1178 } 1179 1180 static int __i915_spin_request(struct drm_i915_gem_request *req) 1181 { 1182 unsigned long timeout; 1183 1184 if (i915_gem_request_get_ring(req)->irq_refcount) 1185 return -EBUSY; 1186 1187 timeout = jiffies + 1; 1188 while (!need_resched()) { 1189 if (i915_gem_request_completed(req, true)) 1190 return 0; 1191 1192 if (time_after_eq(jiffies, timeout)) 1193 break; 1194 1195 cpu_relax_lowlatency(); 1196 } 1197 if (i915_gem_request_completed(req, false)) 1198 return 0; 1199 1200 return -EAGAIN; 1201 } 1202 1203 /** 1204 * __i915_wait_request - wait until execution of request has finished 1205 * @req: duh! 1206 * @reset_counter: reset sequence associated with the given request 1207 * @interruptible: do an interruptible wait (normally yes) 1208 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1209 * 1210 * Note: It is of utmost importance that the passed in seqno and reset_counter 1211 * values have been read by the caller in an smp safe manner. Where read-side 1212 * locks are involved, it is sufficient to read the reset_counter before 1213 * unlocking the lock that protects the seqno. For lockless tricks, the 1214 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1215 * inserted. 1216 * 1217 * Returns 0 if the request was found within the alloted time. Else returns the 1218 * errno with remaining time filled in timeout argument. 1219 */ 1220 int __i915_wait_request(struct drm_i915_gem_request *req, 1221 unsigned reset_counter, 1222 bool interruptible, 1223 s64 *timeout, 1224 struct intel_rps_client *rps) 1225 { 1226 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1227 struct drm_device *dev = ring->dev; 1228 struct drm_i915_private *dev_priv = dev->dev_private; 1229 const bool irq_test_in_progress = 1230 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1231 DEFINE_WAIT(wait); 1232 unsigned long timeout_expire; 1233 s64 before, now; 1234 int ret; 1235 1236 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1237 1238 if (list_empty(&req->list)) 1239 return 0; 1240 1241 if (i915_gem_request_completed(req, true)) 1242 return 0; 1243 1244 timeout_expire = timeout ? 1245 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; 1246 1247 if (INTEL_INFO(dev_priv)->gen >= 6) 1248 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1249 1250 /* Record current time in case interrupted by signal, or wedged */ 1251 trace_i915_gem_request_wait_begin(req); 1252 before = ktime_get_raw_ns(); 1253 1254 /* Optimistic spin for the next jiffie before touching IRQs */ 1255 ret = __i915_spin_request(req); 1256 if (ret == 0) 1257 goto out; 1258 1259 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1260 ret = -ENODEV; 1261 goto out; 1262 } 1263 1264 for (;;) { 1265 struct timer_list timer; 1266 1267 prepare_to_wait(&ring->irq_queue, &wait, 1268 interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); 1269 1270 /* We need to check whether any gpu reset happened in between 1271 * the caller grabbing the seqno and now ... */ 1272 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1273 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1274 * is truely gone. */ 1275 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1276 if (ret == 0) 1277 ret = -EAGAIN; 1278 break; 1279 } 1280 1281 if (i915_gem_request_completed(req, false)) { 1282 ret = 0; 1283 break; 1284 } 1285 1286 if (interruptible && signal_pending(current)) { 1287 ret = -ERESTARTSYS; 1288 break; 1289 } 1290 1291 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1292 ret = -ETIME; 1293 break; 1294 } 1295 1296 timer.function = NULL; 1297 if (timeout || missed_irq(dev_priv, ring)) { 1298 unsigned long expire; 1299 1300 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); 1301 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1302 mod_timer(&timer, expire); 1303 } 1304 1305 io_schedule(); 1306 1307 if (timer.function) { 1308 del_singleshot_timer_sync(&timer); 1309 destroy_timer_on_stack(&timer); 1310 } 1311 } 1312 if (!irq_test_in_progress) 1313 ring->irq_put(ring); 1314 1315 finish_wait(&ring->irq_queue, &wait); 1316 1317 out: 1318 now = ktime_get_raw_ns(); 1319 trace_i915_gem_request_wait_end(req); 1320 1321 if (timeout) { 1322 s64 tres = *timeout - (now - before); 1323 1324 *timeout = tres < 0 ? 0 : tres; 1325 1326 /* 1327 * Apparently ktime isn't accurate enough and occasionally has a 1328 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1329 * things up to make the test happy. We allow up to 1 jiffy. 1330 * 1331 * This is a regrssion from the timespec->ktime conversion. 1332 */ 1333 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1334 *timeout = 0; 1335 } 1336 1337 return ret; 1338 } 1339 1340 static inline void 1341 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1342 { 1343 struct drm_i915_file_private *file_priv = request->file_priv; 1344 1345 if (!file_priv) 1346 return; 1347 1348 spin_lock(&file_priv->mm.lock); 1349 list_del(&request->client_list); 1350 request->file_priv = NULL; 1351 spin_unlock(&file_priv->mm.lock); 1352 } 1353 1354 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1355 { 1356 trace_i915_gem_request_retire(request); 1357 1358 /* We know the GPU must have read the request to have 1359 * sent us the seqno + interrupt, so use the position 1360 * of tail of the request to update the last known position 1361 * of the GPU head. 1362 * 1363 * Note this requires that we are always called in request 1364 * completion order. 1365 */ 1366 request->ringbuf->last_retired_head = request->postfix; 1367 1368 list_del_init(&request->list); 1369 i915_gem_request_remove_from_client(request); 1370 1371 put_pid(request->pid); 1372 1373 i915_gem_request_unreference(request); 1374 } 1375 1376 static void 1377 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1378 { 1379 struct intel_engine_cs *engine = req->ring; 1380 struct drm_i915_gem_request *tmp; 1381 1382 lockdep_assert_held(&engine->dev->struct_mutex); 1383 1384 if (list_empty(&req->list)) 1385 return; 1386 1387 do { 1388 tmp = list_first_entry(&engine->request_list, 1389 typeof(*tmp), list); 1390 1391 i915_gem_request_retire(tmp); 1392 } while (tmp != req); 1393 1394 WARN_ON(i915_verify_lists(engine->dev)); 1395 } 1396 1397 /** 1398 * Waits for a request to be signaled, and cleans up the 1399 * request and object lists appropriately for that event. 1400 */ 1401 int 1402 i915_wait_request(struct drm_i915_gem_request *req) 1403 { 1404 struct drm_device *dev; 1405 struct drm_i915_private *dev_priv; 1406 bool interruptible; 1407 int ret; 1408 1409 BUG_ON(req == NULL); 1410 1411 dev = req->ring->dev; 1412 dev_priv = dev->dev_private; 1413 interruptible = dev_priv->mm.interruptible; 1414 1415 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1416 1417 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1418 if (ret) 1419 return ret; 1420 1421 ret = i915_gem_check_olr(req); 1422 if (ret) 1423 return ret; 1424 1425 ret = __i915_wait_request(req, 1426 atomic_read(&dev_priv->gpu_error.reset_counter), 1427 interruptible, NULL, NULL); 1428 if (ret) 1429 return ret; 1430 1431 __i915_gem_request_retire__upto(req); 1432 return 0; 1433 } 1434 1435 /** 1436 * Ensures that all rendering to the object has completed and the object is 1437 * safe to unbind from the GTT or access from the CPU. 1438 */ 1439 int 1440 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1441 bool readonly) 1442 { 1443 int ret, i; 1444 1445 if (!obj->active) 1446 return 0; 1447 1448 if (readonly) { 1449 if (obj->last_write_req != NULL) { 1450 ret = i915_wait_request(obj->last_write_req); 1451 if (ret) 1452 return ret; 1453 1454 i = obj->last_write_req->ring->id; 1455 if (obj->last_read_req[i] == obj->last_write_req) 1456 i915_gem_object_retire__read(obj, i); 1457 else 1458 i915_gem_object_retire__write(obj); 1459 } 1460 } else { 1461 for (i = 0; i < I915_NUM_RINGS; i++) { 1462 if (obj->last_read_req[i] == NULL) 1463 continue; 1464 1465 ret = i915_wait_request(obj->last_read_req[i]); 1466 if (ret) 1467 return ret; 1468 1469 i915_gem_object_retire__read(obj, i); 1470 } 1471 RQ_BUG_ON(obj->active); 1472 } 1473 1474 return 0; 1475 } 1476 1477 static void 1478 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1479 struct drm_i915_gem_request *req) 1480 { 1481 int ring = req->ring->id; 1482 1483 if (obj->last_read_req[ring] == req) 1484 i915_gem_object_retire__read(obj, ring); 1485 else if (obj->last_write_req == req) 1486 i915_gem_object_retire__write(obj); 1487 1488 __i915_gem_request_retire__upto(req); 1489 } 1490 1491 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1492 * as the object state may change during this call. 1493 */ 1494 static __must_check int 1495 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1496 struct intel_rps_client *rps, 1497 bool readonly) 1498 { 1499 struct drm_device *dev = obj->base.dev; 1500 struct drm_i915_private *dev_priv = dev->dev_private; 1501 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1502 unsigned reset_counter; 1503 int ret, i, n = 0; 1504 1505 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1506 BUG_ON(!dev_priv->mm.interruptible); 1507 1508 if (!obj->active) 1509 return 0; 1510 1511 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1512 if (ret) 1513 return ret; 1514 1515 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1516 1517 if (readonly) { 1518 struct drm_i915_gem_request *req; 1519 1520 req = obj->last_write_req; 1521 if (req == NULL) 1522 return 0; 1523 1524 ret = i915_gem_check_olr(req); 1525 if (ret) 1526 goto err; 1527 1528 requests[n++] = i915_gem_request_reference(req); 1529 } else { 1530 for (i = 0; i < I915_NUM_RINGS; i++) { 1531 struct drm_i915_gem_request *req; 1532 1533 req = obj->last_read_req[i]; 1534 if (req == NULL) 1535 continue; 1536 1537 ret = i915_gem_check_olr(req); 1538 if (ret) 1539 goto err; 1540 1541 requests[n++] = i915_gem_request_reference(req); 1542 } 1543 } 1544 1545 mutex_unlock(&dev->struct_mutex); 1546 for (i = 0; ret == 0 && i < n; i++) 1547 ret = __i915_wait_request(requests[i], reset_counter, true, 1548 NULL, rps); 1549 mutex_lock(&dev->struct_mutex); 1550 1551 err: 1552 for (i = 0; i < n; i++) { 1553 if (ret == 0) 1554 i915_gem_object_retire_request(obj, requests[i]); 1555 i915_gem_request_unreference(requests[i]); 1556 } 1557 1558 return ret; 1559 } 1560 1561 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1562 { 1563 struct drm_i915_file_private *fpriv = file->driver_priv; 1564 return &fpriv->rps; 1565 } 1566 1567 /** 1568 * Called when user space prepares to use an object with the CPU, either 1569 * through the mmap ioctl's mapping or a GTT mapping. 1570 */ 1571 int 1572 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1573 struct drm_file *file) 1574 { 1575 struct drm_i915_gem_set_domain *args = data; 1576 struct drm_i915_gem_object *obj; 1577 uint32_t read_domains = args->read_domains; 1578 uint32_t write_domain = args->write_domain; 1579 int ret; 1580 1581 /* Only handle setting domains to types used by the CPU. */ 1582 if (write_domain & I915_GEM_GPU_DOMAINS) 1583 return -EINVAL; 1584 1585 if (read_domains & I915_GEM_GPU_DOMAINS) 1586 return -EINVAL; 1587 1588 /* Having something in the write domain implies it's in the read 1589 * domain, and only that read domain. Enforce that in the request. 1590 */ 1591 if (write_domain != 0 && read_domains != write_domain) 1592 return -EINVAL; 1593 1594 ret = i915_mutex_lock_interruptible(dev); 1595 if (ret) 1596 return ret; 1597 1598 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1599 if (&obj->base == NULL) { 1600 ret = -ENOENT; 1601 goto unlock; 1602 } 1603 1604 /* Try to flush the object off the GPU without holding the lock. 1605 * We will repeat the flush holding the lock in the normal manner 1606 * to catch cases where we are gazumped. 1607 */ 1608 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1609 to_rps_client(file), 1610 !write_domain); 1611 if (ret) 1612 goto unref; 1613 1614 if (read_domains & I915_GEM_DOMAIN_GTT) 1615 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1616 else 1617 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1618 1619 unref: 1620 drm_gem_object_unreference(&obj->base); 1621 unlock: 1622 mutex_unlock(&dev->struct_mutex); 1623 return ret; 1624 } 1625 1626 /** 1627 * Called when user space has done writes to this buffer 1628 */ 1629 int 1630 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1631 struct drm_file *file) 1632 { 1633 struct drm_i915_gem_sw_finish *args = data; 1634 struct drm_i915_gem_object *obj; 1635 int ret = 0; 1636 1637 ret = i915_mutex_lock_interruptible(dev); 1638 if (ret) 1639 return ret; 1640 1641 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1642 if (&obj->base == NULL) { 1643 ret = -ENOENT; 1644 goto unlock; 1645 } 1646 1647 /* Pinned buffers may be scanout, so flush the cache */ 1648 if (obj->pin_display) 1649 i915_gem_object_flush_cpu_write_domain(obj); 1650 1651 drm_gem_object_unreference(&obj->base); 1652 unlock: 1653 mutex_unlock(&dev->struct_mutex); 1654 return ret; 1655 } 1656 1657 /** 1658 * Maps the contents of an object, returning the address it is mapped 1659 * into. 1660 * 1661 * While the mapping holds a reference on the contents of the object, it doesn't 1662 * imply a ref on the object itself. 1663 * 1664 * IMPORTANT: 1665 * 1666 * DRM driver writers who look a this function as an example for how to do GEM 1667 * mmap support, please don't implement mmap support like here. The modern way 1668 * to implement DRM mmap support is with an mmap offset ioctl (like 1669 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1670 * That way debug tooling like valgrind will understand what's going on, hiding 1671 * the mmap call in a driver private ioctl will break that. The i915 driver only 1672 * does cpu mmaps this way because we didn't know better. 1673 */ 1674 int 1675 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1676 struct drm_file *file) 1677 { 1678 struct drm_i915_gem_mmap *args = data; 1679 struct drm_gem_object *obj; 1680 unsigned long addr; 1681 1682 if (args->flags & ~(I915_MMAP_WC)) 1683 return -EINVAL; 1684 1685 if (args->flags & I915_MMAP_WC && !cpu_has_pat) 1686 return -ENODEV; 1687 1688 obj = drm_gem_object_lookup(dev, file, args->handle); 1689 if (obj == NULL) 1690 return -ENOENT; 1691 1692 /* prime objects have no backing filp to GEM mmap 1693 * pages from. 1694 */ 1695 if (!obj->filp) { 1696 drm_gem_object_unreference_unlocked(obj); 1697 return -EINVAL; 1698 } 1699 1700 addr = vm_mmap(obj->filp, 0, args->size, 1701 PROT_READ | PROT_WRITE, MAP_SHARED, 1702 args->offset); 1703 if (args->flags & I915_MMAP_WC) { 1704 struct mm_struct *mm = current->mm; 1705 struct vm_area_struct *vma; 1706 1707 down_write(&mm->mmap_sem); 1708 vma = find_vma(mm, addr); 1709 if (vma) 1710 vma->vm_page_prot = 1711 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1712 else 1713 addr = -ENOMEM; 1714 up_write(&mm->mmap_sem); 1715 } 1716 drm_gem_object_unreference_unlocked(obj); 1717 if (IS_ERR((void *)addr)) 1718 return addr; 1719 1720 args->addr_ptr = (uint64_t) addr; 1721 1722 return 0; 1723 } 1724 1725 /** 1726 * i915_gem_fault - fault a page into the GTT 1727 * vma: VMA in question 1728 * vmf: fault info 1729 * 1730 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1731 * from userspace. The fault handler takes care of binding the object to 1732 * the GTT (if needed), allocating and programming a fence register (again, 1733 * only if needed based on whether the old reg is still valid or the object 1734 * is tiled) and inserting a new PTE into the faulting process. 1735 * 1736 * Note that the faulting process may involve evicting existing objects 1737 * from the GTT and/or fence registers to make room. So performance may 1738 * suffer if the GTT working set is large or there are few fence registers 1739 * left. 1740 */ 1741 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1742 { 1743 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1744 struct drm_device *dev = obj->base.dev; 1745 struct drm_i915_private *dev_priv = dev->dev_private; 1746 struct i915_ggtt_view view = i915_ggtt_view_normal; 1747 pgoff_t page_offset; 1748 unsigned long pfn; 1749 int ret = 0; 1750 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1751 1752 intel_runtime_pm_get(dev_priv); 1753 1754 /* We don't use vmf->pgoff since that has the fake offset */ 1755 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1756 PAGE_SHIFT; 1757 1758 ret = i915_mutex_lock_interruptible(dev); 1759 if (ret) 1760 goto out; 1761 1762 trace_i915_gem_object_fault(obj, page_offset, true, write); 1763 1764 /* Try to flush the object off the GPU first without holding the lock. 1765 * Upon reacquiring the lock, we will perform our sanity checks and then 1766 * repeat the flush holding the lock in the normal manner to catch cases 1767 * where we are gazumped. 1768 */ 1769 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1770 if (ret) 1771 goto unlock; 1772 1773 /* Access to snoopable pages through the GTT is incoherent. */ 1774 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1775 ret = -EFAULT; 1776 goto unlock; 1777 } 1778 1779 /* Use a partial view if the object is bigger than the aperture. */ 1780 if (obj->base.size >= dev_priv->gtt.mappable_end && 1781 obj->tiling_mode == I915_TILING_NONE) { 1782 static const unsigned int chunk_size = 256; // 1 MiB 1783 1784 memset(&view, 0, sizeof(view)); 1785 view.type = I915_GGTT_VIEW_PARTIAL; 1786 view.params.partial.offset = rounddown(page_offset, chunk_size); 1787 view.params.partial.size = 1788 min_t(unsigned int, 1789 chunk_size, 1790 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1791 view.params.partial.offset); 1792 } 1793 1794 /* Now pin it into the GTT if needed */ 1795 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1796 if (ret) 1797 goto unlock; 1798 1799 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1800 if (ret) 1801 goto unpin; 1802 1803 ret = i915_gem_object_get_fence(obj); 1804 if (ret) 1805 goto unpin; 1806 1807 /* Finally, remap it using the new GTT offset */ 1808 pfn = dev_priv->gtt.mappable_base + 1809 i915_gem_obj_ggtt_offset_view(obj, &view); 1810 pfn >>= PAGE_SHIFT; 1811 1812 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1813 /* Overriding existing pages in partial view does not cause 1814 * us any trouble as TLBs are still valid because the fault 1815 * is due to userspace losing part of the mapping or never 1816 * having accessed it before (at this partials' range). 1817 */ 1818 unsigned long base = vma->vm_start + 1819 (view.params.partial.offset << PAGE_SHIFT); 1820 unsigned int i; 1821 1822 for (i = 0; i < view.params.partial.size; i++) { 1823 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1824 if (ret) 1825 break; 1826 } 1827 1828 obj->fault_mappable = true; 1829 } else { 1830 if (!obj->fault_mappable) { 1831 unsigned long size = min_t(unsigned long, 1832 vma->vm_end - vma->vm_start, 1833 obj->base.size); 1834 int i; 1835 1836 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1837 ret = vm_insert_pfn(vma, 1838 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1839 pfn + i); 1840 if (ret) 1841 break; 1842 } 1843 1844 obj->fault_mappable = true; 1845 } else 1846 ret = vm_insert_pfn(vma, 1847 (unsigned long)vmf->virtual_address, 1848 pfn + page_offset); 1849 } 1850 unpin: 1851 i915_gem_object_ggtt_unpin_view(obj, &view); 1852 unlock: 1853 mutex_unlock(&dev->struct_mutex); 1854 out: 1855 switch (ret) { 1856 case -EIO: 1857 /* 1858 * We eat errors when the gpu is terminally wedged to avoid 1859 * userspace unduly crashing (gl has no provisions for mmaps to 1860 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1861 * and so needs to be reported. 1862 */ 1863 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1864 ret = VM_FAULT_SIGBUS; 1865 break; 1866 } 1867 case -EAGAIN: 1868 /* 1869 * EAGAIN means the gpu is hung and we'll wait for the error 1870 * handler to reset everything when re-faulting in 1871 * i915_mutex_lock_interruptible. 1872 */ 1873 case 0: 1874 case -ERESTARTSYS: 1875 case -EINTR: 1876 case -EBUSY: 1877 /* 1878 * EBUSY is ok: this just means that another thread 1879 * already did the job. 1880 */ 1881 ret = VM_FAULT_NOPAGE; 1882 break; 1883 case -ENOMEM: 1884 ret = VM_FAULT_OOM; 1885 break; 1886 case -ENOSPC: 1887 case -EFAULT: 1888 ret = VM_FAULT_SIGBUS; 1889 break; 1890 default: 1891 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1892 ret = VM_FAULT_SIGBUS; 1893 break; 1894 } 1895 1896 intel_runtime_pm_put(dev_priv); 1897 return ret; 1898 } 1899 1900 /** 1901 * i915_gem_release_mmap - remove physical page mappings 1902 * @obj: obj in question 1903 * 1904 * Preserve the reservation of the mmapping with the DRM core code, but 1905 * relinquish ownership of the pages back to the system. 1906 * 1907 * It is vital that we remove the page mapping if we have mapped a tiled 1908 * object through the GTT and then lose the fence register due to 1909 * resource pressure. Similarly if the object has been moved out of the 1910 * aperture, than pages mapped into userspace must be revoked. Removing the 1911 * mapping will then trigger a page fault on the next user access, allowing 1912 * fixup by i915_gem_fault(). 1913 */ 1914 void 1915 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1916 { 1917 if (!obj->fault_mappable) 1918 return; 1919 1920 drm_vma_node_unmap(&obj->base.vma_node, 1921 obj->base.dev->anon_inode->i_mapping); 1922 obj->fault_mappable = false; 1923 } 1924 1925 void 1926 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1927 { 1928 struct drm_i915_gem_object *obj; 1929 1930 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1931 i915_gem_release_mmap(obj); 1932 } 1933 1934 uint32_t 1935 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1936 { 1937 uint32_t gtt_size; 1938 1939 if (INTEL_INFO(dev)->gen >= 4 || 1940 tiling_mode == I915_TILING_NONE) 1941 return size; 1942 1943 /* Previous chips need a power-of-two fence region when tiling */ 1944 if (INTEL_INFO(dev)->gen == 3) 1945 gtt_size = 1024*1024; 1946 else 1947 gtt_size = 512*1024; 1948 1949 while (gtt_size < size) 1950 gtt_size <<= 1; 1951 1952 return gtt_size; 1953 } 1954 1955 /** 1956 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1957 * @obj: object to check 1958 * 1959 * Return the required GTT alignment for an object, taking into account 1960 * potential fence register mapping. 1961 */ 1962 uint32_t 1963 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1964 int tiling_mode, bool fenced) 1965 { 1966 /* 1967 * Minimum alignment is 4k (GTT page size), but might be greater 1968 * if a fence register is needed for the object. 1969 */ 1970 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1971 tiling_mode == I915_TILING_NONE) 1972 return 4096; 1973 1974 /* 1975 * Previous chips need to be aligned to the size of the smallest 1976 * fence register that can contain the object. 1977 */ 1978 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1979 } 1980 1981 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1982 { 1983 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1984 int ret; 1985 1986 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1987 return 0; 1988 1989 dev_priv->mm.shrinker_no_lock_stealing = true; 1990 1991 ret = drm_gem_create_mmap_offset(&obj->base); 1992 if (ret != -ENOSPC) 1993 goto out; 1994 1995 /* Badly fragmented mmap space? The only way we can recover 1996 * space is by destroying unwanted objects. We can't randomly release 1997 * mmap_offsets as userspace expects them to be persistent for the 1998 * lifetime of the objects. The closest we can is to release the 1999 * offsets on purgeable objects by truncating it and marking it purged, 2000 * which prevents userspace from ever using that object again. 2001 */ 2002 i915_gem_shrink(dev_priv, 2003 obj->base.size >> PAGE_SHIFT, 2004 I915_SHRINK_BOUND | 2005 I915_SHRINK_UNBOUND | 2006 I915_SHRINK_PURGEABLE); 2007 ret = drm_gem_create_mmap_offset(&obj->base); 2008 if (ret != -ENOSPC) 2009 goto out; 2010 2011 i915_gem_shrink_all(dev_priv); 2012 ret = drm_gem_create_mmap_offset(&obj->base); 2013 out: 2014 dev_priv->mm.shrinker_no_lock_stealing = false; 2015 2016 return ret; 2017 } 2018 2019 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2020 { 2021 drm_gem_free_mmap_offset(&obj->base); 2022 } 2023 2024 int 2025 i915_gem_mmap_gtt(struct drm_file *file, 2026 struct drm_device *dev, 2027 uint32_t handle, 2028 uint64_t *offset) 2029 { 2030 struct drm_i915_gem_object *obj; 2031 int ret; 2032 2033 ret = i915_mutex_lock_interruptible(dev); 2034 if (ret) 2035 return ret; 2036 2037 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2038 if (&obj->base == NULL) { 2039 ret = -ENOENT; 2040 goto unlock; 2041 } 2042 2043 if (obj->madv != I915_MADV_WILLNEED) { 2044 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2045 ret = -EFAULT; 2046 goto out; 2047 } 2048 2049 ret = i915_gem_object_create_mmap_offset(obj); 2050 if (ret) 2051 goto out; 2052 2053 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2054 2055 out: 2056 drm_gem_object_unreference(&obj->base); 2057 unlock: 2058 mutex_unlock(&dev->struct_mutex); 2059 return ret; 2060 } 2061 2062 /** 2063 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2064 * @dev: DRM device 2065 * @data: GTT mapping ioctl data 2066 * @file: GEM object info 2067 * 2068 * Simply returns the fake offset to userspace so it can mmap it. 2069 * The mmap call will end up in drm_gem_mmap(), which will set things 2070 * up so we can get faults in the handler above. 2071 * 2072 * The fault handler will take care of binding the object into the GTT 2073 * (since it may have been evicted to make room for something), allocating 2074 * a fence register, and mapping the appropriate aperture address into 2075 * userspace. 2076 */ 2077 int 2078 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2079 struct drm_file *file) 2080 { 2081 struct drm_i915_gem_mmap_gtt *args = data; 2082 2083 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2084 } 2085 2086 /* Immediately discard the backing storage */ 2087 static void 2088 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2089 { 2090 i915_gem_object_free_mmap_offset(obj); 2091 2092 if (obj->base.filp == NULL) 2093 return; 2094 2095 /* Our goal here is to return as much of the memory as 2096 * is possible back to the system as we are called from OOM. 2097 * To do this we must instruct the shmfs to drop all of its 2098 * backing pages, *now*. 2099 */ 2100 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2101 obj->madv = __I915_MADV_PURGED; 2102 } 2103 2104 /* Try to discard unwanted pages */ 2105 static void 2106 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2107 { 2108 struct address_space *mapping; 2109 2110 switch (obj->madv) { 2111 case I915_MADV_DONTNEED: 2112 i915_gem_object_truncate(obj); 2113 case __I915_MADV_PURGED: 2114 return; 2115 } 2116 2117 if (obj->base.filp == NULL) 2118 return; 2119 2120 mapping = file_inode(obj->base.filp)->i_mapping, 2121 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2122 } 2123 2124 static void 2125 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2126 { 2127 struct sg_page_iter sg_iter; 2128 int ret; 2129 2130 BUG_ON(obj->madv == __I915_MADV_PURGED); 2131 2132 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2133 if (ret) { 2134 /* In the event of a disaster, abandon all caches and 2135 * hope for the best. 2136 */ 2137 WARN_ON(ret != -EIO); 2138 i915_gem_clflush_object(obj, true); 2139 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2140 } 2141 2142 if (i915_gem_object_needs_bit17_swizzle(obj)) 2143 i915_gem_object_save_bit_17_swizzle(obj); 2144 2145 if (obj->madv == I915_MADV_DONTNEED) 2146 obj->dirty = 0; 2147 2148 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2149 struct page *page = sg_page_iter_page(&sg_iter); 2150 2151 if (obj->dirty) 2152 set_page_dirty(page); 2153 2154 if (obj->madv == I915_MADV_WILLNEED) 2155 mark_page_accessed(page); 2156 2157 page_cache_release(page); 2158 } 2159 obj->dirty = 0; 2160 2161 sg_free_table(obj->pages); 2162 kfree(obj->pages); 2163 } 2164 2165 int 2166 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2167 { 2168 const struct drm_i915_gem_object_ops *ops = obj->ops; 2169 2170 if (obj->pages == NULL) 2171 return 0; 2172 2173 if (obj->pages_pin_count) 2174 return -EBUSY; 2175 2176 BUG_ON(i915_gem_obj_bound_any(obj)); 2177 2178 /* ->put_pages might need to allocate memory for the bit17 swizzle 2179 * array, hence protect them from being reaped by removing them from gtt 2180 * lists early. */ 2181 list_del(&obj->global_list); 2182 2183 ops->put_pages(obj); 2184 obj->pages = NULL; 2185 2186 i915_gem_object_invalidate(obj); 2187 2188 return 0; 2189 } 2190 2191 static int 2192 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2193 { 2194 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2195 int page_count, i; 2196 struct address_space *mapping; 2197 struct sg_table *st; 2198 struct scatterlist *sg; 2199 struct sg_page_iter sg_iter; 2200 struct page *page; 2201 unsigned long last_pfn = 0; /* suppress gcc warning */ 2202 gfp_t gfp; 2203 2204 /* Assert that the object is not currently in any GPU domain. As it 2205 * wasn't in the GTT, there shouldn't be any way it could have been in 2206 * a GPU cache 2207 */ 2208 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2209 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2210 2211 st = kmalloc(sizeof(*st), GFP_KERNEL); 2212 if (st == NULL) 2213 return -ENOMEM; 2214 2215 page_count = obj->base.size / PAGE_SIZE; 2216 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2217 kfree(st); 2218 return -ENOMEM; 2219 } 2220 2221 /* Get the list of pages out of our struct file. They'll be pinned 2222 * at this point until we release them. 2223 * 2224 * Fail silently without starting the shrinker 2225 */ 2226 mapping = file_inode(obj->base.filp)->i_mapping; 2227 gfp = mapping_gfp_mask(mapping); 2228 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2229 gfp &= ~(__GFP_IO | __GFP_WAIT); 2230 sg = st->sgl; 2231 st->nents = 0; 2232 for (i = 0; i < page_count; i++) { 2233 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2234 if (IS_ERR(page)) { 2235 i915_gem_shrink(dev_priv, 2236 page_count, 2237 I915_SHRINK_BOUND | 2238 I915_SHRINK_UNBOUND | 2239 I915_SHRINK_PURGEABLE); 2240 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2241 } 2242 if (IS_ERR(page)) { 2243 /* We've tried hard to allocate the memory by reaping 2244 * our own buffer, now let the real VM do its job and 2245 * go down in flames if truly OOM. 2246 */ 2247 i915_gem_shrink_all(dev_priv); 2248 page = shmem_read_mapping_page(mapping, i); 2249 if (IS_ERR(page)) 2250 goto err_pages; 2251 } 2252 #ifdef CONFIG_SWIOTLB 2253 if (swiotlb_nr_tbl()) { 2254 st->nents++; 2255 sg_set_page(sg, page, PAGE_SIZE, 0); 2256 sg = sg_next(sg); 2257 continue; 2258 } 2259 #endif 2260 if (!i || page_to_pfn(page) != last_pfn + 1) { 2261 if (i) 2262 sg = sg_next(sg); 2263 st->nents++; 2264 sg_set_page(sg, page, PAGE_SIZE, 0); 2265 } else { 2266 sg->length += PAGE_SIZE; 2267 } 2268 last_pfn = page_to_pfn(page); 2269 2270 /* Check that the i965g/gm workaround works. */ 2271 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2272 } 2273 #ifdef CONFIG_SWIOTLB 2274 if (!swiotlb_nr_tbl()) 2275 #endif 2276 sg_mark_end(sg); 2277 obj->pages = st; 2278 2279 if (i915_gem_object_needs_bit17_swizzle(obj)) 2280 i915_gem_object_do_bit_17_swizzle(obj); 2281 2282 if (obj->tiling_mode != I915_TILING_NONE && 2283 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2284 i915_gem_object_pin_pages(obj); 2285 2286 return 0; 2287 2288 err_pages: 2289 sg_mark_end(sg); 2290 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 2291 page_cache_release(sg_page_iter_page(&sg_iter)); 2292 sg_free_table(st); 2293 kfree(st); 2294 2295 /* shmemfs first checks if there is enough memory to allocate the page 2296 * and reports ENOSPC should there be insufficient, along with the usual 2297 * ENOMEM for a genuine allocation failure. 2298 * 2299 * We use ENOSPC in our driver to mean that we have run out of aperture 2300 * space and so want to translate the error from shmemfs back to our 2301 * usual understanding of ENOMEM. 2302 */ 2303 if (PTR_ERR(page) == -ENOSPC) 2304 return -ENOMEM; 2305 else 2306 return PTR_ERR(page); 2307 } 2308 2309 /* Ensure that the associated pages are gathered from the backing storage 2310 * and pinned into our object. i915_gem_object_get_pages() may be called 2311 * multiple times before they are released by a single call to 2312 * i915_gem_object_put_pages() - once the pages are no longer referenced 2313 * either as a result of memory pressure (reaping pages under the shrinker) 2314 * or as the object is itself released. 2315 */ 2316 int 2317 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2318 { 2319 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2320 const struct drm_i915_gem_object_ops *ops = obj->ops; 2321 int ret; 2322 2323 if (obj->pages) 2324 return 0; 2325 2326 if (obj->madv != I915_MADV_WILLNEED) { 2327 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2328 return -EFAULT; 2329 } 2330 2331 BUG_ON(obj->pages_pin_count); 2332 2333 ret = ops->get_pages(obj); 2334 if (ret) 2335 return ret; 2336 2337 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2338 2339 obj->get_page.sg = obj->pages->sgl; 2340 obj->get_page.last = 0; 2341 2342 return 0; 2343 } 2344 2345 void i915_vma_move_to_active(struct i915_vma *vma, 2346 struct intel_engine_cs *ring) 2347 { 2348 struct drm_i915_gem_object *obj = vma->obj; 2349 2350 /* Add a reference if we're newly entering the active list. */ 2351 if (obj->active == 0) 2352 drm_gem_object_reference(&obj->base); 2353 obj->active |= intel_ring_flag(ring); 2354 2355 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2356 i915_gem_request_assign(&obj->last_read_req[ring->id], 2357 intel_ring_get_request(ring)); 2358 2359 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2360 } 2361 2362 static void 2363 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2364 { 2365 RQ_BUG_ON(obj->last_write_req == NULL); 2366 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2367 2368 i915_gem_request_assign(&obj->last_write_req, NULL); 2369 intel_fb_obj_flush(obj, true); 2370 } 2371 2372 static void 2373 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2374 { 2375 struct i915_vma *vma; 2376 2377 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2378 RQ_BUG_ON(!(obj->active & (1 << ring))); 2379 2380 list_del_init(&obj->ring_list[ring]); 2381 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2382 2383 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2384 i915_gem_object_retire__write(obj); 2385 2386 obj->active &= ~(1 << ring); 2387 if (obj->active) 2388 return; 2389 2390 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2391 if (!list_empty(&vma->mm_list)) 2392 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2393 } 2394 2395 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2396 drm_gem_object_unreference(&obj->base); 2397 } 2398 2399 static int 2400 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2401 { 2402 struct drm_i915_private *dev_priv = dev->dev_private; 2403 struct intel_engine_cs *ring; 2404 int ret, i, j; 2405 2406 /* Carefully retire all requests without writing to the rings */ 2407 for_each_ring(ring, dev_priv, i) { 2408 ret = intel_ring_idle(ring); 2409 if (ret) 2410 return ret; 2411 } 2412 i915_gem_retire_requests(dev); 2413 2414 /* Finally reset hw state */ 2415 for_each_ring(ring, dev_priv, i) { 2416 intel_ring_init_seqno(ring, seqno); 2417 2418 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2419 ring->semaphore.sync_seqno[j] = 0; 2420 } 2421 2422 return 0; 2423 } 2424 2425 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2426 { 2427 struct drm_i915_private *dev_priv = dev->dev_private; 2428 int ret; 2429 2430 if (seqno == 0) 2431 return -EINVAL; 2432 2433 /* HWS page needs to be set less than what we 2434 * will inject to ring 2435 */ 2436 ret = i915_gem_init_seqno(dev, seqno - 1); 2437 if (ret) 2438 return ret; 2439 2440 /* Carefully set the last_seqno value so that wrap 2441 * detection still works 2442 */ 2443 dev_priv->next_seqno = seqno; 2444 dev_priv->last_seqno = seqno - 1; 2445 if (dev_priv->last_seqno == 0) 2446 dev_priv->last_seqno--; 2447 2448 return 0; 2449 } 2450 2451 int 2452 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2453 { 2454 struct drm_i915_private *dev_priv = dev->dev_private; 2455 2456 /* reserve 0 for non-seqno */ 2457 if (dev_priv->next_seqno == 0) { 2458 int ret = i915_gem_init_seqno(dev, 0); 2459 if (ret) 2460 return ret; 2461 2462 dev_priv->next_seqno = 1; 2463 } 2464 2465 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2466 return 0; 2467 } 2468 2469 int __i915_add_request(struct intel_engine_cs *ring, 2470 struct drm_file *file, 2471 struct drm_i915_gem_object *obj) 2472 { 2473 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2474 struct drm_i915_gem_request *request; 2475 struct intel_ringbuffer *ringbuf; 2476 u32 request_start; 2477 int ret; 2478 2479 request = ring->outstanding_lazy_request; 2480 if (WARN_ON(request == NULL)) 2481 return -ENOMEM; 2482 2483 if (i915.enable_execlists) { 2484 ringbuf = request->ctx->engine[ring->id].ringbuf; 2485 } else 2486 ringbuf = ring->buffer; 2487 2488 request_start = intel_ring_get_tail(ringbuf); 2489 /* 2490 * Emit any outstanding flushes - execbuf can fail to emit the flush 2491 * after having emitted the batchbuffer command. Hence we need to fix 2492 * things up similar to emitting the lazy request. The difference here 2493 * is that the flush _must_ happen before the next request, no matter 2494 * what. 2495 */ 2496 if (i915.enable_execlists) { 2497 ret = logical_ring_flush_all_caches(ringbuf, request->ctx); 2498 if (ret) 2499 return ret; 2500 } else { 2501 ret = intel_ring_flush_all_caches(ring); 2502 if (ret) 2503 return ret; 2504 } 2505 2506 /* Record the position of the start of the request so that 2507 * should we detect the updated seqno part-way through the 2508 * GPU processing the request, we never over-estimate the 2509 * position of the head. 2510 */ 2511 request->postfix = intel_ring_get_tail(ringbuf); 2512 2513 if (i915.enable_execlists) { 2514 ret = ring->emit_request(ringbuf, request); 2515 if (ret) 2516 return ret; 2517 } else { 2518 ret = ring->add_request(ring); 2519 if (ret) 2520 return ret; 2521 2522 request->tail = intel_ring_get_tail(ringbuf); 2523 } 2524 2525 request->head = request_start; 2526 2527 /* Whilst this request exists, batch_obj will be on the 2528 * active_list, and so will hold the active reference. Only when this 2529 * request is retired will the the batch_obj be moved onto the 2530 * inactive_list and lose its active reference. Hence we do not need 2531 * to explicitly hold another reference here. 2532 */ 2533 request->batch_obj = obj; 2534 2535 if (!i915.enable_execlists) { 2536 /* Hold a reference to the current context so that we can inspect 2537 * it later in case a hangcheck error event fires. 2538 */ 2539 request->ctx = ring->last_context; 2540 if (request->ctx) 2541 i915_gem_context_reference(request->ctx); 2542 } 2543 2544 request->emitted_jiffies = jiffies; 2545 list_add_tail(&request->list, &ring->request_list); 2546 request->file_priv = NULL; 2547 2548 if (file) { 2549 struct drm_i915_file_private *file_priv = file->driver_priv; 2550 2551 spin_lock(&file_priv->mm.lock); 2552 request->file_priv = file_priv; 2553 list_add_tail(&request->client_list, 2554 &file_priv->mm.request_list); 2555 spin_unlock(&file_priv->mm.lock); 2556 2557 request->pid = get_pid(task_pid(current)); 2558 } 2559 2560 trace_i915_gem_request_add(request); 2561 ring->outstanding_lazy_request = NULL; 2562 2563 i915_queue_hangcheck(ring->dev); 2564 2565 queue_delayed_work(dev_priv->wq, 2566 &dev_priv->mm.retire_work, 2567 round_jiffies_up_relative(HZ)); 2568 intel_mark_busy(dev_priv->dev); 2569 2570 return 0; 2571 } 2572 2573 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2574 const struct intel_context *ctx) 2575 { 2576 unsigned long elapsed; 2577 2578 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2579 2580 if (ctx->hang_stats.banned) 2581 return true; 2582 2583 if (ctx->hang_stats.ban_period_seconds && 2584 elapsed <= ctx->hang_stats.ban_period_seconds) { 2585 if (!i915_gem_context_is_default(ctx)) { 2586 DRM_DEBUG("context hanging too fast, banning!\n"); 2587 return true; 2588 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2589 if (i915_stop_ring_allow_warn(dev_priv)) 2590 DRM_ERROR("gpu hanging too fast, banning!\n"); 2591 return true; 2592 } 2593 } 2594 2595 return false; 2596 } 2597 2598 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2599 struct intel_context *ctx, 2600 const bool guilty) 2601 { 2602 struct i915_ctx_hang_stats *hs; 2603 2604 if (WARN_ON(!ctx)) 2605 return; 2606 2607 hs = &ctx->hang_stats; 2608 2609 if (guilty) { 2610 hs->banned = i915_context_is_banned(dev_priv, ctx); 2611 hs->batch_active++; 2612 hs->guilty_ts = get_seconds(); 2613 } else { 2614 hs->batch_pending++; 2615 } 2616 } 2617 2618 void i915_gem_request_free(struct kref *req_ref) 2619 { 2620 struct drm_i915_gem_request *req = container_of(req_ref, 2621 typeof(*req), ref); 2622 struct intel_context *ctx = req->ctx; 2623 2624 if (ctx) { 2625 if (i915.enable_execlists) { 2626 struct intel_engine_cs *ring = req->ring; 2627 2628 if (ctx != ring->default_context) 2629 intel_lr_context_unpin(ring, ctx); 2630 } 2631 2632 i915_gem_context_unreference(ctx); 2633 } 2634 2635 kmem_cache_free(req->i915->requests, req); 2636 } 2637 2638 int i915_gem_request_alloc(struct intel_engine_cs *ring, 2639 struct intel_context *ctx) 2640 { 2641 struct drm_i915_private *dev_priv = to_i915(ring->dev); 2642 struct drm_i915_gem_request *req; 2643 int ret; 2644 2645 if (ring->outstanding_lazy_request) 2646 return 0; 2647 2648 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 2649 if (req == NULL) 2650 return -ENOMEM; 2651 2652 kref_init(&req->ref); 2653 req->i915 = dev_priv; 2654 2655 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 2656 if (ret) 2657 goto err; 2658 2659 req->ring = ring; 2660 2661 if (i915.enable_execlists) 2662 ret = intel_logical_ring_alloc_request_extras(req, ctx); 2663 else 2664 ret = intel_ring_alloc_request_extras(req); 2665 if (ret) 2666 goto err; 2667 2668 ring->outstanding_lazy_request = req; 2669 return 0; 2670 2671 err: 2672 kmem_cache_free(dev_priv->requests, req); 2673 return ret; 2674 } 2675 2676 struct drm_i915_gem_request * 2677 i915_gem_find_active_request(struct intel_engine_cs *ring) 2678 { 2679 struct drm_i915_gem_request *request; 2680 2681 list_for_each_entry(request, &ring->request_list, list) { 2682 if (i915_gem_request_completed(request, false)) 2683 continue; 2684 2685 return request; 2686 } 2687 2688 return NULL; 2689 } 2690 2691 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2692 struct intel_engine_cs *ring) 2693 { 2694 struct drm_i915_gem_request *request; 2695 bool ring_hung; 2696 2697 request = i915_gem_find_active_request(ring); 2698 2699 if (request == NULL) 2700 return; 2701 2702 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2703 2704 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2705 2706 list_for_each_entry_continue(request, &ring->request_list, list) 2707 i915_set_reset_status(dev_priv, request->ctx, false); 2708 } 2709 2710 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2711 struct intel_engine_cs *ring) 2712 { 2713 while (!list_empty(&ring->active_list)) { 2714 struct drm_i915_gem_object *obj; 2715 2716 obj = list_first_entry(&ring->active_list, 2717 struct drm_i915_gem_object, 2718 ring_list[ring->id]); 2719 2720 i915_gem_object_retire__read(obj, ring->id); 2721 } 2722 2723 /* 2724 * Clear the execlists queue up before freeing the requests, as those 2725 * are the ones that keep the context and ringbuffer backing objects 2726 * pinned in place. 2727 */ 2728 while (!list_empty(&ring->execlist_queue)) { 2729 struct drm_i915_gem_request *submit_req; 2730 2731 submit_req = list_first_entry(&ring->execlist_queue, 2732 struct drm_i915_gem_request, 2733 execlist_link); 2734 list_del(&submit_req->execlist_link); 2735 2736 if (submit_req->ctx != ring->default_context) 2737 intel_lr_context_unpin(ring, submit_req->ctx); 2738 2739 i915_gem_request_unreference(submit_req); 2740 } 2741 2742 /* 2743 * We must free the requests after all the corresponding objects have 2744 * been moved off active lists. Which is the same order as the normal 2745 * retire_requests function does. This is important if object hold 2746 * implicit references on things like e.g. ppgtt address spaces through 2747 * the request. 2748 */ 2749 while (!list_empty(&ring->request_list)) { 2750 struct drm_i915_gem_request *request; 2751 2752 request = list_first_entry(&ring->request_list, 2753 struct drm_i915_gem_request, 2754 list); 2755 2756 i915_gem_request_retire(request); 2757 } 2758 2759 /* This may not have been flushed before the reset, so clean it now */ 2760 i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); 2761 } 2762 2763 void i915_gem_restore_fences(struct drm_device *dev) 2764 { 2765 struct drm_i915_private *dev_priv = dev->dev_private; 2766 int i; 2767 2768 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2769 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2770 2771 /* 2772 * Commit delayed tiling changes if we have an object still 2773 * attached to the fence, otherwise just clear the fence. 2774 */ 2775 if (reg->obj) { 2776 i915_gem_object_update_fence(reg->obj, reg, 2777 reg->obj->tiling_mode); 2778 } else { 2779 i915_gem_write_fence(dev, i, NULL); 2780 } 2781 } 2782 } 2783 2784 void i915_gem_reset(struct drm_device *dev) 2785 { 2786 struct drm_i915_private *dev_priv = dev->dev_private; 2787 struct intel_engine_cs *ring; 2788 int i; 2789 2790 /* 2791 * Before we free the objects from the requests, we need to inspect 2792 * them for finding the guilty party. As the requests only borrow 2793 * their reference to the objects, the inspection must be done first. 2794 */ 2795 for_each_ring(ring, dev_priv, i) 2796 i915_gem_reset_ring_status(dev_priv, ring); 2797 2798 for_each_ring(ring, dev_priv, i) 2799 i915_gem_reset_ring_cleanup(dev_priv, ring); 2800 2801 i915_gem_context_reset(dev); 2802 2803 i915_gem_restore_fences(dev); 2804 2805 WARN_ON(i915_verify_lists(dev)); 2806 } 2807 2808 /** 2809 * This function clears the request list as sequence numbers are passed. 2810 */ 2811 void 2812 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2813 { 2814 WARN_ON(i915_verify_lists(ring->dev)); 2815 2816 /* Retire requests first as we use it above for the early return. 2817 * If we retire requests last, we may use a later seqno and so clear 2818 * the requests lists without clearing the active list, leading to 2819 * confusion. 2820 */ 2821 while (!list_empty(&ring->request_list)) { 2822 struct drm_i915_gem_request *request; 2823 2824 request = list_first_entry(&ring->request_list, 2825 struct drm_i915_gem_request, 2826 list); 2827 2828 if (!i915_gem_request_completed(request, true)) 2829 break; 2830 2831 i915_gem_request_retire(request); 2832 } 2833 2834 /* Move any buffers on the active list that are no longer referenced 2835 * by the ringbuffer to the flushing/inactive lists as appropriate, 2836 * before we free the context associated with the requests. 2837 */ 2838 while (!list_empty(&ring->active_list)) { 2839 struct drm_i915_gem_object *obj; 2840 2841 obj = list_first_entry(&ring->active_list, 2842 struct drm_i915_gem_object, 2843 ring_list[ring->id]); 2844 2845 if (!list_empty(&obj->last_read_req[ring->id]->list)) 2846 break; 2847 2848 i915_gem_object_retire__read(obj, ring->id); 2849 } 2850 2851 if (unlikely(ring->trace_irq_req && 2852 i915_gem_request_completed(ring->trace_irq_req, true))) { 2853 ring->irq_put(ring); 2854 i915_gem_request_assign(&ring->trace_irq_req, NULL); 2855 } 2856 2857 WARN_ON(i915_verify_lists(ring->dev)); 2858 } 2859 2860 bool 2861 i915_gem_retire_requests(struct drm_device *dev) 2862 { 2863 struct drm_i915_private *dev_priv = dev->dev_private; 2864 struct intel_engine_cs *ring; 2865 bool idle = true; 2866 int i; 2867 2868 for_each_ring(ring, dev_priv, i) { 2869 i915_gem_retire_requests_ring(ring); 2870 idle &= list_empty(&ring->request_list); 2871 if (i915.enable_execlists) { 2872 unsigned long flags; 2873 2874 spin_lock_irqsave(&ring->execlist_lock, flags); 2875 idle &= list_empty(&ring->execlist_queue); 2876 spin_unlock_irqrestore(&ring->execlist_lock, flags); 2877 2878 intel_execlists_retire_requests(ring); 2879 } 2880 } 2881 2882 if (idle) 2883 mod_delayed_work(dev_priv->wq, 2884 &dev_priv->mm.idle_work, 2885 msecs_to_jiffies(100)); 2886 2887 return idle; 2888 } 2889 2890 static void 2891 i915_gem_retire_work_handler(struct work_struct *work) 2892 { 2893 struct drm_i915_private *dev_priv = 2894 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2895 struct drm_device *dev = dev_priv->dev; 2896 bool idle; 2897 2898 /* Come back later if the device is busy... */ 2899 idle = false; 2900 if (mutex_trylock(&dev->struct_mutex)) { 2901 idle = i915_gem_retire_requests(dev); 2902 mutex_unlock(&dev->struct_mutex); 2903 } 2904 if (!idle) 2905 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2906 round_jiffies_up_relative(HZ)); 2907 } 2908 2909 static void 2910 i915_gem_idle_work_handler(struct work_struct *work) 2911 { 2912 struct drm_i915_private *dev_priv = 2913 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2914 struct drm_device *dev = dev_priv->dev; 2915 struct intel_engine_cs *ring; 2916 int i; 2917 2918 for_each_ring(ring, dev_priv, i) 2919 if (!list_empty(&ring->request_list)) 2920 return; 2921 2922 intel_mark_idle(dev); 2923 2924 if (mutex_trylock(&dev->struct_mutex)) { 2925 struct intel_engine_cs *ring; 2926 int i; 2927 2928 for_each_ring(ring, dev_priv, i) 2929 i915_gem_batch_pool_fini(&ring->batch_pool); 2930 2931 mutex_unlock(&dev->struct_mutex); 2932 } 2933 } 2934 2935 /** 2936 * Ensures that an object will eventually get non-busy by flushing any required 2937 * write domains, emitting any outstanding lazy request and retiring and 2938 * completed requests. 2939 */ 2940 static int 2941 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2942 { 2943 int ret, i; 2944 2945 if (!obj->active) 2946 return 0; 2947 2948 for (i = 0; i < I915_NUM_RINGS; i++) { 2949 struct drm_i915_gem_request *req; 2950 2951 req = obj->last_read_req[i]; 2952 if (req == NULL) 2953 continue; 2954 2955 if (list_empty(&req->list)) 2956 goto retire; 2957 2958 ret = i915_gem_check_olr(req); 2959 if (ret) 2960 return ret; 2961 2962 if (i915_gem_request_completed(req, true)) { 2963 __i915_gem_request_retire__upto(req); 2964 retire: 2965 i915_gem_object_retire__read(obj, i); 2966 } 2967 } 2968 2969 return 0; 2970 } 2971 2972 /** 2973 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2974 * @DRM_IOCTL_ARGS: standard ioctl arguments 2975 * 2976 * Returns 0 if successful, else an error is returned with the remaining time in 2977 * the timeout parameter. 2978 * -ETIME: object is still busy after timeout 2979 * -ERESTARTSYS: signal interrupted the wait 2980 * -ENONENT: object doesn't exist 2981 * Also possible, but rare: 2982 * -EAGAIN: GPU wedged 2983 * -ENOMEM: damn 2984 * -ENODEV: Internal IRQ fail 2985 * -E?: The add request failed 2986 * 2987 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2988 * non-zero timeout parameter the wait ioctl will wait for the given number of 2989 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2990 * without holding struct_mutex the object may become re-busied before this 2991 * function completes. A similar but shorter * race condition exists in the busy 2992 * ioctl 2993 */ 2994 int 2995 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2996 { 2997 struct drm_i915_private *dev_priv = dev->dev_private; 2998 struct drm_i915_gem_wait *args = data; 2999 struct drm_i915_gem_object *obj; 3000 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3001 unsigned reset_counter; 3002 int i, n = 0; 3003 int ret; 3004 3005 if (args->flags != 0) 3006 return -EINVAL; 3007 3008 ret = i915_mutex_lock_interruptible(dev); 3009 if (ret) 3010 return ret; 3011 3012 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3013 if (&obj->base == NULL) { 3014 mutex_unlock(&dev->struct_mutex); 3015 return -ENOENT; 3016 } 3017 3018 /* Need to make sure the object gets inactive eventually. */ 3019 ret = i915_gem_object_flush_active(obj); 3020 if (ret) 3021 goto out; 3022 3023 if (!obj->active) 3024 goto out; 3025 3026 /* Do this after OLR check to make sure we make forward progress polling 3027 * on this IOCTL with a timeout == 0 (like busy ioctl) 3028 */ 3029 if (args->timeout_ns == 0) { 3030 ret = -ETIME; 3031 goto out; 3032 } 3033 3034 drm_gem_object_unreference(&obj->base); 3035 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3036 3037 for (i = 0; i < I915_NUM_RINGS; i++) { 3038 if (obj->last_read_req[i] == NULL) 3039 continue; 3040 3041 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3042 } 3043 3044 mutex_unlock(&dev->struct_mutex); 3045 3046 for (i = 0; i < n; i++) { 3047 if (ret == 0) 3048 ret = __i915_wait_request(req[i], reset_counter, true, 3049 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3050 file->driver_priv); 3051 i915_gem_request_unreference__unlocked(req[i]); 3052 } 3053 return ret; 3054 3055 out: 3056 drm_gem_object_unreference(&obj->base); 3057 mutex_unlock(&dev->struct_mutex); 3058 return ret; 3059 } 3060 3061 static int 3062 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3063 struct intel_engine_cs *to, 3064 struct drm_i915_gem_request *req) 3065 { 3066 struct intel_engine_cs *from; 3067 int ret; 3068 3069 from = i915_gem_request_get_ring(req); 3070 if (to == from) 3071 return 0; 3072 3073 if (i915_gem_request_completed(req, true)) 3074 return 0; 3075 3076 ret = i915_gem_check_olr(req); 3077 if (ret) 3078 return ret; 3079 3080 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3081 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3082 ret = __i915_wait_request(req, 3083 atomic_read(&i915->gpu_error.reset_counter), 3084 i915->mm.interruptible, 3085 NULL, 3086 &i915->rps.semaphores); 3087 if (ret) 3088 return ret; 3089 3090 i915_gem_object_retire_request(obj, req); 3091 } else { 3092 int idx = intel_ring_sync_index(from, to); 3093 u32 seqno = i915_gem_request_get_seqno(req); 3094 3095 if (seqno <= from->semaphore.sync_seqno[idx]) 3096 return 0; 3097 3098 trace_i915_gem_ring_sync_to(from, to, req); 3099 ret = to->semaphore.sync_to(to, from, seqno); 3100 if (ret) 3101 return ret; 3102 3103 /* We use last_read_req because sync_to() 3104 * might have just caused seqno wrap under 3105 * the radar. 3106 */ 3107 from->semaphore.sync_seqno[idx] = 3108 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3109 } 3110 3111 return 0; 3112 } 3113 3114 /** 3115 * i915_gem_object_sync - sync an object to a ring. 3116 * 3117 * @obj: object which may be in use on another ring. 3118 * @to: ring we wish to use the object on. May be NULL. 3119 * 3120 * This code is meant to abstract object synchronization with the GPU. 3121 * Calling with NULL implies synchronizing the object with the CPU 3122 * rather than a particular GPU ring. Conceptually we serialise writes 3123 * between engines inside the GPU. We only allow on engine to write 3124 * into a buffer at any time, but multiple readers. To ensure each has 3125 * a coherent view of memory, we must: 3126 * 3127 * - If there is an outstanding write request to the object, the new 3128 * request must wait for it to complete (either CPU or in hw, requests 3129 * on the same ring will be naturally ordered). 3130 * 3131 * - If we are a write request (pending_write_domain is set), the new 3132 * request must wait for outstanding read requests to complete. 3133 * 3134 * Returns 0 if successful, else propagates up the lower layer error. 3135 */ 3136 int 3137 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3138 struct intel_engine_cs *to) 3139 { 3140 const bool readonly = obj->base.pending_write_domain == 0; 3141 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3142 int ret, i, n; 3143 3144 if (!obj->active) 3145 return 0; 3146 3147 if (to == NULL) 3148 return i915_gem_object_wait_rendering(obj, readonly); 3149 3150 n = 0; 3151 if (readonly) { 3152 if (obj->last_write_req) 3153 req[n++] = obj->last_write_req; 3154 } else { 3155 for (i = 0; i < I915_NUM_RINGS; i++) 3156 if (obj->last_read_req[i]) 3157 req[n++] = obj->last_read_req[i]; 3158 } 3159 for (i = 0; i < n; i++) { 3160 ret = __i915_gem_object_sync(obj, to, req[i]); 3161 if (ret) 3162 return ret; 3163 } 3164 3165 return 0; 3166 } 3167 3168 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3169 { 3170 u32 old_write_domain, old_read_domains; 3171 3172 /* Force a pagefault for domain tracking on next user access */ 3173 i915_gem_release_mmap(obj); 3174 3175 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3176 return; 3177 3178 /* Wait for any direct GTT access to complete */ 3179 mb(); 3180 3181 old_read_domains = obj->base.read_domains; 3182 old_write_domain = obj->base.write_domain; 3183 3184 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3185 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3186 3187 trace_i915_gem_object_change_domain(obj, 3188 old_read_domains, 3189 old_write_domain); 3190 } 3191 3192 int i915_vma_unbind(struct i915_vma *vma) 3193 { 3194 struct drm_i915_gem_object *obj = vma->obj; 3195 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3196 int ret; 3197 3198 if (list_empty(&vma->vma_link)) 3199 return 0; 3200 3201 if (!drm_mm_node_allocated(&vma->node)) { 3202 i915_gem_vma_destroy(vma); 3203 return 0; 3204 } 3205 3206 if (vma->pin_count) 3207 return -EBUSY; 3208 3209 BUG_ON(obj->pages == NULL); 3210 3211 ret = i915_gem_object_wait_rendering(obj, false); 3212 if (ret) 3213 return ret; 3214 /* Continue on if we fail due to EIO, the GPU is hung so we 3215 * should be safe and we need to cleanup or else we might 3216 * cause memory corruption through use-after-free. 3217 */ 3218 3219 if (i915_is_ggtt(vma->vm) && 3220 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3221 i915_gem_object_finish_gtt(obj); 3222 3223 /* release the fence reg _after_ flushing */ 3224 ret = i915_gem_object_put_fence(obj); 3225 if (ret) 3226 return ret; 3227 } 3228 3229 trace_i915_vma_unbind(vma); 3230 3231 vma->vm->unbind_vma(vma); 3232 vma->bound = 0; 3233 3234 list_del_init(&vma->mm_list); 3235 if (i915_is_ggtt(vma->vm)) { 3236 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3237 obj->map_and_fenceable = false; 3238 } else if (vma->ggtt_view.pages) { 3239 sg_free_table(vma->ggtt_view.pages); 3240 kfree(vma->ggtt_view.pages); 3241 } 3242 vma->ggtt_view.pages = NULL; 3243 } 3244 3245 drm_mm_remove_node(&vma->node); 3246 i915_gem_vma_destroy(vma); 3247 3248 /* Since the unbound list is global, only move to that list if 3249 * no more VMAs exist. */ 3250 if (list_empty(&obj->vma_list)) { 3251 i915_gem_gtt_finish_object(obj); 3252 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3253 } 3254 3255 /* And finally now the object is completely decoupled from this vma, 3256 * we can drop its hold on the backing storage and allow it to be 3257 * reaped by the shrinker. 3258 */ 3259 i915_gem_object_unpin_pages(obj); 3260 3261 return 0; 3262 } 3263 3264 int i915_gpu_idle(struct drm_device *dev) 3265 { 3266 struct drm_i915_private *dev_priv = dev->dev_private; 3267 struct intel_engine_cs *ring; 3268 int ret, i; 3269 3270 /* Flush everything onto the inactive list. */ 3271 for_each_ring(ring, dev_priv, i) { 3272 if (!i915.enable_execlists) { 3273 ret = i915_switch_context(ring, ring->default_context); 3274 if (ret) 3275 return ret; 3276 } 3277 3278 ret = intel_ring_idle(ring); 3279 if (ret) 3280 return ret; 3281 } 3282 3283 WARN_ON(i915_verify_lists(dev)); 3284 return 0; 3285 } 3286 3287 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3288 struct drm_i915_gem_object *obj) 3289 { 3290 struct drm_i915_private *dev_priv = dev->dev_private; 3291 int fence_reg; 3292 int fence_pitch_shift; 3293 3294 if (INTEL_INFO(dev)->gen >= 6) { 3295 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3296 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3297 } else { 3298 fence_reg = FENCE_REG_965_0; 3299 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3300 } 3301 3302 fence_reg += reg * 8; 3303 3304 /* To w/a incoherency with non-atomic 64-bit register updates, 3305 * we split the 64-bit update into two 32-bit writes. In order 3306 * for a partial fence not to be evaluated between writes, we 3307 * precede the update with write to turn off the fence register, 3308 * and only enable the fence as the last step. 3309 * 3310 * For extra levels of paranoia, we make sure each step lands 3311 * before applying the next step. 3312 */ 3313 I915_WRITE(fence_reg, 0); 3314 POSTING_READ(fence_reg); 3315 3316 if (obj) { 3317 u32 size = i915_gem_obj_ggtt_size(obj); 3318 uint64_t val; 3319 3320 /* Adjust fence size to match tiled area */ 3321 if (obj->tiling_mode != I915_TILING_NONE) { 3322 uint32_t row_size = obj->stride * 3323 (obj->tiling_mode == I915_TILING_Y ? 32 : 8); 3324 size = (size / row_size) * row_size; 3325 } 3326 3327 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3328 0xfffff000) << 32; 3329 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3330 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3331 if (obj->tiling_mode == I915_TILING_Y) 3332 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3333 val |= I965_FENCE_REG_VALID; 3334 3335 I915_WRITE(fence_reg + 4, val >> 32); 3336 POSTING_READ(fence_reg + 4); 3337 3338 I915_WRITE(fence_reg + 0, val); 3339 POSTING_READ(fence_reg); 3340 } else { 3341 I915_WRITE(fence_reg + 4, 0); 3342 POSTING_READ(fence_reg + 4); 3343 } 3344 } 3345 3346 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3347 struct drm_i915_gem_object *obj) 3348 { 3349 struct drm_i915_private *dev_priv = dev->dev_private; 3350 u32 val; 3351 3352 if (obj) { 3353 u32 size = i915_gem_obj_ggtt_size(obj); 3354 int pitch_val; 3355 int tile_width; 3356 3357 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3358 (size & -size) != size || 3359 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3360 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3361 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3362 3363 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3364 tile_width = 128; 3365 else 3366 tile_width = 512; 3367 3368 /* Note: pitch better be a power of two tile widths */ 3369 pitch_val = obj->stride / tile_width; 3370 pitch_val = ffs(pitch_val) - 1; 3371 3372 val = i915_gem_obj_ggtt_offset(obj); 3373 if (obj->tiling_mode == I915_TILING_Y) 3374 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3375 val |= I915_FENCE_SIZE_BITS(size); 3376 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3377 val |= I830_FENCE_REG_VALID; 3378 } else 3379 val = 0; 3380 3381 if (reg < 8) 3382 reg = FENCE_REG_830_0 + reg * 4; 3383 else 3384 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3385 3386 I915_WRITE(reg, val); 3387 POSTING_READ(reg); 3388 } 3389 3390 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3391 struct drm_i915_gem_object *obj) 3392 { 3393 struct drm_i915_private *dev_priv = dev->dev_private; 3394 uint32_t val; 3395 3396 if (obj) { 3397 u32 size = i915_gem_obj_ggtt_size(obj); 3398 uint32_t pitch_val; 3399 3400 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3401 (size & -size) != size || 3402 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3403 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3404 i915_gem_obj_ggtt_offset(obj), size); 3405 3406 pitch_val = obj->stride / 128; 3407 pitch_val = ffs(pitch_val) - 1; 3408 3409 val = i915_gem_obj_ggtt_offset(obj); 3410 if (obj->tiling_mode == I915_TILING_Y) 3411 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3412 val |= I830_FENCE_SIZE_BITS(size); 3413 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3414 val |= I830_FENCE_REG_VALID; 3415 } else 3416 val = 0; 3417 3418 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3419 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3420 } 3421 3422 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3423 { 3424 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3425 } 3426 3427 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3428 struct drm_i915_gem_object *obj) 3429 { 3430 struct drm_i915_private *dev_priv = dev->dev_private; 3431 3432 /* Ensure that all CPU reads are completed before installing a fence 3433 * and all writes before removing the fence. 3434 */ 3435 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3436 mb(); 3437 3438 WARN(obj && (!obj->stride || !obj->tiling_mode), 3439 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3440 obj->stride, obj->tiling_mode); 3441 3442 if (IS_GEN2(dev)) 3443 i830_write_fence_reg(dev, reg, obj); 3444 else if (IS_GEN3(dev)) 3445 i915_write_fence_reg(dev, reg, obj); 3446 else if (INTEL_INFO(dev)->gen >= 4) 3447 i965_write_fence_reg(dev, reg, obj); 3448 3449 /* And similarly be paranoid that no direct access to this region 3450 * is reordered to before the fence is installed. 3451 */ 3452 if (i915_gem_object_needs_mb(obj)) 3453 mb(); 3454 } 3455 3456 static inline int fence_number(struct drm_i915_private *dev_priv, 3457 struct drm_i915_fence_reg *fence) 3458 { 3459 return fence - dev_priv->fence_regs; 3460 } 3461 3462 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3463 struct drm_i915_fence_reg *fence, 3464 bool enable) 3465 { 3466 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3467 int reg = fence_number(dev_priv, fence); 3468 3469 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3470 3471 if (enable) { 3472 obj->fence_reg = reg; 3473 fence->obj = obj; 3474 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3475 } else { 3476 obj->fence_reg = I915_FENCE_REG_NONE; 3477 fence->obj = NULL; 3478 list_del_init(&fence->lru_list); 3479 } 3480 obj->fence_dirty = false; 3481 } 3482 3483 static int 3484 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3485 { 3486 if (obj->last_fenced_req) { 3487 int ret = i915_wait_request(obj->last_fenced_req); 3488 if (ret) 3489 return ret; 3490 3491 i915_gem_request_assign(&obj->last_fenced_req, NULL); 3492 } 3493 3494 return 0; 3495 } 3496 3497 int 3498 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3499 { 3500 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3501 struct drm_i915_fence_reg *fence; 3502 int ret; 3503 3504 ret = i915_gem_object_wait_fence(obj); 3505 if (ret) 3506 return ret; 3507 3508 if (obj->fence_reg == I915_FENCE_REG_NONE) 3509 return 0; 3510 3511 fence = &dev_priv->fence_regs[obj->fence_reg]; 3512 3513 if (WARN_ON(fence->pin_count)) 3514 return -EBUSY; 3515 3516 i915_gem_object_fence_lost(obj); 3517 i915_gem_object_update_fence(obj, fence, false); 3518 3519 return 0; 3520 } 3521 3522 static struct drm_i915_fence_reg * 3523 i915_find_fence_reg(struct drm_device *dev) 3524 { 3525 struct drm_i915_private *dev_priv = dev->dev_private; 3526 struct drm_i915_fence_reg *reg, *avail; 3527 int i; 3528 3529 /* First try to find a free reg */ 3530 avail = NULL; 3531 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3532 reg = &dev_priv->fence_regs[i]; 3533 if (!reg->obj) 3534 return reg; 3535 3536 if (!reg->pin_count) 3537 avail = reg; 3538 } 3539 3540 if (avail == NULL) 3541 goto deadlock; 3542 3543 /* None available, try to steal one or wait for a user to finish */ 3544 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3545 if (reg->pin_count) 3546 continue; 3547 3548 return reg; 3549 } 3550 3551 deadlock: 3552 /* Wait for completion of pending flips which consume fences */ 3553 if (intel_has_pending_fb_unpin(dev)) 3554 return ERR_PTR(-EAGAIN); 3555 3556 return ERR_PTR(-EDEADLK); 3557 } 3558 3559 /** 3560 * i915_gem_object_get_fence - set up fencing for an object 3561 * @obj: object to map through a fence reg 3562 * 3563 * When mapping objects through the GTT, userspace wants to be able to write 3564 * to them without having to worry about swizzling if the object is tiled. 3565 * This function walks the fence regs looking for a free one for @obj, 3566 * stealing one if it can't find any. 3567 * 3568 * It then sets up the reg based on the object's properties: address, pitch 3569 * and tiling format. 3570 * 3571 * For an untiled surface, this removes any existing fence. 3572 */ 3573 int 3574 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3575 { 3576 struct drm_device *dev = obj->base.dev; 3577 struct drm_i915_private *dev_priv = dev->dev_private; 3578 bool enable = obj->tiling_mode != I915_TILING_NONE; 3579 struct drm_i915_fence_reg *reg; 3580 int ret; 3581 3582 /* Have we updated the tiling parameters upon the object and so 3583 * will need to serialise the write to the associated fence register? 3584 */ 3585 if (obj->fence_dirty) { 3586 ret = i915_gem_object_wait_fence(obj); 3587 if (ret) 3588 return ret; 3589 } 3590 3591 /* Just update our place in the LRU if our fence is getting reused. */ 3592 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3593 reg = &dev_priv->fence_regs[obj->fence_reg]; 3594 if (!obj->fence_dirty) { 3595 list_move_tail(®->lru_list, 3596 &dev_priv->mm.fence_list); 3597 return 0; 3598 } 3599 } else if (enable) { 3600 if (WARN_ON(!obj->map_and_fenceable)) 3601 return -EINVAL; 3602 3603 reg = i915_find_fence_reg(dev); 3604 if (IS_ERR(reg)) 3605 return PTR_ERR(reg); 3606 3607 if (reg->obj) { 3608 struct drm_i915_gem_object *old = reg->obj; 3609 3610 ret = i915_gem_object_wait_fence(old); 3611 if (ret) 3612 return ret; 3613 3614 i915_gem_object_fence_lost(old); 3615 } 3616 } else 3617 return 0; 3618 3619 i915_gem_object_update_fence(obj, reg, enable); 3620 3621 return 0; 3622 } 3623 3624 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3625 unsigned long cache_level) 3626 { 3627 struct drm_mm_node *gtt_space = &vma->node; 3628 struct drm_mm_node *other; 3629 3630 /* 3631 * On some machines we have to be careful when putting differing types 3632 * of snoopable memory together to avoid the prefetcher crossing memory 3633 * domains and dying. During vm initialisation, we decide whether or not 3634 * these constraints apply and set the drm_mm.color_adjust 3635 * appropriately. 3636 */ 3637 if (vma->vm->mm.color_adjust == NULL) 3638 return true; 3639 3640 if (!drm_mm_node_allocated(gtt_space)) 3641 return true; 3642 3643 if (list_empty(>t_space->node_list)) 3644 return true; 3645 3646 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3647 if (other->allocated && !other->hole_follows && other->color != cache_level) 3648 return false; 3649 3650 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3651 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3652 return false; 3653 3654 return true; 3655 } 3656 3657 /** 3658 * Finds free space in the GTT aperture and binds the object or a view of it 3659 * there. 3660 */ 3661 static struct i915_vma * 3662 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3663 struct i915_address_space *vm, 3664 const struct i915_ggtt_view *ggtt_view, 3665 unsigned alignment, 3666 uint64_t flags) 3667 { 3668 struct drm_device *dev = obj->base.dev; 3669 struct drm_i915_private *dev_priv = dev->dev_private; 3670 u32 size, fence_size, fence_alignment, unfenced_alignment; 3671 unsigned long start = 3672 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3673 unsigned long end = 3674 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3675 struct i915_vma *vma; 3676 int ret; 3677 3678 if (i915_is_ggtt(vm)) { 3679 u32 view_size; 3680 3681 if (WARN_ON(!ggtt_view)) 3682 return ERR_PTR(-EINVAL); 3683 3684 view_size = i915_ggtt_view_size(obj, ggtt_view); 3685 3686 fence_size = i915_gem_get_gtt_size(dev, 3687 view_size, 3688 obj->tiling_mode); 3689 fence_alignment = i915_gem_get_gtt_alignment(dev, 3690 view_size, 3691 obj->tiling_mode, 3692 true); 3693 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3694 view_size, 3695 obj->tiling_mode, 3696 false); 3697 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3698 } else { 3699 fence_size = i915_gem_get_gtt_size(dev, 3700 obj->base.size, 3701 obj->tiling_mode); 3702 fence_alignment = i915_gem_get_gtt_alignment(dev, 3703 obj->base.size, 3704 obj->tiling_mode, 3705 true); 3706 unfenced_alignment = 3707 i915_gem_get_gtt_alignment(dev, 3708 obj->base.size, 3709 obj->tiling_mode, 3710 false); 3711 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3712 } 3713 3714 if (alignment == 0) 3715 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3716 unfenced_alignment; 3717 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3718 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3719 ggtt_view ? ggtt_view->type : 0, 3720 alignment); 3721 return ERR_PTR(-EINVAL); 3722 } 3723 3724 /* If binding the object/GGTT view requires more space than the entire 3725 * aperture has, reject it early before evicting everything in a vain 3726 * attempt to find space. 3727 */ 3728 if (size > end) { 3729 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%lu\n", 3730 ggtt_view ? ggtt_view->type : 0, 3731 size, 3732 flags & PIN_MAPPABLE ? "mappable" : "total", 3733 end); 3734 return ERR_PTR(-E2BIG); 3735 } 3736 3737 ret = i915_gem_object_get_pages(obj); 3738 if (ret) 3739 return ERR_PTR(ret); 3740 3741 i915_gem_object_pin_pages(obj); 3742 3743 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3744 i915_gem_obj_lookup_or_create_vma(obj, vm); 3745 3746 if (IS_ERR(vma)) 3747 goto err_unpin; 3748 3749 search_free: 3750 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3751 size, alignment, 3752 obj->cache_level, 3753 start, end, 3754 DRM_MM_SEARCH_DEFAULT, 3755 DRM_MM_CREATE_DEFAULT); 3756 if (ret) { 3757 ret = i915_gem_evict_something(dev, vm, size, alignment, 3758 obj->cache_level, 3759 start, end, 3760 flags); 3761 if (ret == 0) 3762 goto search_free; 3763 3764 goto err_free_vma; 3765 } 3766 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3767 ret = -EINVAL; 3768 goto err_remove_node; 3769 } 3770 3771 ret = i915_gem_gtt_prepare_object(obj); 3772 if (ret) 3773 goto err_remove_node; 3774 3775 trace_i915_vma_bind(vma, flags); 3776 ret = i915_vma_bind(vma, obj->cache_level, flags); 3777 if (ret) 3778 goto err_finish_gtt; 3779 3780 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3781 list_add_tail(&vma->mm_list, &vm->inactive_list); 3782 3783 return vma; 3784 3785 err_finish_gtt: 3786 i915_gem_gtt_finish_object(obj); 3787 err_remove_node: 3788 drm_mm_remove_node(&vma->node); 3789 err_free_vma: 3790 i915_gem_vma_destroy(vma); 3791 vma = ERR_PTR(ret); 3792 err_unpin: 3793 i915_gem_object_unpin_pages(obj); 3794 return vma; 3795 } 3796 3797 bool 3798 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3799 bool force) 3800 { 3801 /* If we don't have a page list set up, then we're not pinned 3802 * to GPU, and we can ignore the cache flush because it'll happen 3803 * again at bind time. 3804 */ 3805 if (obj->pages == NULL) 3806 return false; 3807 3808 /* 3809 * Stolen memory is always coherent with the GPU as it is explicitly 3810 * marked as wc by the system, or the system is cache-coherent. 3811 */ 3812 if (obj->stolen || obj->phys_handle) 3813 return false; 3814 3815 /* If the GPU is snooping the contents of the CPU cache, 3816 * we do not need to manually clear the CPU cache lines. However, 3817 * the caches are only snooped when the render cache is 3818 * flushed/invalidated. As we always have to emit invalidations 3819 * and flushes when moving into and out of the RENDER domain, correct 3820 * snooping behaviour occurs naturally as the result of our domain 3821 * tracking. 3822 */ 3823 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3824 obj->cache_dirty = true; 3825 return false; 3826 } 3827 3828 trace_i915_gem_object_clflush(obj); 3829 drm_clflush_sg(obj->pages); 3830 obj->cache_dirty = false; 3831 3832 return true; 3833 } 3834 3835 /** Flushes the GTT write domain for the object if it's dirty. */ 3836 static void 3837 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3838 { 3839 uint32_t old_write_domain; 3840 3841 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3842 return; 3843 3844 /* No actual flushing is required for the GTT write domain. Writes 3845 * to it immediately go to main memory as far as we know, so there's 3846 * no chipset flush. It also doesn't land in render cache. 3847 * 3848 * However, we do have to enforce the order so that all writes through 3849 * the GTT land before any writes to the device, such as updates to 3850 * the GATT itself. 3851 */ 3852 wmb(); 3853 3854 old_write_domain = obj->base.write_domain; 3855 obj->base.write_domain = 0; 3856 3857 intel_fb_obj_flush(obj, false); 3858 3859 trace_i915_gem_object_change_domain(obj, 3860 obj->base.read_domains, 3861 old_write_domain); 3862 } 3863 3864 /** Flushes the CPU write domain for the object if it's dirty. */ 3865 static void 3866 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3867 { 3868 uint32_t old_write_domain; 3869 3870 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3871 return; 3872 3873 if (i915_gem_clflush_object(obj, obj->pin_display)) 3874 i915_gem_chipset_flush(obj->base.dev); 3875 3876 old_write_domain = obj->base.write_domain; 3877 obj->base.write_domain = 0; 3878 3879 intel_fb_obj_flush(obj, false); 3880 3881 trace_i915_gem_object_change_domain(obj, 3882 obj->base.read_domains, 3883 old_write_domain); 3884 } 3885 3886 /** 3887 * Moves a single object to the GTT read, and possibly write domain. 3888 * 3889 * This function returns when the move is complete, including waiting on 3890 * flushes to occur. 3891 */ 3892 int 3893 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3894 { 3895 uint32_t old_write_domain, old_read_domains; 3896 struct i915_vma *vma; 3897 int ret; 3898 3899 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3900 return 0; 3901 3902 ret = i915_gem_object_wait_rendering(obj, !write); 3903 if (ret) 3904 return ret; 3905 3906 /* Flush and acquire obj->pages so that we are coherent through 3907 * direct access in memory with previous cached writes through 3908 * shmemfs and that our cache domain tracking remains valid. 3909 * For example, if the obj->filp was moved to swap without us 3910 * being notified and releasing the pages, we would mistakenly 3911 * continue to assume that the obj remained out of the CPU cached 3912 * domain. 3913 */ 3914 ret = i915_gem_object_get_pages(obj); 3915 if (ret) 3916 return ret; 3917 3918 i915_gem_object_flush_cpu_write_domain(obj); 3919 3920 /* Serialise direct access to this object with the barriers for 3921 * coherent writes from the GPU, by effectively invalidating the 3922 * GTT domain upon first access. 3923 */ 3924 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3925 mb(); 3926 3927 old_write_domain = obj->base.write_domain; 3928 old_read_domains = obj->base.read_domains; 3929 3930 /* It should now be out of any other write domains, and we can update 3931 * the domain values for our changes. 3932 */ 3933 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3934 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3935 if (write) { 3936 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3937 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3938 obj->dirty = 1; 3939 } 3940 3941 if (write) 3942 intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); 3943 3944 trace_i915_gem_object_change_domain(obj, 3945 old_read_domains, 3946 old_write_domain); 3947 3948 /* And bump the LRU for this access */ 3949 vma = i915_gem_obj_to_ggtt(obj); 3950 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3951 list_move_tail(&vma->mm_list, 3952 &to_i915(obj->base.dev)->gtt.base.inactive_list); 3953 3954 return 0; 3955 } 3956 3957 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3958 enum i915_cache_level cache_level) 3959 { 3960 struct drm_device *dev = obj->base.dev; 3961 struct i915_vma *vma, *next; 3962 int ret; 3963 3964 if (obj->cache_level == cache_level) 3965 return 0; 3966 3967 if (i915_gem_obj_is_pinned(obj)) { 3968 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3969 return -EBUSY; 3970 } 3971 3972 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3973 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3974 ret = i915_vma_unbind(vma); 3975 if (ret) 3976 return ret; 3977 } 3978 } 3979 3980 if (i915_gem_obj_bound_any(obj)) { 3981 ret = i915_gem_object_wait_rendering(obj, false); 3982 if (ret) 3983 return ret; 3984 3985 i915_gem_object_finish_gtt(obj); 3986 3987 /* Before SandyBridge, you could not use tiling or fence 3988 * registers with snooped memory, so relinquish any fences 3989 * currently pointing to our region in the aperture. 3990 */ 3991 if (INTEL_INFO(dev)->gen < 6) { 3992 ret = i915_gem_object_put_fence(obj); 3993 if (ret) 3994 return ret; 3995 } 3996 3997 list_for_each_entry(vma, &obj->vma_list, vma_link) 3998 if (drm_mm_node_allocated(&vma->node)) { 3999 ret = i915_vma_bind(vma, cache_level, 4000 PIN_UPDATE); 4001 if (ret) 4002 return ret; 4003 } 4004 } 4005 4006 list_for_each_entry(vma, &obj->vma_list, vma_link) 4007 vma->node.color = cache_level; 4008 obj->cache_level = cache_level; 4009 4010 if (obj->cache_dirty && 4011 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4012 cpu_write_needs_clflush(obj)) { 4013 if (i915_gem_clflush_object(obj, true)) 4014 i915_gem_chipset_flush(obj->base.dev); 4015 } 4016 4017 return 0; 4018 } 4019 4020 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4021 struct drm_file *file) 4022 { 4023 struct drm_i915_gem_caching *args = data; 4024 struct drm_i915_gem_object *obj; 4025 4026 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4027 if (&obj->base == NULL) 4028 return -ENOENT; 4029 4030 switch (obj->cache_level) { 4031 case I915_CACHE_LLC: 4032 case I915_CACHE_L3_LLC: 4033 args->caching = I915_CACHING_CACHED; 4034 break; 4035 4036 case I915_CACHE_WT: 4037 args->caching = I915_CACHING_DISPLAY; 4038 break; 4039 4040 default: 4041 args->caching = I915_CACHING_NONE; 4042 break; 4043 } 4044 4045 drm_gem_object_unreference_unlocked(&obj->base); 4046 return 0; 4047 } 4048 4049 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4050 struct drm_file *file) 4051 { 4052 struct drm_i915_gem_caching *args = data; 4053 struct drm_i915_gem_object *obj; 4054 enum i915_cache_level level; 4055 int ret; 4056 4057 switch (args->caching) { 4058 case I915_CACHING_NONE: 4059 level = I915_CACHE_NONE; 4060 break; 4061 case I915_CACHING_CACHED: 4062 level = I915_CACHE_LLC; 4063 break; 4064 case I915_CACHING_DISPLAY: 4065 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4066 break; 4067 default: 4068 return -EINVAL; 4069 } 4070 4071 ret = i915_mutex_lock_interruptible(dev); 4072 if (ret) 4073 return ret; 4074 4075 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4076 if (&obj->base == NULL) { 4077 ret = -ENOENT; 4078 goto unlock; 4079 } 4080 4081 ret = i915_gem_object_set_cache_level(obj, level); 4082 4083 drm_gem_object_unreference(&obj->base); 4084 unlock: 4085 mutex_unlock(&dev->struct_mutex); 4086 return ret; 4087 } 4088 4089 /* 4090 * Prepare buffer for display plane (scanout, cursors, etc). 4091 * Can be called from an uninterruptible phase (modesetting) and allows 4092 * any flushes to be pipelined (for pageflips). 4093 */ 4094 int 4095 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4096 u32 alignment, 4097 struct intel_engine_cs *pipelined, 4098 const struct i915_ggtt_view *view) 4099 { 4100 u32 old_read_domains, old_write_domain; 4101 int ret; 4102 4103 ret = i915_gem_object_sync(obj, pipelined); 4104 if (ret) 4105 return ret; 4106 4107 /* Mark the pin_display early so that we account for the 4108 * display coherency whilst setting up the cache domains. 4109 */ 4110 obj->pin_display++; 4111 4112 /* The display engine is not coherent with the LLC cache on gen6. As 4113 * a result, we make sure that the pinning that is about to occur is 4114 * done with uncached PTEs. This is lowest common denominator for all 4115 * chipsets. 4116 * 4117 * However for gen6+, we could do better by using the GFDT bit instead 4118 * of uncaching, which would allow us to flush all the LLC-cached data 4119 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4120 */ 4121 ret = i915_gem_object_set_cache_level(obj, 4122 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4123 if (ret) 4124 goto err_unpin_display; 4125 4126 /* As the user may map the buffer once pinned in the display plane 4127 * (e.g. libkms for the bootup splash), we have to ensure that we 4128 * always use map_and_fenceable for all scanout buffers. 4129 */ 4130 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4131 view->type == I915_GGTT_VIEW_NORMAL ? 4132 PIN_MAPPABLE : 0); 4133 if (ret) 4134 goto err_unpin_display; 4135 4136 i915_gem_object_flush_cpu_write_domain(obj); 4137 4138 old_write_domain = obj->base.write_domain; 4139 old_read_domains = obj->base.read_domains; 4140 4141 /* It should now be out of any other write domains, and we can update 4142 * the domain values for our changes. 4143 */ 4144 obj->base.write_domain = 0; 4145 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4146 4147 trace_i915_gem_object_change_domain(obj, 4148 old_read_domains, 4149 old_write_domain); 4150 4151 return 0; 4152 4153 err_unpin_display: 4154 obj->pin_display--; 4155 return ret; 4156 } 4157 4158 void 4159 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4160 const struct i915_ggtt_view *view) 4161 { 4162 if (WARN_ON(obj->pin_display == 0)) 4163 return; 4164 4165 i915_gem_object_ggtt_unpin_view(obj, view); 4166 4167 obj->pin_display--; 4168 } 4169 4170 /** 4171 * Moves a single object to the CPU read, and possibly write domain. 4172 * 4173 * This function returns when the move is complete, including waiting on 4174 * flushes to occur. 4175 */ 4176 int 4177 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4178 { 4179 uint32_t old_write_domain, old_read_domains; 4180 int ret; 4181 4182 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4183 return 0; 4184 4185 ret = i915_gem_object_wait_rendering(obj, !write); 4186 if (ret) 4187 return ret; 4188 4189 i915_gem_object_flush_gtt_write_domain(obj); 4190 4191 old_write_domain = obj->base.write_domain; 4192 old_read_domains = obj->base.read_domains; 4193 4194 /* Flush the CPU cache if it's still invalid. */ 4195 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4196 i915_gem_clflush_object(obj, false); 4197 4198 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4199 } 4200 4201 /* It should now be out of any other write domains, and we can update 4202 * the domain values for our changes. 4203 */ 4204 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4205 4206 /* If we're writing through the CPU, then the GPU read domains will 4207 * need to be invalidated at next use. 4208 */ 4209 if (write) { 4210 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4211 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4212 } 4213 4214 if (write) 4215 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 4216 4217 trace_i915_gem_object_change_domain(obj, 4218 old_read_domains, 4219 old_write_domain); 4220 4221 return 0; 4222 } 4223 4224 /* Throttle our rendering by waiting until the ring has completed our requests 4225 * emitted over 20 msec ago. 4226 * 4227 * Note that if we were to use the current jiffies each time around the loop, 4228 * we wouldn't escape the function with any frames outstanding if the time to 4229 * render a frame was over 20ms. 4230 * 4231 * This should get us reasonable parallelism between CPU and GPU but also 4232 * relatively low latency when blocking on a particular request to finish. 4233 */ 4234 static int 4235 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4236 { 4237 struct drm_i915_private *dev_priv = dev->dev_private; 4238 struct drm_i915_file_private *file_priv = file->driver_priv; 4239 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4240 struct drm_i915_gem_request *request, *target = NULL; 4241 unsigned reset_counter; 4242 int ret; 4243 4244 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4245 if (ret) 4246 return ret; 4247 4248 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4249 if (ret) 4250 return ret; 4251 4252 spin_lock(&file_priv->mm.lock); 4253 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4254 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4255 break; 4256 4257 target = request; 4258 } 4259 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4260 if (target) 4261 i915_gem_request_reference(target); 4262 spin_unlock(&file_priv->mm.lock); 4263 4264 if (target == NULL) 4265 return 0; 4266 4267 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4268 if (ret == 0) 4269 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4270 4271 i915_gem_request_unreference__unlocked(target); 4272 4273 return ret; 4274 } 4275 4276 static bool 4277 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4278 { 4279 struct drm_i915_gem_object *obj = vma->obj; 4280 4281 if (alignment && 4282 vma->node.start & (alignment - 1)) 4283 return true; 4284 4285 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4286 return true; 4287 4288 if (flags & PIN_OFFSET_BIAS && 4289 vma->node.start < (flags & PIN_OFFSET_MASK)) 4290 return true; 4291 4292 return false; 4293 } 4294 4295 static int 4296 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4297 struct i915_address_space *vm, 4298 const struct i915_ggtt_view *ggtt_view, 4299 uint32_t alignment, 4300 uint64_t flags) 4301 { 4302 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4303 struct i915_vma *vma; 4304 unsigned bound; 4305 int ret; 4306 4307 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4308 return -ENODEV; 4309 4310 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4311 return -EINVAL; 4312 4313 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4314 return -EINVAL; 4315 4316 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4317 return -EINVAL; 4318 4319 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4320 i915_gem_obj_to_vma(obj, vm); 4321 4322 if (IS_ERR(vma)) 4323 return PTR_ERR(vma); 4324 4325 if (vma) { 4326 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4327 return -EBUSY; 4328 4329 if (i915_vma_misplaced(vma, alignment, flags)) { 4330 unsigned long offset; 4331 offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) : 4332 i915_gem_obj_offset(obj, vm); 4333 WARN(vma->pin_count, 4334 "bo is already pinned in %s with incorrect alignment:" 4335 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4336 " obj->map_and_fenceable=%d\n", 4337 ggtt_view ? "ggtt" : "ppgtt", 4338 offset, 4339 alignment, 4340 !!(flags & PIN_MAPPABLE), 4341 obj->map_and_fenceable); 4342 ret = i915_vma_unbind(vma); 4343 if (ret) 4344 return ret; 4345 4346 vma = NULL; 4347 } 4348 } 4349 4350 bound = vma ? vma->bound : 0; 4351 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4352 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4353 flags); 4354 if (IS_ERR(vma)) 4355 return PTR_ERR(vma); 4356 } else { 4357 ret = i915_vma_bind(vma, obj->cache_level, flags); 4358 if (ret) 4359 return ret; 4360 } 4361 4362 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4363 (bound ^ vma->bound) & GLOBAL_BIND) { 4364 bool mappable, fenceable; 4365 u32 fence_size, fence_alignment; 4366 4367 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4368 obj->base.size, 4369 obj->tiling_mode); 4370 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4371 obj->base.size, 4372 obj->tiling_mode, 4373 true); 4374 4375 fenceable = (vma->node.size == fence_size && 4376 (vma->node.start & (fence_alignment - 1)) == 0); 4377 4378 mappable = (vma->node.start + fence_size <= 4379 dev_priv->gtt.mappable_end); 4380 4381 obj->map_and_fenceable = mappable && fenceable; 4382 4383 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4384 } 4385 4386 vma->pin_count++; 4387 return 0; 4388 } 4389 4390 int 4391 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4392 struct i915_address_space *vm, 4393 uint32_t alignment, 4394 uint64_t flags) 4395 { 4396 return i915_gem_object_do_pin(obj, vm, 4397 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4398 alignment, flags); 4399 } 4400 4401 int 4402 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4403 const struct i915_ggtt_view *view, 4404 uint32_t alignment, 4405 uint64_t flags) 4406 { 4407 if (WARN_ONCE(!view, "no view specified")) 4408 return -EINVAL; 4409 4410 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4411 alignment, flags | PIN_GLOBAL); 4412 } 4413 4414 void 4415 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4416 const struct i915_ggtt_view *view) 4417 { 4418 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4419 4420 BUG_ON(!vma); 4421 WARN_ON(vma->pin_count == 0); 4422 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4423 4424 --vma->pin_count; 4425 } 4426 4427 bool 4428 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4429 { 4430 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4431 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4432 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4433 4434 WARN_ON(!ggtt_vma || 4435 dev_priv->fence_regs[obj->fence_reg].pin_count > 4436 ggtt_vma->pin_count); 4437 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4438 return true; 4439 } else 4440 return false; 4441 } 4442 4443 void 4444 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4445 { 4446 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4447 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4448 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4449 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4450 } 4451 } 4452 4453 int 4454 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4455 struct drm_file *file) 4456 { 4457 struct drm_i915_gem_busy *args = data; 4458 struct drm_i915_gem_object *obj; 4459 int ret; 4460 4461 ret = i915_mutex_lock_interruptible(dev); 4462 if (ret) 4463 return ret; 4464 4465 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4466 if (&obj->base == NULL) { 4467 ret = -ENOENT; 4468 goto unlock; 4469 } 4470 4471 /* Count all active objects as busy, even if they are currently not used 4472 * by the gpu. Users of this interface expect objects to eventually 4473 * become non-busy without any further actions, therefore emit any 4474 * necessary flushes here. 4475 */ 4476 ret = i915_gem_object_flush_active(obj); 4477 if (ret) 4478 goto unref; 4479 4480 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4481 args->busy = obj->active << 16; 4482 if (obj->last_write_req) 4483 args->busy |= obj->last_write_req->ring->id; 4484 4485 unref: 4486 drm_gem_object_unreference(&obj->base); 4487 unlock: 4488 mutex_unlock(&dev->struct_mutex); 4489 return ret; 4490 } 4491 4492 int 4493 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4494 struct drm_file *file_priv) 4495 { 4496 return i915_gem_ring_throttle(dev, file_priv); 4497 } 4498 4499 int 4500 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4501 struct drm_file *file_priv) 4502 { 4503 struct drm_i915_private *dev_priv = dev->dev_private; 4504 struct drm_i915_gem_madvise *args = data; 4505 struct drm_i915_gem_object *obj; 4506 int ret; 4507 4508 switch (args->madv) { 4509 case I915_MADV_DONTNEED: 4510 case I915_MADV_WILLNEED: 4511 break; 4512 default: 4513 return -EINVAL; 4514 } 4515 4516 ret = i915_mutex_lock_interruptible(dev); 4517 if (ret) 4518 return ret; 4519 4520 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4521 if (&obj->base == NULL) { 4522 ret = -ENOENT; 4523 goto unlock; 4524 } 4525 4526 if (i915_gem_obj_is_pinned(obj)) { 4527 ret = -EINVAL; 4528 goto out; 4529 } 4530 4531 if (obj->pages && 4532 obj->tiling_mode != I915_TILING_NONE && 4533 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4534 if (obj->madv == I915_MADV_WILLNEED) 4535 i915_gem_object_unpin_pages(obj); 4536 if (args->madv == I915_MADV_WILLNEED) 4537 i915_gem_object_pin_pages(obj); 4538 } 4539 4540 if (obj->madv != __I915_MADV_PURGED) 4541 obj->madv = args->madv; 4542 4543 /* if the object is no longer attached, discard its backing storage */ 4544 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4545 i915_gem_object_truncate(obj); 4546 4547 args->retained = obj->madv != __I915_MADV_PURGED; 4548 4549 out: 4550 drm_gem_object_unreference(&obj->base); 4551 unlock: 4552 mutex_unlock(&dev->struct_mutex); 4553 return ret; 4554 } 4555 4556 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4557 const struct drm_i915_gem_object_ops *ops) 4558 { 4559 int i; 4560 4561 INIT_LIST_HEAD(&obj->global_list); 4562 for (i = 0; i < I915_NUM_RINGS; i++) 4563 INIT_LIST_HEAD(&obj->ring_list[i]); 4564 INIT_LIST_HEAD(&obj->obj_exec_link); 4565 INIT_LIST_HEAD(&obj->vma_list); 4566 INIT_LIST_HEAD(&obj->batch_pool_link); 4567 4568 obj->ops = ops; 4569 4570 obj->fence_reg = I915_FENCE_REG_NONE; 4571 obj->madv = I915_MADV_WILLNEED; 4572 4573 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4574 } 4575 4576 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4577 .get_pages = i915_gem_object_get_pages_gtt, 4578 .put_pages = i915_gem_object_put_pages_gtt, 4579 }; 4580 4581 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4582 size_t size) 4583 { 4584 struct drm_i915_gem_object *obj; 4585 struct address_space *mapping; 4586 gfp_t mask; 4587 4588 obj = i915_gem_object_alloc(dev); 4589 if (obj == NULL) 4590 return NULL; 4591 4592 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4593 i915_gem_object_free(obj); 4594 return NULL; 4595 } 4596 4597 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4598 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4599 /* 965gm cannot relocate objects above 4GiB. */ 4600 mask &= ~__GFP_HIGHMEM; 4601 mask |= __GFP_DMA32; 4602 } 4603 4604 mapping = file_inode(obj->base.filp)->i_mapping; 4605 mapping_set_gfp_mask(mapping, mask); 4606 4607 i915_gem_object_init(obj, &i915_gem_object_ops); 4608 4609 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4610 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4611 4612 if (HAS_LLC(dev)) { 4613 /* On some devices, we can have the GPU use the LLC (the CPU 4614 * cache) for about a 10% performance improvement 4615 * compared to uncached. Graphics requests other than 4616 * display scanout are coherent with the CPU in 4617 * accessing this cache. This means in this mode we 4618 * don't need to clflush on the CPU side, and on the 4619 * GPU side we only need to flush internal caches to 4620 * get data visible to the CPU. 4621 * 4622 * However, we maintain the display planes as UC, and so 4623 * need to rebind when first used as such. 4624 */ 4625 obj->cache_level = I915_CACHE_LLC; 4626 } else 4627 obj->cache_level = I915_CACHE_NONE; 4628 4629 trace_i915_gem_object_create(obj); 4630 4631 return obj; 4632 } 4633 4634 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4635 { 4636 /* If we are the last user of the backing storage (be it shmemfs 4637 * pages or stolen etc), we know that the pages are going to be 4638 * immediately released. In this case, we can then skip copying 4639 * back the contents from the GPU. 4640 */ 4641 4642 if (obj->madv != I915_MADV_WILLNEED) 4643 return false; 4644 4645 if (obj->base.filp == NULL) 4646 return true; 4647 4648 /* At first glance, this looks racy, but then again so would be 4649 * userspace racing mmap against close. However, the first external 4650 * reference to the filp can only be obtained through the 4651 * i915_gem_mmap_ioctl() which safeguards us against the user 4652 * acquiring such a reference whilst we are in the middle of 4653 * freeing the object. 4654 */ 4655 return atomic_long_read(&obj->base.filp->f_count) == 1; 4656 } 4657 4658 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4659 { 4660 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4661 struct drm_device *dev = obj->base.dev; 4662 struct drm_i915_private *dev_priv = dev->dev_private; 4663 struct i915_vma *vma, *next; 4664 4665 intel_runtime_pm_get(dev_priv); 4666 4667 trace_i915_gem_object_destroy(obj); 4668 4669 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4670 int ret; 4671 4672 vma->pin_count = 0; 4673 ret = i915_vma_unbind(vma); 4674 if (WARN_ON(ret == -ERESTARTSYS)) { 4675 bool was_interruptible; 4676 4677 was_interruptible = dev_priv->mm.interruptible; 4678 dev_priv->mm.interruptible = false; 4679 4680 WARN_ON(i915_vma_unbind(vma)); 4681 4682 dev_priv->mm.interruptible = was_interruptible; 4683 } 4684 } 4685 4686 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4687 * before progressing. */ 4688 if (obj->stolen) 4689 i915_gem_object_unpin_pages(obj); 4690 4691 WARN_ON(obj->frontbuffer_bits); 4692 4693 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4694 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4695 obj->tiling_mode != I915_TILING_NONE) 4696 i915_gem_object_unpin_pages(obj); 4697 4698 if (WARN_ON(obj->pages_pin_count)) 4699 obj->pages_pin_count = 0; 4700 if (discard_backing_storage(obj)) 4701 obj->madv = I915_MADV_DONTNEED; 4702 i915_gem_object_put_pages(obj); 4703 i915_gem_object_free_mmap_offset(obj); 4704 4705 BUG_ON(obj->pages); 4706 4707 if (obj->base.import_attach) 4708 drm_prime_gem_destroy(&obj->base, NULL); 4709 4710 if (obj->ops->release) 4711 obj->ops->release(obj); 4712 4713 drm_gem_object_release(&obj->base); 4714 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4715 4716 kfree(obj->bit_17); 4717 i915_gem_object_free(obj); 4718 4719 intel_runtime_pm_put(dev_priv); 4720 } 4721 4722 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4723 struct i915_address_space *vm) 4724 { 4725 struct i915_vma *vma; 4726 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4727 if (i915_is_ggtt(vma->vm) && 4728 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4729 continue; 4730 if (vma->vm == vm) 4731 return vma; 4732 } 4733 return NULL; 4734 } 4735 4736 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4737 const struct i915_ggtt_view *view) 4738 { 4739 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 4740 struct i915_vma *vma; 4741 4742 if (WARN_ONCE(!view, "no view specified")) 4743 return ERR_PTR(-EINVAL); 4744 4745 list_for_each_entry(vma, &obj->vma_list, vma_link) 4746 if (vma->vm == ggtt && 4747 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4748 return vma; 4749 return NULL; 4750 } 4751 4752 void i915_gem_vma_destroy(struct i915_vma *vma) 4753 { 4754 struct i915_address_space *vm = NULL; 4755 WARN_ON(vma->node.allocated); 4756 4757 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4758 if (!list_empty(&vma->exec_list)) 4759 return; 4760 4761 vm = vma->vm; 4762 4763 if (!i915_is_ggtt(vm)) 4764 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4765 4766 list_del(&vma->vma_link); 4767 4768 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 4769 } 4770 4771 static void 4772 i915_gem_stop_ringbuffers(struct drm_device *dev) 4773 { 4774 struct drm_i915_private *dev_priv = dev->dev_private; 4775 struct intel_engine_cs *ring; 4776 int i; 4777 4778 for_each_ring(ring, dev_priv, i) 4779 dev_priv->gt.stop_ring(ring); 4780 } 4781 4782 int 4783 i915_gem_suspend(struct drm_device *dev) 4784 { 4785 struct drm_i915_private *dev_priv = dev->dev_private; 4786 int ret = 0; 4787 4788 mutex_lock(&dev->struct_mutex); 4789 ret = i915_gpu_idle(dev); 4790 if (ret) 4791 goto err; 4792 4793 i915_gem_retire_requests(dev); 4794 4795 i915_gem_stop_ringbuffers(dev); 4796 mutex_unlock(&dev->struct_mutex); 4797 4798 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4799 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4800 flush_delayed_work(&dev_priv->mm.idle_work); 4801 4802 /* Assert that we sucessfully flushed all the work and 4803 * reset the GPU back to its idle, low power state. 4804 */ 4805 WARN_ON(dev_priv->mm.busy); 4806 4807 return 0; 4808 4809 err: 4810 mutex_unlock(&dev->struct_mutex); 4811 return ret; 4812 } 4813 4814 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4815 { 4816 struct drm_device *dev = ring->dev; 4817 struct drm_i915_private *dev_priv = dev->dev_private; 4818 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4819 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4820 int i, ret; 4821 4822 if (!HAS_L3_DPF(dev) || !remap_info) 4823 return 0; 4824 4825 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4826 if (ret) 4827 return ret; 4828 4829 /* 4830 * Note: We do not worry about the concurrent register cacheline hang 4831 * here because no other code should access these registers other than 4832 * at initialization time. 4833 */ 4834 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4835 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4836 intel_ring_emit(ring, reg_base + i); 4837 intel_ring_emit(ring, remap_info[i/4]); 4838 } 4839 4840 intel_ring_advance(ring); 4841 4842 return ret; 4843 } 4844 4845 void i915_gem_init_swizzling(struct drm_device *dev) 4846 { 4847 struct drm_i915_private *dev_priv = dev->dev_private; 4848 4849 if (INTEL_INFO(dev)->gen < 5 || 4850 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4851 return; 4852 4853 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4854 DISP_TILE_SURFACE_SWIZZLING); 4855 4856 if (IS_GEN5(dev)) 4857 return; 4858 4859 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4860 if (IS_GEN6(dev)) 4861 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4862 else if (IS_GEN7(dev)) 4863 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4864 else if (IS_GEN8(dev)) 4865 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4866 else 4867 BUG(); 4868 } 4869 4870 static bool 4871 intel_enable_blt(struct drm_device *dev) 4872 { 4873 if (!HAS_BLT(dev)) 4874 return false; 4875 4876 /* The blitter was dysfunctional on early prototypes */ 4877 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4878 DRM_INFO("BLT not supported on this pre-production hardware;" 4879 " graphics performance will be degraded.\n"); 4880 return false; 4881 } 4882 4883 return true; 4884 } 4885 4886 static void init_unused_ring(struct drm_device *dev, u32 base) 4887 { 4888 struct drm_i915_private *dev_priv = dev->dev_private; 4889 4890 I915_WRITE(RING_CTL(base), 0); 4891 I915_WRITE(RING_HEAD(base), 0); 4892 I915_WRITE(RING_TAIL(base), 0); 4893 I915_WRITE(RING_START(base), 0); 4894 } 4895 4896 static void init_unused_rings(struct drm_device *dev) 4897 { 4898 if (IS_I830(dev)) { 4899 init_unused_ring(dev, PRB1_BASE); 4900 init_unused_ring(dev, SRB0_BASE); 4901 init_unused_ring(dev, SRB1_BASE); 4902 init_unused_ring(dev, SRB2_BASE); 4903 init_unused_ring(dev, SRB3_BASE); 4904 } else if (IS_GEN2(dev)) { 4905 init_unused_ring(dev, SRB0_BASE); 4906 init_unused_ring(dev, SRB1_BASE); 4907 } else if (IS_GEN3(dev)) { 4908 init_unused_ring(dev, PRB1_BASE); 4909 init_unused_ring(dev, PRB2_BASE); 4910 } 4911 } 4912 4913 int i915_gem_init_rings(struct drm_device *dev) 4914 { 4915 struct drm_i915_private *dev_priv = dev->dev_private; 4916 int ret; 4917 4918 ret = intel_init_render_ring_buffer(dev); 4919 if (ret) 4920 return ret; 4921 4922 if (HAS_BSD(dev)) { 4923 ret = intel_init_bsd_ring_buffer(dev); 4924 if (ret) 4925 goto cleanup_render_ring; 4926 } 4927 4928 if (intel_enable_blt(dev)) { 4929 ret = intel_init_blt_ring_buffer(dev); 4930 if (ret) 4931 goto cleanup_bsd_ring; 4932 } 4933 4934 if (HAS_VEBOX(dev)) { 4935 ret = intel_init_vebox_ring_buffer(dev); 4936 if (ret) 4937 goto cleanup_blt_ring; 4938 } 4939 4940 if (HAS_BSD2(dev)) { 4941 ret = intel_init_bsd2_ring_buffer(dev); 4942 if (ret) 4943 goto cleanup_vebox_ring; 4944 } 4945 4946 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4947 if (ret) 4948 goto cleanup_bsd2_ring; 4949 4950 return 0; 4951 4952 cleanup_bsd2_ring: 4953 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 4954 cleanup_vebox_ring: 4955 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4956 cleanup_blt_ring: 4957 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4958 cleanup_bsd_ring: 4959 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4960 cleanup_render_ring: 4961 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4962 4963 return ret; 4964 } 4965 4966 int 4967 i915_gem_init_hw(struct drm_device *dev) 4968 { 4969 struct drm_i915_private *dev_priv = dev->dev_private; 4970 struct intel_engine_cs *ring; 4971 int ret, i; 4972 4973 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4974 return -EIO; 4975 4976 /* Double layer security blanket, see i915_gem_init() */ 4977 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4978 4979 if (dev_priv->ellc_size) 4980 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4981 4982 if (IS_HASWELL(dev)) 4983 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4984 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4985 4986 if (HAS_PCH_NOP(dev)) { 4987 if (IS_IVYBRIDGE(dev)) { 4988 u32 temp = I915_READ(GEN7_MSG_CTL); 4989 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4990 I915_WRITE(GEN7_MSG_CTL, temp); 4991 } else if (INTEL_INFO(dev)->gen >= 7) { 4992 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4993 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4994 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4995 } 4996 } 4997 4998 i915_gem_init_swizzling(dev); 4999 5000 /* 5001 * At least 830 can leave some of the unused rings 5002 * "active" (ie. head != tail) after resume which 5003 * will prevent c3 entry. Makes sure all unused rings 5004 * are totally idle. 5005 */ 5006 init_unused_rings(dev); 5007 5008 for_each_ring(ring, dev_priv, i) { 5009 ret = ring->init_hw(ring); 5010 if (ret) 5011 goto out; 5012 } 5013 5014 for (i = 0; i < NUM_L3_SLICES(dev); i++) 5015 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 5016 5017 ret = i915_ppgtt_init_hw(dev); 5018 if (ret && ret != -EIO) { 5019 DRM_ERROR("PPGTT enable failed %d\n", ret); 5020 i915_gem_cleanup_ringbuffer(dev); 5021 } 5022 5023 ret = i915_gem_context_enable(dev_priv); 5024 if (ret && ret != -EIO) { 5025 DRM_ERROR("Context enable failed %d\n", ret); 5026 i915_gem_cleanup_ringbuffer(dev); 5027 5028 goto out; 5029 } 5030 5031 out: 5032 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5033 return ret; 5034 } 5035 5036 int i915_gem_init(struct drm_device *dev) 5037 { 5038 struct drm_i915_private *dev_priv = dev->dev_private; 5039 int ret; 5040 5041 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5042 i915.enable_execlists); 5043 5044 mutex_lock(&dev->struct_mutex); 5045 5046 if (IS_VALLEYVIEW(dev)) { 5047 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 5048 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 5049 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 5050 VLV_GTLC_ALLOWWAKEACK), 10)) 5051 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 5052 } 5053 5054 if (!i915.enable_execlists) { 5055 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5056 dev_priv->gt.init_rings = i915_gem_init_rings; 5057 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5058 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5059 } else { 5060 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5061 dev_priv->gt.init_rings = intel_logical_rings_init; 5062 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5063 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5064 } 5065 5066 /* This is just a security blanket to placate dragons. 5067 * On some systems, we very sporadically observe that the first TLBs 5068 * used by the CS may be stale, despite us poking the TLB reset. If 5069 * we hold the forcewake during initialisation these problems 5070 * just magically go away. 5071 */ 5072 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5073 5074 ret = i915_gem_init_userptr(dev); 5075 if (ret) 5076 goto out_unlock; 5077 5078 i915_gem_init_global_gtt(dev); 5079 5080 ret = i915_gem_context_init(dev); 5081 if (ret) 5082 goto out_unlock; 5083 5084 ret = dev_priv->gt.init_rings(dev); 5085 if (ret) 5086 goto out_unlock; 5087 5088 ret = i915_gem_init_hw(dev); 5089 if (ret == -EIO) { 5090 /* Allow ring initialisation to fail by marking the GPU as 5091 * wedged. But we only want to do this where the GPU is angry, 5092 * for all other failure, such as an allocation failure, bail. 5093 */ 5094 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5095 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5096 ret = 0; 5097 } 5098 5099 out_unlock: 5100 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5101 mutex_unlock(&dev->struct_mutex); 5102 5103 return ret; 5104 } 5105 5106 void 5107 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5108 { 5109 struct drm_i915_private *dev_priv = dev->dev_private; 5110 struct intel_engine_cs *ring; 5111 int i; 5112 5113 for_each_ring(ring, dev_priv, i) 5114 dev_priv->gt.cleanup_ring(ring); 5115 } 5116 5117 static void 5118 init_ring_lists(struct intel_engine_cs *ring) 5119 { 5120 INIT_LIST_HEAD(&ring->active_list); 5121 INIT_LIST_HEAD(&ring->request_list); 5122 } 5123 5124 void i915_init_vm(struct drm_i915_private *dev_priv, 5125 struct i915_address_space *vm) 5126 { 5127 if (!i915_is_ggtt(vm)) 5128 drm_mm_init(&vm->mm, vm->start, vm->total); 5129 vm->dev = dev_priv->dev; 5130 INIT_LIST_HEAD(&vm->active_list); 5131 INIT_LIST_HEAD(&vm->inactive_list); 5132 INIT_LIST_HEAD(&vm->global_link); 5133 list_add_tail(&vm->global_link, &dev_priv->vm_list); 5134 } 5135 5136 void 5137 i915_gem_load(struct drm_device *dev) 5138 { 5139 struct drm_i915_private *dev_priv = dev->dev_private; 5140 int i; 5141 5142 dev_priv->objects = 5143 kmem_cache_create("i915_gem_object", 5144 sizeof(struct drm_i915_gem_object), 0, 5145 SLAB_HWCACHE_ALIGN, 5146 NULL); 5147 dev_priv->vmas = 5148 kmem_cache_create("i915_gem_vma", 5149 sizeof(struct i915_vma), 0, 5150 SLAB_HWCACHE_ALIGN, 5151 NULL); 5152 dev_priv->requests = 5153 kmem_cache_create("i915_gem_request", 5154 sizeof(struct drm_i915_gem_request), 0, 5155 SLAB_HWCACHE_ALIGN, 5156 NULL); 5157 5158 INIT_LIST_HEAD(&dev_priv->vm_list); 5159 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5160 5161 INIT_LIST_HEAD(&dev_priv->context_list); 5162 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5163 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5164 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5165 for (i = 0; i < I915_NUM_RINGS; i++) 5166 init_ring_lists(&dev_priv->ring[i]); 5167 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5168 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5169 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5170 i915_gem_retire_work_handler); 5171 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5172 i915_gem_idle_work_handler); 5173 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5174 5175 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5176 5177 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5178 dev_priv->num_fence_regs = 32; 5179 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5180 dev_priv->num_fence_regs = 16; 5181 else 5182 dev_priv->num_fence_regs = 8; 5183 5184 if (intel_vgpu_active(dev)) 5185 dev_priv->num_fence_regs = 5186 I915_READ(vgtif_reg(avail_rs.fence_num)); 5187 5188 /* Initialize fence registers to zero */ 5189 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5190 i915_gem_restore_fences(dev); 5191 5192 i915_gem_detect_bit_6_swizzle(dev); 5193 init_waitqueue_head(&dev_priv->pending_flip_queue); 5194 5195 dev_priv->mm.interruptible = true; 5196 5197 i915_gem_shrinker_init(dev_priv); 5198 5199 mutex_init(&dev_priv->fb_tracking.lock); 5200 } 5201 5202 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5203 { 5204 struct drm_i915_file_private *file_priv = file->driver_priv; 5205 5206 /* Clean up our request list when the client is going away, so that 5207 * later retire_requests won't dereference our soon-to-be-gone 5208 * file_priv. 5209 */ 5210 spin_lock(&file_priv->mm.lock); 5211 while (!list_empty(&file_priv->mm.request_list)) { 5212 struct drm_i915_gem_request *request; 5213 5214 request = list_first_entry(&file_priv->mm.request_list, 5215 struct drm_i915_gem_request, 5216 client_list); 5217 list_del(&request->client_list); 5218 request->file_priv = NULL; 5219 } 5220 spin_unlock(&file_priv->mm.lock); 5221 5222 if (!list_empty(&file_priv->rps.link)) { 5223 spin_lock(&to_i915(dev)->rps.client_lock); 5224 list_del(&file_priv->rps.link); 5225 spin_unlock(&to_i915(dev)->rps.client_lock); 5226 } 5227 } 5228 5229 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5230 { 5231 struct drm_i915_file_private *file_priv; 5232 int ret; 5233 5234 DRM_DEBUG_DRIVER("\n"); 5235 5236 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5237 if (!file_priv) 5238 return -ENOMEM; 5239 5240 file->driver_priv = file_priv; 5241 file_priv->dev_priv = dev->dev_private; 5242 file_priv->file = file; 5243 INIT_LIST_HEAD(&file_priv->rps.link); 5244 5245 spin_lock_init(&file_priv->mm.lock); 5246 INIT_LIST_HEAD(&file_priv->mm.request_list); 5247 5248 ret = i915_gem_context_open(dev, file); 5249 if (ret) 5250 kfree(file_priv); 5251 5252 return ret; 5253 } 5254 5255 /** 5256 * i915_gem_track_fb - update frontbuffer tracking 5257 * old: current GEM buffer for the frontbuffer slots 5258 * new: new GEM buffer for the frontbuffer slots 5259 * frontbuffer_bits: bitmask of frontbuffer slots 5260 * 5261 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5262 * from @old and setting them in @new. Both @old and @new can be NULL. 5263 */ 5264 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5265 struct drm_i915_gem_object *new, 5266 unsigned frontbuffer_bits) 5267 { 5268 if (old) { 5269 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5270 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5271 old->frontbuffer_bits &= ~frontbuffer_bits; 5272 } 5273 5274 if (new) { 5275 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5276 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5277 new->frontbuffer_bits |= frontbuffer_bits; 5278 } 5279 } 5280 5281 /* All the new VM stuff */ 5282 unsigned long 5283 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5284 struct i915_address_space *vm) 5285 { 5286 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5287 struct i915_vma *vma; 5288 5289 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5290 5291 list_for_each_entry(vma, &o->vma_list, vma_link) { 5292 if (i915_is_ggtt(vma->vm) && 5293 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5294 continue; 5295 if (vma->vm == vm) 5296 return vma->node.start; 5297 } 5298 5299 WARN(1, "%s vma for this object not found.\n", 5300 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5301 return -1; 5302 } 5303 5304 unsigned long 5305 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5306 const struct i915_ggtt_view *view) 5307 { 5308 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5309 struct i915_vma *vma; 5310 5311 list_for_each_entry(vma, &o->vma_list, vma_link) 5312 if (vma->vm == ggtt && 5313 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5314 return vma->node.start; 5315 5316 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5317 return -1; 5318 } 5319 5320 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5321 struct i915_address_space *vm) 5322 { 5323 struct i915_vma *vma; 5324 5325 list_for_each_entry(vma, &o->vma_list, vma_link) { 5326 if (i915_is_ggtt(vma->vm) && 5327 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5328 continue; 5329 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5330 return true; 5331 } 5332 5333 return false; 5334 } 5335 5336 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5337 const struct i915_ggtt_view *view) 5338 { 5339 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5340 struct i915_vma *vma; 5341 5342 list_for_each_entry(vma, &o->vma_list, vma_link) 5343 if (vma->vm == ggtt && 5344 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5345 drm_mm_node_allocated(&vma->node)) 5346 return true; 5347 5348 return false; 5349 } 5350 5351 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5352 { 5353 struct i915_vma *vma; 5354 5355 list_for_each_entry(vma, &o->vma_list, vma_link) 5356 if (drm_mm_node_allocated(&vma->node)) 5357 return true; 5358 5359 return false; 5360 } 5361 5362 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5363 struct i915_address_space *vm) 5364 { 5365 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5366 struct i915_vma *vma; 5367 5368 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5369 5370 BUG_ON(list_empty(&o->vma_list)); 5371 5372 list_for_each_entry(vma, &o->vma_list, vma_link) { 5373 if (i915_is_ggtt(vma->vm) && 5374 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5375 continue; 5376 if (vma->vm == vm) 5377 return vma->node.size; 5378 } 5379 return 0; 5380 } 5381 5382 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5383 { 5384 struct i915_vma *vma; 5385 list_for_each_entry(vma, &obj->vma_list, vma_link) 5386 if (vma->pin_count > 0) 5387 return true; 5388 5389 return false; 5390 } 5391 5392