1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/shmem_fs.h> 35 #include <linux/slab.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 #include <linux/dma-buf.h> 39 40 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 41 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 42 bool force); 43 static __must_check int 44 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 45 struct i915_address_space *vm, 46 unsigned alignment, 47 bool map_and_fenceable, 48 bool nonblocking); 49 static int i915_gem_phys_pwrite(struct drm_device *dev, 50 struct drm_i915_gem_object *obj, 51 struct drm_i915_gem_pwrite *args, 52 struct drm_file *file); 53 54 static void i915_gem_write_fence(struct drm_device *dev, int reg, 55 struct drm_i915_gem_object *obj); 56 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 57 struct drm_i915_fence_reg *fence, 58 bool enable); 59 60 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 61 struct shrink_control *sc); 62 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 63 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); 64 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 65 66 static bool cpu_cache_is_coherent(struct drm_device *dev, 67 enum i915_cache_level level) 68 { 69 return HAS_LLC(dev) || level != I915_CACHE_NONE; 70 } 71 72 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 73 { 74 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 75 return true; 76 77 return obj->pin_display; 78 } 79 80 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 81 { 82 if (obj->tiling_mode) 83 i915_gem_release_mmap(obj); 84 85 /* As we do not have an associated fence register, we will force 86 * a tiling change if we ever need to acquire one. 87 */ 88 obj->fence_dirty = false; 89 obj->fence_reg = I915_FENCE_REG_NONE; 90 } 91 92 /* some bookkeeping */ 93 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 94 size_t size) 95 { 96 spin_lock(&dev_priv->mm.object_stat_lock); 97 dev_priv->mm.object_count++; 98 dev_priv->mm.object_memory += size; 99 spin_unlock(&dev_priv->mm.object_stat_lock); 100 } 101 102 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 103 size_t size) 104 { 105 spin_lock(&dev_priv->mm.object_stat_lock); 106 dev_priv->mm.object_count--; 107 dev_priv->mm.object_memory -= size; 108 spin_unlock(&dev_priv->mm.object_stat_lock); 109 } 110 111 static int 112 i915_gem_wait_for_error(struct i915_gpu_error *error) 113 { 114 int ret; 115 116 #define EXIT_COND (!i915_reset_in_progress(error) || \ 117 i915_terminally_wedged(error)) 118 if (EXIT_COND) 119 return 0; 120 121 /* 122 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 123 * userspace. If it takes that long something really bad is going on and 124 * we should simply try to bail out and fail as gracefully as possible. 125 */ 126 ret = wait_event_interruptible_timeout(error->reset_queue, 127 EXIT_COND, 128 10*HZ); 129 if (ret == 0) { 130 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 131 return -EIO; 132 } else if (ret < 0) { 133 return ret; 134 } 135 #undef EXIT_COND 136 137 return 0; 138 } 139 140 int i915_mutex_lock_interruptible(struct drm_device *dev) 141 { 142 struct drm_i915_private *dev_priv = dev->dev_private; 143 int ret; 144 145 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 146 if (ret) 147 return ret; 148 149 ret = mutex_lock_interruptible(&dev->struct_mutex); 150 if (ret) 151 return ret; 152 153 WARN_ON(i915_verify_lists(dev)); 154 return 0; 155 } 156 157 static inline bool 158 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 159 { 160 return i915_gem_obj_bound_any(obj) && !obj->active; 161 } 162 163 int 164 i915_gem_init_ioctl(struct drm_device *dev, void *data, 165 struct drm_file *file) 166 { 167 struct drm_i915_private *dev_priv = dev->dev_private; 168 struct drm_i915_gem_init *args = data; 169 170 if (drm_core_check_feature(dev, DRIVER_MODESET)) 171 return -ENODEV; 172 173 if (args->gtt_start >= args->gtt_end || 174 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 175 return -EINVAL; 176 177 /* GEM with user mode setting was never supported on ilk and later. */ 178 if (INTEL_INFO(dev)->gen >= 5) 179 return -ENODEV; 180 181 mutex_lock(&dev->struct_mutex); 182 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, 183 args->gtt_end); 184 dev_priv->gtt.mappable_end = args->gtt_end; 185 mutex_unlock(&dev->struct_mutex); 186 187 return 0; 188 } 189 190 int 191 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 192 struct drm_file *file) 193 { 194 struct drm_i915_private *dev_priv = dev->dev_private; 195 struct drm_i915_gem_get_aperture *args = data; 196 struct drm_i915_gem_object *obj; 197 size_t pinned; 198 199 pinned = 0; 200 mutex_lock(&dev->struct_mutex); 201 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 202 if (obj->pin_count) 203 pinned += i915_gem_obj_ggtt_size(obj); 204 mutex_unlock(&dev->struct_mutex); 205 206 args->aper_size = dev_priv->gtt.base.total; 207 args->aper_available_size = args->aper_size - pinned; 208 209 return 0; 210 } 211 212 void *i915_gem_object_alloc(struct drm_device *dev) 213 { 214 struct drm_i915_private *dev_priv = dev->dev_private; 215 return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO); 216 } 217 218 void i915_gem_object_free(struct drm_i915_gem_object *obj) 219 { 220 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 221 kmem_cache_free(dev_priv->slab, obj); 222 } 223 224 static int 225 i915_gem_create(struct drm_file *file, 226 struct drm_device *dev, 227 uint64_t size, 228 uint32_t *handle_p) 229 { 230 struct drm_i915_gem_object *obj; 231 int ret; 232 u32 handle; 233 234 size = roundup(size, PAGE_SIZE); 235 if (size == 0) 236 return -EINVAL; 237 238 /* Allocate the new object */ 239 obj = i915_gem_alloc_object(dev, size); 240 if (obj == NULL) 241 return -ENOMEM; 242 243 ret = drm_gem_handle_create(file, &obj->base, &handle); 244 /* drop reference from allocate - handle holds it now */ 245 drm_gem_object_unreference_unlocked(&obj->base); 246 if (ret) 247 return ret; 248 249 *handle_p = handle; 250 return 0; 251 } 252 253 int 254 i915_gem_dumb_create(struct drm_file *file, 255 struct drm_device *dev, 256 struct drm_mode_create_dumb *args) 257 { 258 /* have to work out size/pitch and return them */ 259 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); 260 args->size = args->pitch * args->height; 261 return i915_gem_create(file, dev, 262 args->size, &args->handle); 263 } 264 265 /** 266 * Creates a new mm object and returns a handle to it. 267 */ 268 int 269 i915_gem_create_ioctl(struct drm_device *dev, void *data, 270 struct drm_file *file) 271 { 272 struct drm_i915_gem_create *args = data; 273 274 return i915_gem_create(file, dev, 275 args->size, &args->handle); 276 } 277 278 static inline int 279 __copy_to_user_swizzled(char __user *cpu_vaddr, 280 const char *gpu_vaddr, int gpu_offset, 281 int length) 282 { 283 int ret, cpu_offset = 0; 284 285 while (length > 0) { 286 int cacheline_end = ALIGN(gpu_offset + 1, 64); 287 int this_length = min(cacheline_end - gpu_offset, length); 288 int swizzled_gpu_offset = gpu_offset ^ 64; 289 290 ret = __copy_to_user(cpu_vaddr + cpu_offset, 291 gpu_vaddr + swizzled_gpu_offset, 292 this_length); 293 if (ret) 294 return ret + length; 295 296 cpu_offset += this_length; 297 gpu_offset += this_length; 298 length -= this_length; 299 } 300 301 return 0; 302 } 303 304 static inline int 305 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 306 const char __user *cpu_vaddr, 307 int length) 308 { 309 int ret, cpu_offset = 0; 310 311 while (length > 0) { 312 int cacheline_end = ALIGN(gpu_offset + 1, 64); 313 int this_length = min(cacheline_end - gpu_offset, length); 314 int swizzled_gpu_offset = gpu_offset ^ 64; 315 316 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 317 cpu_vaddr + cpu_offset, 318 this_length); 319 if (ret) 320 return ret + length; 321 322 cpu_offset += this_length; 323 gpu_offset += this_length; 324 length -= this_length; 325 } 326 327 return 0; 328 } 329 330 /* Per-page copy function for the shmem pread fastpath. 331 * Flushes invalid cachelines before reading the target if 332 * needs_clflush is set. */ 333 static int 334 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 335 char __user *user_data, 336 bool page_do_bit17_swizzling, bool needs_clflush) 337 { 338 char *vaddr; 339 int ret; 340 341 if (unlikely(page_do_bit17_swizzling)) 342 return -EINVAL; 343 344 vaddr = kmap_atomic(page); 345 if (needs_clflush) 346 drm_clflush_virt_range(vaddr + shmem_page_offset, 347 page_length); 348 ret = __copy_to_user_inatomic(user_data, 349 vaddr + shmem_page_offset, 350 page_length); 351 kunmap_atomic(vaddr); 352 353 return ret ? -EFAULT : 0; 354 } 355 356 static void 357 shmem_clflush_swizzled_range(char *addr, unsigned long length, 358 bool swizzled) 359 { 360 if (unlikely(swizzled)) { 361 unsigned long start = (unsigned long) addr; 362 unsigned long end = (unsigned long) addr + length; 363 364 /* For swizzling simply ensure that we always flush both 365 * channels. Lame, but simple and it works. Swizzled 366 * pwrite/pread is far from a hotpath - current userspace 367 * doesn't use it at all. */ 368 start = round_down(start, 128); 369 end = round_up(end, 128); 370 371 drm_clflush_virt_range((void *)start, end - start); 372 } else { 373 drm_clflush_virt_range(addr, length); 374 } 375 376 } 377 378 /* Only difference to the fast-path function is that this can handle bit17 379 * and uses non-atomic copy and kmap functions. */ 380 static int 381 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 382 char __user *user_data, 383 bool page_do_bit17_swizzling, bool needs_clflush) 384 { 385 char *vaddr; 386 int ret; 387 388 vaddr = kmap(page); 389 if (needs_clflush) 390 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 391 page_length, 392 page_do_bit17_swizzling); 393 394 if (page_do_bit17_swizzling) 395 ret = __copy_to_user_swizzled(user_data, 396 vaddr, shmem_page_offset, 397 page_length); 398 else 399 ret = __copy_to_user(user_data, 400 vaddr + shmem_page_offset, 401 page_length); 402 kunmap(page); 403 404 return ret ? - EFAULT : 0; 405 } 406 407 static int 408 i915_gem_shmem_pread(struct drm_device *dev, 409 struct drm_i915_gem_object *obj, 410 struct drm_i915_gem_pread *args, 411 struct drm_file *file) 412 { 413 char __user *user_data; 414 ssize_t remain; 415 loff_t offset; 416 int shmem_page_offset, page_length, ret = 0; 417 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 418 int prefaulted = 0; 419 int needs_clflush = 0; 420 struct sg_page_iter sg_iter; 421 422 user_data = to_user_ptr(args->data_ptr); 423 remain = args->size; 424 425 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 426 427 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 428 /* If we're not in the cpu read domain, set ourself into the gtt 429 * read domain and manually flush cachelines (if required). This 430 * optimizes for the case when the gpu will dirty the data 431 * anyway again before the next pread happens. */ 432 needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); 433 if (i915_gem_obj_bound_any(obj)) { 434 ret = i915_gem_object_set_to_gtt_domain(obj, false); 435 if (ret) 436 return ret; 437 } 438 } 439 440 ret = i915_gem_object_get_pages(obj); 441 if (ret) 442 return ret; 443 444 i915_gem_object_pin_pages(obj); 445 446 offset = args->offset; 447 448 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 449 offset >> PAGE_SHIFT) { 450 struct page *page = sg_page_iter_page(&sg_iter); 451 452 if (remain <= 0) 453 break; 454 455 /* Operation in this page 456 * 457 * shmem_page_offset = offset within page in shmem file 458 * page_length = bytes to copy for this page 459 */ 460 shmem_page_offset = offset_in_page(offset); 461 page_length = remain; 462 if ((shmem_page_offset + page_length) > PAGE_SIZE) 463 page_length = PAGE_SIZE - shmem_page_offset; 464 465 page_do_bit17_swizzling = obj_do_bit17_swizzling && 466 (page_to_phys(page) & (1 << 17)) != 0; 467 468 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 469 user_data, page_do_bit17_swizzling, 470 needs_clflush); 471 if (ret == 0) 472 goto next_page; 473 474 mutex_unlock(&dev->struct_mutex); 475 476 if (likely(!i915_prefault_disable) && !prefaulted) { 477 ret = fault_in_multipages_writeable(user_data, remain); 478 /* Userspace is tricking us, but we've already clobbered 479 * its pages with the prefault and promised to write the 480 * data up to the first fault. Hence ignore any errors 481 * and just continue. */ 482 (void)ret; 483 prefaulted = 1; 484 } 485 486 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 487 user_data, page_do_bit17_swizzling, 488 needs_clflush); 489 490 mutex_lock(&dev->struct_mutex); 491 492 next_page: 493 mark_page_accessed(page); 494 495 if (ret) 496 goto out; 497 498 remain -= page_length; 499 user_data += page_length; 500 offset += page_length; 501 } 502 503 out: 504 i915_gem_object_unpin_pages(obj); 505 506 return ret; 507 } 508 509 /** 510 * Reads data from the object referenced by handle. 511 * 512 * On error, the contents of *data are undefined. 513 */ 514 int 515 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 516 struct drm_file *file) 517 { 518 struct drm_i915_gem_pread *args = data; 519 struct drm_i915_gem_object *obj; 520 int ret = 0; 521 522 if (args->size == 0) 523 return 0; 524 525 if (!access_ok(VERIFY_WRITE, 526 to_user_ptr(args->data_ptr), 527 args->size)) 528 return -EFAULT; 529 530 ret = i915_mutex_lock_interruptible(dev); 531 if (ret) 532 return ret; 533 534 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 535 if (&obj->base == NULL) { 536 ret = -ENOENT; 537 goto unlock; 538 } 539 540 /* Bounds check source. */ 541 if (args->offset > obj->base.size || 542 args->size > obj->base.size - args->offset) { 543 ret = -EINVAL; 544 goto out; 545 } 546 547 /* prime objects have no backing filp to GEM pread/pwrite 548 * pages from. 549 */ 550 if (!obj->base.filp) { 551 ret = -EINVAL; 552 goto out; 553 } 554 555 trace_i915_gem_object_pread(obj, args->offset, args->size); 556 557 ret = i915_gem_shmem_pread(dev, obj, args, file); 558 559 out: 560 drm_gem_object_unreference(&obj->base); 561 unlock: 562 mutex_unlock(&dev->struct_mutex); 563 return ret; 564 } 565 566 /* This is the fast write path which cannot handle 567 * page faults in the source data 568 */ 569 570 static inline int 571 fast_user_write(struct io_mapping *mapping, 572 loff_t page_base, int page_offset, 573 char __user *user_data, 574 int length) 575 { 576 void __iomem *vaddr_atomic; 577 void *vaddr; 578 unsigned long unwritten; 579 580 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 581 /* We can use the cpu mem copy function because this is X86. */ 582 vaddr = (void __force*)vaddr_atomic + page_offset; 583 unwritten = __copy_from_user_inatomic_nocache(vaddr, 584 user_data, length); 585 io_mapping_unmap_atomic(vaddr_atomic); 586 return unwritten; 587 } 588 589 /** 590 * This is the fast pwrite path, where we copy the data directly from the 591 * user into the GTT, uncached. 592 */ 593 static int 594 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 595 struct drm_i915_gem_object *obj, 596 struct drm_i915_gem_pwrite *args, 597 struct drm_file *file) 598 { 599 drm_i915_private_t *dev_priv = dev->dev_private; 600 ssize_t remain; 601 loff_t offset, page_base; 602 char __user *user_data; 603 int page_offset, page_length, ret; 604 605 ret = i915_gem_obj_ggtt_pin(obj, 0, true, true); 606 if (ret) 607 goto out; 608 609 ret = i915_gem_object_set_to_gtt_domain(obj, true); 610 if (ret) 611 goto out_unpin; 612 613 ret = i915_gem_object_put_fence(obj); 614 if (ret) 615 goto out_unpin; 616 617 user_data = to_user_ptr(args->data_ptr); 618 remain = args->size; 619 620 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 621 622 while (remain > 0) { 623 /* Operation in this page 624 * 625 * page_base = page offset within aperture 626 * page_offset = offset within page 627 * page_length = bytes to copy for this page 628 */ 629 page_base = offset & PAGE_MASK; 630 page_offset = offset_in_page(offset); 631 page_length = remain; 632 if ((page_offset + remain) > PAGE_SIZE) 633 page_length = PAGE_SIZE - page_offset; 634 635 /* If we get a fault while copying data, then (presumably) our 636 * source page isn't available. Return the error and we'll 637 * retry in the slow path. 638 */ 639 if (fast_user_write(dev_priv->gtt.mappable, page_base, 640 page_offset, user_data, page_length)) { 641 ret = -EFAULT; 642 goto out_unpin; 643 } 644 645 remain -= page_length; 646 user_data += page_length; 647 offset += page_length; 648 } 649 650 out_unpin: 651 i915_gem_object_unpin(obj); 652 out: 653 return ret; 654 } 655 656 /* Per-page copy function for the shmem pwrite fastpath. 657 * Flushes invalid cachelines before writing to the target if 658 * needs_clflush_before is set and flushes out any written cachelines after 659 * writing if needs_clflush is set. */ 660 static int 661 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 662 char __user *user_data, 663 bool page_do_bit17_swizzling, 664 bool needs_clflush_before, 665 bool needs_clflush_after) 666 { 667 char *vaddr; 668 int ret; 669 670 if (unlikely(page_do_bit17_swizzling)) 671 return -EINVAL; 672 673 vaddr = kmap_atomic(page); 674 if (needs_clflush_before) 675 drm_clflush_virt_range(vaddr + shmem_page_offset, 676 page_length); 677 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, 678 user_data, 679 page_length); 680 if (needs_clflush_after) 681 drm_clflush_virt_range(vaddr + shmem_page_offset, 682 page_length); 683 kunmap_atomic(vaddr); 684 685 return ret ? -EFAULT : 0; 686 } 687 688 /* Only difference to the fast-path function is that this can handle bit17 689 * and uses non-atomic copy and kmap functions. */ 690 static int 691 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 692 char __user *user_data, 693 bool page_do_bit17_swizzling, 694 bool needs_clflush_before, 695 bool needs_clflush_after) 696 { 697 char *vaddr; 698 int ret; 699 700 vaddr = kmap(page); 701 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 702 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 703 page_length, 704 page_do_bit17_swizzling); 705 if (page_do_bit17_swizzling) 706 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 707 user_data, 708 page_length); 709 else 710 ret = __copy_from_user(vaddr + shmem_page_offset, 711 user_data, 712 page_length); 713 if (needs_clflush_after) 714 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 715 page_length, 716 page_do_bit17_swizzling); 717 kunmap(page); 718 719 return ret ? -EFAULT : 0; 720 } 721 722 static int 723 i915_gem_shmem_pwrite(struct drm_device *dev, 724 struct drm_i915_gem_object *obj, 725 struct drm_i915_gem_pwrite *args, 726 struct drm_file *file) 727 { 728 ssize_t remain; 729 loff_t offset; 730 char __user *user_data; 731 int shmem_page_offset, page_length, ret = 0; 732 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 733 int hit_slowpath = 0; 734 int needs_clflush_after = 0; 735 int needs_clflush_before = 0; 736 struct sg_page_iter sg_iter; 737 738 user_data = to_user_ptr(args->data_ptr); 739 remain = args->size; 740 741 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 742 743 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 744 /* If we're not in the cpu write domain, set ourself into the gtt 745 * write domain and manually flush cachelines (if required). This 746 * optimizes for the case when the gpu will use the data 747 * right away and we therefore have to clflush anyway. */ 748 needs_clflush_after = cpu_write_needs_clflush(obj); 749 if (i915_gem_obj_bound_any(obj)) { 750 ret = i915_gem_object_set_to_gtt_domain(obj, true); 751 if (ret) 752 return ret; 753 } 754 } 755 /* Same trick applies to invalidate partially written cachelines read 756 * before writing. */ 757 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 758 needs_clflush_before = 759 !cpu_cache_is_coherent(dev, obj->cache_level); 760 761 ret = i915_gem_object_get_pages(obj); 762 if (ret) 763 return ret; 764 765 i915_gem_object_pin_pages(obj); 766 767 offset = args->offset; 768 obj->dirty = 1; 769 770 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 771 offset >> PAGE_SHIFT) { 772 struct page *page = sg_page_iter_page(&sg_iter); 773 int partial_cacheline_write; 774 775 if (remain <= 0) 776 break; 777 778 /* Operation in this page 779 * 780 * shmem_page_offset = offset within page in shmem file 781 * page_length = bytes to copy for this page 782 */ 783 shmem_page_offset = offset_in_page(offset); 784 785 page_length = remain; 786 if ((shmem_page_offset + page_length) > PAGE_SIZE) 787 page_length = PAGE_SIZE - shmem_page_offset; 788 789 /* If we don't overwrite a cacheline completely we need to be 790 * careful to have up-to-date data by first clflushing. Don't 791 * overcomplicate things and flush the entire patch. */ 792 partial_cacheline_write = needs_clflush_before && 793 ((shmem_page_offset | page_length) 794 & (boot_cpu_data.x86_clflush_size - 1)); 795 796 page_do_bit17_swizzling = obj_do_bit17_swizzling && 797 (page_to_phys(page) & (1 << 17)) != 0; 798 799 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 800 user_data, page_do_bit17_swizzling, 801 partial_cacheline_write, 802 needs_clflush_after); 803 if (ret == 0) 804 goto next_page; 805 806 hit_slowpath = 1; 807 mutex_unlock(&dev->struct_mutex); 808 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 809 user_data, page_do_bit17_swizzling, 810 partial_cacheline_write, 811 needs_clflush_after); 812 813 mutex_lock(&dev->struct_mutex); 814 815 next_page: 816 set_page_dirty(page); 817 mark_page_accessed(page); 818 819 if (ret) 820 goto out; 821 822 remain -= page_length; 823 user_data += page_length; 824 offset += page_length; 825 } 826 827 out: 828 i915_gem_object_unpin_pages(obj); 829 830 if (hit_slowpath) { 831 /* 832 * Fixup: Flush cpu caches in case we didn't flush the dirty 833 * cachelines in-line while writing and the object moved 834 * out of the cpu write domain while we've dropped the lock. 835 */ 836 if (!needs_clflush_after && 837 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 838 if (i915_gem_clflush_object(obj, obj->pin_display)) 839 i915_gem_chipset_flush(dev); 840 } 841 } 842 843 if (needs_clflush_after) 844 i915_gem_chipset_flush(dev); 845 846 return ret; 847 } 848 849 /** 850 * Writes data to the object referenced by handle. 851 * 852 * On error, the contents of the buffer that were to be modified are undefined. 853 */ 854 int 855 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 856 struct drm_file *file) 857 { 858 struct drm_i915_gem_pwrite *args = data; 859 struct drm_i915_gem_object *obj; 860 int ret; 861 862 if (args->size == 0) 863 return 0; 864 865 if (!access_ok(VERIFY_READ, 866 to_user_ptr(args->data_ptr), 867 args->size)) 868 return -EFAULT; 869 870 if (likely(!i915_prefault_disable)) { 871 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 872 args->size); 873 if (ret) 874 return -EFAULT; 875 } 876 877 ret = i915_mutex_lock_interruptible(dev); 878 if (ret) 879 return ret; 880 881 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 882 if (&obj->base == NULL) { 883 ret = -ENOENT; 884 goto unlock; 885 } 886 887 /* Bounds check destination. */ 888 if (args->offset > obj->base.size || 889 args->size > obj->base.size - args->offset) { 890 ret = -EINVAL; 891 goto out; 892 } 893 894 /* prime objects have no backing filp to GEM pread/pwrite 895 * pages from. 896 */ 897 if (!obj->base.filp) { 898 ret = -EINVAL; 899 goto out; 900 } 901 902 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 903 904 ret = -EFAULT; 905 /* We can only do the GTT pwrite on untiled buffers, as otherwise 906 * it would end up going through the fenced access, and we'll get 907 * different detiling behavior between reading and writing. 908 * pread/pwrite currently are reading and writing from the CPU 909 * perspective, requiring manual detiling by the client. 910 */ 911 if (obj->phys_obj) { 912 ret = i915_gem_phys_pwrite(dev, obj, args, file); 913 goto out; 914 } 915 916 if (obj->tiling_mode == I915_TILING_NONE && 917 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 918 cpu_write_needs_clflush(obj)) { 919 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 920 /* Note that the gtt paths might fail with non-page-backed user 921 * pointers (e.g. gtt mappings when moving data between 922 * textures). Fallback to the shmem path in that case. */ 923 } 924 925 if (ret == -EFAULT || ret == -ENOSPC) 926 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 927 928 out: 929 drm_gem_object_unreference(&obj->base); 930 unlock: 931 mutex_unlock(&dev->struct_mutex); 932 return ret; 933 } 934 935 int 936 i915_gem_check_wedge(struct i915_gpu_error *error, 937 bool interruptible) 938 { 939 if (i915_reset_in_progress(error)) { 940 /* Non-interruptible callers can't handle -EAGAIN, hence return 941 * -EIO unconditionally for these. */ 942 if (!interruptible) 943 return -EIO; 944 945 /* Recovery complete, but the reset failed ... */ 946 if (i915_terminally_wedged(error)) 947 return -EIO; 948 949 return -EAGAIN; 950 } 951 952 return 0; 953 } 954 955 /* 956 * Compare seqno against outstanding lazy request. Emit a request if they are 957 * equal. 958 */ 959 static int 960 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 961 { 962 int ret; 963 964 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 965 966 ret = 0; 967 if (seqno == ring->outstanding_lazy_request) 968 ret = i915_add_request(ring, NULL); 969 970 return ret; 971 } 972 973 /** 974 * __wait_seqno - wait until execution of seqno has finished 975 * @ring: the ring expected to report seqno 976 * @seqno: duh! 977 * @reset_counter: reset sequence associated with the given seqno 978 * @interruptible: do an interruptible wait (normally yes) 979 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 980 * 981 * Note: It is of utmost importance that the passed in seqno and reset_counter 982 * values have been read by the caller in an smp safe manner. Where read-side 983 * locks are involved, it is sufficient to read the reset_counter before 984 * unlocking the lock that protects the seqno. For lockless tricks, the 985 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 986 * inserted. 987 * 988 * Returns 0 if the seqno was found within the alloted time. Else returns the 989 * errno with remaining time filled in timeout argument. 990 */ 991 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 992 unsigned reset_counter, 993 bool interruptible, struct timespec *timeout) 994 { 995 drm_i915_private_t *dev_priv = ring->dev->dev_private; 996 struct timespec before, now, wait_time={1,0}; 997 unsigned long timeout_jiffies; 998 long end; 999 bool wait_forever = true; 1000 int ret; 1001 1002 WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n"); 1003 1004 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1005 return 0; 1006 1007 trace_i915_gem_request_wait_begin(ring, seqno); 1008 1009 if (timeout != NULL) { 1010 wait_time = *timeout; 1011 wait_forever = false; 1012 } 1013 1014 timeout_jiffies = timespec_to_jiffies_timeout(&wait_time); 1015 1016 if (WARN_ON(!ring->irq_get(ring))) 1017 return -ENODEV; 1018 1019 /* Record current time in case interrupted by signal, or wedged * */ 1020 getrawmonotonic(&before); 1021 1022 #define EXIT_COND \ 1023 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1024 i915_reset_in_progress(&dev_priv->gpu_error) || \ 1025 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1026 do { 1027 if (interruptible) 1028 end = wait_event_interruptible_timeout(ring->irq_queue, 1029 EXIT_COND, 1030 timeout_jiffies); 1031 else 1032 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1033 timeout_jiffies); 1034 1035 /* We need to check whether any gpu reset happened in between 1036 * the caller grabbing the seqno and now ... */ 1037 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1038 end = -EAGAIN; 1039 1040 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely 1041 * gone. */ 1042 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1043 if (ret) 1044 end = ret; 1045 } while (end == 0 && wait_forever); 1046 1047 getrawmonotonic(&now); 1048 1049 ring->irq_put(ring); 1050 trace_i915_gem_request_wait_end(ring, seqno); 1051 #undef EXIT_COND 1052 1053 if (timeout) { 1054 struct timespec sleep_time = timespec_sub(now, before); 1055 *timeout = timespec_sub(*timeout, sleep_time); 1056 if (!timespec_valid(timeout)) /* i.e. negative time remains */ 1057 set_normalized_timespec(timeout, 0, 0); 1058 } 1059 1060 switch (end) { 1061 case -EIO: 1062 case -EAGAIN: /* Wedged */ 1063 case -ERESTARTSYS: /* Signal */ 1064 return (int)end; 1065 case 0: /* Timeout */ 1066 return -ETIME; 1067 default: /* Completed */ 1068 WARN_ON(end < 0); /* We're not aware of other errors */ 1069 return 0; 1070 } 1071 } 1072 1073 /** 1074 * Waits for a sequence number to be signaled, and cleans up the 1075 * request and object lists appropriately for that event. 1076 */ 1077 int 1078 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 1079 { 1080 struct drm_device *dev = ring->dev; 1081 struct drm_i915_private *dev_priv = dev->dev_private; 1082 bool interruptible = dev_priv->mm.interruptible; 1083 int ret; 1084 1085 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1086 BUG_ON(seqno == 0); 1087 1088 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1089 if (ret) 1090 return ret; 1091 1092 ret = i915_gem_check_olr(ring, seqno); 1093 if (ret) 1094 return ret; 1095 1096 return __wait_seqno(ring, seqno, 1097 atomic_read(&dev_priv->gpu_error.reset_counter), 1098 interruptible, NULL); 1099 } 1100 1101 static int 1102 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, 1103 struct intel_ring_buffer *ring) 1104 { 1105 i915_gem_retire_requests_ring(ring); 1106 1107 /* Manually manage the write flush as we may have not yet 1108 * retired the buffer. 1109 * 1110 * Note that the last_write_seqno is always the earlier of 1111 * the two (read/write) seqno, so if we haved successfully waited, 1112 * we know we have passed the last write. 1113 */ 1114 obj->last_write_seqno = 0; 1115 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1116 1117 return 0; 1118 } 1119 1120 /** 1121 * Ensures that all rendering to the object has completed and the object is 1122 * safe to unbind from the GTT or access from the CPU. 1123 */ 1124 static __must_check int 1125 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1126 bool readonly) 1127 { 1128 struct intel_ring_buffer *ring = obj->ring; 1129 u32 seqno; 1130 int ret; 1131 1132 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1133 if (seqno == 0) 1134 return 0; 1135 1136 ret = i915_wait_seqno(ring, seqno); 1137 if (ret) 1138 return ret; 1139 1140 return i915_gem_object_wait_rendering__tail(obj, ring); 1141 } 1142 1143 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1144 * as the object state may change during this call. 1145 */ 1146 static __must_check int 1147 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1148 bool readonly) 1149 { 1150 struct drm_device *dev = obj->base.dev; 1151 struct drm_i915_private *dev_priv = dev->dev_private; 1152 struct intel_ring_buffer *ring = obj->ring; 1153 unsigned reset_counter; 1154 u32 seqno; 1155 int ret; 1156 1157 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1158 BUG_ON(!dev_priv->mm.interruptible); 1159 1160 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1161 if (seqno == 0) 1162 return 0; 1163 1164 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1165 if (ret) 1166 return ret; 1167 1168 ret = i915_gem_check_olr(ring, seqno); 1169 if (ret) 1170 return ret; 1171 1172 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1173 mutex_unlock(&dev->struct_mutex); 1174 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); 1175 mutex_lock(&dev->struct_mutex); 1176 if (ret) 1177 return ret; 1178 1179 return i915_gem_object_wait_rendering__tail(obj, ring); 1180 } 1181 1182 /** 1183 * Called when user space prepares to use an object with the CPU, either 1184 * through the mmap ioctl's mapping or a GTT mapping. 1185 */ 1186 int 1187 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1188 struct drm_file *file) 1189 { 1190 struct drm_i915_gem_set_domain *args = data; 1191 struct drm_i915_gem_object *obj; 1192 uint32_t read_domains = args->read_domains; 1193 uint32_t write_domain = args->write_domain; 1194 int ret; 1195 1196 /* Only handle setting domains to types used by the CPU. */ 1197 if (write_domain & I915_GEM_GPU_DOMAINS) 1198 return -EINVAL; 1199 1200 if (read_domains & I915_GEM_GPU_DOMAINS) 1201 return -EINVAL; 1202 1203 /* Having something in the write domain implies it's in the read 1204 * domain, and only that read domain. Enforce that in the request. 1205 */ 1206 if (write_domain != 0 && read_domains != write_domain) 1207 return -EINVAL; 1208 1209 ret = i915_mutex_lock_interruptible(dev); 1210 if (ret) 1211 return ret; 1212 1213 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1214 if (&obj->base == NULL) { 1215 ret = -ENOENT; 1216 goto unlock; 1217 } 1218 1219 /* Try to flush the object off the GPU without holding the lock. 1220 * We will repeat the flush holding the lock in the normal manner 1221 * to catch cases where we are gazumped. 1222 */ 1223 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 1224 if (ret) 1225 goto unref; 1226 1227 if (read_domains & I915_GEM_DOMAIN_GTT) { 1228 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1229 1230 /* Silently promote "you're not bound, there was nothing to do" 1231 * to success, since the client was just asking us to 1232 * make sure everything was done. 1233 */ 1234 if (ret == -EINVAL) 1235 ret = 0; 1236 } else { 1237 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1238 } 1239 1240 unref: 1241 drm_gem_object_unreference(&obj->base); 1242 unlock: 1243 mutex_unlock(&dev->struct_mutex); 1244 return ret; 1245 } 1246 1247 /** 1248 * Called when user space has done writes to this buffer 1249 */ 1250 int 1251 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1252 struct drm_file *file) 1253 { 1254 struct drm_i915_gem_sw_finish *args = data; 1255 struct drm_i915_gem_object *obj; 1256 int ret = 0; 1257 1258 ret = i915_mutex_lock_interruptible(dev); 1259 if (ret) 1260 return ret; 1261 1262 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1263 if (&obj->base == NULL) { 1264 ret = -ENOENT; 1265 goto unlock; 1266 } 1267 1268 /* Pinned buffers may be scanout, so flush the cache */ 1269 if (obj->pin_display) 1270 i915_gem_object_flush_cpu_write_domain(obj, true); 1271 1272 drm_gem_object_unreference(&obj->base); 1273 unlock: 1274 mutex_unlock(&dev->struct_mutex); 1275 return ret; 1276 } 1277 1278 /** 1279 * Maps the contents of an object, returning the address it is mapped 1280 * into. 1281 * 1282 * While the mapping holds a reference on the contents of the object, it doesn't 1283 * imply a ref on the object itself. 1284 */ 1285 int 1286 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1287 struct drm_file *file) 1288 { 1289 struct drm_i915_gem_mmap *args = data; 1290 struct drm_gem_object *obj; 1291 unsigned long addr; 1292 1293 obj = drm_gem_object_lookup(dev, file, args->handle); 1294 if (obj == NULL) 1295 return -ENOENT; 1296 1297 /* prime objects have no backing filp to GEM mmap 1298 * pages from. 1299 */ 1300 if (!obj->filp) { 1301 drm_gem_object_unreference_unlocked(obj); 1302 return -EINVAL; 1303 } 1304 1305 addr = vm_mmap(obj->filp, 0, args->size, 1306 PROT_READ | PROT_WRITE, MAP_SHARED, 1307 args->offset); 1308 drm_gem_object_unreference_unlocked(obj); 1309 if (IS_ERR((void *)addr)) 1310 return addr; 1311 1312 args->addr_ptr = (uint64_t) addr; 1313 1314 return 0; 1315 } 1316 1317 /** 1318 * i915_gem_fault - fault a page into the GTT 1319 * vma: VMA in question 1320 * vmf: fault info 1321 * 1322 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1323 * from userspace. The fault handler takes care of binding the object to 1324 * the GTT (if needed), allocating and programming a fence register (again, 1325 * only if needed based on whether the old reg is still valid or the object 1326 * is tiled) and inserting a new PTE into the faulting process. 1327 * 1328 * Note that the faulting process may involve evicting existing objects 1329 * from the GTT and/or fence registers to make room. So performance may 1330 * suffer if the GTT working set is large or there are few fence registers 1331 * left. 1332 */ 1333 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1334 { 1335 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1336 struct drm_device *dev = obj->base.dev; 1337 drm_i915_private_t *dev_priv = dev->dev_private; 1338 pgoff_t page_offset; 1339 unsigned long pfn; 1340 int ret = 0; 1341 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1342 1343 /* We don't use vmf->pgoff since that has the fake offset */ 1344 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1345 PAGE_SHIFT; 1346 1347 ret = i915_mutex_lock_interruptible(dev); 1348 if (ret) 1349 goto out; 1350 1351 trace_i915_gem_object_fault(obj, page_offset, true, write); 1352 1353 /* Access to snoopable pages through the GTT is incoherent. */ 1354 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1355 ret = -EINVAL; 1356 goto unlock; 1357 } 1358 1359 /* Now bind it into the GTT if needed */ 1360 ret = i915_gem_obj_ggtt_pin(obj, 0, true, false); 1361 if (ret) 1362 goto unlock; 1363 1364 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1365 if (ret) 1366 goto unpin; 1367 1368 ret = i915_gem_object_get_fence(obj); 1369 if (ret) 1370 goto unpin; 1371 1372 obj->fault_mappable = true; 1373 1374 pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj); 1375 pfn >>= PAGE_SHIFT; 1376 pfn += page_offset; 1377 1378 /* Finally, remap it using the new GTT offset */ 1379 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1380 unpin: 1381 i915_gem_object_unpin(obj); 1382 unlock: 1383 mutex_unlock(&dev->struct_mutex); 1384 out: 1385 switch (ret) { 1386 case -EIO: 1387 /* If this -EIO is due to a gpu hang, give the reset code a 1388 * chance to clean up the mess. Otherwise return the proper 1389 * SIGBUS. */ 1390 if (i915_terminally_wedged(&dev_priv->gpu_error)) 1391 return VM_FAULT_SIGBUS; 1392 case -EAGAIN: 1393 /* Give the error handler a chance to run and move the 1394 * objects off the GPU active list. Next time we service the 1395 * fault, we should be able to transition the page into the 1396 * GTT without touching the GPU (and so avoid further 1397 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1398 * with coherency, just lost writes. 1399 */ 1400 set_need_resched(); 1401 case 0: 1402 case -ERESTARTSYS: 1403 case -EINTR: 1404 case -EBUSY: 1405 /* 1406 * EBUSY is ok: this just means that another thread 1407 * already did the job. 1408 */ 1409 return VM_FAULT_NOPAGE; 1410 case -ENOMEM: 1411 return VM_FAULT_OOM; 1412 case -ENOSPC: 1413 return VM_FAULT_SIGBUS; 1414 default: 1415 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1416 return VM_FAULT_SIGBUS; 1417 } 1418 } 1419 1420 /** 1421 * i915_gem_release_mmap - remove physical page mappings 1422 * @obj: obj in question 1423 * 1424 * Preserve the reservation of the mmapping with the DRM core code, but 1425 * relinquish ownership of the pages back to the system. 1426 * 1427 * It is vital that we remove the page mapping if we have mapped a tiled 1428 * object through the GTT and then lose the fence register due to 1429 * resource pressure. Similarly if the object has been moved out of the 1430 * aperture, than pages mapped into userspace must be revoked. Removing the 1431 * mapping will then trigger a page fault on the next user access, allowing 1432 * fixup by i915_gem_fault(). 1433 */ 1434 void 1435 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1436 { 1437 if (!obj->fault_mappable) 1438 return; 1439 1440 drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->dev_mapping); 1441 obj->fault_mappable = false; 1442 } 1443 1444 uint32_t 1445 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1446 { 1447 uint32_t gtt_size; 1448 1449 if (INTEL_INFO(dev)->gen >= 4 || 1450 tiling_mode == I915_TILING_NONE) 1451 return size; 1452 1453 /* Previous chips need a power-of-two fence region when tiling */ 1454 if (INTEL_INFO(dev)->gen == 3) 1455 gtt_size = 1024*1024; 1456 else 1457 gtt_size = 512*1024; 1458 1459 while (gtt_size < size) 1460 gtt_size <<= 1; 1461 1462 return gtt_size; 1463 } 1464 1465 /** 1466 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1467 * @obj: object to check 1468 * 1469 * Return the required GTT alignment for an object, taking into account 1470 * potential fence register mapping. 1471 */ 1472 uint32_t 1473 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1474 int tiling_mode, bool fenced) 1475 { 1476 /* 1477 * Minimum alignment is 4k (GTT page size), but might be greater 1478 * if a fence register is needed for the object. 1479 */ 1480 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1481 tiling_mode == I915_TILING_NONE) 1482 return 4096; 1483 1484 /* 1485 * Previous chips need to be aligned to the size of the smallest 1486 * fence register that can contain the object. 1487 */ 1488 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1489 } 1490 1491 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1492 { 1493 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1494 int ret; 1495 1496 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1497 return 0; 1498 1499 dev_priv->mm.shrinker_no_lock_stealing = true; 1500 1501 ret = drm_gem_create_mmap_offset(&obj->base); 1502 if (ret != -ENOSPC) 1503 goto out; 1504 1505 /* Badly fragmented mmap space? The only way we can recover 1506 * space is by destroying unwanted objects. We can't randomly release 1507 * mmap_offsets as userspace expects them to be persistent for the 1508 * lifetime of the objects. The closest we can is to release the 1509 * offsets on purgeable objects by truncating it and marking it purged, 1510 * which prevents userspace from ever using that object again. 1511 */ 1512 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1513 ret = drm_gem_create_mmap_offset(&obj->base); 1514 if (ret != -ENOSPC) 1515 goto out; 1516 1517 i915_gem_shrink_all(dev_priv); 1518 ret = drm_gem_create_mmap_offset(&obj->base); 1519 out: 1520 dev_priv->mm.shrinker_no_lock_stealing = false; 1521 1522 return ret; 1523 } 1524 1525 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1526 { 1527 drm_gem_free_mmap_offset(&obj->base); 1528 } 1529 1530 int 1531 i915_gem_mmap_gtt(struct drm_file *file, 1532 struct drm_device *dev, 1533 uint32_t handle, 1534 uint64_t *offset) 1535 { 1536 struct drm_i915_private *dev_priv = dev->dev_private; 1537 struct drm_i915_gem_object *obj; 1538 int ret; 1539 1540 ret = i915_mutex_lock_interruptible(dev); 1541 if (ret) 1542 return ret; 1543 1544 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1545 if (&obj->base == NULL) { 1546 ret = -ENOENT; 1547 goto unlock; 1548 } 1549 1550 if (obj->base.size > dev_priv->gtt.mappable_end) { 1551 ret = -E2BIG; 1552 goto out; 1553 } 1554 1555 if (obj->madv != I915_MADV_WILLNEED) { 1556 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1557 ret = -EINVAL; 1558 goto out; 1559 } 1560 1561 ret = i915_gem_object_create_mmap_offset(obj); 1562 if (ret) 1563 goto out; 1564 1565 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 1566 1567 out: 1568 drm_gem_object_unreference(&obj->base); 1569 unlock: 1570 mutex_unlock(&dev->struct_mutex); 1571 return ret; 1572 } 1573 1574 /** 1575 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1576 * @dev: DRM device 1577 * @data: GTT mapping ioctl data 1578 * @file: GEM object info 1579 * 1580 * Simply returns the fake offset to userspace so it can mmap it. 1581 * The mmap call will end up in drm_gem_mmap(), which will set things 1582 * up so we can get faults in the handler above. 1583 * 1584 * The fault handler will take care of binding the object into the GTT 1585 * (since it may have been evicted to make room for something), allocating 1586 * a fence register, and mapping the appropriate aperture address into 1587 * userspace. 1588 */ 1589 int 1590 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1591 struct drm_file *file) 1592 { 1593 struct drm_i915_gem_mmap_gtt *args = data; 1594 1595 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1596 } 1597 1598 /* Immediately discard the backing storage */ 1599 static void 1600 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1601 { 1602 struct inode *inode; 1603 1604 i915_gem_object_free_mmap_offset(obj); 1605 1606 if (obj->base.filp == NULL) 1607 return; 1608 1609 /* Our goal here is to return as much of the memory as 1610 * is possible back to the system as we are called from OOM. 1611 * To do this we must instruct the shmfs to drop all of its 1612 * backing pages, *now*. 1613 */ 1614 inode = file_inode(obj->base.filp); 1615 shmem_truncate_range(inode, 0, (loff_t)-1); 1616 1617 obj->madv = __I915_MADV_PURGED; 1618 } 1619 1620 static inline int 1621 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1622 { 1623 return obj->madv == I915_MADV_DONTNEED; 1624 } 1625 1626 static void 1627 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1628 { 1629 struct sg_page_iter sg_iter; 1630 int ret; 1631 1632 BUG_ON(obj->madv == __I915_MADV_PURGED); 1633 1634 ret = i915_gem_object_set_to_cpu_domain(obj, true); 1635 if (ret) { 1636 /* In the event of a disaster, abandon all caches and 1637 * hope for the best. 1638 */ 1639 WARN_ON(ret != -EIO); 1640 i915_gem_clflush_object(obj, true); 1641 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 1642 } 1643 1644 if (i915_gem_object_needs_bit17_swizzle(obj)) 1645 i915_gem_object_save_bit_17_swizzle(obj); 1646 1647 if (obj->madv == I915_MADV_DONTNEED) 1648 obj->dirty = 0; 1649 1650 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 1651 struct page *page = sg_page_iter_page(&sg_iter); 1652 1653 if (obj->dirty) 1654 set_page_dirty(page); 1655 1656 if (obj->madv == I915_MADV_WILLNEED) 1657 mark_page_accessed(page); 1658 1659 page_cache_release(page); 1660 } 1661 obj->dirty = 0; 1662 1663 sg_free_table(obj->pages); 1664 kfree(obj->pages); 1665 } 1666 1667 int 1668 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 1669 { 1670 const struct drm_i915_gem_object_ops *ops = obj->ops; 1671 1672 if (obj->pages == NULL) 1673 return 0; 1674 1675 if (obj->pages_pin_count) 1676 return -EBUSY; 1677 1678 BUG_ON(i915_gem_obj_bound_any(obj)); 1679 1680 /* ->put_pages might need to allocate memory for the bit17 swizzle 1681 * array, hence protect them from being reaped by removing them from gtt 1682 * lists early. */ 1683 list_del(&obj->global_list); 1684 1685 ops->put_pages(obj); 1686 obj->pages = NULL; 1687 1688 if (i915_gem_object_is_purgeable(obj)) 1689 i915_gem_object_truncate(obj); 1690 1691 return 0; 1692 } 1693 1694 static long 1695 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 1696 bool purgeable_only) 1697 { 1698 struct drm_i915_gem_object *obj, *next; 1699 long count = 0; 1700 1701 list_for_each_entry_safe(obj, next, 1702 &dev_priv->mm.unbound_list, 1703 global_list) { 1704 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 1705 i915_gem_object_put_pages(obj) == 0) { 1706 count += obj->base.size >> PAGE_SHIFT; 1707 if (count >= target) 1708 return count; 1709 } 1710 } 1711 1712 list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list, 1713 global_list) { 1714 struct i915_vma *vma, *v; 1715 1716 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 1717 continue; 1718 1719 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link) 1720 if (i915_vma_unbind(vma)) 1721 break; 1722 1723 if (!i915_gem_object_put_pages(obj)) { 1724 count += obj->base.size >> PAGE_SHIFT; 1725 if (count >= target) 1726 return count; 1727 } 1728 } 1729 1730 return count; 1731 } 1732 1733 static long 1734 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 1735 { 1736 return __i915_gem_shrink(dev_priv, target, true); 1737 } 1738 1739 static void 1740 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 1741 { 1742 struct drm_i915_gem_object *obj, *next; 1743 1744 i915_gem_evict_everything(dev_priv->dev); 1745 1746 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, 1747 global_list) 1748 i915_gem_object_put_pages(obj); 1749 } 1750 1751 static int 1752 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 1753 { 1754 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1755 int page_count, i; 1756 struct address_space *mapping; 1757 struct sg_table *st; 1758 struct scatterlist *sg; 1759 struct sg_page_iter sg_iter; 1760 struct page *page; 1761 unsigned long last_pfn = 0; /* suppress gcc warning */ 1762 gfp_t gfp; 1763 1764 /* Assert that the object is not currently in any GPU domain. As it 1765 * wasn't in the GTT, there shouldn't be any way it could have been in 1766 * a GPU cache 1767 */ 1768 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 1769 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 1770 1771 st = kmalloc(sizeof(*st), GFP_KERNEL); 1772 if (st == NULL) 1773 return -ENOMEM; 1774 1775 page_count = obj->base.size / PAGE_SIZE; 1776 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 1777 sg_free_table(st); 1778 kfree(st); 1779 return -ENOMEM; 1780 } 1781 1782 /* Get the list of pages out of our struct file. They'll be pinned 1783 * at this point until we release them. 1784 * 1785 * Fail silently without starting the shrinker 1786 */ 1787 mapping = file_inode(obj->base.filp)->i_mapping; 1788 gfp = mapping_gfp_mask(mapping); 1789 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 1790 gfp &= ~(__GFP_IO | __GFP_WAIT); 1791 sg = st->sgl; 1792 st->nents = 0; 1793 for (i = 0; i < page_count; i++) { 1794 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 1795 if (IS_ERR(page)) { 1796 i915_gem_purge(dev_priv, page_count); 1797 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 1798 } 1799 if (IS_ERR(page)) { 1800 /* We've tried hard to allocate the memory by reaping 1801 * our own buffer, now let the real VM do its job and 1802 * go down in flames if truly OOM. 1803 */ 1804 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); 1805 gfp |= __GFP_IO | __GFP_WAIT; 1806 1807 i915_gem_shrink_all(dev_priv); 1808 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 1809 if (IS_ERR(page)) 1810 goto err_pages; 1811 1812 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 1813 gfp &= ~(__GFP_IO | __GFP_WAIT); 1814 } 1815 #ifdef CONFIG_SWIOTLB 1816 if (swiotlb_nr_tbl()) { 1817 st->nents++; 1818 sg_set_page(sg, page, PAGE_SIZE, 0); 1819 sg = sg_next(sg); 1820 continue; 1821 } 1822 #endif 1823 if (!i || page_to_pfn(page) != last_pfn + 1) { 1824 if (i) 1825 sg = sg_next(sg); 1826 st->nents++; 1827 sg_set_page(sg, page, PAGE_SIZE, 0); 1828 } else { 1829 sg->length += PAGE_SIZE; 1830 } 1831 last_pfn = page_to_pfn(page); 1832 } 1833 #ifdef CONFIG_SWIOTLB 1834 if (!swiotlb_nr_tbl()) 1835 #endif 1836 sg_mark_end(sg); 1837 obj->pages = st; 1838 1839 if (i915_gem_object_needs_bit17_swizzle(obj)) 1840 i915_gem_object_do_bit_17_swizzle(obj); 1841 1842 return 0; 1843 1844 err_pages: 1845 sg_mark_end(sg); 1846 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 1847 page_cache_release(sg_page_iter_page(&sg_iter)); 1848 sg_free_table(st); 1849 kfree(st); 1850 return PTR_ERR(page); 1851 } 1852 1853 /* Ensure that the associated pages are gathered from the backing storage 1854 * and pinned into our object. i915_gem_object_get_pages() may be called 1855 * multiple times before they are released by a single call to 1856 * i915_gem_object_put_pages() - once the pages are no longer referenced 1857 * either as a result of memory pressure (reaping pages under the shrinker) 1858 * or as the object is itself released. 1859 */ 1860 int 1861 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 1862 { 1863 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1864 const struct drm_i915_gem_object_ops *ops = obj->ops; 1865 int ret; 1866 1867 if (obj->pages) 1868 return 0; 1869 1870 if (obj->madv != I915_MADV_WILLNEED) { 1871 DRM_ERROR("Attempting to obtain a purgeable object\n"); 1872 return -EINVAL; 1873 } 1874 1875 BUG_ON(obj->pages_pin_count); 1876 1877 ret = ops->get_pages(obj); 1878 if (ret) 1879 return ret; 1880 1881 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 1882 return 0; 1883 } 1884 1885 void 1886 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1887 struct intel_ring_buffer *ring) 1888 { 1889 struct drm_device *dev = obj->base.dev; 1890 struct drm_i915_private *dev_priv = dev->dev_private; 1891 u32 seqno = intel_ring_get_seqno(ring); 1892 1893 BUG_ON(ring == NULL); 1894 if (obj->ring != ring && obj->last_write_seqno) { 1895 /* Keep the seqno relative to the current ring */ 1896 obj->last_write_seqno = seqno; 1897 } 1898 obj->ring = ring; 1899 1900 /* Add a reference if we're newly entering the active list. */ 1901 if (!obj->active) { 1902 drm_gem_object_reference(&obj->base); 1903 obj->active = 1; 1904 } 1905 1906 list_move_tail(&obj->ring_list, &ring->active_list); 1907 1908 obj->last_read_seqno = seqno; 1909 1910 if (obj->fenced_gpu_access) { 1911 obj->last_fenced_seqno = seqno; 1912 1913 /* Bump MRU to take account of the delayed flush */ 1914 if (obj->fence_reg != I915_FENCE_REG_NONE) { 1915 struct drm_i915_fence_reg *reg; 1916 1917 reg = &dev_priv->fence_regs[obj->fence_reg]; 1918 list_move_tail(®->lru_list, 1919 &dev_priv->mm.fence_list); 1920 } 1921 } 1922 } 1923 1924 static void 1925 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1926 { 1927 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1928 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1929 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1930 1931 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 1932 BUG_ON(!obj->active); 1933 1934 list_move_tail(&vma->mm_list, &ggtt_vm->inactive_list); 1935 1936 list_del_init(&obj->ring_list); 1937 obj->ring = NULL; 1938 1939 obj->last_read_seqno = 0; 1940 obj->last_write_seqno = 0; 1941 obj->base.write_domain = 0; 1942 1943 obj->last_fenced_seqno = 0; 1944 obj->fenced_gpu_access = false; 1945 1946 obj->active = 0; 1947 drm_gem_object_unreference(&obj->base); 1948 1949 WARN_ON(i915_verify_lists(dev)); 1950 } 1951 1952 static int 1953 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 1954 { 1955 struct drm_i915_private *dev_priv = dev->dev_private; 1956 struct intel_ring_buffer *ring; 1957 int ret, i, j; 1958 1959 /* Carefully retire all requests without writing to the rings */ 1960 for_each_ring(ring, dev_priv, i) { 1961 ret = intel_ring_idle(ring); 1962 if (ret) 1963 return ret; 1964 } 1965 i915_gem_retire_requests(dev); 1966 1967 /* Finally reset hw state */ 1968 for_each_ring(ring, dev_priv, i) { 1969 intel_ring_init_seqno(ring, seqno); 1970 1971 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 1972 ring->sync_seqno[j] = 0; 1973 } 1974 1975 return 0; 1976 } 1977 1978 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 1979 { 1980 struct drm_i915_private *dev_priv = dev->dev_private; 1981 int ret; 1982 1983 if (seqno == 0) 1984 return -EINVAL; 1985 1986 /* HWS page needs to be set less than what we 1987 * will inject to ring 1988 */ 1989 ret = i915_gem_init_seqno(dev, seqno - 1); 1990 if (ret) 1991 return ret; 1992 1993 /* Carefully set the last_seqno value so that wrap 1994 * detection still works 1995 */ 1996 dev_priv->next_seqno = seqno; 1997 dev_priv->last_seqno = seqno - 1; 1998 if (dev_priv->last_seqno == 0) 1999 dev_priv->last_seqno--; 2000 2001 return 0; 2002 } 2003 2004 int 2005 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2006 { 2007 struct drm_i915_private *dev_priv = dev->dev_private; 2008 2009 /* reserve 0 for non-seqno */ 2010 if (dev_priv->next_seqno == 0) { 2011 int ret = i915_gem_init_seqno(dev, 0); 2012 if (ret) 2013 return ret; 2014 2015 dev_priv->next_seqno = 1; 2016 } 2017 2018 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2019 return 0; 2020 } 2021 2022 int __i915_add_request(struct intel_ring_buffer *ring, 2023 struct drm_file *file, 2024 struct drm_i915_gem_object *obj, 2025 u32 *out_seqno) 2026 { 2027 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2028 struct drm_i915_gem_request *request; 2029 u32 request_ring_position, request_start; 2030 int was_empty; 2031 int ret; 2032 2033 request_start = intel_ring_get_tail(ring); 2034 /* 2035 * Emit any outstanding flushes - execbuf can fail to emit the flush 2036 * after having emitted the batchbuffer command. Hence we need to fix 2037 * things up similar to emitting the lazy request. The difference here 2038 * is that the flush _must_ happen before the next request, no matter 2039 * what. 2040 */ 2041 ret = intel_ring_flush_all_caches(ring); 2042 if (ret) 2043 return ret; 2044 2045 request = kmalloc(sizeof(*request), GFP_KERNEL); 2046 if (request == NULL) 2047 return -ENOMEM; 2048 2049 2050 /* Record the position of the start of the request so that 2051 * should we detect the updated seqno part-way through the 2052 * GPU processing the request, we never over-estimate the 2053 * position of the head. 2054 */ 2055 request_ring_position = intel_ring_get_tail(ring); 2056 2057 ret = ring->add_request(ring); 2058 if (ret) { 2059 kfree(request); 2060 return ret; 2061 } 2062 2063 request->seqno = intel_ring_get_seqno(ring); 2064 request->ring = ring; 2065 request->head = request_start; 2066 request->tail = request_ring_position; 2067 request->ctx = ring->last_context; 2068 request->batch_obj = obj; 2069 2070 /* Whilst this request exists, batch_obj will be on the 2071 * active_list, and so will hold the active reference. Only when this 2072 * request is retired will the the batch_obj be moved onto the 2073 * inactive_list and lose its active reference. Hence we do not need 2074 * to explicitly hold another reference here. 2075 */ 2076 2077 if (request->ctx) 2078 i915_gem_context_reference(request->ctx); 2079 2080 request->emitted_jiffies = jiffies; 2081 was_empty = list_empty(&ring->request_list); 2082 list_add_tail(&request->list, &ring->request_list); 2083 request->file_priv = NULL; 2084 2085 if (file) { 2086 struct drm_i915_file_private *file_priv = file->driver_priv; 2087 2088 spin_lock(&file_priv->mm.lock); 2089 request->file_priv = file_priv; 2090 list_add_tail(&request->client_list, 2091 &file_priv->mm.request_list); 2092 spin_unlock(&file_priv->mm.lock); 2093 } 2094 2095 trace_i915_gem_request_add(ring, request->seqno); 2096 ring->outstanding_lazy_request = 0; 2097 2098 if (!dev_priv->ums.mm_suspended) { 2099 i915_queue_hangcheck(ring->dev); 2100 2101 if (was_empty) { 2102 queue_delayed_work(dev_priv->wq, 2103 &dev_priv->mm.retire_work, 2104 round_jiffies_up_relative(HZ)); 2105 intel_mark_busy(dev_priv->dev); 2106 } 2107 } 2108 2109 if (out_seqno) 2110 *out_seqno = request->seqno; 2111 return 0; 2112 } 2113 2114 static inline void 2115 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2116 { 2117 struct drm_i915_file_private *file_priv = request->file_priv; 2118 2119 if (!file_priv) 2120 return; 2121 2122 spin_lock(&file_priv->mm.lock); 2123 if (request->file_priv) { 2124 list_del(&request->client_list); 2125 request->file_priv = NULL; 2126 } 2127 spin_unlock(&file_priv->mm.lock); 2128 } 2129 2130 static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj, 2131 struct i915_address_space *vm) 2132 { 2133 if (acthd >= i915_gem_obj_offset(obj, vm) && 2134 acthd < i915_gem_obj_offset(obj, vm) + obj->base.size) 2135 return true; 2136 2137 return false; 2138 } 2139 2140 static bool i915_head_inside_request(const u32 acthd_unmasked, 2141 const u32 request_start, 2142 const u32 request_end) 2143 { 2144 const u32 acthd = acthd_unmasked & HEAD_ADDR; 2145 2146 if (request_start < request_end) { 2147 if (acthd >= request_start && acthd < request_end) 2148 return true; 2149 } else if (request_start > request_end) { 2150 if (acthd >= request_start || acthd < request_end) 2151 return true; 2152 } 2153 2154 return false; 2155 } 2156 2157 static struct i915_address_space * 2158 request_to_vm(struct drm_i915_gem_request *request) 2159 { 2160 struct drm_i915_private *dev_priv = request->ring->dev->dev_private; 2161 struct i915_address_space *vm; 2162 2163 vm = &dev_priv->gtt.base; 2164 2165 return vm; 2166 } 2167 2168 static bool i915_request_guilty(struct drm_i915_gem_request *request, 2169 const u32 acthd, bool *inside) 2170 { 2171 /* There is a possibility that unmasked head address 2172 * pointing inside the ring, matches the batch_obj address range. 2173 * However this is extremely unlikely. 2174 */ 2175 if (request->batch_obj) { 2176 if (i915_head_inside_object(acthd, request->batch_obj, 2177 request_to_vm(request))) { 2178 *inside = true; 2179 return true; 2180 } 2181 } 2182 2183 if (i915_head_inside_request(acthd, request->head, request->tail)) { 2184 *inside = false; 2185 return true; 2186 } 2187 2188 return false; 2189 } 2190 2191 static void i915_set_reset_status(struct intel_ring_buffer *ring, 2192 struct drm_i915_gem_request *request, 2193 u32 acthd) 2194 { 2195 struct i915_ctx_hang_stats *hs = NULL; 2196 bool inside, guilty; 2197 unsigned long offset = 0; 2198 2199 /* Innocent until proven guilty */ 2200 guilty = false; 2201 2202 if (request->batch_obj) 2203 offset = i915_gem_obj_offset(request->batch_obj, 2204 request_to_vm(request)); 2205 2206 if (ring->hangcheck.action != HANGCHECK_WAIT && 2207 i915_request_guilty(request, acthd, &inside)) { 2208 DRM_ERROR("%s hung %s bo (0x%lx ctx %d) at 0x%x\n", 2209 ring->name, 2210 inside ? "inside" : "flushing", 2211 offset, 2212 request->ctx ? request->ctx->id : 0, 2213 acthd); 2214 2215 guilty = true; 2216 } 2217 2218 /* If contexts are disabled or this is the default context, use 2219 * file_priv->reset_state 2220 */ 2221 if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID) 2222 hs = &request->ctx->hang_stats; 2223 else if (request->file_priv) 2224 hs = &request->file_priv->hang_stats; 2225 2226 if (hs) { 2227 if (guilty) 2228 hs->batch_active++; 2229 else 2230 hs->batch_pending++; 2231 } 2232 } 2233 2234 static void i915_gem_free_request(struct drm_i915_gem_request *request) 2235 { 2236 list_del(&request->list); 2237 i915_gem_request_remove_from_client(request); 2238 2239 if (request->ctx) 2240 i915_gem_context_unreference(request->ctx); 2241 2242 kfree(request); 2243 } 2244 2245 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 2246 struct intel_ring_buffer *ring) 2247 { 2248 u32 completed_seqno; 2249 u32 acthd; 2250 2251 acthd = intel_ring_get_active_head(ring); 2252 completed_seqno = ring->get_seqno(ring, false); 2253 2254 while (!list_empty(&ring->request_list)) { 2255 struct drm_i915_gem_request *request; 2256 2257 request = list_first_entry(&ring->request_list, 2258 struct drm_i915_gem_request, 2259 list); 2260 2261 if (request->seqno > completed_seqno) 2262 i915_set_reset_status(ring, request, acthd); 2263 2264 i915_gem_free_request(request); 2265 } 2266 2267 while (!list_empty(&ring->active_list)) { 2268 struct drm_i915_gem_object *obj; 2269 2270 obj = list_first_entry(&ring->active_list, 2271 struct drm_i915_gem_object, 2272 ring_list); 2273 2274 i915_gem_object_move_to_inactive(obj); 2275 } 2276 } 2277 2278 void i915_gem_restore_fences(struct drm_device *dev) 2279 { 2280 struct drm_i915_private *dev_priv = dev->dev_private; 2281 int i; 2282 2283 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2284 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2285 2286 /* 2287 * Commit delayed tiling changes if we have an object still 2288 * attached to the fence, otherwise just clear the fence. 2289 */ 2290 if (reg->obj) { 2291 i915_gem_object_update_fence(reg->obj, reg, 2292 reg->obj->tiling_mode); 2293 } else { 2294 i915_gem_write_fence(dev, i, NULL); 2295 } 2296 } 2297 } 2298 2299 void i915_gem_reset(struct drm_device *dev) 2300 { 2301 struct drm_i915_private *dev_priv = dev->dev_private; 2302 struct intel_ring_buffer *ring; 2303 int i; 2304 2305 for_each_ring(ring, dev_priv, i) 2306 i915_gem_reset_ring_lists(dev_priv, ring); 2307 2308 i915_gem_restore_fences(dev); 2309 } 2310 2311 /** 2312 * This function clears the request list as sequence numbers are passed. 2313 */ 2314 void 2315 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 2316 { 2317 uint32_t seqno; 2318 2319 if (list_empty(&ring->request_list)) 2320 return; 2321 2322 WARN_ON(i915_verify_lists(ring->dev)); 2323 2324 seqno = ring->get_seqno(ring, true); 2325 2326 while (!list_empty(&ring->request_list)) { 2327 struct drm_i915_gem_request *request; 2328 2329 request = list_first_entry(&ring->request_list, 2330 struct drm_i915_gem_request, 2331 list); 2332 2333 if (!i915_seqno_passed(seqno, request->seqno)) 2334 break; 2335 2336 trace_i915_gem_request_retire(ring, request->seqno); 2337 /* We know the GPU must have read the request to have 2338 * sent us the seqno + interrupt, so use the position 2339 * of tail of the request to update the last known position 2340 * of the GPU head. 2341 */ 2342 ring->last_retired_head = request->tail; 2343 2344 i915_gem_free_request(request); 2345 } 2346 2347 /* Move any buffers on the active list that are no longer referenced 2348 * by the ringbuffer to the flushing/inactive lists as appropriate. 2349 */ 2350 while (!list_empty(&ring->active_list)) { 2351 struct drm_i915_gem_object *obj; 2352 2353 obj = list_first_entry(&ring->active_list, 2354 struct drm_i915_gem_object, 2355 ring_list); 2356 2357 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2358 break; 2359 2360 i915_gem_object_move_to_inactive(obj); 2361 } 2362 2363 if (unlikely(ring->trace_irq_seqno && 2364 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2365 ring->irq_put(ring); 2366 ring->trace_irq_seqno = 0; 2367 } 2368 2369 WARN_ON(i915_verify_lists(ring->dev)); 2370 } 2371 2372 void 2373 i915_gem_retire_requests(struct drm_device *dev) 2374 { 2375 drm_i915_private_t *dev_priv = dev->dev_private; 2376 struct intel_ring_buffer *ring; 2377 int i; 2378 2379 for_each_ring(ring, dev_priv, i) 2380 i915_gem_retire_requests_ring(ring); 2381 } 2382 2383 static void 2384 i915_gem_retire_work_handler(struct work_struct *work) 2385 { 2386 drm_i915_private_t *dev_priv; 2387 struct drm_device *dev; 2388 struct intel_ring_buffer *ring; 2389 bool idle; 2390 int i; 2391 2392 dev_priv = container_of(work, drm_i915_private_t, 2393 mm.retire_work.work); 2394 dev = dev_priv->dev; 2395 2396 /* Come back later if the device is busy... */ 2397 if (!mutex_trylock(&dev->struct_mutex)) { 2398 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2399 round_jiffies_up_relative(HZ)); 2400 return; 2401 } 2402 2403 i915_gem_retire_requests(dev); 2404 2405 /* Send a periodic flush down the ring so we don't hold onto GEM 2406 * objects indefinitely. 2407 */ 2408 idle = true; 2409 for_each_ring(ring, dev_priv, i) { 2410 if (ring->gpu_caches_dirty) 2411 i915_add_request(ring, NULL); 2412 2413 idle &= list_empty(&ring->request_list); 2414 } 2415 2416 if (!dev_priv->ums.mm_suspended && !idle) 2417 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2418 round_jiffies_up_relative(HZ)); 2419 if (idle) 2420 intel_mark_idle(dev); 2421 2422 mutex_unlock(&dev->struct_mutex); 2423 } 2424 2425 /** 2426 * Ensures that an object will eventually get non-busy by flushing any required 2427 * write domains, emitting any outstanding lazy request and retiring and 2428 * completed requests. 2429 */ 2430 static int 2431 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2432 { 2433 int ret; 2434 2435 if (obj->active) { 2436 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2437 if (ret) 2438 return ret; 2439 2440 i915_gem_retire_requests_ring(obj->ring); 2441 } 2442 2443 return 0; 2444 } 2445 2446 /** 2447 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2448 * @DRM_IOCTL_ARGS: standard ioctl arguments 2449 * 2450 * Returns 0 if successful, else an error is returned with the remaining time in 2451 * the timeout parameter. 2452 * -ETIME: object is still busy after timeout 2453 * -ERESTARTSYS: signal interrupted the wait 2454 * -ENONENT: object doesn't exist 2455 * Also possible, but rare: 2456 * -EAGAIN: GPU wedged 2457 * -ENOMEM: damn 2458 * -ENODEV: Internal IRQ fail 2459 * -E?: The add request failed 2460 * 2461 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2462 * non-zero timeout parameter the wait ioctl will wait for the given number of 2463 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2464 * without holding struct_mutex the object may become re-busied before this 2465 * function completes. A similar but shorter * race condition exists in the busy 2466 * ioctl 2467 */ 2468 int 2469 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2470 { 2471 drm_i915_private_t *dev_priv = dev->dev_private; 2472 struct drm_i915_gem_wait *args = data; 2473 struct drm_i915_gem_object *obj; 2474 struct intel_ring_buffer *ring = NULL; 2475 struct timespec timeout_stack, *timeout = NULL; 2476 unsigned reset_counter; 2477 u32 seqno = 0; 2478 int ret = 0; 2479 2480 if (args->timeout_ns >= 0) { 2481 timeout_stack = ns_to_timespec(args->timeout_ns); 2482 timeout = &timeout_stack; 2483 } 2484 2485 ret = i915_mutex_lock_interruptible(dev); 2486 if (ret) 2487 return ret; 2488 2489 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2490 if (&obj->base == NULL) { 2491 mutex_unlock(&dev->struct_mutex); 2492 return -ENOENT; 2493 } 2494 2495 /* Need to make sure the object gets inactive eventually. */ 2496 ret = i915_gem_object_flush_active(obj); 2497 if (ret) 2498 goto out; 2499 2500 if (obj->active) { 2501 seqno = obj->last_read_seqno; 2502 ring = obj->ring; 2503 } 2504 2505 if (seqno == 0) 2506 goto out; 2507 2508 /* Do this after OLR check to make sure we make forward progress polling 2509 * on this IOCTL with a 0 timeout (like busy ioctl) 2510 */ 2511 if (!args->timeout_ns) { 2512 ret = -ETIME; 2513 goto out; 2514 } 2515 2516 drm_gem_object_unreference(&obj->base); 2517 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2518 mutex_unlock(&dev->struct_mutex); 2519 2520 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout); 2521 if (timeout) 2522 args->timeout_ns = timespec_to_ns(timeout); 2523 return ret; 2524 2525 out: 2526 drm_gem_object_unreference(&obj->base); 2527 mutex_unlock(&dev->struct_mutex); 2528 return ret; 2529 } 2530 2531 /** 2532 * i915_gem_object_sync - sync an object to a ring. 2533 * 2534 * @obj: object which may be in use on another ring. 2535 * @to: ring we wish to use the object on. May be NULL. 2536 * 2537 * This code is meant to abstract object synchronization with the GPU. 2538 * Calling with NULL implies synchronizing the object with the CPU 2539 * rather than a particular GPU ring. 2540 * 2541 * Returns 0 if successful, else propagates up the lower layer error. 2542 */ 2543 int 2544 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2545 struct intel_ring_buffer *to) 2546 { 2547 struct intel_ring_buffer *from = obj->ring; 2548 u32 seqno; 2549 int ret, idx; 2550 2551 if (from == NULL || to == from) 2552 return 0; 2553 2554 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2555 return i915_gem_object_wait_rendering(obj, false); 2556 2557 idx = intel_ring_sync_index(from, to); 2558 2559 seqno = obj->last_read_seqno; 2560 if (seqno <= from->sync_seqno[idx]) 2561 return 0; 2562 2563 ret = i915_gem_check_olr(obj->ring, seqno); 2564 if (ret) 2565 return ret; 2566 2567 ret = to->sync_to(to, from, seqno); 2568 if (!ret) 2569 /* We use last_read_seqno because sync_to() 2570 * might have just caused seqno wrap under 2571 * the radar. 2572 */ 2573 from->sync_seqno[idx] = obj->last_read_seqno; 2574 2575 return ret; 2576 } 2577 2578 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2579 { 2580 u32 old_write_domain, old_read_domains; 2581 2582 /* Force a pagefault for domain tracking on next user access */ 2583 i915_gem_release_mmap(obj); 2584 2585 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2586 return; 2587 2588 /* Wait for any direct GTT access to complete */ 2589 mb(); 2590 2591 old_read_domains = obj->base.read_domains; 2592 old_write_domain = obj->base.write_domain; 2593 2594 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2595 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2596 2597 trace_i915_gem_object_change_domain(obj, 2598 old_read_domains, 2599 old_write_domain); 2600 } 2601 2602 int i915_vma_unbind(struct i915_vma *vma) 2603 { 2604 struct drm_i915_gem_object *obj = vma->obj; 2605 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2606 int ret; 2607 2608 if (list_empty(&vma->vma_link)) 2609 return 0; 2610 2611 if (!drm_mm_node_allocated(&vma->node)) 2612 goto destroy; 2613 2614 if (obj->pin_count) 2615 return -EBUSY; 2616 2617 BUG_ON(obj->pages == NULL); 2618 2619 ret = i915_gem_object_finish_gpu(obj); 2620 if (ret) 2621 return ret; 2622 /* Continue on if we fail due to EIO, the GPU is hung so we 2623 * should be safe and we need to cleanup or else we might 2624 * cause memory corruption through use-after-free. 2625 */ 2626 2627 i915_gem_object_finish_gtt(obj); 2628 2629 /* release the fence reg _after_ flushing */ 2630 ret = i915_gem_object_put_fence(obj); 2631 if (ret) 2632 return ret; 2633 2634 trace_i915_vma_unbind(vma); 2635 2636 if (obj->has_global_gtt_mapping) 2637 i915_gem_gtt_unbind_object(obj); 2638 if (obj->has_aliasing_ppgtt_mapping) { 2639 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2640 obj->has_aliasing_ppgtt_mapping = 0; 2641 } 2642 i915_gem_gtt_finish_object(obj); 2643 i915_gem_object_unpin_pages(obj); 2644 2645 list_del(&vma->mm_list); 2646 /* Avoid an unnecessary call to unbind on rebind. */ 2647 if (i915_is_ggtt(vma->vm)) 2648 obj->map_and_fenceable = true; 2649 2650 drm_mm_remove_node(&vma->node); 2651 2652 destroy: 2653 i915_gem_vma_destroy(vma); 2654 2655 /* Since the unbound list is global, only move to that list if 2656 * no more VMAs exist. 2657 * NB: Until we have real VMAs there will only ever be one */ 2658 WARN_ON(!list_empty(&obj->vma_list)); 2659 if (list_empty(&obj->vma_list)) 2660 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2661 2662 return 0; 2663 } 2664 2665 /** 2666 * Unbinds an object from the global GTT aperture. 2667 */ 2668 int 2669 i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj) 2670 { 2671 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2672 struct i915_address_space *ggtt = &dev_priv->gtt.base; 2673 2674 if (!i915_gem_obj_ggtt_bound(obj)) 2675 return 0; 2676 2677 if (obj->pin_count) 2678 return -EBUSY; 2679 2680 BUG_ON(obj->pages == NULL); 2681 2682 return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt)); 2683 } 2684 2685 int i915_gpu_idle(struct drm_device *dev) 2686 { 2687 drm_i915_private_t *dev_priv = dev->dev_private; 2688 struct intel_ring_buffer *ring; 2689 int ret, i; 2690 2691 /* Flush everything onto the inactive list. */ 2692 for_each_ring(ring, dev_priv, i) { 2693 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 2694 if (ret) 2695 return ret; 2696 2697 ret = intel_ring_idle(ring); 2698 if (ret) 2699 return ret; 2700 } 2701 2702 return 0; 2703 } 2704 2705 static void i965_write_fence_reg(struct drm_device *dev, int reg, 2706 struct drm_i915_gem_object *obj) 2707 { 2708 drm_i915_private_t *dev_priv = dev->dev_private; 2709 int fence_reg; 2710 int fence_pitch_shift; 2711 2712 if (INTEL_INFO(dev)->gen >= 6) { 2713 fence_reg = FENCE_REG_SANDYBRIDGE_0; 2714 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 2715 } else { 2716 fence_reg = FENCE_REG_965_0; 2717 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 2718 } 2719 2720 fence_reg += reg * 8; 2721 2722 /* To w/a incoherency with non-atomic 64-bit register updates, 2723 * we split the 64-bit update into two 32-bit writes. In order 2724 * for a partial fence not to be evaluated between writes, we 2725 * precede the update with write to turn off the fence register, 2726 * and only enable the fence as the last step. 2727 * 2728 * For extra levels of paranoia, we make sure each step lands 2729 * before applying the next step. 2730 */ 2731 I915_WRITE(fence_reg, 0); 2732 POSTING_READ(fence_reg); 2733 2734 if (obj) { 2735 u32 size = i915_gem_obj_ggtt_size(obj); 2736 uint64_t val; 2737 2738 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 2739 0xfffff000) << 32; 2740 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 2741 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 2742 if (obj->tiling_mode == I915_TILING_Y) 2743 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2744 val |= I965_FENCE_REG_VALID; 2745 2746 I915_WRITE(fence_reg + 4, val >> 32); 2747 POSTING_READ(fence_reg + 4); 2748 2749 I915_WRITE(fence_reg + 0, val); 2750 POSTING_READ(fence_reg); 2751 } else { 2752 I915_WRITE(fence_reg + 4, 0); 2753 POSTING_READ(fence_reg + 4); 2754 } 2755 } 2756 2757 static void i915_write_fence_reg(struct drm_device *dev, int reg, 2758 struct drm_i915_gem_object *obj) 2759 { 2760 drm_i915_private_t *dev_priv = dev->dev_private; 2761 u32 val; 2762 2763 if (obj) { 2764 u32 size = i915_gem_obj_ggtt_size(obj); 2765 int pitch_val; 2766 int tile_width; 2767 2768 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 2769 (size & -size) != size || 2770 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 2771 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2772 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 2773 2774 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2775 tile_width = 128; 2776 else 2777 tile_width = 512; 2778 2779 /* Note: pitch better be a power of two tile widths */ 2780 pitch_val = obj->stride / tile_width; 2781 pitch_val = ffs(pitch_val) - 1; 2782 2783 val = i915_gem_obj_ggtt_offset(obj); 2784 if (obj->tiling_mode == I915_TILING_Y) 2785 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2786 val |= I915_FENCE_SIZE_BITS(size); 2787 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2788 val |= I830_FENCE_REG_VALID; 2789 } else 2790 val = 0; 2791 2792 if (reg < 8) 2793 reg = FENCE_REG_830_0 + reg * 4; 2794 else 2795 reg = FENCE_REG_945_8 + (reg - 8) * 4; 2796 2797 I915_WRITE(reg, val); 2798 POSTING_READ(reg); 2799 } 2800 2801 static void i830_write_fence_reg(struct drm_device *dev, int reg, 2802 struct drm_i915_gem_object *obj) 2803 { 2804 drm_i915_private_t *dev_priv = dev->dev_private; 2805 uint32_t val; 2806 2807 if (obj) { 2808 u32 size = i915_gem_obj_ggtt_size(obj); 2809 uint32_t pitch_val; 2810 2811 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 2812 (size & -size) != size || 2813 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 2814 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 2815 i915_gem_obj_ggtt_offset(obj), size); 2816 2817 pitch_val = obj->stride / 128; 2818 pitch_val = ffs(pitch_val) - 1; 2819 2820 val = i915_gem_obj_ggtt_offset(obj); 2821 if (obj->tiling_mode == I915_TILING_Y) 2822 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2823 val |= I830_FENCE_SIZE_BITS(size); 2824 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2825 val |= I830_FENCE_REG_VALID; 2826 } else 2827 val = 0; 2828 2829 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 2830 POSTING_READ(FENCE_REG_830_0 + reg * 4); 2831 } 2832 2833 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 2834 { 2835 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 2836 } 2837 2838 static void i915_gem_write_fence(struct drm_device *dev, int reg, 2839 struct drm_i915_gem_object *obj) 2840 { 2841 struct drm_i915_private *dev_priv = dev->dev_private; 2842 2843 /* Ensure that all CPU reads are completed before installing a fence 2844 * and all writes before removing the fence. 2845 */ 2846 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 2847 mb(); 2848 2849 WARN(obj && (!obj->stride || !obj->tiling_mode), 2850 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 2851 obj->stride, obj->tiling_mode); 2852 2853 switch (INTEL_INFO(dev)->gen) { 2854 case 7: 2855 case 6: 2856 case 5: 2857 case 4: i965_write_fence_reg(dev, reg, obj); break; 2858 case 3: i915_write_fence_reg(dev, reg, obj); break; 2859 case 2: i830_write_fence_reg(dev, reg, obj); break; 2860 default: BUG(); 2861 } 2862 2863 /* And similarly be paranoid that no direct access to this region 2864 * is reordered to before the fence is installed. 2865 */ 2866 if (i915_gem_object_needs_mb(obj)) 2867 mb(); 2868 } 2869 2870 static inline int fence_number(struct drm_i915_private *dev_priv, 2871 struct drm_i915_fence_reg *fence) 2872 { 2873 return fence - dev_priv->fence_regs; 2874 } 2875 2876 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 2877 struct drm_i915_fence_reg *fence, 2878 bool enable) 2879 { 2880 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2881 int reg = fence_number(dev_priv, fence); 2882 2883 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 2884 2885 if (enable) { 2886 obj->fence_reg = reg; 2887 fence->obj = obj; 2888 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 2889 } else { 2890 obj->fence_reg = I915_FENCE_REG_NONE; 2891 fence->obj = NULL; 2892 list_del_init(&fence->lru_list); 2893 } 2894 obj->fence_dirty = false; 2895 } 2896 2897 static int 2898 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 2899 { 2900 if (obj->last_fenced_seqno) { 2901 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 2902 if (ret) 2903 return ret; 2904 2905 obj->last_fenced_seqno = 0; 2906 } 2907 2908 obj->fenced_gpu_access = false; 2909 return 0; 2910 } 2911 2912 int 2913 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2914 { 2915 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2916 struct drm_i915_fence_reg *fence; 2917 int ret; 2918 2919 ret = i915_gem_object_wait_fence(obj); 2920 if (ret) 2921 return ret; 2922 2923 if (obj->fence_reg == I915_FENCE_REG_NONE) 2924 return 0; 2925 2926 fence = &dev_priv->fence_regs[obj->fence_reg]; 2927 2928 i915_gem_object_fence_lost(obj); 2929 i915_gem_object_update_fence(obj, fence, false); 2930 2931 return 0; 2932 } 2933 2934 static struct drm_i915_fence_reg * 2935 i915_find_fence_reg(struct drm_device *dev) 2936 { 2937 struct drm_i915_private *dev_priv = dev->dev_private; 2938 struct drm_i915_fence_reg *reg, *avail; 2939 int i; 2940 2941 /* First try to find a free reg */ 2942 avail = NULL; 2943 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2944 reg = &dev_priv->fence_regs[i]; 2945 if (!reg->obj) 2946 return reg; 2947 2948 if (!reg->pin_count) 2949 avail = reg; 2950 } 2951 2952 if (avail == NULL) 2953 return NULL; 2954 2955 /* None available, try to steal one or wait for a user to finish */ 2956 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2957 if (reg->pin_count) 2958 continue; 2959 2960 return reg; 2961 } 2962 2963 return NULL; 2964 } 2965 2966 /** 2967 * i915_gem_object_get_fence - set up fencing for an object 2968 * @obj: object to map through a fence reg 2969 * 2970 * When mapping objects through the GTT, userspace wants to be able to write 2971 * to them without having to worry about swizzling if the object is tiled. 2972 * This function walks the fence regs looking for a free one for @obj, 2973 * stealing one if it can't find any. 2974 * 2975 * It then sets up the reg based on the object's properties: address, pitch 2976 * and tiling format. 2977 * 2978 * For an untiled surface, this removes any existing fence. 2979 */ 2980 int 2981 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 2982 { 2983 struct drm_device *dev = obj->base.dev; 2984 struct drm_i915_private *dev_priv = dev->dev_private; 2985 bool enable = obj->tiling_mode != I915_TILING_NONE; 2986 struct drm_i915_fence_reg *reg; 2987 int ret; 2988 2989 /* Have we updated the tiling parameters upon the object and so 2990 * will need to serialise the write to the associated fence register? 2991 */ 2992 if (obj->fence_dirty) { 2993 ret = i915_gem_object_wait_fence(obj); 2994 if (ret) 2995 return ret; 2996 } 2997 2998 /* Just update our place in the LRU if our fence is getting reused. */ 2999 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3000 reg = &dev_priv->fence_regs[obj->fence_reg]; 3001 if (!obj->fence_dirty) { 3002 list_move_tail(®->lru_list, 3003 &dev_priv->mm.fence_list); 3004 return 0; 3005 } 3006 } else if (enable) { 3007 reg = i915_find_fence_reg(dev); 3008 if (reg == NULL) 3009 return -EDEADLK; 3010 3011 if (reg->obj) { 3012 struct drm_i915_gem_object *old = reg->obj; 3013 3014 ret = i915_gem_object_wait_fence(old); 3015 if (ret) 3016 return ret; 3017 3018 i915_gem_object_fence_lost(old); 3019 } 3020 } else 3021 return 0; 3022 3023 i915_gem_object_update_fence(obj, reg, enable); 3024 3025 return 0; 3026 } 3027 3028 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3029 struct drm_mm_node *gtt_space, 3030 unsigned long cache_level) 3031 { 3032 struct drm_mm_node *other; 3033 3034 /* On non-LLC machines we have to be careful when putting differing 3035 * types of snoopable memory together to avoid the prefetcher 3036 * crossing memory domains and dying. 3037 */ 3038 if (HAS_LLC(dev)) 3039 return true; 3040 3041 if (!drm_mm_node_allocated(gtt_space)) 3042 return true; 3043 3044 if (list_empty(>t_space->node_list)) 3045 return true; 3046 3047 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3048 if (other->allocated && !other->hole_follows && other->color != cache_level) 3049 return false; 3050 3051 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3052 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3053 return false; 3054 3055 return true; 3056 } 3057 3058 static void i915_gem_verify_gtt(struct drm_device *dev) 3059 { 3060 #if WATCH_GTT 3061 struct drm_i915_private *dev_priv = dev->dev_private; 3062 struct drm_i915_gem_object *obj; 3063 int err = 0; 3064 3065 list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) { 3066 if (obj->gtt_space == NULL) { 3067 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3068 err++; 3069 continue; 3070 } 3071 3072 if (obj->cache_level != obj->gtt_space->color) { 3073 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3074 i915_gem_obj_ggtt_offset(obj), 3075 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3076 obj->cache_level, 3077 obj->gtt_space->color); 3078 err++; 3079 continue; 3080 } 3081 3082 if (!i915_gem_valid_gtt_space(dev, 3083 obj->gtt_space, 3084 obj->cache_level)) { 3085 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3086 i915_gem_obj_ggtt_offset(obj), 3087 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3088 obj->cache_level); 3089 err++; 3090 continue; 3091 } 3092 } 3093 3094 WARN_ON(err); 3095 #endif 3096 } 3097 3098 /** 3099 * Finds free space in the GTT aperture and binds the object there. 3100 */ 3101 static int 3102 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3103 struct i915_address_space *vm, 3104 unsigned alignment, 3105 bool map_and_fenceable, 3106 bool nonblocking) 3107 { 3108 struct drm_device *dev = obj->base.dev; 3109 drm_i915_private_t *dev_priv = dev->dev_private; 3110 u32 size, fence_size, fence_alignment, unfenced_alignment; 3111 size_t gtt_max = 3112 map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total; 3113 struct i915_vma *vma; 3114 int ret; 3115 3116 fence_size = i915_gem_get_gtt_size(dev, 3117 obj->base.size, 3118 obj->tiling_mode); 3119 fence_alignment = i915_gem_get_gtt_alignment(dev, 3120 obj->base.size, 3121 obj->tiling_mode, true); 3122 unfenced_alignment = 3123 i915_gem_get_gtt_alignment(dev, 3124 obj->base.size, 3125 obj->tiling_mode, false); 3126 3127 if (alignment == 0) 3128 alignment = map_and_fenceable ? fence_alignment : 3129 unfenced_alignment; 3130 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 3131 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 3132 return -EINVAL; 3133 } 3134 3135 size = map_and_fenceable ? fence_size : obj->base.size; 3136 3137 /* If the object is bigger than the entire aperture, reject it early 3138 * before evicting everything in a vain attempt to find space. 3139 */ 3140 if (obj->base.size > gtt_max) { 3141 DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", 3142 obj->base.size, 3143 map_and_fenceable ? "mappable" : "total", 3144 gtt_max); 3145 return -E2BIG; 3146 } 3147 3148 ret = i915_gem_object_get_pages(obj); 3149 if (ret) 3150 return ret; 3151 3152 i915_gem_object_pin_pages(obj); 3153 3154 BUG_ON(!i915_is_ggtt(vm)); 3155 3156 vma = i915_gem_obj_lookup_or_create_vma(obj, vm); 3157 if (IS_ERR(vma)) { 3158 ret = PTR_ERR(vma); 3159 goto err_unpin; 3160 } 3161 3162 /* For now we only ever use 1 vma per object */ 3163 WARN_ON(!list_is_singular(&obj->vma_list)); 3164 3165 search_free: 3166 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3167 size, alignment, 3168 obj->cache_level, 0, gtt_max, 3169 DRM_MM_SEARCH_DEFAULT); 3170 if (ret) { 3171 ret = i915_gem_evict_something(dev, vm, size, alignment, 3172 obj->cache_level, 3173 map_and_fenceable, 3174 nonblocking); 3175 if (ret == 0) 3176 goto search_free; 3177 3178 goto err_free_vma; 3179 } 3180 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node, 3181 obj->cache_level))) { 3182 ret = -EINVAL; 3183 goto err_remove_node; 3184 } 3185 3186 ret = i915_gem_gtt_prepare_object(obj); 3187 if (ret) 3188 goto err_remove_node; 3189 3190 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3191 list_add_tail(&vma->mm_list, &vm->inactive_list); 3192 3193 if (i915_is_ggtt(vm)) { 3194 bool mappable, fenceable; 3195 3196 fenceable = (vma->node.size == fence_size && 3197 (vma->node.start & (fence_alignment - 1)) == 0); 3198 3199 mappable = (vma->node.start + obj->base.size <= 3200 dev_priv->gtt.mappable_end); 3201 3202 obj->map_and_fenceable = mappable && fenceable; 3203 } 3204 3205 WARN_ON(map_and_fenceable && !obj->map_and_fenceable); 3206 3207 trace_i915_vma_bind(vma, map_and_fenceable); 3208 i915_gem_verify_gtt(dev); 3209 return 0; 3210 3211 err_remove_node: 3212 drm_mm_remove_node(&vma->node); 3213 err_free_vma: 3214 i915_gem_vma_destroy(vma); 3215 err_unpin: 3216 i915_gem_object_unpin_pages(obj); 3217 return ret; 3218 } 3219 3220 bool 3221 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3222 bool force) 3223 { 3224 /* If we don't have a page list set up, then we're not pinned 3225 * to GPU, and we can ignore the cache flush because it'll happen 3226 * again at bind time. 3227 */ 3228 if (obj->pages == NULL) 3229 return false; 3230 3231 /* 3232 * Stolen memory is always coherent with the GPU as it is explicitly 3233 * marked as wc by the system, or the system is cache-coherent. 3234 */ 3235 if (obj->stolen) 3236 return false; 3237 3238 /* If the GPU is snooping the contents of the CPU cache, 3239 * we do not need to manually clear the CPU cache lines. However, 3240 * the caches are only snooped when the render cache is 3241 * flushed/invalidated. As we always have to emit invalidations 3242 * and flushes when moving into and out of the RENDER domain, correct 3243 * snooping behaviour occurs naturally as the result of our domain 3244 * tracking. 3245 */ 3246 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 3247 return false; 3248 3249 trace_i915_gem_object_clflush(obj); 3250 drm_clflush_sg(obj->pages); 3251 3252 return true; 3253 } 3254 3255 /** Flushes the GTT write domain for the object if it's dirty. */ 3256 static void 3257 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3258 { 3259 uint32_t old_write_domain; 3260 3261 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3262 return; 3263 3264 /* No actual flushing is required for the GTT write domain. Writes 3265 * to it immediately go to main memory as far as we know, so there's 3266 * no chipset flush. It also doesn't land in render cache. 3267 * 3268 * However, we do have to enforce the order so that all writes through 3269 * the GTT land before any writes to the device, such as updates to 3270 * the GATT itself. 3271 */ 3272 wmb(); 3273 3274 old_write_domain = obj->base.write_domain; 3275 obj->base.write_domain = 0; 3276 3277 trace_i915_gem_object_change_domain(obj, 3278 obj->base.read_domains, 3279 old_write_domain); 3280 } 3281 3282 /** Flushes the CPU write domain for the object if it's dirty. */ 3283 static void 3284 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 3285 bool force) 3286 { 3287 uint32_t old_write_domain; 3288 3289 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3290 return; 3291 3292 if (i915_gem_clflush_object(obj, force)) 3293 i915_gem_chipset_flush(obj->base.dev); 3294 3295 old_write_domain = obj->base.write_domain; 3296 obj->base.write_domain = 0; 3297 3298 trace_i915_gem_object_change_domain(obj, 3299 obj->base.read_domains, 3300 old_write_domain); 3301 } 3302 3303 /** 3304 * Moves a single object to the GTT read, and possibly write domain. 3305 * 3306 * This function returns when the move is complete, including waiting on 3307 * flushes to occur. 3308 */ 3309 int 3310 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3311 { 3312 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 3313 uint32_t old_write_domain, old_read_domains; 3314 int ret; 3315 3316 /* Not valid to be called on unbound objects. */ 3317 if (!i915_gem_obj_bound_any(obj)) 3318 return -EINVAL; 3319 3320 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3321 return 0; 3322 3323 ret = i915_gem_object_wait_rendering(obj, !write); 3324 if (ret) 3325 return ret; 3326 3327 i915_gem_object_flush_cpu_write_domain(obj, false); 3328 3329 /* Serialise direct access to this object with the barriers for 3330 * coherent writes from the GPU, by effectively invalidating the 3331 * GTT domain upon first access. 3332 */ 3333 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3334 mb(); 3335 3336 old_write_domain = obj->base.write_domain; 3337 old_read_domains = obj->base.read_domains; 3338 3339 /* It should now be out of any other write domains, and we can update 3340 * the domain values for our changes. 3341 */ 3342 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3343 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3344 if (write) { 3345 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3346 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3347 obj->dirty = 1; 3348 } 3349 3350 trace_i915_gem_object_change_domain(obj, 3351 old_read_domains, 3352 old_write_domain); 3353 3354 /* And bump the LRU for this access */ 3355 if (i915_gem_object_is_inactive(obj)) { 3356 struct i915_vma *vma = i915_gem_obj_to_vma(obj, 3357 &dev_priv->gtt.base); 3358 if (vma) 3359 list_move_tail(&vma->mm_list, 3360 &dev_priv->gtt.base.inactive_list); 3361 3362 } 3363 3364 return 0; 3365 } 3366 3367 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3368 enum i915_cache_level cache_level) 3369 { 3370 struct drm_device *dev = obj->base.dev; 3371 drm_i915_private_t *dev_priv = dev->dev_private; 3372 struct i915_vma *vma; 3373 int ret; 3374 3375 if (obj->cache_level == cache_level) 3376 return 0; 3377 3378 if (obj->pin_count) { 3379 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3380 return -EBUSY; 3381 } 3382 3383 list_for_each_entry(vma, &obj->vma_list, vma_link) { 3384 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) { 3385 ret = i915_vma_unbind(vma); 3386 if (ret) 3387 return ret; 3388 3389 break; 3390 } 3391 } 3392 3393 if (i915_gem_obj_bound_any(obj)) { 3394 ret = i915_gem_object_finish_gpu(obj); 3395 if (ret) 3396 return ret; 3397 3398 i915_gem_object_finish_gtt(obj); 3399 3400 /* Before SandyBridge, you could not use tiling or fence 3401 * registers with snooped memory, so relinquish any fences 3402 * currently pointing to our region in the aperture. 3403 */ 3404 if (INTEL_INFO(dev)->gen < 6) { 3405 ret = i915_gem_object_put_fence(obj); 3406 if (ret) 3407 return ret; 3408 } 3409 3410 if (obj->has_global_gtt_mapping) 3411 i915_gem_gtt_bind_object(obj, cache_level); 3412 if (obj->has_aliasing_ppgtt_mapping) 3413 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 3414 obj, cache_level); 3415 } 3416 3417 list_for_each_entry(vma, &obj->vma_list, vma_link) 3418 vma->node.color = cache_level; 3419 obj->cache_level = cache_level; 3420 3421 if (cpu_write_needs_clflush(obj)) { 3422 u32 old_read_domains, old_write_domain; 3423 3424 /* If we're coming from LLC cached, then we haven't 3425 * actually been tracking whether the data is in the 3426 * CPU cache or not, since we only allow one bit set 3427 * in obj->write_domain and have been skipping the clflushes. 3428 * Just set it to the CPU cache for now. 3429 */ 3430 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3431 3432 old_read_domains = obj->base.read_domains; 3433 old_write_domain = obj->base.write_domain; 3434 3435 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3436 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3437 3438 trace_i915_gem_object_change_domain(obj, 3439 old_read_domains, 3440 old_write_domain); 3441 } 3442 3443 i915_gem_verify_gtt(dev); 3444 return 0; 3445 } 3446 3447 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3448 struct drm_file *file) 3449 { 3450 struct drm_i915_gem_caching *args = data; 3451 struct drm_i915_gem_object *obj; 3452 int ret; 3453 3454 ret = i915_mutex_lock_interruptible(dev); 3455 if (ret) 3456 return ret; 3457 3458 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3459 if (&obj->base == NULL) { 3460 ret = -ENOENT; 3461 goto unlock; 3462 } 3463 3464 switch (obj->cache_level) { 3465 case I915_CACHE_LLC: 3466 case I915_CACHE_L3_LLC: 3467 args->caching = I915_CACHING_CACHED; 3468 break; 3469 3470 case I915_CACHE_WT: 3471 args->caching = I915_CACHING_DISPLAY; 3472 break; 3473 3474 default: 3475 args->caching = I915_CACHING_NONE; 3476 break; 3477 } 3478 3479 drm_gem_object_unreference(&obj->base); 3480 unlock: 3481 mutex_unlock(&dev->struct_mutex); 3482 return ret; 3483 } 3484 3485 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3486 struct drm_file *file) 3487 { 3488 struct drm_i915_gem_caching *args = data; 3489 struct drm_i915_gem_object *obj; 3490 enum i915_cache_level level; 3491 int ret; 3492 3493 switch (args->caching) { 3494 case I915_CACHING_NONE: 3495 level = I915_CACHE_NONE; 3496 break; 3497 case I915_CACHING_CACHED: 3498 level = I915_CACHE_LLC; 3499 break; 3500 case I915_CACHING_DISPLAY: 3501 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3502 break; 3503 default: 3504 return -EINVAL; 3505 } 3506 3507 ret = i915_mutex_lock_interruptible(dev); 3508 if (ret) 3509 return ret; 3510 3511 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3512 if (&obj->base == NULL) { 3513 ret = -ENOENT; 3514 goto unlock; 3515 } 3516 3517 ret = i915_gem_object_set_cache_level(obj, level); 3518 3519 drm_gem_object_unreference(&obj->base); 3520 unlock: 3521 mutex_unlock(&dev->struct_mutex); 3522 return ret; 3523 } 3524 3525 static bool is_pin_display(struct drm_i915_gem_object *obj) 3526 { 3527 /* There are 3 sources that pin objects: 3528 * 1. The display engine (scanouts, sprites, cursors); 3529 * 2. Reservations for execbuffer; 3530 * 3. The user. 3531 * 3532 * We can ignore reservations as we hold the struct_mutex and 3533 * are only called outside of the reservation path. The user 3534 * can only increment pin_count once, and so if after 3535 * subtracting the potential reference by the user, any pin_count 3536 * remains, it must be due to another use by the display engine. 3537 */ 3538 return obj->pin_count - !!obj->user_pin_count; 3539 } 3540 3541 /* 3542 * Prepare buffer for display plane (scanout, cursors, etc). 3543 * Can be called from an uninterruptible phase (modesetting) and allows 3544 * any flushes to be pipelined (for pageflips). 3545 */ 3546 int 3547 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3548 u32 alignment, 3549 struct intel_ring_buffer *pipelined) 3550 { 3551 u32 old_read_domains, old_write_domain; 3552 int ret; 3553 3554 if (pipelined != obj->ring) { 3555 ret = i915_gem_object_sync(obj, pipelined); 3556 if (ret) 3557 return ret; 3558 } 3559 3560 /* Mark the pin_display early so that we account for the 3561 * display coherency whilst setting up the cache domains. 3562 */ 3563 obj->pin_display = true; 3564 3565 /* The display engine is not coherent with the LLC cache on gen6. As 3566 * a result, we make sure that the pinning that is about to occur is 3567 * done with uncached PTEs. This is lowest common denominator for all 3568 * chipsets. 3569 * 3570 * However for gen6+, we could do better by using the GFDT bit instead 3571 * of uncaching, which would allow us to flush all the LLC-cached data 3572 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3573 */ 3574 ret = i915_gem_object_set_cache_level(obj, 3575 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 3576 if (ret) 3577 goto err_unpin_display; 3578 3579 /* As the user may map the buffer once pinned in the display plane 3580 * (e.g. libkms for the bootup splash), we have to ensure that we 3581 * always use map_and_fenceable for all scanout buffers. 3582 */ 3583 ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false); 3584 if (ret) 3585 goto err_unpin_display; 3586 3587 i915_gem_object_flush_cpu_write_domain(obj, true); 3588 3589 old_write_domain = obj->base.write_domain; 3590 old_read_domains = obj->base.read_domains; 3591 3592 /* It should now be out of any other write domains, and we can update 3593 * the domain values for our changes. 3594 */ 3595 obj->base.write_domain = 0; 3596 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3597 3598 trace_i915_gem_object_change_domain(obj, 3599 old_read_domains, 3600 old_write_domain); 3601 3602 return 0; 3603 3604 err_unpin_display: 3605 obj->pin_display = is_pin_display(obj); 3606 return ret; 3607 } 3608 3609 void 3610 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) 3611 { 3612 i915_gem_object_unpin(obj); 3613 obj->pin_display = is_pin_display(obj); 3614 } 3615 3616 int 3617 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3618 { 3619 int ret; 3620 3621 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3622 return 0; 3623 3624 ret = i915_gem_object_wait_rendering(obj, false); 3625 if (ret) 3626 return ret; 3627 3628 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3629 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3630 return 0; 3631 } 3632 3633 /** 3634 * Moves a single object to the CPU read, and possibly write domain. 3635 * 3636 * This function returns when the move is complete, including waiting on 3637 * flushes to occur. 3638 */ 3639 int 3640 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3641 { 3642 uint32_t old_write_domain, old_read_domains; 3643 int ret; 3644 3645 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3646 return 0; 3647 3648 ret = i915_gem_object_wait_rendering(obj, !write); 3649 if (ret) 3650 return ret; 3651 3652 i915_gem_object_flush_gtt_write_domain(obj); 3653 3654 old_write_domain = obj->base.write_domain; 3655 old_read_domains = obj->base.read_domains; 3656 3657 /* Flush the CPU cache if it's still invalid. */ 3658 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3659 i915_gem_clflush_object(obj, false); 3660 3661 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3662 } 3663 3664 /* It should now be out of any other write domains, and we can update 3665 * the domain values for our changes. 3666 */ 3667 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3668 3669 /* If we're writing through the CPU, then the GPU read domains will 3670 * need to be invalidated at next use. 3671 */ 3672 if (write) { 3673 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3674 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3675 } 3676 3677 trace_i915_gem_object_change_domain(obj, 3678 old_read_domains, 3679 old_write_domain); 3680 3681 return 0; 3682 } 3683 3684 /* Throttle our rendering by waiting until the ring has completed our requests 3685 * emitted over 20 msec ago. 3686 * 3687 * Note that if we were to use the current jiffies each time around the loop, 3688 * we wouldn't escape the function with any frames outstanding if the time to 3689 * render a frame was over 20ms. 3690 * 3691 * This should get us reasonable parallelism between CPU and GPU but also 3692 * relatively low latency when blocking on a particular request to finish. 3693 */ 3694 static int 3695 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3696 { 3697 struct drm_i915_private *dev_priv = dev->dev_private; 3698 struct drm_i915_file_private *file_priv = file->driver_priv; 3699 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3700 struct drm_i915_gem_request *request; 3701 struct intel_ring_buffer *ring = NULL; 3702 unsigned reset_counter; 3703 u32 seqno = 0; 3704 int ret; 3705 3706 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 3707 if (ret) 3708 return ret; 3709 3710 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 3711 if (ret) 3712 return ret; 3713 3714 spin_lock(&file_priv->mm.lock); 3715 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3716 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3717 break; 3718 3719 ring = request->ring; 3720 seqno = request->seqno; 3721 } 3722 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3723 spin_unlock(&file_priv->mm.lock); 3724 3725 if (seqno == 0) 3726 return 0; 3727 3728 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); 3729 if (ret == 0) 3730 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3731 3732 return ret; 3733 } 3734 3735 int 3736 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3737 struct i915_address_space *vm, 3738 uint32_t alignment, 3739 bool map_and_fenceable, 3740 bool nonblocking) 3741 { 3742 struct i915_vma *vma; 3743 int ret; 3744 3745 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 3746 return -EBUSY; 3747 3748 WARN_ON(map_and_fenceable && !i915_is_ggtt(vm)); 3749 3750 vma = i915_gem_obj_to_vma(obj, vm); 3751 3752 if (vma) { 3753 if ((alignment && 3754 vma->node.start & (alignment - 1)) || 3755 (map_and_fenceable && !obj->map_and_fenceable)) { 3756 WARN(obj->pin_count, 3757 "bo is already pinned with incorrect alignment:" 3758 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 3759 " obj->map_and_fenceable=%d\n", 3760 i915_gem_obj_offset(obj, vm), alignment, 3761 map_and_fenceable, 3762 obj->map_and_fenceable); 3763 ret = i915_vma_unbind(vma); 3764 if (ret) 3765 return ret; 3766 } 3767 } 3768 3769 if (!i915_gem_obj_bound(obj, vm)) { 3770 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3771 3772 ret = i915_gem_object_bind_to_vm(obj, vm, alignment, 3773 map_and_fenceable, 3774 nonblocking); 3775 if (ret) 3776 return ret; 3777 3778 if (!dev_priv->mm.aliasing_ppgtt) 3779 i915_gem_gtt_bind_object(obj, obj->cache_level); 3780 } 3781 3782 if (!obj->has_global_gtt_mapping && map_and_fenceable) 3783 i915_gem_gtt_bind_object(obj, obj->cache_level); 3784 3785 obj->pin_count++; 3786 obj->pin_mappable |= map_and_fenceable; 3787 3788 return 0; 3789 } 3790 3791 void 3792 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3793 { 3794 BUG_ON(obj->pin_count == 0); 3795 BUG_ON(!i915_gem_obj_bound_any(obj)); 3796 3797 if (--obj->pin_count == 0) 3798 obj->pin_mappable = false; 3799 } 3800 3801 int 3802 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3803 struct drm_file *file) 3804 { 3805 struct drm_i915_gem_pin *args = data; 3806 struct drm_i915_gem_object *obj; 3807 int ret; 3808 3809 ret = i915_mutex_lock_interruptible(dev); 3810 if (ret) 3811 return ret; 3812 3813 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3814 if (&obj->base == NULL) { 3815 ret = -ENOENT; 3816 goto unlock; 3817 } 3818 3819 if (obj->madv != I915_MADV_WILLNEED) { 3820 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3821 ret = -EINVAL; 3822 goto out; 3823 } 3824 3825 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3826 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3827 args->handle); 3828 ret = -EINVAL; 3829 goto out; 3830 } 3831 3832 if (obj->user_pin_count == 0) { 3833 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false); 3834 if (ret) 3835 goto out; 3836 } 3837 3838 obj->user_pin_count++; 3839 obj->pin_filp = file; 3840 3841 args->offset = i915_gem_obj_ggtt_offset(obj); 3842 out: 3843 drm_gem_object_unreference(&obj->base); 3844 unlock: 3845 mutex_unlock(&dev->struct_mutex); 3846 return ret; 3847 } 3848 3849 int 3850 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3851 struct drm_file *file) 3852 { 3853 struct drm_i915_gem_pin *args = data; 3854 struct drm_i915_gem_object *obj; 3855 int ret; 3856 3857 ret = i915_mutex_lock_interruptible(dev); 3858 if (ret) 3859 return ret; 3860 3861 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3862 if (&obj->base == NULL) { 3863 ret = -ENOENT; 3864 goto unlock; 3865 } 3866 3867 if (obj->pin_filp != file) { 3868 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3869 args->handle); 3870 ret = -EINVAL; 3871 goto out; 3872 } 3873 obj->user_pin_count--; 3874 if (obj->user_pin_count == 0) { 3875 obj->pin_filp = NULL; 3876 i915_gem_object_unpin(obj); 3877 } 3878 3879 out: 3880 drm_gem_object_unreference(&obj->base); 3881 unlock: 3882 mutex_unlock(&dev->struct_mutex); 3883 return ret; 3884 } 3885 3886 int 3887 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3888 struct drm_file *file) 3889 { 3890 struct drm_i915_gem_busy *args = data; 3891 struct drm_i915_gem_object *obj; 3892 int ret; 3893 3894 ret = i915_mutex_lock_interruptible(dev); 3895 if (ret) 3896 return ret; 3897 3898 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3899 if (&obj->base == NULL) { 3900 ret = -ENOENT; 3901 goto unlock; 3902 } 3903 3904 /* Count all active objects as busy, even if they are currently not used 3905 * by the gpu. Users of this interface expect objects to eventually 3906 * become non-busy without any further actions, therefore emit any 3907 * necessary flushes here. 3908 */ 3909 ret = i915_gem_object_flush_active(obj); 3910 3911 args->busy = obj->active; 3912 if (obj->ring) { 3913 BUILD_BUG_ON(I915_NUM_RINGS > 16); 3914 args->busy |= intel_ring_flag(obj->ring) << 16; 3915 } 3916 3917 drm_gem_object_unreference(&obj->base); 3918 unlock: 3919 mutex_unlock(&dev->struct_mutex); 3920 return ret; 3921 } 3922 3923 int 3924 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3925 struct drm_file *file_priv) 3926 { 3927 return i915_gem_ring_throttle(dev, file_priv); 3928 } 3929 3930 int 3931 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3932 struct drm_file *file_priv) 3933 { 3934 struct drm_i915_gem_madvise *args = data; 3935 struct drm_i915_gem_object *obj; 3936 int ret; 3937 3938 switch (args->madv) { 3939 case I915_MADV_DONTNEED: 3940 case I915_MADV_WILLNEED: 3941 break; 3942 default: 3943 return -EINVAL; 3944 } 3945 3946 ret = i915_mutex_lock_interruptible(dev); 3947 if (ret) 3948 return ret; 3949 3950 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3951 if (&obj->base == NULL) { 3952 ret = -ENOENT; 3953 goto unlock; 3954 } 3955 3956 if (obj->pin_count) { 3957 ret = -EINVAL; 3958 goto out; 3959 } 3960 3961 if (obj->madv != __I915_MADV_PURGED) 3962 obj->madv = args->madv; 3963 3964 /* if the object is no longer attached, discard its backing storage */ 3965 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 3966 i915_gem_object_truncate(obj); 3967 3968 args->retained = obj->madv != __I915_MADV_PURGED; 3969 3970 out: 3971 drm_gem_object_unreference(&obj->base); 3972 unlock: 3973 mutex_unlock(&dev->struct_mutex); 3974 return ret; 3975 } 3976 3977 void i915_gem_object_init(struct drm_i915_gem_object *obj, 3978 const struct drm_i915_gem_object_ops *ops) 3979 { 3980 INIT_LIST_HEAD(&obj->global_list); 3981 INIT_LIST_HEAD(&obj->ring_list); 3982 INIT_LIST_HEAD(&obj->exec_list); 3983 INIT_LIST_HEAD(&obj->obj_exec_link); 3984 INIT_LIST_HEAD(&obj->vma_list); 3985 3986 obj->ops = ops; 3987 3988 obj->fence_reg = I915_FENCE_REG_NONE; 3989 obj->madv = I915_MADV_WILLNEED; 3990 /* Avoid an unnecessary call to unbind on the first bind. */ 3991 obj->map_and_fenceable = true; 3992 3993 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 3994 } 3995 3996 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 3997 .get_pages = i915_gem_object_get_pages_gtt, 3998 .put_pages = i915_gem_object_put_pages_gtt, 3999 }; 4000 4001 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4002 size_t size) 4003 { 4004 struct drm_i915_gem_object *obj; 4005 struct address_space *mapping; 4006 gfp_t mask; 4007 4008 obj = i915_gem_object_alloc(dev); 4009 if (obj == NULL) 4010 return NULL; 4011 4012 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4013 i915_gem_object_free(obj); 4014 return NULL; 4015 } 4016 4017 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4018 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4019 /* 965gm cannot relocate objects above 4GiB. */ 4020 mask &= ~__GFP_HIGHMEM; 4021 mask |= __GFP_DMA32; 4022 } 4023 4024 mapping = file_inode(obj->base.filp)->i_mapping; 4025 mapping_set_gfp_mask(mapping, mask); 4026 4027 i915_gem_object_init(obj, &i915_gem_object_ops); 4028 4029 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4030 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4031 4032 if (HAS_LLC(dev)) { 4033 /* On some devices, we can have the GPU use the LLC (the CPU 4034 * cache) for about a 10% performance improvement 4035 * compared to uncached. Graphics requests other than 4036 * display scanout are coherent with the CPU in 4037 * accessing this cache. This means in this mode we 4038 * don't need to clflush on the CPU side, and on the 4039 * GPU side we only need to flush internal caches to 4040 * get data visible to the CPU. 4041 * 4042 * However, we maintain the display planes as UC, and so 4043 * need to rebind when first used as such. 4044 */ 4045 obj->cache_level = I915_CACHE_LLC; 4046 } else 4047 obj->cache_level = I915_CACHE_NONE; 4048 4049 trace_i915_gem_object_create(obj); 4050 4051 return obj; 4052 } 4053 4054 int i915_gem_init_object(struct drm_gem_object *obj) 4055 { 4056 BUG(); 4057 4058 return 0; 4059 } 4060 4061 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4062 { 4063 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4064 struct drm_device *dev = obj->base.dev; 4065 drm_i915_private_t *dev_priv = dev->dev_private; 4066 struct i915_vma *vma, *next; 4067 4068 trace_i915_gem_object_destroy(obj); 4069 4070 if (obj->phys_obj) 4071 i915_gem_detach_phys_object(dev, obj); 4072 4073 obj->pin_count = 0; 4074 /* NB: 0 or 1 elements */ 4075 WARN_ON(!list_empty(&obj->vma_list) && 4076 !list_is_singular(&obj->vma_list)); 4077 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4078 int ret = i915_vma_unbind(vma); 4079 if (WARN_ON(ret == -ERESTARTSYS)) { 4080 bool was_interruptible; 4081 4082 was_interruptible = dev_priv->mm.interruptible; 4083 dev_priv->mm.interruptible = false; 4084 4085 WARN_ON(i915_vma_unbind(vma)); 4086 4087 dev_priv->mm.interruptible = was_interruptible; 4088 } 4089 } 4090 4091 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4092 * before progressing. */ 4093 if (obj->stolen) 4094 i915_gem_object_unpin_pages(obj); 4095 4096 if (WARN_ON(obj->pages_pin_count)) 4097 obj->pages_pin_count = 0; 4098 i915_gem_object_put_pages(obj); 4099 i915_gem_object_free_mmap_offset(obj); 4100 i915_gem_object_release_stolen(obj); 4101 4102 BUG_ON(obj->pages); 4103 4104 if (obj->base.import_attach) 4105 drm_prime_gem_destroy(&obj->base, NULL); 4106 4107 drm_gem_object_release(&obj->base); 4108 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4109 4110 kfree(obj->bit_17); 4111 i915_gem_object_free(obj); 4112 } 4113 4114 struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj, 4115 struct i915_address_space *vm) 4116 { 4117 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 4118 if (vma == NULL) 4119 return ERR_PTR(-ENOMEM); 4120 4121 INIT_LIST_HEAD(&vma->vma_link); 4122 INIT_LIST_HEAD(&vma->mm_list); 4123 INIT_LIST_HEAD(&vma->exec_list); 4124 vma->vm = vm; 4125 vma->obj = obj; 4126 4127 /* Keep GGTT vmas first to make debug easier */ 4128 if (i915_is_ggtt(vm)) 4129 list_add(&vma->vma_link, &obj->vma_list); 4130 else 4131 list_add_tail(&vma->vma_link, &obj->vma_list); 4132 4133 return vma; 4134 } 4135 4136 void i915_gem_vma_destroy(struct i915_vma *vma) 4137 { 4138 WARN_ON(vma->node.allocated); 4139 list_del(&vma->vma_link); 4140 kfree(vma); 4141 } 4142 4143 int 4144 i915_gem_idle(struct drm_device *dev) 4145 { 4146 drm_i915_private_t *dev_priv = dev->dev_private; 4147 int ret; 4148 4149 if (dev_priv->ums.mm_suspended) { 4150 mutex_unlock(&dev->struct_mutex); 4151 return 0; 4152 } 4153 4154 ret = i915_gpu_idle(dev); 4155 if (ret) { 4156 mutex_unlock(&dev->struct_mutex); 4157 return ret; 4158 } 4159 i915_gem_retire_requests(dev); 4160 4161 /* Under UMS, be paranoid and evict. */ 4162 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4163 i915_gem_evict_everything(dev); 4164 4165 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); 4166 4167 i915_kernel_lost_context(dev); 4168 i915_gem_cleanup_ringbuffer(dev); 4169 4170 /* Cancel the retire work handler, which should be idle now. */ 4171 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4172 4173 return 0; 4174 } 4175 4176 void i915_gem_l3_remap(struct drm_device *dev) 4177 { 4178 drm_i915_private_t *dev_priv = dev->dev_private; 4179 u32 misccpctl; 4180 int i; 4181 4182 if (!HAS_L3_GPU_CACHE(dev)) 4183 return; 4184 4185 if (!dev_priv->l3_parity.remap_info) 4186 return; 4187 4188 misccpctl = I915_READ(GEN7_MISCCPCTL); 4189 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 4190 POSTING_READ(GEN7_MISCCPCTL); 4191 4192 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4193 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 4194 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 4195 DRM_DEBUG("0x%x was already programmed to %x\n", 4196 GEN7_L3LOG_BASE + i, remap); 4197 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 4198 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 4199 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 4200 } 4201 4202 /* Make sure all the writes land before disabling dop clock gating */ 4203 POSTING_READ(GEN7_L3LOG_BASE); 4204 4205 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 4206 } 4207 4208 void i915_gem_init_swizzling(struct drm_device *dev) 4209 { 4210 drm_i915_private_t *dev_priv = dev->dev_private; 4211 4212 if (INTEL_INFO(dev)->gen < 5 || 4213 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4214 return; 4215 4216 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4217 DISP_TILE_SURFACE_SWIZZLING); 4218 4219 if (IS_GEN5(dev)) 4220 return; 4221 4222 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4223 if (IS_GEN6(dev)) 4224 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4225 else if (IS_GEN7(dev)) 4226 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4227 else 4228 BUG(); 4229 } 4230 4231 static bool 4232 intel_enable_blt(struct drm_device *dev) 4233 { 4234 if (!HAS_BLT(dev)) 4235 return false; 4236 4237 /* The blitter was dysfunctional on early prototypes */ 4238 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4239 DRM_INFO("BLT not supported on this pre-production hardware;" 4240 " graphics performance will be degraded.\n"); 4241 return false; 4242 } 4243 4244 return true; 4245 } 4246 4247 static int i915_gem_init_rings(struct drm_device *dev) 4248 { 4249 struct drm_i915_private *dev_priv = dev->dev_private; 4250 int ret; 4251 4252 ret = intel_init_render_ring_buffer(dev); 4253 if (ret) 4254 return ret; 4255 4256 if (HAS_BSD(dev)) { 4257 ret = intel_init_bsd_ring_buffer(dev); 4258 if (ret) 4259 goto cleanup_render_ring; 4260 } 4261 4262 if (intel_enable_blt(dev)) { 4263 ret = intel_init_blt_ring_buffer(dev); 4264 if (ret) 4265 goto cleanup_bsd_ring; 4266 } 4267 4268 if (HAS_VEBOX(dev)) { 4269 ret = intel_init_vebox_ring_buffer(dev); 4270 if (ret) 4271 goto cleanup_blt_ring; 4272 } 4273 4274 4275 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4276 if (ret) 4277 goto cleanup_vebox_ring; 4278 4279 return 0; 4280 4281 cleanup_vebox_ring: 4282 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4283 cleanup_blt_ring: 4284 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4285 cleanup_bsd_ring: 4286 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4287 cleanup_render_ring: 4288 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4289 4290 return ret; 4291 } 4292 4293 int 4294 i915_gem_init_hw(struct drm_device *dev) 4295 { 4296 drm_i915_private_t *dev_priv = dev->dev_private; 4297 int ret; 4298 4299 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4300 return -EIO; 4301 4302 if (dev_priv->ellc_size) 4303 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4304 4305 if (HAS_PCH_NOP(dev)) { 4306 u32 temp = I915_READ(GEN7_MSG_CTL); 4307 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4308 I915_WRITE(GEN7_MSG_CTL, temp); 4309 } 4310 4311 i915_gem_l3_remap(dev); 4312 4313 i915_gem_init_swizzling(dev); 4314 4315 ret = i915_gem_init_rings(dev); 4316 if (ret) 4317 return ret; 4318 4319 /* 4320 * XXX: There was some w/a described somewhere suggesting loading 4321 * contexts before PPGTT. 4322 */ 4323 i915_gem_context_init(dev); 4324 if (dev_priv->mm.aliasing_ppgtt) { 4325 ret = dev_priv->mm.aliasing_ppgtt->enable(dev); 4326 if (ret) { 4327 i915_gem_cleanup_aliasing_ppgtt(dev); 4328 DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n"); 4329 } 4330 } 4331 4332 return 0; 4333 } 4334 4335 int i915_gem_init(struct drm_device *dev) 4336 { 4337 struct drm_i915_private *dev_priv = dev->dev_private; 4338 int ret; 4339 4340 mutex_lock(&dev->struct_mutex); 4341 4342 if (IS_VALLEYVIEW(dev)) { 4343 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 4344 I915_WRITE(VLV_GTLC_WAKE_CTRL, 1); 4345 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 1) == 1, 10)) 4346 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 4347 } 4348 4349 i915_gem_init_global_gtt(dev); 4350 4351 ret = i915_gem_init_hw(dev); 4352 mutex_unlock(&dev->struct_mutex); 4353 if (ret) { 4354 i915_gem_cleanup_aliasing_ppgtt(dev); 4355 return ret; 4356 } 4357 4358 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4359 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4360 dev_priv->dri1.allow_batchbuffer = 1; 4361 return 0; 4362 } 4363 4364 void 4365 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4366 { 4367 drm_i915_private_t *dev_priv = dev->dev_private; 4368 struct intel_ring_buffer *ring; 4369 int i; 4370 4371 for_each_ring(ring, dev_priv, i) 4372 intel_cleanup_ring_buffer(ring); 4373 } 4374 4375 int 4376 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4377 struct drm_file *file_priv) 4378 { 4379 struct drm_i915_private *dev_priv = dev->dev_private; 4380 int ret; 4381 4382 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4383 return 0; 4384 4385 if (i915_reset_in_progress(&dev_priv->gpu_error)) { 4386 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4387 atomic_set(&dev_priv->gpu_error.reset_counter, 0); 4388 } 4389 4390 mutex_lock(&dev->struct_mutex); 4391 dev_priv->ums.mm_suspended = 0; 4392 4393 ret = i915_gem_init_hw(dev); 4394 if (ret != 0) { 4395 mutex_unlock(&dev->struct_mutex); 4396 return ret; 4397 } 4398 4399 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list)); 4400 mutex_unlock(&dev->struct_mutex); 4401 4402 ret = drm_irq_install(dev); 4403 if (ret) 4404 goto cleanup_ringbuffer; 4405 4406 return 0; 4407 4408 cleanup_ringbuffer: 4409 mutex_lock(&dev->struct_mutex); 4410 i915_gem_cleanup_ringbuffer(dev); 4411 dev_priv->ums.mm_suspended = 1; 4412 mutex_unlock(&dev->struct_mutex); 4413 4414 return ret; 4415 } 4416 4417 int 4418 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4419 struct drm_file *file_priv) 4420 { 4421 struct drm_i915_private *dev_priv = dev->dev_private; 4422 int ret; 4423 4424 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4425 return 0; 4426 4427 drm_irq_uninstall(dev); 4428 4429 mutex_lock(&dev->struct_mutex); 4430 ret = i915_gem_idle(dev); 4431 4432 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4433 * We need to replace this with a semaphore, or something. 4434 * And not confound ums.mm_suspended! 4435 */ 4436 if (ret != 0) 4437 dev_priv->ums.mm_suspended = 1; 4438 mutex_unlock(&dev->struct_mutex); 4439 4440 return ret; 4441 } 4442 4443 void 4444 i915_gem_lastclose(struct drm_device *dev) 4445 { 4446 int ret; 4447 4448 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4449 return; 4450 4451 mutex_lock(&dev->struct_mutex); 4452 ret = i915_gem_idle(dev); 4453 if (ret) 4454 DRM_ERROR("failed to idle hardware: %d\n", ret); 4455 mutex_unlock(&dev->struct_mutex); 4456 } 4457 4458 static void 4459 init_ring_lists(struct intel_ring_buffer *ring) 4460 { 4461 INIT_LIST_HEAD(&ring->active_list); 4462 INIT_LIST_HEAD(&ring->request_list); 4463 } 4464 4465 static void i915_init_vm(struct drm_i915_private *dev_priv, 4466 struct i915_address_space *vm) 4467 { 4468 vm->dev = dev_priv->dev; 4469 INIT_LIST_HEAD(&vm->active_list); 4470 INIT_LIST_HEAD(&vm->inactive_list); 4471 INIT_LIST_HEAD(&vm->global_link); 4472 list_add(&vm->global_link, &dev_priv->vm_list); 4473 } 4474 4475 void 4476 i915_gem_load(struct drm_device *dev) 4477 { 4478 drm_i915_private_t *dev_priv = dev->dev_private; 4479 int i; 4480 4481 dev_priv->slab = 4482 kmem_cache_create("i915_gem_object", 4483 sizeof(struct drm_i915_gem_object), 0, 4484 SLAB_HWCACHE_ALIGN, 4485 NULL); 4486 4487 INIT_LIST_HEAD(&dev_priv->vm_list); 4488 i915_init_vm(dev_priv, &dev_priv->gtt.base); 4489 4490 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4491 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4492 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4493 for (i = 0; i < I915_NUM_RINGS; i++) 4494 init_ring_lists(&dev_priv->ring[i]); 4495 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4496 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4497 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4498 i915_gem_retire_work_handler); 4499 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 4500 4501 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 4502 if (IS_GEN3(dev)) { 4503 I915_WRITE(MI_ARB_STATE, 4504 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 4505 } 4506 4507 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4508 4509 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4510 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4511 dev_priv->fence_reg_start = 3; 4512 4513 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 4514 dev_priv->num_fence_regs = 32; 4515 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4516 dev_priv->num_fence_regs = 16; 4517 else 4518 dev_priv->num_fence_regs = 8; 4519 4520 /* Initialize fence registers to zero */ 4521 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4522 i915_gem_restore_fences(dev); 4523 4524 i915_gem_detect_bit_6_swizzle(dev); 4525 init_waitqueue_head(&dev_priv->pending_flip_queue); 4526 4527 dev_priv->mm.interruptible = true; 4528 4529 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 4530 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 4531 register_shrinker(&dev_priv->mm.inactive_shrinker); 4532 } 4533 4534 /* 4535 * Create a physically contiguous memory object for this object 4536 * e.g. for cursor + overlay regs 4537 */ 4538 static int i915_gem_init_phys_object(struct drm_device *dev, 4539 int id, int size, int align) 4540 { 4541 drm_i915_private_t *dev_priv = dev->dev_private; 4542 struct drm_i915_gem_phys_object *phys_obj; 4543 int ret; 4544 4545 if (dev_priv->mm.phys_objs[id - 1] || !size) 4546 return 0; 4547 4548 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4549 if (!phys_obj) 4550 return -ENOMEM; 4551 4552 phys_obj->id = id; 4553 4554 phys_obj->handle = drm_pci_alloc(dev, size, align); 4555 if (!phys_obj->handle) { 4556 ret = -ENOMEM; 4557 goto kfree_obj; 4558 } 4559 #ifdef CONFIG_X86 4560 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4561 #endif 4562 4563 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4564 4565 return 0; 4566 kfree_obj: 4567 kfree(phys_obj); 4568 return ret; 4569 } 4570 4571 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 4572 { 4573 drm_i915_private_t *dev_priv = dev->dev_private; 4574 struct drm_i915_gem_phys_object *phys_obj; 4575 4576 if (!dev_priv->mm.phys_objs[id - 1]) 4577 return; 4578 4579 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4580 if (phys_obj->cur_obj) { 4581 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4582 } 4583 4584 #ifdef CONFIG_X86 4585 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4586 #endif 4587 drm_pci_free(dev, phys_obj->handle); 4588 kfree(phys_obj); 4589 dev_priv->mm.phys_objs[id - 1] = NULL; 4590 } 4591 4592 void i915_gem_free_all_phys_object(struct drm_device *dev) 4593 { 4594 int i; 4595 4596 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4597 i915_gem_free_phys_object(dev, i); 4598 } 4599 4600 void i915_gem_detach_phys_object(struct drm_device *dev, 4601 struct drm_i915_gem_object *obj) 4602 { 4603 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 4604 char *vaddr; 4605 int i; 4606 int page_count; 4607 4608 if (!obj->phys_obj) 4609 return; 4610 vaddr = obj->phys_obj->handle->vaddr; 4611 4612 page_count = obj->base.size / PAGE_SIZE; 4613 for (i = 0; i < page_count; i++) { 4614 struct page *page = shmem_read_mapping_page(mapping, i); 4615 if (!IS_ERR(page)) { 4616 char *dst = kmap_atomic(page); 4617 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4618 kunmap_atomic(dst); 4619 4620 drm_clflush_pages(&page, 1); 4621 4622 set_page_dirty(page); 4623 mark_page_accessed(page); 4624 page_cache_release(page); 4625 } 4626 } 4627 i915_gem_chipset_flush(dev); 4628 4629 obj->phys_obj->cur_obj = NULL; 4630 obj->phys_obj = NULL; 4631 } 4632 4633 int 4634 i915_gem_attach_phys_object(struct drm_device *dev, 4635 struct drm_i915_gem_object *obj, 4636 int id, 4637 int align) 4638 { 4639 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 4640 drm_i915_private_t *dev_priv = dev->dev_private; 4641 int ret = 0; 4642 int page_count; 4643 int i; 4644 4645 if (id > I915_MAX_PHYS_OBJECT) 4646 return -EINVAL; 4647 4648 if (obj->phys_obj) { 4649 if (obj->phys_obj->id == id) 4650 return 0; 4651 i915_gem_detach_phys_object(dev, obj); 4652 } 4653 4654 /* create a new object */ 4655 if (!dev_priv->mm.phys_objs[id - 1]) { 4656 ret = i915_gem_init_phys_object(dev, id, 4657 obj->base.size, align); 4658 if (ret) { 4659 DRM_ERROR("failed to init phys object %d size: %zu\n", 4660 id, obj->base.size); 4661 return ret; 4662 } 4663 } 4664 4665 /* bind to the object */ 4666 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4667 obj->phys_obj->cur_obj = obj; 4668 4669 page_count = obj->base.size / PAGE_SIZE; 4670 4671 for (i = 0; i < page_count; i++) { 4672 struct page *page; 4673 char *dst, *src; 4674 4675 page = shmem_read_mapping_page(mapping, i); 4676 if (IS_ERR(page)) 4677 return PTR_ERR(page); 4678 4679 src = kmap_atomic(page); 4680 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4681 memcpy(dst, src, PAGE_SIZE); 4682 kunmap_atomic(src); 4683 4684 mark_page_accessed(page); 4685 page_cache_release(page); 4686 } 4687 4688 return 0; 4689 } 4690 4691 static int 4692 i915_gem_phys_pwrite(struct drm_device *dev, 4693 struct drm_i915_gem_object *obj, 4694 struct drm_i915_gem_pwrite *args, 4695 struct drm_file *file_priv) 4696 { 4697 void *vaddr = obj->phys_obj->handle->vaddr + args->offset; 4698 char __user *user_data = to_user_ptr(args->data_ptr); 4699 4700 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 4701 unsigned long unwritten; 4702 4703 /* The physical object once assigned is fixed for the lifetime 4704 * of the obj, so we can safely drop the lock and continue 4705 * to access vaddr. 4706 */ 4707 mutex_unlock(&dev->struct_mutex); 4708 unwritten = copy_from_user(vaddr, user_data, args->size); 4709 mutex_lock(&dev->struct_mutex); 4710 if (unwritten) 4711 return -EFAULT; 4712 } 4713 4714 i915_gem_chipset_flush(dev); 4715 return 0; 4716 } 4717 4718 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4719 { 4720 struct drm_i915_file_private *file_priv = file->driver_priv; 4721 4722 /* Clean up our request list when the client is going away, so that 4723 * later retire_requests won't dereference our soon-to-be-gone 4724 * file_priv. 4725 */ 4726 spin_lock(&file_priv->mm.lock); 4727 while (!list_empty(&file_priv->mm.request_list)) { 4728 struct drm_i915_gem_request *request; 4729 4730 request = list_first_entry(&file_priv->mm.request_list, 4731 struct drm_i915_gem_request, 4732 client_list); 4733 list_del(&request->client_list); 4734 request->file_priv = NULL; 4735 } 4736 spin_unlock(&file_priv->mm.lock); 4737 } 4738 4739 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 4740 { 4741 if (!mutex_is_locked(mutex)) 4742 return false; 4743 4744 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 4745 return mutex->owner == task; 4746 #else 4747 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 4748 return false; 4749 #endif 4750 } 4751 4752 static int 4753 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4754 { 4755 struct drm_i915_private *dev_priv = 4756 container_of(shrinker, 4757 struct drm_i915_private, 4758 mm.inactive_shrinker); 4759 struct drm_device *dev = dev_priv->dev; 4760 struct drm_i915_gem_object *obj; 4761 int nr_to_scan = sc->nr_to_scan; 4762 bool unlock = true; 4763 int cnt; 4764 4765 if (!mutex_trylock(&dev->struct_mutex)) { 4766 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 4767 return 0; 4768 4769 if (dev_priv->mm.shrinker_no_lock_stealing) 4770 return 0; 4771 4772 unlock = false; 4773 } 4774 4775 if (nr_to_scan) { 4776 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); 4777 if (nr_to_scan > 0) 4778 nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, 4779 false); 4780 if (nr_to_scan > 0) 4781 i915_gem_shrink_all(dev_priv); 4782 } 4783 4784 cnt = 0; 4785 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) 4786 if (obj->pages_pin_count == 0) 4787 cnt += obj->base.size >> PAGE_SHIFT; 4788 4789 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 4790 if (obj->active) 4791 continue; 4792 4793 if (obj->pin_count == 0 && obj->pages_pin_count == 0) 4794 cnt += obj->base.size >> PAGE_SHIFT; 4795 } 4796 4797 if (unlock) 4798 mutex_unlock(&dev->struct_mutex); 4799 return cnt; 4800 } 4801 4802 /* All the new VM stuff */ 4803 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, 4804 struct i915_address_space *vm) 4805 { 4806 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 4807 struct i915_vma *vma; 4808 4809 if (vm == &dev_priv->mm.aliasing_ppgtt->base) 4810 vm = &dev_priv->gtt.base; 4811 4812 BUG_ON(list_empty(&o->vma_list)); 4813 list_for_each_entry(vma, &o->vma_list, vma_link) { 4814 if (vma->vm == vm) 4815 return vma->node.start; 4816 4817 } 4818 return -1; 4819 } 4820 4821 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 4822 struct i915_address_space *vm) 4823 { 4824 struct i915_vma *vma; 4825 4826 list_for_each_entry(vma, &o->vma_list, vma_link) 4827 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 4828 return true; 4829 4830 return false; 4831 } 4832 4833 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 4834 { 4835 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 4836 struct i915_address_space *vm; 4837 4838 list_for_each_entry(vm, &dev_priv->vm_list, global_link) 4839 if (i915_gem_obj_bound(o, vm)) 4840 return true; 4841 4842 return false; 4843 } 4844 4845 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 4846 struct i915_address_space *vm) 4847 { 4848 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 4849 struct i915_vma *vma; 4850 4851 if (vm == &dev_priv->mm.aliasing_ppgtt->base) 4852 vm = &dev_priv->gtt.base; 4853 4854 BUG_ON(list_empty(&o->vma_list)); 4855 4856 list_for_each_entry(vma, &o->vma_list, vma_link) 4857 if (vma->vm == vm) 4858 return vma->node.size; 4859 4860 return 0; 4861 } 4862 4863 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4864 struct i915_address_space *vm) 4865 { 4866 struct i915_vma *vma; 4867 list_for_each_entry(vma, &obj->vma_list, vma_link) 4868 if (vma->vm == vm) 4869 return vma; 4870 4871 return NULL; 4872 } 4873 4874 struct i915_vma * 4875 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 4876 struct i915_address_space *vm) 4877 { 4878 struct i915_vma *vma; 4879 4880 vma = i915_gem_obj_to_vma(obj, vm); 4881 if (!vma) 4882 vma = i915_gem_vma_create(obj, vm); 4883 4884 return vma; 4885 } 4886