1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/shmem_fs.h> 35 #include <linux/slab.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 39 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); 40 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 41 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 42 static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, 43 bool write); 44 static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 45 uint64_t offset, 46 uint64_t size); 47 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj); 48 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 49 unsigned alignment, 50 bool map_and_fenceable); 51 static void i915_gem_clear_fence_reg(struct drm_device *dev, 52 struct drm_i915_fence_reg *reg); 53 static int i915_gem_phys_pwrite(struct drm_device *dev, 54 struct drm_i915_gem_object *obj, 55 struct drm_i915_gem_pwrite *args, 56 struct drm_file *file); 57 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); 58 59 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 60 struct shrink_control *sc); 61 62 /* some bookkeeping */ 63 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 64 size_t size) 65 { 66 dev_priv->mm.object_count++; 67 dev_priv->mm.object_memory += size; 68 } 69 70 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 71 size_t size) 72 { 73 dev_priv->mm.object_count--; 74 dev_priv->mm.object_memory -= size; 75 } 76 77 static int 78 i915_gem_wait_for_error(struct drm_device *dev) 79 { 80 struct drm_i915_private *dev_priv = dev->dev_private; 81 struct completion *x = &dev_priv->error_completion; 82 unsigned long flags; 83 int ret; 84 85 if (!atomic_read(&dev_priv->mm.wedged)) 86 return 0; 87 88 ret = wait_for_completion_interruptible(x); 89 if (ret) 90 return ret; 91 92 if (atomic_read(&dev_priv->mm.wedged)) { 93 /* GPU is hung, bump the completion count to account for 94 * the token we just consumed so that we never hit zero and 95 * end up waiting upon a subsequent completion event that 96 * will never happen. 97 */ 98 spin_lock_irqsave(&x->wait.lock, flags); 99 x->done++; 100 spin_unlock_irqrestore(&x->wait.lock, flags); 101 } 102 return 0; 103 } 104 105 int i915_mutex_lock_interruptible(struct drm_device *dev) 106 { 107 int ret; 108 109 ret = i915_gem_wait_for_error(dev); 110 if (ret) 111 return ret; 112 113 ret = mutex_lock_interruptible(&dev->struct_mutex); 114 if (ret) 115 return ret; 116 117 WARN_ON(i915_verify_lists(dev)); 118 return 0; 119 } 120 121 static inline bool 122 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 123 { 124 return obj->gtt_space && !obj->active && obj->pin_count == 0; 125 } 126 127 void i915_gem_do_init(struct drm_device *dev, 128 unsigned long start, 129 unsigned long mappable_end, 130 unsigned long end) 131 { 132 drm_i915_private_t *dev_priv = dev->dev_private; 133 134 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start); 135 136 dev_priv->mm.gtt_start = start; 137 dev_priv->mm.gtt_mappable_end = mappable_end; 138 dev_priv->mm.gtt_end = end; 139 dev_priv->mm.gtt_total = end - start; 140 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start; 141 142 /* Take over this portion of the GTT */ 143 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); 144 } 145 146 int 147 i915_gem_init_ioctl(struct drm_device *dev, void *data, 148 struct drm_file *file) 149 { 150 struct drm_i915_gem_init *args = data; 151 152 if (args->gtt_start >= args->gtt_end || 153 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 154 return -EINVAL; 155 156 mutex_lock(&dev->struct_mutex); 157 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end); 158 mutex_unlock(&dev->struct_mutex); 159 160 return 0; 161 } 162 163 int 164 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 165 struct drm_file *file) 166 { 167 struct drm_i915_private *dev_priv = dev->dev_private; 168 struct drm_i915_gem_get_aperture *args = data; 169 struct drm_i915_gem_object *obj; 170 size_t pinned; 171 172 if (!(dev->driver->driver_features & DRIVER_GEM)) 173 return -ENODEV; 174 175 pinned = 0; 176 mutex_lock(&dev->struct_mutex); 177 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list) 178 pinned += obj->gtt_space->size; 179 mutex_unlock(&dev->struct_mutex); 180 181 args->aper_size = dev_priv->mm.gtt_total; 182 args->aper_available_size = args->aper_size - pinned; 183 184 return 0; 185 } 186 187 static int 188 i915_gem_create(struct drm_file *file, 189 struct drm_device *dev, 190 uint64_t size, 191 uint32_t *handle_p) 192 { 193 struct drm_i915_gem_object *obj; 194 int ret; 195 u32 handle; 196 197 size = roundup(size, PAGE_SIZE); 198 if (size == 0) 199 return -EINVAL; 200 201 /* Allocate the new object */ 202 obj = i915_gem_alloc_object(dev, size); 203 if (obj == NULL) 204 return -ENOMEM; 205 206 ret = drm_gem_handle_create(file, &obj->base, &handle); 207 if (ret) { 208 drm_gem_object_release(&obj->base); 209 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 210 kfree(obj); 211 return ret; 212 } 213 214 /* drop reference from allocate - handle holds it now */ 215 drm_gem_object_unreference(&obj->base); 216 trace_i915_gem_object_create(obj); 217 218 *handle_p = handle; 219 return 0; 220 } 221 222 int 223 i915_gem_dumb_create(struct drm_file *file, 224 struct drm_device *dev, 225 struct drm_mode_create_dumb *args) 226 { 227 /* have to work out size/pitch and return them */ 228 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); 229 args->size = args->pitch * args->height; 230 return i915_gem_create(file, dev, 231 args->size, &args->handle); 232 } 233 234 int i915_gem_dumb_destroy(struct drm_file *file, 235 struct drm_device *dev, 236 uint32_t handle) 237 { 238 return drm_gem_handle_delete(file, handle); 239 } 240 241 /** 242 * Creates a new mm object and returns a handle to it. 243 */ 244 int 245 i915_gem_create_ioctl(struct drm_device *dev, void *data, 246 struct drm_file *file) 247 { 248 struct drm_i915_gem_create *args = data; 249 return i915_gem_create(file, dev, 250 args->size, &args->handle); 251 } 252 253 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 254 { 255 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 256 257 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 258 obj->tiling_mode != I915_TILING_NONE; 259 } 260 261 static inline void 262 slow_shmem_copy(struct page *dst_page, 263 int dst_offset, 264 struct page *src_page, 265 int src_offset, 266 int length) 267 { 268 char *dst_vaddr, *src_vaddr; 269 270 dst_vaddr = kmap(dst_page); 271 src_vaddr = kmap(src_page); 272 273 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 274 275 kunmap(src_page); 276 kunmap(dst_page); 277 } 278 279 static inline void 280 slow_shmem_bit17_copy(struct page *gpu_page, 281 int gpu_offset, 282 struct page *cpu_page, 283 int cpu_offset, 284 int length, 285 int is_read) 286 { 287 char *gpu_vaddr, *cpu_vaddr; 288 289 /* Use the unswizzled path if this page isn't affected. */ 290 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 291 if (is_read) 292 return slow_shmem_copy(cpu_page, cpu_offset, 293 gpu_page, gpu_offset, length); 294 else 295 return slow_shmem_copy(gpu_page, gpu_offset, 296 cpu_page, cpu_offset, length); 297 } 298 299 gpu_vaddr = kmap(gpu_page); 300 cpu_vaddr = kmap(cpu_page); 301 302 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 303 * XORing with the other bits (A9 for Y, A9 and A10 for X) 304 */ 305 while (length > 0) { 306 int cacheline_end = ALIGN(gpu_offset + 1, 64); 307 int this_length = min(cacheline_end - gpu_offset, length); 308 int swizzled_gpu_offset = gpu_offset ^ 64; 309 310 if (is_read) { 311 memcpy(cpu_vaddr + cpu_offset, 312 gpu_vaddr + swizzled_gpu_offset, 313 this_length); 314 } else { 315 memcpy(gpu_vaddr + swizzled_gpu_offset, 316 cpu_vaddr + cpu_offset, 317 this_length); 318 } 319 cpu_offset += this_length; 320 gpu_offset += this_length; 321 length -= this_length; 322 } 323 324 kunmap(cpu_page); 325 kunmap(gpu_page); 326 } 327 328 /** 329 * This is the fast shmem pread path, which attempts to copy_from_user directly 330 * from the backing pages of the object to the user's address space. On a 331 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 332 */ 333 static int 334 i915_gem_shmem_pread_fast(struct drm_device *dev, 335 struct drm_i915_gem_object *obj, 336 struct drm_i915_gem_pread *args, 337 struct drm_file *file) 338 { 339 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 340 ssize_t remain; 341 loff_t offset; 342 char __user *user_data; 343 int page_offset, page_length; 344 345 user_data = (char __user *) (uintptr_t) args->data_ptr; 346 remain = args->size; 347 348 offset = args->offset; 349 350 while (remain > 0) { 351 struct page *page; 352 char *vaddr; 353 int ret; 354 355 /* Operation in this page 356 * 357 * page_offset = offset within page 358 * page_length = bytes to copy for this page 359 */ 360 page_offset = offset_in_page(offset); 361 page_length = remain; 362 if ((page_offset + remain) > PAGE_SIZE) 363 page_length = PAGE_SIZE - page_offset; 364 365 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 366 if (IS_ERR(page)) 367 return PTR_ERR(page); 368 369 vaddr = kmap_atomic(page); 370 ret = __copy_to_user_inatomic(user_data, 371 vaddr + page_offset, 372 page_length); 373 kunmap_atomic(vaddr); 374 375 mark_page_accessed(page); 376 page_cache_release(page); 377 if (ret) 378 return -EFAULT; 379 380 remain -= page_length; 381 user_data += page_length; 382 offset += page_length; 383 } 384 385 return 0; 386 } 387 388 /** 389 * This is the fallback shmem pread path, which allocates temporary storage 390 * in kernel space to copy_to_user into outside of the struct_mutex, so we 391 * can copy out of the object's backing pages while holding the struct mutex 392 * and not take page faults. 393 */ 394 static int 395 i915_gem_shmem_pread_slow(struct drm_device *dev, 396 struct drm_i915_gem_object *obj, 397 struct drm_i915_gem_pread *args, 398 struct drm_file *file) 399 { 400 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 401 struct mm_struct *mm = current->mm; 402 struct page **user_pages; 403 ssize_t remain; 404 loff_t offset, pinned_pages, i; 405 loff_t first_data_page, last_data_page, num_pages; 406 int shmem_page_offset; 407 int data_page_index, data_page_offset; 408 int page_length; 409 int ret; 410 uint64_t data_ptr = args->data_ptr; 411 int do_bit17_swizzling; 412 413 remain = args->size; 414 415 /* Pin the user pages containing the data. We can't fault while 416 * holding the struct mutex, yet we want to hold it while 417 * dereferencing the user data. 418 */ 419 first_data_page = data_ptr / PAGE_SIZE; 420 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 421 num_pages = last_data_page - first_data_page + 1; 422 423 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 424 if (user_pages == NULL) 425 return -ENOMEM; 426 427 mutex_unlock(&dev->struct_mutex); 428 down_read(&mm->mmap_sem); 429 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 430 num_pages, 1, 0, user_pages, NULL); 431 up_read(&mm->mmap_sem); 432 mutex_lock(&dev->struct_mutex); 433 if (pinned_pages < num_pages) { 434 ret = -EFAULT; 435 goto out; 436 } 437 438 ret = i915_gem_object_set_cpu_read_domain_range(obj, 439 args->offset, 440 args->size); 441 if (ret) 442 goto out; 443 444 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 445 446 offset = args->offset; 447 448 while (remain > 0) { 449 struct page *page; 450 451 /* Operation in this page 452 * 453 * shmem_page_offset = offset within page in shmem file 454 * data_page_index = page number in get_user_pages return 455 * data_page_offset = offset with data_page_index page. 456 * page_length = bytes to copy for this page 457 */ 458 shmem_page_offset = offset_in_page(offset); 459 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 460 data_page_offset = offset_in_page(data_ptr); 461 462 page_length = remain; 463 if ((shmem_page_offset + page_length) > PAGE_SIZE) 464 page_length = PAGE_SIZE - shmem_page_offset; 465 if ((data_page_offset + page_length) > PAGE_SIZE) 466 page_length = PAGE_SIZE - data_page_offset; 467 468 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 469 if (IS_ERR(page)) { 470 ret = PTR_ERR(page); 471 goto out; 472 } 473 474 if (do_bit17_swizzling) { 475 slow_shmem_bit17_copy(page, 476 shmem_page_offset, 477 user_pages[data_page_index], 478 data_page_offset, 479 page_length, 480 1); 481 } else { 482 slow_shmem_copy(user_pages[data_page_index], 483 data_page_offset, 484 page, 485 shmem_page_offset, 486 page_length); 487 } 488 489 mark_page_accessed(page); 490 page_cache_release(page); 491 492 remain -= page_length; 493 data_ptr += page_length; 494 offset += page_length; 495 } 496 497 out: 498 for (i = 0; i < pinned_pages; i++) { 499 SetPageDirty(user_pages[i]); 500 mark_page_accessed(user_pages[i]); 501 page_cache_release(user_pages[i]); 502 } 503 drm_free_large(user_pages); 504 505 return ret; 506 } 507 508 /** 509 * Reads data from the object referenced by handle. 510 * 511 * On error, the contents of *data are undefined. 512 */ 513 int 514 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 515 struct drm_file *file) 516 { 517 struct drm_i915_gem_pread *args = data; 518 struct drm_i915_gem_object *obj; 519 int ret = 0; 520 521 if (args->size == 0) 522 return 0; 523 524 if (!access_ok(VERIFY_WRITE, 525 (char __user *)(uintptr_t)args->data_ptr, 526 args->size)) 527 return -EFAULT; 528 529 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr, 530 args->size); 531 if (ret) 532 return -EFAULT; 533 534 ret = i915_mutex_lock_interruptible(dev); 535 if (ret) 536 return ret; 537 538 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 539 if (&obj->base == NULL) { 540 ret = -ENOENT; 541 goto unlock; 542 } 543 544 /* Bounds check source. */ 545 if (args->offset > obj->base.size || 546 args->size > obj->base.size - args->offset) { 547 ret = -EINVAL; 548 goto out; 549 } 550 551 trace_i915_gem_object_pread(obj, args->offset, args->size); 552 553 ret = i915_gem_object_set_cpu_read_domain_range(obj, 554 args->offset, 555 args->size); 556 if (ret) 557 goto out; 558 559 ret = -EFAULT; 560 if (!i915_gem_object_needs_bit17_swizzle(obj)) 561 ret = i915_gem_shmem_pread_fast(dev, obj, args, file); 562 if (ret == -EFAULT) 563 ret = i915_gem_shmem_pread_slow(dev, obj, args, file); 564 565 out: 566 drm_gem_object_unreference(&obj->base); 567 unlock: 568 mutex_unlock(&dev->struct_mutex); 569 return ret; 570 } 571 572 /* This is the fast write path which cannot handle 573 * page faults in the source data 574 */ 575 576 static inline int 577 fast_user_write(struct io_mapping *mapping, 578 loff_t page_base, int page_offset, 579 char __user *user_data, 580 int length) 581 { 582 char *vaddr_atomic; 583 unsigned long unwritten; 584 585 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 586 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 587 user_data, length); 588 io_mapping_unmap_atomic(vaddr_atomic); 589 return unwritten; 590 } 591 592 /* Here's the write path which can sleep for 593 * page faults 594 */ 595 596 static inline void 597 slow_kernel_write(struct io_mapping *mapping, 598 loff_t gtt_base, int gtt_offset, 599 struct page *user_page, int user_offset, 600 int length) 601 { 602 char __iomem *dst_vaddr; 603 char *src_vaddr; 604 605 dst_vaddr = io_mapping_map_wc(mapping, gtt_base); 606 src_vaddr = kmap(user_page); 607 608 memcpy_toio(dst_vaddr + gtt_offset, 609 src_vaddr + user_offset, 610 length); 611 612 kunmap(user_page); 613 io_mapping_unmap(dst_vaddr); 614 } 615 616 /** 617 * This is the fast pwrite path, where we copy the data directly from the 618 * user into the GTT, uncached. 619 */ 620 static int 621 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 622 struct drm_i915_gem_object *obj, 623 struct drm_i915_gem_pwrite *args, 624 struct drm_file *file) 625 { 626 drm_i915_private_t *dev_priv = dev->dev_private; 627 ssize_t remain; 628 loff_t offset, page_base; 629 char __user *user_data; 630 int page_offset, page_length; 631 632 user_data = (char __user *) (uintptr_t) args->data_ptr; 633 remain = args->size; 634 635 offset = obj->gtt_offset + args->offset; 636 637 while (remain > 0) { 638 /* Operation in this page 639 * 640 * page_base = page offset within aperture 641 * page_offset = offset within page 642 * page_length = bytes to copy for this page 643 */ 644 page_base = offset & PAGE_MASK; 645 page_offset = offset_in_page(offset); 646 page_length = remain; 647 if ((page_offset + remain) > PAGE_SIZE) 648 page_length = PAGE_SIZE - page_offset; 649 650 /* If we get a fault while copying data, then (presumably) our 651 * source page isn't available. Return the error and we'll 652 * retry in the slow path. 653 */ 654 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 655 page_offset, user_data, page_length)) 656 return -EFAULT; 657 658 remain -= page_length; 659 user_data += page_length; 660 offset += page_length; 661 } 662 663 return 0; 664 } 665 666 /** 667 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 668 * the memory and maps it using kmap_atomic for copying. 669 * 670 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 671 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 672 */ 673 static int 674 i915_gem_gtt_pwrite_slow(struct drm_device *dev, 675 struct drm_i915_gem_object *obj, 676 struct drm_i915_gem_pwrite *args, 677 struct drm_file *file) 678 { 679 drm_i915_private_t *dev_priv = dev->dev_private; 680 ssize_t remain; 681 loff_t gtt_page_base, offset; 682 loff_t first_data_page, last_data_page, num_pages; 683 loff_t pinned_pages, i; 684 struct page **user_pages; 685 struct mm_struct *mm = current->mm; 686 int gtt_page_offset, data_page_offset, data_page_index, page_length; 687 int ret; 688 uint64_t data_ptr = args->data_ptr; 689 690 remain = args->size; 691 692 /* Pin the user pages containing the data. We can't fault while 693 * holding the struct mutex, and all of the pwrite implementations 694 * want to hold it while dereferencing the user data. 695 */ 696 first_data_page = data_ptr / PAGE_SIZE; 697 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 698 num_pages = last_data_page - first_data_page + 1; 699 700 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 701 if (user_pages == NULL) 702 return -ENOMEM; 703 704 mutex_unlock(&dev->struct_mutex); 705 down_read(&mm->mmap_sem); 706 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 707 num_pages, 0, 0, user_pages, NULL); 708 up_read(&mm->mmap_sem); 709 mutex_lock(&dev->struct_mutex); 710 if (pinned_pages < num_pages) { 711 ret = -EFAULT; 712 goto out_unpin_pages; 713 } 714 715 ret = i915_gem_object_set_to_gtt_domain(obj, true); 716 if (ret) 717 goto out_unpin_pages; 718 719 ret = i915_gem_object_put_fence(obj); 720 if (ret) 721 goto out_unpin_pages; 722 723 offset = obj->gtt_offset + args->offset; 724 725 while (remain > 0) { 726 /* Operation in this page 727 * 728 * gtt_page_base = page offset within aperture 729 * gtt_page_offset = offset within page in aperture 730 * data_page_index = page number in get_user_pages return 731 * data_page_offset = offset with data_page_index page. 732 * page_length = bytes to copy for this page 733 */ 734 gtt_page_base = offset & PAGE_MASK; 735 gtt_page_offset = offset_in_page(offset); 736 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 737 data_page_offset = offset_in_page(data_ptr); 738 739 page_length = remain; 740 if ((gtt_page_offset + page_length) > PAGE_SIZE) 741 page_length = PAGE_SIZE - gtt_page_offset; 742 if ((data_page_offset + page_length) > PAGE_SIZE) 743 page_length = PAGE_SIZE - data_page_offset; 744 745 slow_kernel_write(dev_priv->mm.gtt_mapping, 746 gtt_page_base, gtt_page_offset, 747 user_pages[data_page_index], 748 data_page_offset, 749 page_length); 750 751 remain -= page_length; 752 offset += page_length; 753 data_ptr += page_length; 754 } 755 756 out_unpin_pages: 757 for (i = 0; i < pinned_pages; i++) 758 page_cache_release(user_pages[i]); 759 drm_free_large(user_pages); 760 761 return ret; 762 } 763 764 /** 765 * This is the fast shmem pwrite path, which attempts to directly 766 * copy_from_user into the kmapped pages backing the object. 767 */ 768 static int 769 i915_gem_shmem_pwrite_fast(struct drm_device *dev, 770 struct drm_i915_gem_object *obj, 771 struct drm_i915_gem_pwrite *args, 772 struct drm_file *file) 773 { 774 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 775 ssize_t remain; 776 loff_t offset; 777 char __user *user_data; 778 int page_offset, page_length; 779 780 user_data = (char __user *) (uintptr_t) args->data_ptr; 781 remain = args->size; 782 783 offset = args->offset; 784 obj->dirty = 1; 785 786 while (remain > 0) { 787 struct page *page; 788 char *vaddr; 789 int ret; 790 791 /* Operation in this page 792 * 793 * page_offset = offset within page 794 * page_length = bytes to copy for this page 795 */ 796 page_offset = offset_in_page(offset); 797 page_length = remain; 798 if ((page_offset + remain) > PAGE_SIZE) 799 page_length = PAGE_SIZE - page_offset; 800 801 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 802 if (IS_ERR(page)) 803 return PTR_ERR(page); 804 805 vaddr = kmap_atomic(page); 806 ret = __copy_from_user_inatomic(vaddr + page_offset, 807 user_data, 808 page_length); 809 kunmap_atomic(vaddr); 810 811 set_page_dirty(page); 812 mark_page_accessed(page); 813 page_cache_release(page); 814 815 /* If we get a fault while copying data, then (presumably) our 816 * source page isn't available. Return the error and we'll 817 * retry in the slow path. 818 */ 819 if (ret) 820 return -EFAULT; 821 822 remain -= page_length; 823 user_data += page_length; 824 offset += page_length; 825 } 826 827 return 0; 828 } 829 830 /** 831 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 832 * the memory and maps it using kmap_atomic for copying. 833 * 834 * This avoids taking mmap_sem for faulting on the user's address while the 835 * struct_mutex is held. 836 */ 837 static int 838 i915_gem_shmem_pwrite_slow(struct drm_device *dev, 839 struct drm_i915_gem_object *obj, 840 struct drm_i915_gem_pwrite *args, 841 struct drm_file *file) 842 { 843 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 844 struct mm_struct *mm = current->mm; 845 struct page **user_pages; 846 ssize_t remain; 847 loff_t offset, pinned_pages, i; 848 loff_t first_data_page, last_data_page, num_pages; 849 int shmem_page_offset; 850 int data_page_index, data_page_offset; 851 int page_length; 852 int ret; 853 uint64_t data_ptr = args->data_ptr; 854 int do_bit17_swizzling; 855 856 remain = args->size; 857 858 /* Pin the user pages containing the data. We can't fault while 859 * holding the struct mutex, and all of the pwrite implementations 860 * want to hold it while dereferencing the user data. 861 */ 862 first_data_page = data_ptr / PAGE_SIZE; 863 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 864 num_pages = last_data_page - first_data_page + 1; 865 866 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); 867 if (user_pages == NULL) 868 return -ENOMEM; 869 870 mutex_unlock(&dev->struct_mutex); 871 down_read(&mm->mmap_sem); 872 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 873 num_pages, 0, 0, user_pages, NULL); 874 up_read(&mm->mmap_sem); 875 mutex_lock(&dev->struct_mutex); 876 if (pinned_pages < num_pages) { 877 ret = -EFAULT; 878 goto out; 879 } 880 881 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 882 if (ret) 883 goto out; 884 885 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 886 887 offset = args->offset; 888 obj->dirty = 1; 889 890 while (remain > 0) { 891 struct page *page; 892 893 /* Operation in this page 894 * 895 * shmem_page_offset = offset within page in shmem file 896 * data_page_index = page number in get_user_pages return 897 * data_page_offset = offset with data_page_index page. 898 * page_length = bytes to copy for this page 899 */ 900 shmem_page_offset = offset_in_page(offset); 901 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 902 data_page_offset = offset_in_page(data_ptr); 903 904 page_length = remain; 905 if ((shmem_page_offset + page_length) > PAGE_SIZE) 906 page_length = PAGE_SIZE - shmem_page_offset; 907 if ((data_page_offset + page_length) > PAGE_SIZE) 908 page_length = PAGE_SIZE - data_page_offset; 909 910 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 911 if (IS_ERR(page)) { 912 ret = PTR_ERR(page); 913 goto out; 914 } 915 916 if (do_bit17_swizzling) { 917 slow_shmem_bit17_copy(page, 918 shmem_page_offset, 919 user_pages[data_page_index], 920 data_page_offset, 921 page_length, 922 0); 923 } else { 924 slow_shmem_copy(page, 925 shmem_page_offset, 926 user_pages[data_page_index], 927 data_page_offset, 928 page_length); 929 } 930 931 set_page_dirty(page); 932 mark_page_accessed(page); 933 page_cache_release(page); 934 935 remain -= page_length; 936 data_ptr += page_length; 937 offset += page_length; 938 } 939 940 out: 941 for (i = 0; i < pinned_pages; i++) 942 page_cache_release(user_pages[i]); 943 drm_free_large(user_pages); 944 945 return ret; 946 } 947 948 /** 949 * Writes data to the object referenced by handle. 950 * 951 * On error, the contents of the buffer that were to be modified are undefined. 952 */ 953 int 954 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 955 struct drm_file *file) 956 { 957 struct drm_i915_gem_pwrite *args = data; 958 struct drm_i915_gem_object *obj; 959 int ret; 960 961 if (args->size == 0) 962 return 0; 963 964 if (!access_ok(VERIFY_READ, 965 (char __user *)(uintptr_t)args->data_ptr, 966 args->size)) 967 return -EFAULT; 968 969 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr, 970 args->size); 971 if (ret) 972 return -EFAULT; 973 974 ret = i915_mutex_lock_interruptible(dev); 975 if (ret) 976 return ret; 977 978 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 979 if (&obj->base == NULL) { 980 ret = -ENOENT; 981 goto unlock; 982 } 983 984 /* Bounds check destination. */ 985 if (args->offset > obj->base.size || 986 args->size > obj->base.size - args->offset) { 987 ret = -EINVAL; 988 goto out; 989 } 990 991 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 992 993 /* We can only do the GTT pwrite on untiled buffers, as otherwise 994 * it would end up going through the fenced access, and we'll get 995 * different detiling behavior between reading and writing. 996 * pread/pwrite currently are reading and writing from the CPU 997 * perspective, requiring manual detiling by the client. 998 */ 999 if (obj->phys_obj) 1000 ret = i915_gem_phys_pwrite(dev, obj, args, file); 1001 else if (obj->gtt_space && 1002 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1003 ret = i915_gem_object_pin(obj, 0, true); 1004 if (ret) 1005 goto out; 1006 1007 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1008 if (ret) 1009 goto out_unpin; 1010 1011 ret = i915_gem_object_put_fence(obj); 1012 if (ret) 1013 goto out_unpin; 1014 1015 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1016 if (ret == -EFAULT) 1017 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file); 1018 1019 out_unpin: 1020 i915_gem_object_unpin(obj); 1021 } else { 1022 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1023 if (ret) 1024 goto out; 1025 1026 ret = -EFAULT; 1027 if (!i915_gem_object_needs_bit17_swizzle(obj)) 1028 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); 1029 if (ret == -EFAULT) 1030 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); 1031 } 1032 1033 out: 1034 drm_gem_object_unreference(&obj->base); 1035 unlock: 1036 mutex_unlock(&dev->struct_mutex); 1037 return ret; 1038 } 1039 1040 /** 1041 * Called when user space prepares to use an object with the CPU, either 1042 * through the mmap ioctl's mapping or a GTT mapping. 1043 */ 1044 int 1045 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1046 struct drm_file *file) 1047 { 1048 struct drm_i915_gem_set_domain *args = data; 1049 struct drm_i915_gem_object *obj; 1050 uint32_t read_domains = args->read_domains; 1051 uint32_t write_domain = args->write_domain; 1052 int ret; 1053 1054 if (!(dev->driver->driver_features & DRIVER_GEM)) 1055 return -ENODEV; 1056 1057 /* Only handle setting domains to types used by the CPU. */ 1058 if (write_domain & I915_GEM_GPU_DOMAINS) 1059 return -EINVAL; 1060 1061 if (read_domains & I915_GEM_GPU_DOMAINS) 1062 return -EINVAL; 1063 1064 /* Having something in the write domain implies it's in the read 1065 * domain, and only that read domain. Enforce that in the request. 1066 */ 1067 if (write_domain != 0 && read_domains != write_domain) 1068 return -EINVAL; 1069 1070 ret = i915_mutex_lock_interruptible(dev); 1071 if (ret) 1072 return ret; 1073 1074 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1075 if (&obj->base == NULL) { 1076 ret = -ENOENT; 1077 goto unlock; 1078 } 1079 1080 if (read_domains & I915_GEM_DOMAIN_GTT) { 1081 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1082 1083 /* Silently promote "you're not bound, there was nothing to do" 1084 * to success, since the client was just asking us to 1085 * make sure everything was done. 1086 */ 1087 if (ret == -EINVAL) 1088 ret = 0; 1089 } else { 1090 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1091 } 1092 1093 drm_gem_object_unreference(&obj->base); 1094 unlock: 1095 mutex_unlock(&dev->struct_mutex); 1096 return ret; 1097 } 1098 1099 /** 1100 * Called when user space has done writes to this buffer 1101 */ 1102 int 1103 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1104 struct drm_file *file) 1105 { 1106 struct drm_i915_gem_sw_finish *args = data; 1107 struct drm_i915_gem_object *obj; 1108 int ret = 0; 1109 1110 if (!(dev->driver->driver_features & DRIVER_GEM)) 1111 return -ENODEV; 1112 1113 ret = i915_mutex_lock_interruptible(dev); 1114 if (ret) 1115 return ret; 1116 1117 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1118 if (&obj->base == NULL) { 1119 ret = -ENOENT; 1120 goto unlock; 1121 } 1122 1123 /* Pinned buffers may be scanout, so flush the cache */ 1124 if (obj->pin_count) 1125 i915_gem_object_flush_cpu_write_domain(obj); 1126 1127 drm_gem_object_unreference(&obj->base); 1128 unlock: 1129 mutex_unlock(&dev->struct_mutex); 1130 return ret; 1131 } 1132 1133 /** 1134 * Maps the contents of an object, returning the address it is mapped 1135 * into. 1136 * 1137 * While the mapping holds a reference on the contents of the object, it doesn't 1138 * imply a ref on the object itself. 1139 */ 1140 int 1141 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1142 struct drm_file *file) 1143 { 1144 struct drm_i915_private *dev_priv = dev->dev_private; 1145 struct drm_i915_gem_mmap *args = data; 1146 struct drm_gem_object *obj; 1147 unsigned long addr; 1148 1149 if (!(dev->driver->driver_features & DRIVER_GEM)) 1150 return -ENODEV; 1151 1152 obj = drm_gem_object_lookup(dev, file, args->handle); 1153 if (obj == NULL) 1154 return -ENOENT; 1155 1156 if (obj->size > dev_priv->mm.gtt_mappable_end) { 1157 drm_gem_object_unreference_unlocked(obj); 1158 return -E2BIG; 1159 } 1160 1161 down_write(¤t->mm->mmap_sem); 1162 addr = do_mmap(obj->filp, 0, args->size, 1163 PROT_READ | PROT_WRITE, MAP_SHARED, 1164 args->offset); 1165 up_write(¤t->mm->mmap_sem); 1166 drm_gem_object_unreference_unlocked(obj); 1167 if (IS_ERR((void *)addr)) 1168 return addr; 1169 1170 args->addr_ptr = (uint64_t) addr; 1171 1172 return 0; 1173 } 1174 1175 /** 1176 * i915_gem_fault - fault a page into the GTT 1177 * vma: VMA in question 1178 * vmf: fault info 1179 * 1180 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1181 * from userspace. The fault handler takes care of binding the object to 1182 * the GTT (if needed), allocating and programming a fence register (again, 1183 * only if needed based on whether the old reg is still valid or the object 1184 * is tiled) and inserting a new PTE into the faulting process. 1185 * 1186 * Note that the faulting process may involve evicting existing objects 1187 * from the GTT and/or fence registers to make room. So performance may 1188 * suffer if the GTT working set is large or there are few fence registers 1189 * left. 1190 */ 1191 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1192 { 1193 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1194 struct drm_device *dev = obj->base.dev; 1195 drm_i915_private_t *dev_priv = dev->dev_private; 1196 pgoff_t page_offset; 1197 unsigned long pfn; 1198 int ret = 0; 1199 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1200 1201 /* We don't use vmf->pgoff since that has the fake offset */ 1202 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1203 PAGE_SHIFT; 1204 1205 ret = i915_mutex_lock_interruptible(dev); 1206 if (ret) 1207 goto out; 1208 1209 trace_i915_gem_object_fault(obj, page_offset, true, write); 1210 1211 /* Now bind it into the GTT if needed */ 1212 if (!obj->map_and_fenceable) { 1213 ret = i915_gem_object_unbind(obj); 1214 if (ret) 1215 goto unlock; 1216 } 1217 if (!obj->gtt_space) { 1218 ret = i915_gem_object_bind_to_gtt(obj, 0, true); 1219 if (ret) 1220 goto unlock; 1221 1222 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1223 if (ret) 1224 goto unlock; 1225 } 1226 1227 if (obj->tiling_mode == I915_TILING_NONE) 1228 ret = i915_gem_object_put_fence(obj); 1229 else 1230 ret = i915_gem_object_get_fence(obj, NULL); 1231 if (ret) 1232 goto unlock; 1233 1234 if (i915_gem_object_is_inactive(obj)) 1235 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1236 1237 obj->fault_mappable = true; 1238 1239 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) + 1240 page_offset; 1241 1242 /* Finally, remap it using the new GTT offset */ 1243 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1244 unlock: 1245 mutex_unlock(&dev->struct_mutex); 1246 out: 1247 switch (ret) { 1248 case -EIO: 1249 case -EAGAIN: 1250 /* Give the error handler a chance to run and move the 1251 * objects off the GPU active list. Next time we service the 1252 * fault, we should be able to transition the page into the 1253 * GTT without touching the GPU (and so avoid further 1254 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1255 * with coherency, just lost writes. 1256 */ 1257 set_need_resched(); 1258 case 0: 1259 case -ERESTARTSYS: 1260 case -EINTR: 1261 return VM_FAULT_NOPAGE; 1262 case -ENOMEM: 1263 return VM_FAULT_OOM; 1264 default: 1265 return VM_FAULT_SIGBUS; 1266 } 1267 } 1268 1269 /** 1270 * i915_gem_release_mmap - remove physical page mappings 1271 * @obj: obj in question 1272 * 1273 * Preserve the reservation of the mmapping with the DRM core code, but 1274 * relinquish ownership of the pages back to the system. 1275 * 1276 * It is vital that we remove the page mapping if we have mapped a tiled 1277 * object through the GTT and then lose the fence register due to 1278 * resource pressure. Similarly if the object has been moved out of the 1279 * aperture, than pages mapped into userspace must be revoked. Removing the 1280 * mapping will then trigger a page fault on the next user access, allowing 1281 * fixup by i915_gem_fault(). 1282 */ 1283 void 1284 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1285 { 1286 if (!obj->fault_mappable) 1287 return; 1288 1289 if (obj->base.dev->dev_mapping) 1290 unmap_mapping_range(obj->base.dev->dev_mapping, 1291 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, 1292 obj->base.size, 1); 1293 1294 obj->fault_mappable = false; 1295 } 1296 1297 static uint32_t 1298 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1299 { 1300 uint32_t gtt_size; 1301 1302 if (INTEL_INFO(dev)->gen >= 4 || 1303 tiling_mode == I915_TILING_NONE) 1304 return size; 1305 1306 /* Previous chips need a power-of-two fence region when tiling */ 1307 if (INTEL_INFO(dev)->gen == 3) 1308 gtt_size = 1024*1024; 1309 else 1310 gtt_size = 512*1024; 1311 1312 while (gtt_size < size) 1313 gtt_size <<= 1; 1314 1315 return gtt_size; 1316 } 1317 1318 /** 1319 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1320 * @obj: object to check 1321 * 1322 * Return the required GTT alignment for an object, taking into account 1323 * potential fence register mapping. 1324 */ 1325 static uint32_t 1326 i915_gem_get_gtt_alignment(struct drm_device *dev, 1327 uint32_t size, 1328 int tiling_mode) 1329 { 1330 /* 1331 * Minimum alignment is 4k (GTT page size), but might be greater 1332 * if a fence register is needed for the object. 1333 */ 1334 if (INTEL_INFO(dev)->gen >= 4 || 1335 tiling_mode == I915_TILING_NONE) 1336 return 4096; 1337 1338 /* 1339 * Previous chips need to be aligned to the size of the smallest 1340 * fence register that can contain the object. 1341 */ 1342 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1343 } 1344 1345 /** 1346 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1347 * unfenced object 1348 * @dev: the device 1349 * @size: size of the object 1350 * @tiling_mode: tiling mode of the object 1351 * 1352 * Return the required GTT alignment for an object, only taking into account 1353 * unfenced tiled surface requirements. 1354 */ 1355 uint32_t 1356 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1357 uint32_t size, 1358 int tiling_mode) 1359 { 1360 /* 1361 * Minimum alignment is 4k (GTT page size) for sane hw. 1362 */ 1363 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1364 tiling_mode == I915_TILING_NONE) 1365 return 4096; 1366 1367 /* Previous hardware however needs to be aligned to a power-of-two 1368 * tile height. The simplest method for determining this is to reuse 1369 * the power-of-tile object size. 1370 */ 1371 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1372 } 1373 1374 int 1375 i915_gem_mmap_gtt(struct drm_file *file, 1376 struct drm_device *dev, 1377 uint32_t handle, 1378 uint64_t *offset) 1379 { 1380 struct drm_i915_private *dev_priv = dev->dev_private; 1381 struct drm_i915_gem_object *obj; 1382 int ret; 1383 1384 if (!(dev->driver->driver_features & DRIVER_GEM)) 1385 return -ENODEV; 1386 1387 ret = i915_mutex_lock_interruptible(dev); 1388 if (ret) 1389 return ret; 1390 1391 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1392 if (&obj->base == NULL) { 1393 ret = -ENOENT; 1394 goto unlock; 1395 } 1396 1397 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1398 ret = -E2BIG; 1399 goto out; 1400 } 1401 1402 if (obj->madv != I915_MADV_WILLNEED) { 1403 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1404 ret = -EINVAL; 1405 goto out; 1406 } 1407 1408 if (!obj->base.map_list.map) { 1409 ret = drm_gem_create_mmap_offset(&obj->base); 1410 if (ret) 1411 goto out; 1412 } 1413 1414 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; 1415 1416 out: 1417 drm_gem_object_unreference(&obj->base); 1418 unlock: 1419 mutex_unlock(&dev->struct_mutex); 1420 return ret; 1421 } 1422 1423 /** 1424 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1425 * @dev: DRM device 1426 * @data: GTT mapping ioctl data 1427 * @file: GEM object info 1428 * 1429 * Simply returns the fake offset to userspace so it can mmap it. 1430 * The mmap call will end up in drm_gem_mmap(), which will set things 1431 * up so we can get faults in the handler above. 1432 * 1433 * The fault handler will take care of binding the object into the GTT 1434 * (since it may have been evicted to make room for something), allocating 1435 * a fence register, and mapping the appropriate aperture address into 1436 * userspace. 1437 */ 1438 int 1439 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1440 struct drm_file *file) 1441 { 1442 struct drm_i915_gem_mmap_gtt *args = data; 1443 1444 if (!(dev->driver->driver_features & DRIVER_GEM)) 1445 return -ENODEV; 1446 1447 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1448 } 1449 1450 1451 static int 1452 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 1453 gfp_t gfpmask) 1454 { 1455 int page_count, i; 1456 struct address_space *mapping; 1457 struct inode *inode; 1458 struct page *page; 1459 1460 /* Get the list of pages out of our struct file. They'll be pinned 1461 * at this point until we release them. 1462 */ 1463 page_count = obj->base.size / PAGE_SIZE; 1464 BUG_ON(obj->pages != NULL); 1465 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *)); 1466 if (obj->pages == NULL) 1467 return -ENOMEM; 1468 1469 inode = obj->base.filp->f_path.dentry->d_inode; 1470 mapping = inode->i_mapping; 1471 gfpmask |= mapping_gfp_mask(mapping); 1472 1473 for (i = 0; i < page_count; i++) { 1474 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask); 1475 if (IS_ERR(page)) 1476 goto err_pages; 1477 1478 obj->pages[i] = page; 1479 } 1480 1481 if (i915_gem_object_needs_bit17_swizzle(obj)) 1482 i915_gem_object_do_bit_17_swizzle(obj); 1483 1484 return 0; 1485 1486 err_pages: 1487 while (i--) 1488 page_cache_release(obj->pages[i]); 1489 1490 drm_free_large(obj->pages); 1491 obj->pages = NULL; 1492 return PTR_ERR(page); 1493 } 1494 1495 static void 1496 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1497 { 1498 int page_count = obj->base.size / PAGE_SIZE; 1499 int i; 1500 1501 BUG_ON(obj->madv == __I915_MADV_PURGED); 1502 1503 if (i915_gem_object_needs_bit17_swizzle(obj)) 1504 i915_gem_object_save_bit_17_swizzle(obj); 1505 1506 if (obj->madv == I915_MADV_DONTNEED) 1507 obj->dirty = 0; 1508 1509 for (i = 0; i < page_count; i++) { 1510 if (obj->dirty) 1511 set_page_dirty(obj->pages[i]); 1512 1513 if (obj->madv == I915_MADV_WILLNEED) 1514 mark_page_accessed(obj->pages[i]); 1515 1516 page_cache_release(obj->pages[i]); 1517 } 1518 obj->dirty = 0; 1519 1520 drm_free_large(obj->pages); 1521 obj->pages = NULL; 1522 } 1523 1524 void 1525 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1526 struct intel_ring_buffer *ring, 1527 u32 seqno) 1528 { 1529 struct drm_device *dev = obj->base.dev; 1530 struct drm_i915_private *dev_priv = dev->dev_private; 1531 1532 BUG_ON(ring == NULL); 1533 obj->ring = ring; 1534 1535 /* Add a reference if we're newly entering the active list. */ 1536 if (!obj->active) { 1537 drm_gem_object_reference(&obj->base); 1538 obj->active = 1; 1539 } 1540 1541 /* Move from whatever list we were on to the tail of execution. */ 1542 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1543 list_move_tail(&obj->ring_list, &ring->active_list); 1544 1545 obj->last_rendering_seqno = seqno; 1546 if (obj->fenced_gpu_access) { 1547 struct drm_i915_fence_reg *reg; 1548 1549 BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE); 1550 1551 obj->last_fenced_seqno = seqno; 1552 obj->last_fenced_ring = ring; 1553 1554 reg = &dev_priv->fence_regs[obj->fence_reg]; 1555 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 1556 } 1557 } 1558 1559 static void 1560 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) 1561 { 1562 list_del_init(&obj->ring_list); 1563 obj->last_rendering_seqno = 0; 1564 } 1565 1566 static void 1567 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) 1568 { 1569 struct drm_device *dev = obj->base.dev; 1570 drm_i915_private_t *dev_priv = dev->dev_private; 1571 1572 BUG_ON(!obj->active); 1573 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); 1574 1575 i915_gem_object_move_off_active(obj); 1576 } 1577 1578 static void 1579 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1580 { 1581 struct drm_device *dev = obj->base.dev; 1582 struct drm_i915_private *dev_priv = dev->dev_private; 1583 1584 if (obj->pin_count != 0) 1585 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list); 1586 else 1587 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1588 1589 BUG_ON(!list_empty(&obj->gpu_write_list)); 1590 BUG_ON(!obj->active); 1591 obj->ring = NULL; 1592 1593 i915_gem_object_move_off_active(obj); 1594 obj->fenced_gpu_access = false; 1595 1596 obj->active = 0; 1597 obj->pending_gpu_write = false; 1598 drm_gem_object_unreference(&obj->base); 1599 1600 WARN_ON(i915_verify_lists(dev)); 1601 } 1602 1603 /* Immediately discard the backing storage */ 1604 static void 1605 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1606 { 1607 struct inode *inode; 1608 1609 /* Our goal here is to return as much of the memory as 1610 * is possible back to the system as we are called from OOM. 1611 * To do this we must instruct the shmfs to drop all of its 1612 * backing pages, *now*. 1613 */ 1614 inode = obj->base.filp->f_path.dentry->d_inode; 1615 shmem_truncate_range(inode, 0, (loff_t)-1); 1616 1617 obj->madv = __I915_MADV_PURGED; 1618 } 1619 1620 static inline int 1621 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1622 { 1623 return obj->madv == I915_MADV_DONTNEED; 1624 } 1625 1626 static void 1627 i915_gem_process_flushing_list(struct intel_ring_buffer *ring, 1628 uint32_t flush_domains) 1629 { 1630 struct drm_i915_gem_object *obj, *next; 1631 1632 list_for_each_entry_safe(obj, next, 1633 &ring->gpu_write_list, 1634 gpu_write_list) { 1635 if (obj->base.write_domain & flush_domains) { 1636 uint32_t old_write_domain = obj->base.write_domain; 1637 1638 obj->base.write_domain = 0; 1639 list_del_init(&obj->gpu_write_list); 1640 i915_gem_object_move_to_active(obj, ring, 1641 i915_gem_next_request_seqno(ring)); 1642 1643 trace_i915_gem_object_change_domain(obj, 1644 obj->base.read_domains, 1645 old_write_domain); 1646 } 1647 } 1648 } 1649 1650 int 1651 i915_add_request(struct intel_ring_buffer *ring, 1652 struct drm_file *file, 1653 struct drm_i915_gem_request *request) 1654 { 1655 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1656 uint32_t seqno; 1657 int was_empty; 1658 int ret; 1659 1660 BUG_ON(request == NULL); 1661 1662 ret = ring->add_request(ring, &seqno); 1663 if (ret) 1664 return ret; 1665 1666 trace_i915_gem_request_add(ring, seqno); 1667 1668 request->seqno = seqno; 1669 request->ring = ring; 1670 request->emitted_jiffies = jiffies; 1671 was_empty = list_empty(&ring->request_list); 1672 list_add_tail(&request->list, &ring->request_list); 1673 1674 if (file) { 1675 struct drm_i915_file_private *file_priv = file->driver_priv; 1676 1677 spin_lock(&file_priv->mm.lock); 1678 request->file_priv = file_priv; 1679 list_add_tail(&request->client_list, 1680 &file_priv->mm.request_list); 1681 spin_unlock(&file_priv->mm.lock); 1682 } 1683 1684 ring->outstanding_lazy_request = false; 1685 1686 if (!dev_priv->mm.suspended) { 1687 if (i915_enable_hangcheck) { 1688 mod_timer(&dev_priv->hangcheck_timer, 1689 jiffies + 1690 msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); 1691 } 1692 if (was_empty) 1693 queue_delayed_work(dev_priv->wq, 1694 &dev_priv->mm.retire_work, HZ); 1695 } 1696 return 0; 1697 } 1698 1699 static inline void 1700 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1701 { 1702 struct drm_i915_file_private *file_priv = request->file_priv; 1703 1704 if (!file_priv) 1705 return; 1706 1707 spin_lock(&file_priv->mm.lock); 1708 if (request->file_priv) { 1709 list_del(&request->client_list); 1710 request->file_priv = NULL; 1711 } 1712 spin_unlock(&file_priv->mm.lock); 1713 } 1714 1715 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1716 struct intel_ring_buffer *ring) 1717 { 1718 while (!list_empty(&ring->request_list)) { 1719 struct drm_i915_gem_request *request; 1720 1721 request = list_first_entry(&ring->request_list, 1722 struct drm_i915_gem_request, 1723 list); 1724 1725 list_del(&request->list); 1726 i915_gem_request_remove_from_client(request); 1727 kfree(request); 1728 } 1729 1730 while (!list_empty(&ring->active_list)) { 1731 struct drm_i915_gem_object *obj; 1732 1733 obj = list_first_entry(&ring->active_list, 1734 struct drm_i915_gem_object, 1735 ring_list); 1736 1737 obj->base.write_domain = 0; 1738 list_del_init(&obj->gpu_write_list); 1739 i915_gem_object_move_to_inactive(obj); 1740 } 1741 } 1742 1743 static void i915_gem_reset_fences(struct drm_device *dev) 1744 { 1745 struct drm_i915_private *dev_priv = dev->dev_private; 1746 int i; 1747 1748 for (i = 0; i < 16; i++) { 1749 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1750 struct drm_i915_gem_object *obj = reg->obj; 1751 1752 if (!obj) 1753 continue; 1754 1755 if (obj->tiling_mode) 1756 i915_gem_release_mmap(obj); 1757 1758 reg->obj->fence_reg = I915_FENCE_REG_NONE; 1759 reg->obj->fenced_gpu_access = false; 1760 reg->obj->last_fenced_seqno = 0; 1761 reg->obj->last_fenced_ring = NULL; 1762 i915_gem_clear_fence_reg(dev, reg); 1763 } 1764 } 1765 1766 void i915_gem_reset(struct drm_device *dev) 1767 { 1768 struct drm_i915_private *dev_priv = dev->dev_private; 1769 struct drm_i915_gem_object *obj; 1770 int i; 1771 1772 for (i = 0; i < I915_NUM_RINGS; i++) 1773 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]); 1774 1775 /* Remove anything from the flushing lists. The GPU cache is likely 1776 * to be lost on reset along with the data, so simply move the 1777 * lost bo to the inactive list. 1778 */ 1779 while (!list_empty(&dev_priv->mm.flushing_list)) { 1780 obj = list_first_entry(&dev_priv->mm.flushing_list, 1781 struct drm_i915_gem_object, 1782 mm_list); 1783 1784 obj->base.write_domain = 0; 1785 list_del_init(&obj->gpu_write_list); 1786 i915_gem_object_move_to_inactive(obj); 1787 } 1788 1789 /* Move everything out of the GPU domains to ensure we do any 1790 * necessary invalidation upon reuse. 1791 */ 1792 list_for_each_entry(obj, 1793 &dev_priv->mm.inactive_list, 1794 mm_list) 1795 { 1796 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1797 } 1798 1799 /* The fence registers are invalidated so clear them out */ 1800 i915_gem_reset_fences(dev); 1801 } 1802 1803 /** 1804 * This function clears the request list as sequence numbers are passed. 1805 */ 1806 static void 1807 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1808 { 1809 uint32_t seqno; 1810 int i; 1811 1812 if (list_empty(&ring->request_list)) 1813 return; 1814 1815 WARN_ON(i915_verify_lists(ring->dev)); 1816 1817 seqno = ring->get_seqno(ring); 1818 1819 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) 1820 if (seqno >= ring->sync_seqno[i]) 1821 ring->sync_seqno[i] = 0; 1822 1823 while (!list_empty(&ring->request_list)) { 1824 struct drm_i915_gem_request *request; 1825 1826 request = list_first_entry(&ring->request_list, 1827 struct drm_i915_gem_request, 1828 list); 1829 1830 if (!i915_seqno_passed(seqno, request->seqno)) 1831 break; 1832 1833 trace_i915_gem_request_retire(ring, request->seqno); 1834 1835 list_del(&request->list); 1836 i915_gem_request_remove_from_client(request); 1837 kfree(request); 1838 } 1839 1840 /* Move any buffers on the active list that are no longer referenced 1841 * by the ringbuffer to the flushing/inactive lists as appropriate. 1842 */ 1843 while (!list_empty(&ring->active_list)) { 1844 struct drm_i915_gem_object *obj; 1845 1846 obj = list_first_entry(&ring->active_list, 1847 struct drm_i915_gem_object, 1848 ring_list); 1849 1850 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) 1851 break; 1852 1853 if (obj->base.write_domain != 0) 1854 i915_gem_object_move_to_flushing(obj); 1855 else 1856 i915_gem_object_move_to_inactive(obj); 1857 } 1858 1859 if (unlikely(ring->trace_irq_seqno && 1860 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1861 ring->irq_put(ring); 1862 ring->trace_irq_seqno = 0; 1863 } 1864 1865 WARN_ON(i915_verify_lists(ring->dev)); 1866 } 1867 1868 void 1869 i915_gem_retire_requests(struct drm_device *dev) 1870 { 1871 drm_i915_private_t *dev_priv = dev->dev_private; 1872 int i; 1873 1874 if (!list_empty(&dev_priv->mm.deferred_free_list)) { 1875 struct drm_i915_gem_object *obj, *next; 1876 1877 /* We must be careful that during unbind() we do not 1878 * accidentally infinitely recurse into retire requests. 1879 * Currently: 1880 * retire -> free -> unbind -> wait -> retire_ring 1881 */ 1882 list_for_each_entry_safe(obj, next, 1883 &dev_priv->mm.deferred_free_list, 1884 mm_list) 1885 i915_gem_free_object_tail(obj); 1886 } 1887 1888 for (i = 0; i < I915_NUM_RINGS; i++) 1889 i915_gem_retire_requests_ring(&dev_priv->ring[i]); 1890 } 1891 1892 static void 1893 i915_gem_retire_work_handler(struct work_struct *work) 1894 { 1895 drm_i915_private_t *dev_priv; 1896 struct drm_device *dev; 1897 bool idle; 1898 int i; 1899 1900 dev_priv = container_of(work, drm_i915_private_t, 1901 mm.retire_work.work); 1902 dev = dev_priv->dev; 1903 1904 /* Come back later if the device is busy... */ 1905 if (!mutex_trylock(&dev->struct_mutex)) { 1906 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1907 return; 1908 } 1909 1910 i915_gem_retire_requests(dev); 1911 1912 /* Send a periodic flush down the ring so we don't hold onto GEM 1913 * objects indefinitely. 1914 */ 1915 idle = true; 1916 for (i = 0; i < I915_NUM_RINGS; i++) { 1917 struct intel_ring_buffer *ring = &dev_priv->ring[i]; 1918 1919 if (!list_empty(&ring->gpu_write_list)) { 1920 struct drm_i915_gem_request *request; 1921 int ret; 1922 1923 ret = i915_gem_flush_ring(ring, 1924 0, I915_GEM_GPU_DOMAINS); 1925 request = kzalloc(sizeof(*request), GFP_KERNEL); 1926 if (ret || request == NULL || 1927 i915_add_request(ring, NULL, request)) 1928 kfree(request); 1929 } 1930 1931 idle &= list_empty(&ring->request_list); 1932 } 1933 1934 if (!dev_priv->mm.suspended && !idle) 1935 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1936 1937 mutex_unlock(&dev->struct_mutex); 1938 } 1939 1940 /** 1941 * Waits for a sequence number to be signaled, and cleans up the 1942 * request and object lists appropriately for that event. 1943 */ 1944 int 1945 i915_wait_request(struct intel_ring_buffer *ring, 1946 uint32_t seqno) 1947 { 1948 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1949 u32 ier; 1950 int ret = 0; 1951 1952 BUG_ON(seqno == 0); 1953 1954 if (atomic_read(&dev_priv->mm.wedged)) { 1955 struct completion *x = &dev_priv->error_completion; 1956 bool recovery_complete; 1957 unsigned long flags; 1958 1959 /* Give the error handler a chance to run. */ 1960 spin_lock_irqsave(&x->wait.lock, flags); 1961 recovery_complete = x->done > 0; 1962 spin_unlock_irqrestore(&x->wait.lock, flags); 1963 1964 return recovery_complete ? -EIO : -EAGAIN; 1965 } 1966 1967 if (seqno == ring->outstanding_lazy_request) { 1968 struct drm_i915_gem_request *request; 1969 1970 request = kzalloc(sizeof(*request), GFP_KERNEL); 1971 if (request == NULL) 1972 return -ENOMEM; 1973 1974 ret = i915_add_request(ring, NULL, request); 1975 if (ret) { 1976 kfree(request); 1977 return ret; 1978 } 1979 1980 seqno = request->seqno; 1981 } 1982 1983 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 1984 if (HAS_PCH_SPLIT(ring->dev)) 1985 ier = I915_READ(DEIER) | I915_READ(GTIER); 1986 else 1987 ier = I915_READ(IER); 1988 if (!ier) { 1989 DRM_ERROR("something (likely vbetool) disabled " 1990 "interrupts, re-enabling\n"); 1991 ring->dev->driver->irq_preinstall(ring->dev); 1992 ring->dev->driver->irq_postinstall(ring->dev); 1993 } 1994 1995 trace_i915_gem_request_wait_begin(ring, seqno); 1996 1997 ring->waiting_seqno = seqno; 1998 if (ring->irq_get(ring)) { 1999 if (dev_priv->mm.interruptible) 2000 ret = wait_event_interruptible(ring->irq_queue, 2001 i915_seqno_passed(ring->get_seqno(ring), seqno) 2002 || atomic_read(&dev_priv->mm.wedged)); 2003 else 2004 wait_event(ring->irq_queue, 2005 i915_seqno_passed(ring->get_seqno(ring), seqno) 2006 || atomic_read(&dev_priv->mm.wedged)); 2007 2008 ring->irq_put(ring); 2009 } else if (wait_for(i915_seqno_passed(ring->get_seqno(ring), 2010 seqno) || 2011 atomic_read(&dev_priv->mm.wedged), 3000)) 2012 ret = -EBUSY; 2013 ring->waiting_seqno = 0; 2014 2015 trace_i915_gem_request_wait_end(ring, seqno); 2016 } 2017 if (atomic_read(&dev_priv->mm.wedged)) 2018 ret = -EAGAIN; 2019 2020 if (ret && ret != -ERESTARTSYS) 2021 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n", 2022 __func__, ret, seqno, ring->get_seqno(ring), 2023 dev_priv->next_seqno); 2024 2025 /* Directly dispatch request retiring. While we have the work queue 2026 * to handle this, the waiter on a request often wants an associated 2027 * buffer to have made it to the inactive list, and we would need 2028 * a separate wait queue to handle that. 2029 */ 2030 if (ret == 0) 2031 i915_gem_retire_requests_ring(ring); 2032 2033 return ret; 2034 } 2035 2036 /** 2037 * Ensures that all rendering to the object has completed and the object is 2038 * safe to unbind from the GTT or access from the CPU. 2039 */ 2040 int 2041 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) 2042 { 2043 int ret; 2044 2045 /* This function only exists to support waiting for existing rendering, 2046 * not for emitting required flushes. 2047 */ 2048 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0); 2049 2050 /* If there is rendering queued on the buffer being evicted, wait for 2051 * it. 2052 */ 2053 if (obj->active) { 2054 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); 2055 if (ret) 2056 return ret; 2057 } 2058 2059 return 0; 2060 } 2061 2062 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2063 { 2064 u32 old_write_domain, old_read_domains; 2065 2066 /* Act a barrier for all accesses through the GTT */ 2067 mb(); 2068 2069 /* Force a pagefault for domain tracking on next user access */ 2070 i915_gem_release_mmap(obj); 2071 2072 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2073 return; 2074 2075 old_read_domains = obj->base.read_domains; 2076 old_write_domain = obj->base.write_domain; 2077 2078 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2079 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2080 2081 trace_i915_gem_object_change_domain(obj, 2082 old_read_domains, 2083 old_write_domain); 2084 } 2085 2086 /** 2087 * Unbinds an object from the GTT aperture. 2088 */ 2089 int 2090 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2091 { 2092 int ret = 0; 2093 2094 if (obj->gtt_space == NULL) 2095 return 0; 2096 2097 if (obj->pin_count != 0) { 2098 DRM_ERROR("Attempting to unbind pinned buffer\n"); 2099 return -EINVAL; 2100 } 2101 2102 ret = i915_gem_object_finish_gpu(obj); 2103 if (ret == -ERESTARTSYS) 2104 return ret; 2105 /* Continue on if we fail due to EIO, the GPU is hung so we 2106 * should be safe and we need to cleanup or else we might 2107 * cause memory corruption through use-after-free. 2108 */ 2109 2110 i915_gem_object_finish_gtt(obj); 2111 2112 /* Move the object to the CPU domain to ensure that 2113 * any possible CPU writes while it's not in the GTT 2114 * are flushed when we go to remap it. 2115 */ 2116 if (ret == 0) 2117 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2118 if (ret == -ERESTARTSYS) 2119 return ret; 2120 if (ret) { 2121 /* In the event of a disaster, abandon all caches and 2122 * hope for the best. 2123 */ 2124 i915_gem_clflush_object(obj); 2125 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2126 } 2127 2128 /* release the fence reg _after_ flushing */ 2129 ret = i915_gem_object_put_fence(obj); 2130 if (ret == -ERESTARTSYS) 2131 return ret; 2132 2133 trace_i915_gem_object_unbind(obj); 2134 2135 i915_gem_gtt_unbind_object(obj); 2136 i915_gem_object_put_pages_gtt(obj); 2137 2138 list_del_init(&obj->gtt_list); 2139 list_del_init(&obj->mm_list); 2140 /* Avoid an unnecessary call to unbind on rebind. */ 2141 obj->map_and_fenceable = true; 2142 2143 drm_mm_put_block(obj->gtt_space); 2144 obj->gtt_space = NULL; 2145 obj->gtt_offset = 0; 2146 2147 if (i915_gem_object_is_purgeable(obj)) 2148 i915_gem_object_truncate(obj); 2149 2150 return ret; 2151 } 2152 2153 int 2154 i915_gem_flush_ring(struct intel_ring_buffer *ring, 2155 uint32_t invalidate_domains, 2156 uint32_t flush_domains) 2157 { 2158 int ret; 2159 2160 if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) 2161 return 0; 2162 2163 trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); 2164 2165 ret = ring->flush(ring, invalidate_domains, flush_domains); 2166 if (ret) 2167 return ret; 2168 2169 if (flush_domains & I915_GEM_GPU_DOMAINS) 2170 i915_gem_process_flushing_list(ring, flush_domains); 2171 2172 return 0; 2173 } 2174 2175 static int i915_ring_idle(struct intel_ring_buffer *ring) 2176 { 2177 int ret; 2178 2179 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) 2180 return 0; 2181 2182 if (!list_empty(&ring->gpu_write_list)) { 2183 ret = i915_gem_flush_ring(ring, 2184 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2185 if (ret) 2186 return ret; 2187 } 2188 2189 return i915_wait_request(ring, i915_gem_next_request_seqno(ring)); 2190 } 2191 2192 int 2193 i915_gpu_idle(struct drm_device *dev) 2194 { 2195 drm_i915_private_t *dev_priv = dev->dev_private; 2196 int ret, i; 2197 2198 /* Flush everything onto the inactive list. */ 2199 for (i = 0; i < I915_NUM_RINGS; i++) { 2200 ret = i915_ring_idle(&dev_priv->ring[i]); 2201 if (ret) 2202 return ret; 2203 } 2204 2205 return 0; 2206 } 2207 2208 static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj, 2209 struct intel_ring_buffer *pipelined) 2210 { 2211 struct drm_device *dev = obj->base.dev; 2212 drm_i915_private_t *dev_priv = dev->dev_private; 2213 u32 size = obj->gtt_space->size; 2214 int regnum = obj->fence_reg; 2215 uint64_t val; 2216 2217 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2218 0xfffff000) << 32; 2219 val |= obj->gtt_offset & 0xfffff000; 2220 val |= (uint64_t)((obj->stride / 128) - 1) << 2221 SANDYBRIDGE_FENCE_PITCH_SHIFT; 2222 2223 if (obj->tiling_mode == I915_TILING_Y) 2224 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2225 val |= I965_FENCE_REG_VALID; 2226 2227 if (pipelined) { 2228 int ret = intel_ring_begin(pipelined, 6); 2229 if (ret) 2230 return ret; 2231 2232 intel_ring_emit(pipelined, MI_NOOP); 2233 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2234 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8); 2235 intel_ring_emit(pipelined, (u32)val); 2236 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4); 2237 intel_ring_emit(pipelined, (u32)(val >> 32)); 2238 intel_ring_advance(pipelined); 2239 } else 2240 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val); 2241 2242 return 0; 2243 } 2244 2245 static int i965_write_fence_reg(struct drm_i915_gem_object *obj, 2246 struct intel_ring_buffer *pipelined) 2247 { 2248 struct drm_device *dev = obj->base.dev; 2249 drm_i915_private_t *dev_priv = dev->dev_private; 2250 u32 size = obj->gtt_space->size; 2251 int regnum = obj->fence_reg; 2252 uint64_t val; 2253 2254 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2255 0xfffff000) << 32; 2256 val |= obj->gtt_offset & 0xfffff000; 2257 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2258 if (obj->tiling_mode == I915_TILING_Y) 2259 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2260 val |= I965_FENCE_REG_VALID; 2261 2262 if (pipelined) { 2263 int ret = intel_ring_begin(pipelined, 6); 2264 if (ret) 2265 return ret; 2266 2267 intel_ring_emit(pipelined, MI_NOOP); 2268 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2269 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8); 2270 intel_ring_emit(pipelined, (u32)val); 2271 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4); 2272 intel_ring_emit(pipelined, (u32)(val >> 32)); 2273 intel_ring_advance(pipelined); 2274 } else 2275 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val); 2276 2277 return 0; 2278 } 2279 2280 static int i915_write_fence_reg(struct drm_i915_gem_object *obj, 2281 struct intel_ring_buffer *pipelined) 2282 { 2283 struct drm_device *dev = obj->base.dev; 2284 drm_i915_private_t *dev_priv = dev->dev_private; 2285 u32 size = obj->gtt_space->size; 2286 u32 fence_reg, val, pitch_val; 2287 int tile_width; 2288 2289 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2290 (size & -size) != size || 2291 (obj->gtt_offset & (size - 1)), 2292 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2293 obj->gtt_offset, obj->map_and_fenceable, size)) 2294 return -EINVAL; 2295 2296 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2297 tile_width = 128; 2298 else 2299 tile_width = 512; 2300 2301 /* Note: pitch better be a power of two tile widths */ 2302 pitch_val = obj->stride / tile_width; 2303 pitch_val = ffs(pitch_val) - 1; 2304 2305 val = obj->gtt_offset; 2306 if (obj->tiling_mode == I915_TILING_Y) 2307 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2308 val |= I915_FENCE_SIZE_BITS(size); 2309 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2310 val |= I830_FENCE_REG_VALID; 2311 2312 fence_reg = obj->fence_reg; 2313 if (fence_reg < 8) 2314 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2315 else 2316 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2317 2318 if (pipelined) { 2319 int ret = intel_ring_begin(pipelined, 4); 2320 if (ret) 2321 return ret; 2322 2323 intel_ring_emit(pipelined, MI_NOOP); 2324 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2325 intel_ring_emit(pipelined, fence_reg); 2326 intel_ring_emit(pipelined, val); 2327 intel_ring_advance(pipelined); 2328 } else 2329 I915_WRITE(fence_reg, val); 2330 2331 return 0; 2332 } 2333 2334 static int i830_write_fence_reg(struct drm_i915_gem_object *obj, 2335 struct intel_ring_buffer *pipelined) 2336 { 2337 struct drm_device *dev = obj->base.dev; 2338 drm_i915_private_t *dev_priv = dev->dev_private; 2339 u32 size = obj->gtt_space->size; 2340 int regnum = obj->fence_reg; 2341 uint32_t val; 2342 uint32_t pitch_val; 2343 2344 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2345 (size & -size) != size || 2346 (obj->gtt_offset & (size - 1)), 2347 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2348 obj->gtt_offset, size)) 2349 return -EINVAL; 2350 2351 pitch_val = obj->stride / 128; 2352 pitch_val = ffs(pitch_val) - 1; 2353 2354 val = obj->gtt_offset; 2355 if (obj->tiling_mode == I915_TILING_Y) 2356 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2357 val |= I830_FENCE_SIZE_BITS(size); 2358 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2359 val |= I830_FENCE_REG_VALID; 2360 2361 if (pipelined) { 2362 int ret = intel_ring_begin(pipelined, 4); 2363 if (ret) 2364 return ret; 2365 2366 intel_ring_emit(pipelined, MI_NOOP); 2367 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2368 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4); 2369 intel_ring_emit(pipelined, val); 2370 intel_ring_advance(pipelined); 2371 } else 2372 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val); 2373 2374 return 0; 2375 } 2376 2377 static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno) 2378 { 2379 return i915_seqno_passed(ring->get_seqno(ring), seqno); 2380 } 2381 2382 static int 2383 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, 2384 struct intel_ring_buffer *pipelined) 2385 { 2386 int ret; 2387 2388 if (obj->fenced_gpu_access) { 2389 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2390 ret = i915_gem_flush_ring(obj->last_fenced_ring, 2391 0, obj->base.write_domain); 2392 if (ret) 2393 return ret; 2394 } 2395 2396 obj->fenced_gpu_access = false; 2397 } 2398 2399 if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { 2400 if (!ring_passed_seqno(obj->last_fenced_ring, 2401 obj->last_fenced_seqno)) { 2402 ret = i915_wait_request(obj->last_fenced_ring, 2403 obj->last_fenced_seqno); 2404 if (ret) 2405 return ret; 2406 } 2407 2408 obj->last_fenced_seqno = 0; 2409 obj->last_fenced_ring = NULL; 2410 } 2411 2412 /* Ensure that all CPU reads are completed before installing a fence 2413 * and all writes before removing the fence. 2414 */ 2415 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 2416 mb(); 2417 2418 return 0; 2419 } 2420 2421 int 2422 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2423 { 2424 int ret; 2425 2426 if (obj->tiling_mode) 2427 i915_gem_release_mmap(obj); 2428 2429 ret = i915_gem_object_flush_fence(obj, NULL); 2430 if (ret) 2431 return ret; 2432 2433 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2434 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2435 i915_gem_clear_fence_reg(obj->base.dev, 2436 &dev_priv->fence_regs[obj->fence_reg]); 2437 2438 obj->fence_reg = I915_FENCE_REG_NONE; 2439 } 2440 2441 return 0; 2442 } 2443 2444 static struct drm_i915_fence_reg * 2445 i915_find_fence_reg(struct drm_device *dev, 2446 struct intel_ring_buffer *pipelined) 2447 { 2448 struct drm_i915_private *dev_priv = dev->dev_private; 2449 struct drm_i915_fence_reg *reg, *first, *avail; 2450 int i; 2451 2452 /* First try to find a free reg */ 2453 avail = NULL; 2454 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2455 reg = &dev_priv->fence_regs[i]; 2456 if (!reg->obj) 2457 return reg; 2458 2459 if (!reg->obj->pin_count) 2460 avail = reg; 2461 } 2462 2463 if (avail == NULL) 2464 return NULL; 2465 2466 /* None available, try to steal one or wait for a user to finish */ 2467 avail = first = NULL; 2468 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2469 if (reg->obj->pin_count) 2470 continue; 2471 2472 if (first == NULL) 2473 first = reg; 2474 2475 if (!pipelined || 2476 !reg->obj->last_fenced_ring || 2477 reg->obj->last_fenced_ring == pipelined) { 2478 avail = reg; 2479 break; 2480 } 2481 } 2482 2483 if (avail == NULL) 2484 avail = first; 2485 2486 return avail; 2487 } 2488 2489 /** 2490 * i915_gem_object_get_fence - set up a fence reg for an object 2491 * @obj: object to map through a fence reg 2492 * @pipelined: ring on which to queue the change, or NULL for CPU access 2493 * @interruptible: must we wait uninterruptibly for the register to retire? 2494 * 2495 * When mapping objects through the GTT, userspace wants to be able to write 2496 * to them without having to worry about swizzling if the object is tiled. 2497 * 2498 * This function walks the fence regs looking for a free one for @obj, 2499 * stealing one if it can't find any. 2500 * 2501 * It then sets up the reg based on the object's properties: address, pitch 2502 * and tiling format. 2503 */ 2504 int 2505 i915_gem_object_get_fence(struct drm_i915_gem_object *obj, 2506 struct intel_ring_buffer *pipelined) 2507 { 2508 struct drm_device *dev = obj->base.dev; 2509 struct drm_i915_private *dev_priv = dev->dev_private; 2510 struct drm_i915_fence_reg *reg; 2511 int ret; 2512 2513 /* XXX disable pipelining. There are bugs. Shocking. */ 2514 pipelined = NULL; 2515 2516 /* Just update our place in the LRU if our fence is getting reused. */ 2517 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2518 reg = &dev_priv->fence_regs[obj->fence_reg]; 2519 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2520 2521 if (obj->tiling_changed) { 2522 ret = i915_gem_object_flush_fence(obj, pipelined); 2523 if (ret) 2524 return ret; 2525 2526 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) 2527 pipelined = NULL; 2528 2529 if (pipelined) { 2530 reg->setup_seqno = 2531 i915_gem_next_request_seqno(pipelined); 2532 obj->last_fenced_seqno = reg->setup_seqno; 2533 obj->last_fenced_ring = pipelined; 2534 } 2535 2536 goto update; 2537 } 2538 2539 if (!pipelined) { 2540 if (reg->setup_seqno) { 2541 if (!ring_passed_seqno(obj->last_fenced_ring, 2542 reg->setup_seqno)) { 2543 ret = i915_wait_request(obj->last_fenced_ring, 2544 reg->setup_seqno); 2545 if (ret) 2546 return ret; 2547 } 2548 2549 reg->setup_seqno = 0; 2550 } 2551 } else if (obj->last_fenced_ring && 2552 obj->last_fenced_ring != pipelined) { 2553 ret = i915_gem_object_flush_fence(obj, pipelined); 2554 if (ret) 2555 return ret; 2556 } 2557 2558 return 0; 2559 } 2560 2561 reg = i915_find_fence_reg(dev, pipelined); 2562 if (reg == NULL) 2563 return -ENOSPC; 2564 2565 ret = i915_gem_object_flush_fence(obj, pipelined); 2566 if (ret) 2567 return ret; 2568 2569 if (reg->obj) { 2570 struct drm_i915_gem_object *old = reg->obj; 2571 2572 drm_gem_object_reference(&old->base); 2573 2574 if (old->tiling_mode) 2575 i915_gem_release_mmap(old); 2576 2577 ret = i915_gem_object_flush_fence(old, pipelined); 2578 if (ret) { 2579 drm_gem_object_unreference(&old->base); 2580 return ret; 2581 } 2582 2583 if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0) 2584 pipelined = NULL; 2585 2586 old->fence_reg = I915_FENCE_REG_NONE; 2587 old->last_fenced_ring = pipelined; 2588 old->last_fenced_seqno = 2589 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2590 2591 drm_gem_object_unreference(&old->base); 2592 } else if (obj->last_fenced_seqno == 0) 2593 pipelined = NULL; 2594 2595 reg->obj = obj; 2596 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2597 obj->fence_reg = reg - dev_priv->fence_regs; 2598 obj->last_fenced_ring = pipelined; 2599 2600 reg->setup_seqno = 2601 pipelined ? i915_gem_next_request_seqno(pipelined) : 0; 2602 obj->last_fenced_seqno = reg->setup_seqno; 2603 2604 update: 2605 obj->tiling_changed = false; 2606 switch (INTEL_INFO(dev)->gen) { 2607 case 7: 2608 case 6: 2609 ret = sandybridge_write_fence_reg(obj, pipelined); 2610 break; 2611 case 5: 2612 case 4: 2613 ret = i965_write_fence_reg(obj, pipelined); 2614 break; 2615 case 3: 2616 ret = i915_write_fence_reg(obj, pipelined); 2617 break; 2618 case 2: 2619 ret = i830_write_fence_reg(obj, pipelined); 2620 break; 2621 } 2622 2623 return ret; 2624 } 2625 2626 /** 2627 * i915_gem_clear_fence_reg - clear out fence register info 2628 * @obj: object to clear 2629 * 2630 * Zeroes out the fence register itself and clears out the associated 2631 * data structures in dev_priv and obj. 2632 */ 2633 static void 2634 i915_gem_clear_fence_reg(struct drm_device *dev, 2635 struct drm_i915_fence_reg *reg) 2636 { 2637 drm_i915_private_t *dev_priv = dev->dev_private; 2638 uint32_t fence_reg = reg - dev_priv->fence_regs; 2639 2640 switch (INTEL_INFO(dev)->gen) { 2641 case 7: 2642 case 6: 2643 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0); 2644 break; 2645 case 5: 2646 case 4: 2647 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0); 2648 break; 2649 case 3: 2650 if (fence_reg >= 8) 2651 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2652 else 2653 case 2: 2654 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2655 2656 I915_WRITE(fence_reg, 0); 2657 break; 2658 } 2659 2660 list_del_init(®->lru_list); 2661 reg->obj = NULL; 2662 reg->setup_seqno = 0; 2663 } 2664 2665 /** 2666 * Finds free space in the GTT aperture and binds the object there. 2667 */ 2668 static int 2669 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 2670 unsigned alignment, 2671 bool map_and_fenceable) 2672 { 2673 struct drm_device *dev = obj->base.dev; 2674 drm_i915_private_t *dev_priv = dev->dev_private; 2675 struct drm_mm_node *free_space; 2676 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; 2677 u32 size, fence_size, fence_alignment, unfenced_alignment; 2678 bool mappable, fenceable; 2679 int ret; 2680 2681 if (obj->madv != I915_MADV_WILLNEED) { 2682 DRM_ERROR("Attempting to bind a purgeable object\n"); 2683 return -EINVAL; 2684 } 2685 2686 fence_size = i915_gem_get_gtt_size(dev, 2687 obj->base.size, 2688 obj->tiling_mode); 2689 fence_alignment = i915_gem_get_gtt_alignment(dev, 2690 obj->base.size, 2691 obj->tiling_mode); 2692 unfenced_alignment = 2693 i915_gem_get_unfenced_gtt_alignment(dev, 2694 obj->base.size, 2695 obj->tiling_mode); 2696 2697 if (alignment == 0) 2698 alignment = map_and_fenceable ? fence_alignment : 2699 unfenced_alignment; 2700 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 2701 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2702 return -EINVAL; 2703 } 2704 2705 size = map_and_fenceable ? fence_size : obj->base.size; 2706 2707 /* If the object is bigger than the entire aperture, reject it early 2708 * before evicting everything in a vain attempt to find space. 2709 */ 2710 if (obj->base.size > 2711 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 2712 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 2713 return -E2BIG; 2714 } 2715 2716 search_free: 2717 if (map_and_fenceable) 2718 free_space = 2719 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space, 2720 size, alignment, 0, 2721 dev_priv->mm.gtt_mappable_end, 2722 0); 2723 else 2724 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2725 size, alignment, 0); 2726 2727 if (free_space != NULL) { 2728 if (map_and_fenceable) 2729 obj->gtt_space = 2730 drm_mm_get_block_range_generic(free_space, 2731 size, alignment, 0, 2732 dev_priv->mm.gtt_mappable_end, 2733 0); 2734 else 2735 obj->gtt_space = 2736 drm_mm_get_block(free_space, size, alignment); 2737 } 2738 if (obj->gtt_space == NULL) { 2739 /* If the gtt is empty and we're still having trouble 2740 * fitting our object in, we're out of memory. 2741 */ 2742 ret = i915_gem_evict_something(dev, size, alignment, 2743 map_and_fenceable); 2744 if (ret) 2745 return ret; 2746 2747 goto search_free; 2748 } 2749 2750 ret = i915_gem_object_get_pages_gtt(obj, gfpmask); 2751 if (ret) { 2752 drm_mm_put_block(obj->gtt_space); 2753 obj->gtt_space = NULL; 2754 2755 if (ret == -ENOMEM) { 2756 /* first try to reclaim some memory by clearing the GTT */ 2757 ret = i915_gem_evict_everything(dev, false); 2758 if (ret) { 2759 /* now try to shrink everyone else */ 2760 if (gfpmask) { 2761 gfpmask = 0; 2762 goto search_free; 2763 } 2764 2765 return -ENOMEM; 2766 } 2767 2768 goto search_free; 2769 } 2770 2771 return ret; 2772 } 2773 2774 ret = i915_gem_gtt_bind_object(obj); 2775 if (ret) { 2776 i915_gem_object_put_pages_gtt(obj); 2777 drm_mm_put_block(obj->gtt_space); 2778 obj->gtt_space = NULL; 2779 2780 if (i915_gem_evict_everything(dev, false)) 2781 return ret; 2782 2783 goto search_free; 2784 } 2785 2786 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); 2787 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2788 2789 /* Assert that the object is not currently in any GPU domain. As it 2790 * wasn't in the GTT, there shouldn't be any way it could have been in 2791 * a GPU cache 2792 */ 2793 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2794 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2795 2796 obj->gtt_offset = obj->gtt_space->start; 2797 2798 fenceable = 2799 obj->gtt_space->size == fence_size && 2800 (obj->gtt_space->start & (fence_alignment - 1)) == 0; 2801 2802 mappable = 2803 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 2804 2805 obj->map_and_fenceable = mappable && fenceable; 2806 2807 trace_i915_gem_object_bind(obj, map_and_fenceable); 2808 return 0; 2809 } 2810 2811 void 2812 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 2813 { 2814 /* If we don't have a page list set up, then we're not pinned 2815 * to GPU, and we can ignore the cache flush because it'll happen 2816 * again at bind time. 2817 */ 2818 if (obj->pages == NULL) 2819 return; 2820 2821 /* If the GPU is snooping the contents of the CPU cache, 2822 * we do not need to manually clear the CPU cache lines. However, 2823 * the caches are only snooped when the render cache is 2824 * flushed/invalidated. As we always have to emit invalidations 2825 * and flushes when moving into and out of the RENDER domain, correct 2826 * snooping behaviour occurs naturally as the result of our domain 2827 * tracking. 2828 */ 2829 if (obj->cache_level != I915_CACHE_NONE) 2830 return; 2831 2832 trace_i915_gem_object_clflush(obj); 2833 2834 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2835 } 2836 2837 /** Flushes any GPU write domain for the object if it's dirty. */ 2838 static int 2839 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) 2840 { 2841 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) 2842 return 0; 2843 2844 /* Queue the GPU write cache flushing we need. */ 2845 return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 2846 } 2847 2848 /** Flushes the GTT write domain for the object if it's dirty. */ 2849 static void 2850 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2851 { 2852 uint32_t old_write_domain; 2853 2854 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2855 return; 2856 2857 /* No actual flushing is required for the GTT write domain. Writes 2858 * to it immediately go to main memory as far as we know, so there's 2859 * no chipset flush. It also doesn't land in render cache. 2860 * 2861 * However, we do have to enforce the order so that all writes through 2862 * the GTT land before any writes to the device, such as updates to 2863 * the GATT itself. 2864 */ 2865 wmb(); 2866 2867 old_write_domain = obj->base.write_domain; 2868 obj->base.write_domain = 0; 2869 2870 trace_i915_gem_object_change_domain(obj, 2871 obj->base.read_domains, 2872 old_write_domain); 2873 } 2874 2875 /** Flushes the CPU write domain for the object if it's dirty. */ 2876 static void 2877 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2878 { 2879 uint32_t old_write_domain; 2880 2881 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2882 return; 2883 2884 i915_gem_clflush_object(obj); 2885 intel_gtt_chipset_flush(); 2886 old_write_domain = obj->base.write_domain; 2887 obj->base.write_domain = 0; 2888 2889 trace_i915_gem_object_change_domain(obj, 2890 obj->base.read_domains, 2891 old_write_domain); 2892 } 2893 2894 /** 2895 * Moves a single object to the GTT read, and possibly write domain. 2896 * 2897 * This function returns when the move is complete, including waiting on 2898 * flushes to occur. 2899 */ 2900 int 2901 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2902 { 2903 uint32_t old_write_domain, old_read_domains; 2904 int ret; 2905 2906 /* Not valid to be called on unbound objects. */ 2907 if (obj->gtt_space == NULL) 2908 return -EINVAL; 2909 2910 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2911 return 0; 2912 2913 ret = i915_gem_object_flush_gpu_write_domain(obj); 2914 if (ret) 2915 return ret; 2916 2917 if (obj->pending_gpu_write || write) { 2918 ret = i915_gem_object_wait_rendering(obj); 2919 if (ret) 2920 return ret; 2921 } 2922 2923 i915_gem_object_flush_cpu_write_domain(obj); 2924 2925 old_write_domain = obj->base.write_domain; 2926 old_read_domains = obj->base.read_domains; 2927 2928 /* It should now be out of any other write domains, and we can update 2929 * the domain values for our changes. 2930 */ 2931 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2932 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2933 if (write) { 2934 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 2935 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 2936 obj->dirty = 1; 2937 } 2938 2939 trace_i915_gem_object_change_domain(obj, 2940 old_read_domains, 2941 old_write_domain); 2942 2943 return 0; 2944 } 2945 2946 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 2947 enum i915_cache_level cache_level) 2948 { 2949 int ret; 2950 2951 if (obj->cache_level == cache_level) 2952 return 0; 2953 2954 if (obj->pin_count) { 2955 DRM_DEBUG("can not change the cache level of pinned objects\n"); 2956 return -EBUSY; 2957 } 2958 2959 if (obj->gtt_space) { 2960 ret = i915_gem_object_finish_gpu(obj); 2961 if (ret) 2962 return ret; 2963 2964 i915_gem_object_finish_gtt(obj); 2965 2966 /* Before SandyBridge, you could not use tiling or fence 2967 * registers with snooped memory, so relinquish any fences 2968 * currently pointing to our region in the aperture. 2969 */ 2970 if (INTEL_INFO(obj->base.dev)->gen < 6) { 2971 ret = i915_gem_object_put_fence(obj); 2972 if (ret) 2973 return ret; 2974 } 2975 2976 i915_gem_gtt_rebind_object(obj, cache_level); 2977 } 2978 2979 if (cache_level == I915_CACHE_NONE) { 2980 u32 old_read_domains, old_write_domain; 2981 2982 /* If we're coming from LLC cached, then we haven't 2983 * actually been tracking whether the data is in the 2984 * CPU cache or not, since we only allow one bit set 2985 * in obj->write_domain and have been skipping the clflushes. 2986 * Just set it to the CPU cache for now. 2987 */ 2988 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 2989 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 2990 2991 old_read_domains = obj->base.read_domains; 2992 old_write_domain = obj->base.write_domain; 2993 2994 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2995 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2996 2997 trace_i915_gem_object_change_domain(obj, 2998 old_read_domains, 2999 old_write_domain); 3000 } 3001 3002 obj->cache_level = cache_level; 3003 return 0; 3004 } 3005 3006 /* 3007 * Prepare buffer for display plane (scanout, cursors, etc). 3008 * Can be called from an uninterruptible phase (modesetting) and allows 3009 * any flushes to be pipelined (for pageflips). 3010 * 3011 * For the display plane, we want to be in the GTT but out of any write 3012 * domains. So in many ways this looks like set_to_gtt_domain() apart from the 3013 * ability to pipeline the waits, pinning and any additional subtleties 3014 * that may differentiate the display plane from ordinary buffers. 3015 */ 3016 int 3017 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3018 u32 alignment, 3019 struct intel_ring_buffer *pipelined) 3020 { 3021 u32 old_read_domains, old_write_domain; 3022 int ret; 3023 3024 ret = i915_gem_object_flush_gpu_write_domain(obj); 3025 if (ret) 3026 return ret; 3027 3028 if (pipelined != obj->ring) { 3029 ret = i915_gem_object_wait_rendering(obj); 3030 if (ret == -ERESTARTSYS) 3031 return ret; 3032 } 3033 3034 /* The display engine is not coherent with the LLC cache on gen6. As 3035 * a result, we make sure that the pinning that is about to occur is 3036 * done with uncached PTEs. This is lowest common denominator for all 3037 * chipsets. 3038 * 3039 * However for gen6+, we could do better by using the GFDT bit instead 3040 * of uncaching, which would allow us to flush all the LLC-cached data 3041 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3042 */ 3043 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 3044 if (ret) 3045 return ret; 3046 3047 /* As the user may map the buffer once pinned in the display plane 3048 * (e.g. libkms for the bootup splash), we have to ensure that we 3049 * always use map_and_fenceable for all scanout buffers. 3050 */ 3051 ret = i915_gem_object_pin(obj, alignment, true); 3052 if (ret) 3053 return ret; 3054 3055 i915_gem_object_flush_cpu_write_domain(obj); 3056 3057 old_write_domain = obj->base.write_domain; 3058 old_read_domains = obj->base.read_domains; 3059 3060 /* It should now be out of any other write domains, and we can update 3061 * the domain values for our changes. 3062 */ 3063 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3064 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3065 3066 trace_i915_gem_object_change_domain(obj, 3067 old_read_domains, 3068 old_write_domain); 3069 3070 return 0; 3071 } 3072 3073 int 3074 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3075 { 3076 int ret; 3077 3078 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3079 return 0; 3080 3081 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3082 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); 3083 if (ret) 3084 return ret; 3085 } 3086 3087 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3088 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3089 3090 return i915_gem_object_wait_rendering(obj); 3091 } 3092 3093 /** 3094 * Moves a single object to the CPU read, and possibly write domain. 3095 * 3096 * This function returns when the move is complete, including waiting on 3097 * flushes to occur. 3098 */ 3099 static int 3100 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3101 { 3102 uint32_t old_write_domain, old_read_domains; 3103 int ret; 3104 3105 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3106 return 0; 3107 3108 ret = i915_gem_object_flush_gpu_write_domain(obj); 3109 if (ret) 3110 return ret; 3111 3112 ret = i915_gem_object_wait_rendering(obj); 3113 if (ret) 3114 return ret; 3115 3116 i915_gem_object_flush_gtt_write_domain(obj); 3117 3118 /* If we have a partially-valid cache of the object in the CPU, 3119 * finish invalidating it and free the per-page flags. 3120 */ 3121 i915_gem_object_set_to_full_cpu_read_domain(obj); 3122 3123 old_write_domain = obj->base.write_domain; 3124 old_read_domains = obj->base.read_domains; 3125 3126 /* Flush the CPU cache if it's still invalid. */ 3127 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3128 i915_gem_clflush_object(obj); 3129 3130 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3131 } 3132 3133 /* It should now be out of any other write domains, and we can update 3134 * the domain values for our changes. 3135 */ 3136 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3137 3138 /* If we're writing through the CPU, then the GPU read domains will 3139 * need to be invalidated at next use. 3140 */ 3141 if (write) { 3142 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3143 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3144 } 3145 3146 trace_i915_gem_object_change_domain(obj, 3147 old_read_domains, 3148 old_write_domain); 3149 3150 return 0; 3151 } 3152 3153 /** 3154 * Moves the object from a partially CPU read to a full one. 3155 * 3156 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 3157 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 3158 */ 3159 static void 3160 i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj) 3161 { 3162 if (!obj->page_cpu_valid) 3163 return; 3164 3165 /* If we're partially in the CPU read domain, finish moving it in. 3166 */ 3167 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) { 3168 int i; 3169 3170 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) { 3171 if (obj->page_cpu_valid[i]) 3172 continue; 3173 drm_clflush_pages(obj->pages + i, 1); 3174 } 3175 } 3176 3177 /* Free the page_cpu_valid mappings which are now stale, whether 3178 * or not we've got I915_GEM_DOMAIN_CPU. 3179 */ 3180 kfree(obj->page_cpu_valid); 3181 obj->page_cpu_valid = NULL; 3182 } 3183 3184 /** 3185 * Set the CPU read domain on a range of the object. 3186 * 3187 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 3188 * not entirely valid. The page_cpu_valid member of the object flags which 3189 * pages have been flushed, and will be respected by 3190 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 3191 * of the whole object. 3192 * 3193 * This function returns when the move is complete, including waiting on 3194 * flushes to occur. 3195 */ 3196 static int 3197 i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, 3198 uint64_t offset, uint64_t size) 3199 { 3200 uint32_t old_read_domains; 3201 int i, ret; 3202 3203 if (offset == 0 && size == obj->base.size) 3204 return i915_gem_object_set_to_cpu_domain(obj, 0); 3205 3206 ret = i915_gem_object_flush_gpu_write_domain(obj); 3207 if (ret) 3208 return ret; 3209 3210 ret = i915_gem_object_wait_rendering(obj); 3211 if (ret) 3212 return ret; 3213 3214 i915_gem_object_flush_gtt_write_domain(obj); 3215 3216 /* If we're already fully in the CPU read domain, we're done. */ 3217 if (obj->page_cpu_valid == NULL && 3218 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0) 3219 return 0; 3220 3221 /* Otherwise, create/clear the per-page CPU read domain flag if we're 3222 * newly adding I915_GEM_DOMAIN_CPU 3223 */ 3224 if (obj->page_cpu_valid == NULL) { 3225 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE, 3226 GFP_KERNEL); 3227 if (obj->page_cpu_valid == NULL) 3228 return -ENOMEM; 3229 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 3230 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE); 3231 3232 /* Flush the cache on any pages that are still invalid from the CPU's 3233 * perspective. 3234 */ 3235 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 3236 i++) { 3237 if (obj->page_cpu_valid[i]) 3238 continue; 3239 3240 drm_clflush_pages(obj->pages + i, 1); 3241 3242 obj->page_cpu_valid[i] = 1; 3243 } 3244 3245 /* It should now be out of any other write domains, and we can update 3246 * the domain values for our changes. 3247 */ 3248 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3249 3250 old_read_domains = obj->base.read_domains; 3251 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3252 3253 trace_i915_gem_object_change_domain(obj, 3254 old_read_domains, 3255 obj->base.write_domain); 3256 3257 return 0; 3258 } 3259 3260 /* Throttle our rendering by waiting until the ring has completed our requests 3261 * emitted over 20 msec ago. 3262 * 3263 * Note that if we were to use the current jiffies each time around the loop, 3264 * we wouldn't escape the function with any frames outstanding if the time to 3265 * render a frame was over 20ms. 3266 * 3267 * This should get us reasonable parallelism between CPU and GPU but also 3268 * relatively low latency when blocking on a particular request to finish. 3269 */ 3270 static int 3271 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3272 { 3273 struct drm_i915_private *dev_priv = dev->dev_private; 3274 struct drm_i915_file_private *file_priv = file->driver_priv; 3275 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3276 struct drm_i915_gem_request *request; 3277 struct intel_ring_buffer *ring = NULL; 3278 u32 seqno = 0; 3279 int ret; 3280 3281 if (atomic_read(&dev_priv->mm.wedged)) 3282 return -EIO; 3283 3284 spin_lock(&file_priv->mm.lock); 3285 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3286 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3287 break; 3288 3289 ring = request->ring; 3290 seqno = request->seqno; 3291 } 3292 spin_unlock(&file_priv->mm.lock); 3293 3294 if (seqno == 0) 3295 return 0; 3296 3297 ret = 0; 3298 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 3299 /* And wait for the seqno passing without holding any locks and 3300 * causing extra latency for others. This is safe as the irq 3301 * generation is designed to be run atomically and so is 3302 * lockless. 3303 */ 3304 if (ring->irq_get(ring)) { 3305 ret = wait_event_interruptible(ring->irq_queue, 3306 i915_seqno_passed(ring->get_seqno(ring), seqno) 3307 || atomic_read(&dev_priv->mm.wedged)); 3308 ring->irq_put(ring); 3309 3310 if (ret == 0 && atomic_read(&dev_priv->mm.wedged)) 3311 ret = -EIO; 3312 } 3313 } 3314 3315 if (ret == 0) 3316 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3317 3318 return ret; 3319 } 3320 3321 int 3322 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3323 uint32_t alignment, 3324 bool map_and_fenceable) 3325 { 3326 struct drm_device *dev = obj->base.dev; 3327 struct drm_i915_private *dev_priv = dev->dev_private; 3328 int ret; 3329 3330 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 3331 WARN_ON(i915_verify_lists(dev)); 3332 3333 if (obj->gtt_space != NULL) { 3334 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3335 (map_and_fenceable && !obj->map_and_fenceable)) { 3336 WARN(obj->pin_count, 3337 "bo is already pinned with incorrect alignment:" 3338 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3339 " obj->map_and_fenceable=%d\n", 3340 obj->gtt_offset, alignment, 3341 map_and_fenceable, 3342 obj->map_and_fenceable); 3343 ret = i915_gem_object_unbind(obj); 3344 if (ret) 3345 return ret; 3346 } 3347 } 3348 3349 if (obj->gtt_space == NULL) { 3350 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3351 map_and_fenceable); 3352 if (ret) 3353 return ret; 3354 } 3355 3356 if (obj->pin_count++ == 0) { 3357 if (!obj->active) 3358 list_move_tail(&obj->mm_list, 3359 &dev_priv->mm.pinned_list); 3360 } 3361 obj->pin_mappable |= map_and_fenceable; 3362 3363 WARN_ON(i915_verify_lists(dev)); 3364 return 0; 3365 } 3366 3367 void 3368 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3369 { 3370 struct drm_device *dev = obj->base.dev; 3371 drm_i915_private_t *dev_priv = dev->dev_private; 3372 3373 WARN_ON(i915_verify_lists(dev)); 3374 BUG_ON(obj->pin_count == 0); 3375 BUG_ON(obj->gtt_space == NULL); 3376 3377 if (--obj->pin_count == 0) { 3378 if (!obj->active) 3379 list_move_tail(&obj->mm_list, 3380 &dev_priv->mm.inactive_list); 3381 obj->pin_mappable = false; 3382 } 3383 WARN_ON(i915_verify_lists(dev)); 3384 } 3385 3386 int 3387 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3388 struct drm_file *file) 3389 { 3390 struct drm_i915_gem_pin *args = data; 3391 struct drm_i915_gem_object *obj; 3392 int ret; 3393 3394 ret = i915_mutex_lock_interruptible(dev); 3395 if (ret) 3396 return ret; 3397 3398 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3399 if (&obj->base == NULL) { 3400 ret = -ENOENT; 3401 goto unlock; 3402 } 3403 3404 if (obj->madv != I915_MADV_WILLNEED) { 3405 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3406 ret = -EINVAL; 3407 goto out; 3408 } 3409 3410 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3411 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3412 args->handle); 3413 ret = -EINVAL; 3414 goto out; 3415 } 3416 3417 obj->user_pin_count++; 3418 obj->pin_filp = file; 3419 if (obj->user_pin_count == 1) { 3420 ret = i915_gem_object_pin(obj, args->alignment, true); 3421 if (ret) 3422 goto out; 3423 } 3424 3425 /* XXX - flush the CPU caches for pinned objects 3426 * as the X server doesn't manage domains yet 3427 */ 3428 i915_gem_object_flush_cpu_write_domain(obj); 3429 args->offset = obj->gtt_offset; 3430 out: 3431 drm_gem_object_unreference(&obj->base); 3432 unlock: 3433 mutex_unlock(&dev->struct_mutex); 3434 return ret; 3435 } 3436 3437 int 3438 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3439 struct drm_file *file) 3440 { 3441 struct drm_i915_gem_pin *args = data; 3442 struct drm_i915_gem_object *obj; 3443 int ret; 3444 3445 ret = i915_mutex_lock_interruptible(dev); 3446 if (ret) 3447 return ret; 3448 3449 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3450 if (&obj->base == NULL) { 3451 ret = -ENOENT; 3452 goto unlock; 3453 } 3454 3455 if (obj->pin_filp != file) { 3456 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3457 args->handle); 3458 ret = -EINVAL; 3459 goto out; 3460 } 3461 obj->user_pin_count--; 3462 if (obj->user_pin_count == 0) { 3463 obj->pin_filp = NULL; 3464 i915_gem_object_unpin(obj); 3465 } 3466 3467 out: 3468 drm_gem_object_unreference(&obj->base); 3469 unlock: 3470 mutex_unlock(&dev->struct_mutex); 3471 return ret; 3472 } 3473 3474 int 3475 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3476 struct drm_file *file) 3477 { 3478 struct drm_i915_gem_busy *args = data; 3479 struct drm_i915_gem_object *obj; 3480 int ret; 3481 3482 ret = i915_mutex_lock_interruptible(dev); 3483 if (ret) 3484 return ret; 3485 3486 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3487 if (&obj->base == NULL) { 3488 ret = -ENOENT; 3489 goto unlock; 3490 } 3491 3492 /* Count all active objects as busy, even if they are currently not used 3493 * by the gpu. Users of this interface expect objects to eventually 3494 * become non-busy without any further actions, therefore emit any 3495 * necessary flushes here. 3496 */ 3497 args->busy = obj->active; 3498 if (args->busy) { 3499 /* Unconditionally flush objects, even when the gpu still uses this 3500 * object. Userspace calling this function indicates that it wants to 3501 * use this buffer rather sooner than later, so issuing the required 3502 * flush earlier is beneficial. 3503 */ 3504 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3505 ret = i915_gem_flush_ring(obj->ring, 3506 0, obj->base.write_domain); 3507 } else if (obj->ring->outstanding_lazy_request == 3508 obj->last_rendering_seqno) { 3509 struct drm_i915_gem_request *request; 3510 3511 /* This ring is not being cleared by active usage, 3512 * so emit a request to do so. 3513 */ 3514 request = kzalloc(sizeof(*request), GFP_KERNEL); 3515 if (request) 3516 ret = i915_add_request(obj->ring, NULL, request); 3517 else 3518 ret = -ENOMEM; 3519 } 3520 3521 /* Update the active list for the hardware's current position. 3522 * Otherwise this only updates on a delayed timer or when irqs 3523 * are actually unmasked, and our working set ends up being 3524 * larger than required. 3525 */ 3526 i915_gem_retire_requests_ring(obj->ring); 3527 3528 args->busy = obj->active; 3529 } 3530 3531 drm_gem_object_unreference(&obj->base); 3532 unlock: 3533 mutex_unlock(&dev->struct_mutex); 3534 return ret; 3535 } 3536 3537 int 3538 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3539 struct drm_file *file_priv) 3540 { 3541 return i915_gem_ring_throttle(dev, file_priv); 3542 } 3543 3544 int 3545 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3546 struct drm_file *file_priv) 3547 { 3548 struct drm_i915_gem_madvise *args = data; 3549 struct drm_i915_gem_object *obj; 3550 int ret; 3551 3552 switch (args->madv) { 3553 case I915_MADV_DONTNEED: 3554 case I915_MADV_WILLNEED: 3555 break; 3556 default: 3557 return -EINVAL; 3558 } 3559 3560 ret = i915_mutex_lock_interruptible(dev); 3561 if (ret) 3562 return ret; 3563 3564 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3565 if (&obj->base == NULL) { 3566 ret = -ENOENT; 3567 goto unlock; 3568 } 3569 3570 if (obj->pin_count) { 3571 ret = -EINVAL; 3572 goto out; 3573 } 3574 3575 if (obj->madv != __I915_MADV_PURGED) 3576 obj->madv = args->madv; 3577 3578 /* if the object is no longer bound, discard its backing storage */ 3579 if (i915_gem_object_is_purgeable(obj) && 3580 obj->gtt_space == NULL) 3581 i915_gem_object_truncate(obj); 3582 3583 args->retained = obj->madv != __I915_MADV_PURGED; 3584 3585 out: 3586 drm_gem_object_unreference(&obj->base); 3587 unlock: 3588 mutex_unlock(&dev->struct_mutex); 3589 return ret; 3590 } 3591 3592 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3593 size_t size) 3594 { 3595 struct drm_i915_private *dev_priv = dev->dev_private; 3596 struct drm_i915_gem_object *obj; 3597 struct address_space *mapping; 3598 3599 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 3600 if (obj == NULL) 3601 return NULL; 3602 3603 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3604 kfree(obj); 3605 return NULL; 3606 } 3607 3608 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3609 mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE); 3610 3611 i915_gem_info_add_obj(dev_priv, size); 3612 3613 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3614 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3615 3616 if (IS_GEN6(dev)) { 3617 /* On Gen6, we can have the GPU use the LLC (the CPU 3618 * cache) for about a 10% performance improvement 3619 * compared to uncached. Graphics requests other than 3620 * display scanout are coherent with the CPU in 3621 * accessing this cache. This means in this mode we 3622 * don't need to clflush on the CPU side, and on the 3623 * GPU side we only need to flush internal caches to 3624 * get data visible to the CPU. 3625 * 3626 * However, we maintain the display planes as UC, and so 3627 * need to rebind when first used as such. 3628 */ 3629 obj->cache_level = I915_CACHE_LLC; 3630 } else 3631 obj->cache_level = I915_CACHE_NONE; 3632 3633 obj->base.driver_private = NULL; 3634 obj->fence_reg = I915_FENCE_REG_NONE; 3635 INIT_LIST_HEAD(&obj->mm_list); 3636 INIT_LIST_HEAD(&obj->gtt_list); 3637 INIT_LIST_HEAD(&obj->ring_list); 3638 INIT_LIST_HEAD(&obj->exec_list); 3639 INIT_LIST_HEAD(&obj->gpu_write_list); 3640 obj->madv = I915_MADV_WILLNEED; 3641 /* Avoid an unnecessary call to unbind on the first bind. */ 3642 obj->map_and_fenceable = true; 3643 3644 return obj; 3645 } 3646 3647 int i915_gem_init_object(struct drm_gem_object *obj) 3648 { 3649 BUG(); 3650 3651 return 0; 3652 } 3653 3654 static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) 3655 { 3656 struct drm_device *dev = obj->base.dev; 3657 drm_i915_private_t *dev_priv = dev->dev_private; 3658 int ret; 3659 3660 ret = i915_gem_object_unbind(obj); 3661 if (ret == -ERESTARTSYS) { 3662 list_move(&obj->mm_list, 3663 &dev_priv->mm.deferred_free_list); 3664 return; 3665 } 3666 3667 trace_i915_gem_object_destroy(obj); 3668 3669 if (obj->base.map_list.map) 3670 drm_gem_free_mmap_offset(&obj->base); 3671 3672 drm_gem_object_release(&obj->base); 3673 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3674 3675 kfree(obj->page_cpu_valid); 3676 kfree(obj->bit_17); 3677 kfree(obj); 3678 } 3679 3680 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3681 { 3682 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3683 struct drm_device *dev = obj->base.dev; 3684 3685 while (obj->pin_count > 0) 3686 i915_gem_object_unpin(obj); 3687 3688 if (obj->phys_obj) 3689 i915_gem_detach_phys_object(dev, obj); 3690 3691 i915_gem_free_object_tail(obj); 3692 } 3693 3694 int 3695 i915_gem_idle(struct drm_device *dev) 3696 { 3697 drm_i915_private_t *dev_priv = dev->dev_private; 3698 int ret; 3699 3700 mutex_lock(&dev->struct_mutex); 3701 3702 if (dev_priv->mm.suspended) { 3703 mutex_unlock(&dev->struct_mutex); 3704 return 0; 3705 } 3706 3707 ret = i915_gpu_idle(dev); 3708 if (ret) { 3709 mutex_unlock(&dev->struct_mutex); 3710 return ret; 3711 } 3712 3713 /* Under UMS, be paranoid and evict. */ 3714 if (!drm_core_check_feature(dev, DRIVER_MODESET)) { 3715 ret = i915_gem_evict_inactive(dev, false); 3716 if (ret) { 3717 mutex_unlock(&dev->struct_mutex); 3718 return ret; 3719 } 3720 } 3721 3722 i915_gem_reset_fences(dev); 3723 3724 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3725 * We need to replace this with a semaphore, or something. 3726 * And not confound mm.suspended! 3727 */ 3728 dev_priv->mm.suspended = 1; 3729 del_timer_sync(&dev_priv->hangcheck_timer); 3730 3731 i915_kernel_lost_context(dev); 3732 i915_gem_cleanup_ringbuffer(dev); 3733 3734 mutex_unlock(&dev->struct_mutex); 3735 3736 /* Cancel the retire work handler, which should be idle now. */ 3737 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3738 3739 return 0; 3740 } 3741 3742 int 3743 i915_gem_init_ringbuffer(struct drm_device *dev) 3744 { 3745 drm_i915_private_t *dev_priv = dev->dev_private; 3746 int ret; 3747 3748 ret = intel_init_render_ring_buffer(dev); 3749 if (ret) 3750 return ret; 3751 3752 if (HAS_BSD(dev)) { 3753 ret = intel_init_bsd_ring_buffer(dev); 3754 if (ret) 3755 goto cleanup_render_ring; 3756 } 3757 3758 if (HAS_BLT(dev)) { 3759 ret = intel_init_blt_ring_buffer(dev); 3760 if (ret) 3761 goto cleanup_bsd_ring; 3762 } 3763 3764 dev_priv->next_seqno = 1; 3765 3766 return 0; 3767 3768 cleanup_bsd_ring: 3769 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 3770 cleanup_render_ring: 3771 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 3772 return ret; 3773 } 3774 3775 void 3776 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3777 { 3778 drm_i915_private_t *dev_priv = dev->dev_private; 3779 int i; 3780 3781 for (i = 0; i < I915_NUM_RINGS; i++) 3782 intel_cleanup_ring_buffer(&dev_priv->ring[i]); 3783 } 3784 3785 int 3786 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3787 struct drm_file *file_priv) 3788 { 3789 drm_i915_private_t *dev_priv = dev->dev_private; 3790 int ret, i; 3791 3792 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3793 return 0; 3794 3795 if (atomic_read(&dev_priv->mm.wedged)) { 3796 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3797 atomic_set(&dev_priv->mm.wedged, 0); 3798 } 3799 3800 mutex_lock(&dev->struct_mutex); 3801 dev_priv->mm.suspended = 0; 3802 3803 ret = i915_gem_init_ringbuffer(dev); 3804 if (ret != 0) { 3805 mutex_unlock(&dev->struct_mutex); 3806 return ret; 3807 } 3808 3809 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 3810 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 3811 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 3812 for (i = 0; i < I915_NUM_RINGS; i++) { 3813 BUG_ON(!list_empty(&dev_priv->ring[i].active_list)); 3814 BUG_ON(!list_empty(&dev_priv->ring[i].request_list)); 3815 } 3816 mutex_unlock(&dev->struct_mutex); 3817 3818 ret = drm_irq_install(dev); 3819 if (ret) 3820 goto cleanup_ringbuffer; 3821 3822 return 0; 3823 3824 cleanup_ringbuffer: 3825 mutex_lock(&dev->struct_mutex); 3826 i915_gem_cleanup_ringbuffer(dev); 3827 dev_priv->mm.suspended = 1; 3828 mutex_unlock(&dev->struct_mutex); 3829 3830 return ret; 3831 } 3832 3833 int 3834 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3835 struct drm_file *file_priv) 3836 { 3837 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3838 return 0; 3839 3840 drm_irq_uninstall(dev); 3841 return i915_gem_idle(dev); 3842 } 3843 3844 void 3845 i915_gem_lastclose(struct drm_device *dev) 3846 { 3847 int ret; 3848 3849 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3850 return; 3851 3852 ret = i915_gem_idle(dev); 3853 if (ret) 3854 DRM_ERROR("failed to idle hardware: %d\n", ret); 3855 } 3856 3857 static void 3858 init_ring_lists(struct intel_ring_buffer *ring) 3859 { 3860 INIT_LIST_HEAD(&ring->active_list); 3861 INIT_LIST_HEAD(&ring->request_list); 3862 INIT_LIST_HEAD(&ring->gpu_write_list); 3863 } 3864 3865 void 3866 i915_gem_load(struct drm_device *dev) 3867 { 3868 int i; 3869 drm_i915_private_t *dev_priv = dev->dev_private; 3870 3871 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3872 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 3873 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3874 INIT_LIST_HEAD(&dev_priv->mm.pinned_list); 3875 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3876 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list); 3877 INIT_LIST_HEAD(&dev_priv->mm.gtt_list); 3878 for (i = 0; i < I915_NUM_RINGS; i++) 3879 init_ring_lists(&dev_priv->ring[i]); 3880 for (i = 0; i < 16; i++) 3881 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3882 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3883 i915_gem_retire_work_handler); 3884 init_completion(&dev_priv->error_completion); 3885 3886 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3887 if (IS_GEN3(dev)) { 3888 u32 tmp = I915_READ(MI_ARB_STATE); 3889 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) { 3890 /* arb state is a masked write, so set bit + bit in mask */ 3891 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT); 3892 I915_WRITE(MI_ARB_STATE, tmp); 3893 } 3894 } 3895 3896 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3897 3898 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3899 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3900 dev_priv->fence_reg_start = 3; 3901 3902 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3903 dev_priv->num_fence_regs = 16; 3904 else 3905 dev_priv->num_fence_regs = 8; 3906 3907 /* Initialize fence registers to zero */ 3908 for (i = 0; i < dev_priv->num_fence_regs; i++) { 3909 i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]); 3910 } 3911 3912 i915_gem_detect_bit_6_swizzle(dev); 3913 init_waitqueue_head(&dev_priv->pending_flip_queue); 3914 3915 dev_priv->mm.interruptible = true; 3916 3917 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 3918 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 3919 register_shrinker(&dev_priv->mm.inactive_shrinker); 3920 } 3921 3922 /* 3923 * Create a physically contiguous memory object for this object 3924 * e.g. for cursor + overlay regs 3925 */ 3926 static int i915_gem_init_phys_object(struct drm_device *dev, 3927 int id, int size, int align) 3928 { 3929 drm_i915_private_t *dev_priv = dev->dev_private; 3930 struct drm_i915_gem_phys_object *phys_obj; 3931 int ret; 3932 3933 if (dev_priv->mm.phys_objs[id - 1] || !size) 3934 return 0; 3935 3936 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 3937 if (!phys_obj) 3938 return -ENOMEM; 3939 3940 phys_obj->id = id; 3941 3942 phys_obj->handle = drm_pci_alloc(dev, size, align); 3943 if (!phys_obj->handle) { 3944 ret = -ENOMEM; 3945 goto kfree_obj; 3946 } 3947 #ifdef CONFIG_X86 3948 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 3949 #endif 3950 3951 dev_priv->mm.phys_objs[id - 1] = phys_obj; 3952 3953 return 0; 3954 kfree_obj: 3955 kfree(phys_obj); 3956 return ret; 3957 } 3958 3959 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 3960 { 3961 drm_i915_private_t *dev_priv = dev->dev_private; 3962 struct drm_i915_gem_phys_object *phys_obj; 3963 3964 if (!dev_priv->mm.phys_objs[id - 1]) 3965 return; 3966 3967 phys_obj = dev_priv->mm.phys_objs[id - 1]; 3968 if (phys_obj->cur_obj) { 3969 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 3970 } 3971 3972 #ifdef CONFIG_X86 3973 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 3974 #endif 3975 drm_pci_free(dev, phys_obj->handle); 3976 kfree(phys_obj); 3977 dev_priv->mm.phys_objs[id - 1] = NULL; 3978 } 3979 3980 void i915_gem_free_all_phys_object(struct drm_device *dev) 3981 { 3982 int i; 3983 3984 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 3985 i915_gem_free_phys_object(dev, i); 3986 } 3987 3988 void i915_gem_detach_phys_object(struct drm_device *dev, 3989 struct drm_i915_gem_object *obj) 3990 { 3991 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3992 char *vaddr; 3993 int i; 3994 int page_count; 3995 3996 if (!obj->phys_obj) 3997 return; 3998 vaddr = obj->phys_obj->handle->vaddr; 3999 4000 page_count = obj->base.size / PAGE_SIZE; 4001 for (i = 0; i < page_count; i++) { 4002 struct page *page = shmem_read_mapping_page(mapping, i); 4003 if (!IS_ERR(page)) { 4004 char *dst = kmap_atomic(page); 4005 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4006 kunmap_atomic(dst); 4007 4008 drm_clflush_pages(&page, 1); 4009 4010 set_page_dirty(page); 4011 mark_page_accessed(page); 4012 page_cache_release(page); 4013 } 4014 } 4015 intel_gtt_chipset_flush(); 4016 4017 obj->phys_obj->cur_obj = NULL; 4018 obj->phys_obj = NULL; 4019 } 4020 4021 int 4022 i915_gem_attach_phys_object(struct drm_device *dev, 4023 struct drm_i915_gem_object *obj, 4024 int id, 4025 int align) 4026 { 4027 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4028 drm_i915_private_t *dev_priv = dev->dev_private; 4029 int ret = 0; 4030 int page_count; 4031 int i; 4032 4033 if (id > I915_MAX_PHYS_OBJECT) 4034 return -EINVAL; 4035 4036 if (obj->phys_obj) { 4037 if (obj->phys_obj->id == id) 4038 return 0; 4039 i915_gem_detach_phys_object(dev, obj); 4040 } 4041 4042 /* create a new object */ 4043 if (!dev_priv->mm.phys_objs[id - 1]) { 4044 ret = i915_gem_init_phys_object(dev, id, 4045 obj->base.size, align); 4046 if (ret) { 4047 DRM_ERROR("failed to init phys object %d size: %zu\n", 4048 id, obj->base.size); 4049 return ret; 4050 } 4051 } 4052 4053 /* bind to the object */ 4054 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4055 obj->phys_obj->cur_obj = obj; 4056 4057 page_count = obj->base.size / PAGE_SIZE; 4058 4059 for (i = 0; i < page_count; i++) { 4060 struct page *page; 4061 char *dst, *src; 4062 4063 page = shmem_read_mapping_page(mapping, i); 4064 if (IS_ERR(page)) 4065 return PTR_ERR(page); 4066 4067 src = kmap_atomic(page); 4068 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4069 memcpy(dst, src, PAGE_SIZE); 4070 kunmap_atomic(src); 4071 4072 mark_page_accessed(page); 4073 page_cache_release(page); 4074 } 4075 4076 return 0; 4077 } 4078 4079 static int 4080 i915_gem_phys_pwrite(struct drm_device *dev, 4081 struct drm_i915_gem_object *obj, 4082 struct drm_i915_gem_pwrite *args, 4083 struct drm_file *file_priv) 4084 { 4085 void *vaddr = obj->phys_obj->handle->vaddr + args->offset; 4086 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4087 4088 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 4089 unsigned long unwritten; 4090 4091 /* The physical object once assigned is fixed for the lifetime 4092 * of the obj, so we can safely drop the lock and continue 4093 * to access vaddr. 4094 */ 4095 mutex_unlock(&dev->struct_mutex); 4096 unwritten = copy_from_user(vaddr, user_data, args->size); 4097 mutex_lock(&dev->struct_mutex); 4098 if (unwritten) 4099 return -EFAULT; 4100 } 4101 4102 intel_gtt_chipset_flush(); 4103 return 0; 4104 } 4105 4106 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4107 { 4108 struct drm_i915_file_private *file_priv = file->driver_priv; 4109 4110 /* Clean up our request list when the client is going away, so that 4111 * later retire_requests won't dereference our soon-to-be-gone 4112 * file_priv. 4113 */ 4114 spin_lock(&file_priv->mm.lock); 4115 while (!list_empty(&file_priv->mm.request_list)) { 4116 struct drm_i915_gem_request *request; 4117 4118 request = list_first_entry(&file_priv->mm.request_list, 4119 struct drm_i915_gem_request, 4120 client_list); 4121 list_del(&request->client_list); 4122 request->file_priv = NULL; 4123 } 4124 spin_unlock(&file_priv->mm.lock); 4125 } 4126 4127 static int 4128 i915_gpu_is_active(struct drm_device *dev) 4129 { 4130 drm_i915_private_t *dev_priv = dev->dev_private; 4131 int lists_empty; 4132 4133 lists_empty = list_empty(&dev_priv->mm.flushing_list) && 4134 list_empty(&dev_priv->mm.active_list); 4135 4136 return !lists_empty; 4137 } 4138 4139 static int 4140 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4141 { 4142 struct drm_i915_private *dev_priv = 4143 container_of(shrinker, 4144 struct drm_i915_private, 4145 mm.inactive_shrinker); 4146 struct drm_device *dev = dev_priv->dev; 4147 struct drm_i915_gem_object *obj, *next; 4148 int nr_to_scan = sc->nr_to_scan; 4149 int cnt; 4150 4151 if (!mutex_trylock(&dev->struct_mutex)) 4152 return 0; 4153 4154 /* "fast-path" to count number of available objects */ 4155 if (nr_to_scan == 0) { 4156 cnt = 0; 4157 list_for_each_entry(obj, 4158 &dev_priv->mm.inactive_list, 4159 mm_list) 4160 cnt++; 4161 mutex_unlock(&dev->struct_mutex); 4162 return cnt / 100 * sysctl_vfs_cache_pressure; 4163 } 4164 4165 rescan: 4166 /* first scan for clean buffers */ 4167 i915_gem_retire_requests(dev); 4168 4169 list_for_each_entry_safe(obj, next, 4170 &dev_priv->mm.inactive_list, 4171 mm_list) { 4172 if (i915_gem_object_is_purgeable(obj)) { 4173 if (i915_gem_object_unbind(obj) == 0 && 4174 --nr_to_scan == 0) 4175 break; 4176 } 4177 } 4178 4179 /* second pass, evict/count anything still on the inactive list */ 4180 cnt = 0; 4181 list_for_each_entry_safe(obj, next, 4182 &dev_priv->mm.inactive_list, 4183 mm_list) { 4184 if (nr_to_scan && 4185 i915_gem_object_unbind(obj) == 0) 4186 nr_to_scan--; 4187 else 4188 cnt++; 4189 } 4190 4191 if (nr_to_scan && i915_gpu_is_active(dev)) { 4192 /* 4193 * We are desperate for pages, so as a last resort, wait 4194 * for the GPU to finish and discard whatever we can. 4195 * This has a dramatic impact to reduce the number of 4196 * OOM-killer events whilst running the GPU aggressively. 4197 */ 4198 if (i915_gpu_idle(dev) == 0) 4199 goto rescan; 4200 } 4201 mutex_unlock(&dev->struct_mutex); 4202 return cnt / 100 * sysctl_vfs_cache_pressure; 4203 } 4204