1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/slab.h> 35 #include <linux/swap.h> 36 #include <linux/pci.h> 37 38 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 39 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 40 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 41 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 42 int write); 43 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 44 uint64_t offset, 45 uint64_t size); 46 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 47 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 48 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, 49 unsigned alignment); 50 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); 51 static int i915_gem_evict_something(struct drm_device *dev, int min_size); 52 static int i915_gem_evict_from_inactive_list(struct drm_device *dev); 53 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 54 struct drm_i915_gem_pwrite *args, 55 struct drm_file *file_priv); 56 57 static LIST_HEAD(shrink_list); 58 static DEFINE_SPINLOCK(shrink_list_lock); 59 60 int i915_gem_do_init(struct drm_device *dev, unsigned long start, 61 unsigned long end) 62 { 63 drm_i915_private_t *dev_priv = dev->dev_private; 64 65 if (start >= end || 66 (start & (PAGE_SIZE - 1)) != 0 || 67 (end & (PAGE_SIZE - 1)) != 0) { 68 return -EINVAL; 69 } 70 71 drm_mm_init(&dev_priv->mm.gtt_space, start, 72 end - start); 73 74 dev->gtt_total = (uint32_t) (end - start); 75 76 return 0; 77 } 78 79 int 80 i915_gem_init_ioctl(struct drm_device *dev, void *data, 81 struct drm_file *file_priv) 82 { 83 struct drm_i915_gem_init *args = data; 84 int ret; 85 86 mutex_lock(&dev->struct_mutex); 87 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end); 88 mutex_unlock(&dev->struct_mutex); 89 90 return ret; 91 } 92 93 int 94 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 95 struct drm_file *file_priv) 96 { 97 struct drm_i915_gem_get_aperture *args = data; 98 99 if (!(dev->driver->driver_features & DRIVER_GEM)) 100 return -ENODEV; 101 102 args->aper_size = dev->gtt_total; 103 args->aper_available_size = (args->aper_size - 104 atomic_read(&dev->pin_memory)); 105 106 return 0; 107 } 108 109 110 /** 111 * Creates a new mm object and returns a handle to it. 112 */ 113 int 114 i915_gem_create_ioctl(struct drm_device *dev, void *data, 115 struct drm_file *file_priv) 116 { 117 struct drm_i915_gem_create *args = data; 118 struct drm_gem_object *obj; 119 int ret; 120 u32 handle; 121 122 args->size = roundup(args->size, PAGE_SIZE); 123 124 /* Allocate the new object */ 125 obj = i915_gem_alloc_object(dev, args->size); 126 if (obj == NULL) 127 return -ENOMEM; 128 129 ret = drm_gem_handle_create(file_priv, obj, &handle); 130 drm_gem_object_handle_unreference_unlocked(obj); 131 132 if (ret) 133 return ret; 134 135 args->handle = handle; 136 137 return 0; 138 } 139 140 static inline int 141 fast_shmem_read(struct page **pages, 142 loff_t page_base, int page_offset, 143 char __user *data, 144 int length) 145 { 146 char __iomem *vaddr; 147 int unwritten; 148 149 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 150 if (vaddr == NULL) 151 return -ENOMEM; 152 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length); 153 kunmap_atomic(vaddr, KM_USER0); 154 155 if (unwritten) 156 return -EFAULT; 157 158 return 0; 159 } 160 161 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) 162 { 163 drm_i915_private_t *dev_priv = obj->dev->dev_private; 164 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 165 166 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 167 obj_priv->tiling_mode != I915_TILING_NONE; 168 } 169 170 static inline void 171 slow_shmem_copy(struct page *dst_page, 172 int dst_offset, 173 struct page *src_page, 174 int src_offset, 175 int length) 176 { 177 char *dst_vaddr, *src_vaddr; 178 179 dst_vaddr = kmap(dst_page); 180 src_vaddr = kmap(src_page); 181 182 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 183 184 kunmap(src_page); 185 kunmap(dst_page); 186 } 187 188 static inline void 189 slow_shmem_bit17_copy(struct page *gpu_page, 190 int gpu_offset, 191 struct page *cpu_page, 192 int cpu_offset, 193 int length, 194 int is_read) 195 { 196 char *gpu_vaddr, *cpu_vaddr; 197 198 /* Use the unswizzled path if this page isn't affected. */ 199 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 200 if (is_read) 201 return slow_shmem_copy(cpu_page, cpu_offset, 202 gpu_page, gpu_offset, length); 203 else 204 return slow_shmem_copy(gpu_page, gpu_offset, 205 cpu_page, cpu_offset, length); 206 } 207 208 gpu_vaddr = kmap(gpu_page); 209 cpu_vaddr = kmap(cpu_page); 210 211 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 212 * XORing with the other bits (A9 for Y, A9 and A10 for X) 213 */ 214 while (length > 0) { 215 int cacheline_end = ALIGN(gpu_offset + 1, 64); 216 int this_length = min(cacheline_end - gpu_offset, length); 217 int swizzled_gpu_offset = gpu_offset ^ 64; 218 219 if (is_read) { 220 memcpy(cpu_vaddr + cpu_offset, 221 gpu_vaddr + swizzled_gpu_offset, 222 this_length); 223 } else { 224 memcpy(gpu_vaddr + swizzled_gpu_offset, 225 cpu_vaddr + cpu_offset, 226 this_length); 227 } 228 cpu_offset += this_length; 229 gpu_offset += this_length; 230 length -= this_length; 231 } 232 233 kunmap(cpu_page); 234 kunmap(gpu_page); 235 } 236 237 /** 238 * This is the fast shmem pread path, which attempts to copy_from_user directly 239 * from the backing pages of the object to the user's address space. On a 240 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 241 */ 242 static int 243 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj, 244 struct drm_i915_gem_pread *args, 245 struct drm_file *file_priv) 246 { 247 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 248 ssize_t remain; 249 loff_t offset, page_base; 250 char __user *user_data; 251 int page_offset, page_length; 252 int ret; 253 254 user_data = (char __user *) (uintptr_t) args->data_ptr; 255 remain = args->size; 256 257 mutex_lock(&dev->struct_mutex); 258 259 ret = i915_gem_object_get_pages(obj, 0); 260 if (ret != 0) 261 goto fail_unlock; 262 263 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 264 args->size); 265 if (ret != 0) 266 goto fail_put_pages; 267 268 obj_priv = to_intel_bo(obj); 269 offset = args->offset; 270 271 while (remain > 0) { 272 /* Operation in this page 273 * 274 * page_base = page offset within aperture 275 * page_offset = offset within page 276 * page_length = bytes to copy for this page 277 */ 278 page_base = (offset & ~(PAGE_SIZE-1)); 279 page_offset = offset & (PAGE_SIZE-1); 280 page_length = remain; 281 if ((page_offset + remain) > PAGE_SIZE) 282 page_length = PAGE_SIZE - page_offset; 283 284 ret = fast_shmem_read(obj_priv->pages, 285 page_base, page_offset, 286 user_data, page_length); 287 if (ret) 288 goto fail_put_pages; 289 290 remain -= page_length; 291 user_data += page_length; 292 offset += page_length; 293 } 294 295 fail_put_pages: 296 i915_gem_object_put_pages(obj); 297 fail_unlock: 298 mutex_unlock(&dev->struct_mutex); 299 300 return ret; 301 } 302 303 static int 304 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj) 305 { 306 int ret; 307 308 ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN); 309 310 /* If we've insufficient memory to map in the pages, attempt 311 * to make some space by throwing out some old buffers. 312 */ 313 if (ret == -ENOMEM) { 314 struct drm_device *dev = obj->dev; 315 316 ret = i915_gem_evict_something(dev, obj->size); 317 if (ret) 318 return ret; 319 320 ret = i915_gem_object_get_pages(obj, 0); 321 } 322 323 return ret; 324 } 325 326 /** 327 * This is the fallback shmem pread path, which allocates temporary storage 328 * in kernel space to copy_to_user into outside of the struct_mutex, so we 329 * can copy out of the object's backing pages while holding the struct mutex 330 * and not take page faults. 331 */ 332 static int 333 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, 334 struct drm_i915_gem_pread *args, 335 struct drm_file *file_priv) 336 { 337 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 338 struct mm_struct *mm = current->mm; 339 struct page **user_pages; 340 ssize_t remain; 341 loff_t offset, pinned_pages, i; 342 loff_t first_data_page, last_data_page, num_pages; 343 int shmem_page_index, shmem_page_offset; 344 int data_page_index, data_page_offset; 345 int page_length; 346 int ret; 347 uint64_t data_ptr = args->data_ptr; 348 int do_bit17_swizzling; 349 350 remain = args->size; 351 352 /* Pin the user pages containing the data. We can't fault while 353 * holding the struct mutex, yet we want to hold it while 354 * dereferencing the user data. 355 */ 356 first_data_page = data_ptr / PAGE_SIZE; 357 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 358 num_pages = last_data_page - first_data_page + 1; 359 360 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 361 if (user_pages == NULL) 362 return -ENOMEM; 363 364 down_read(&mm->mmap_sem); 365 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 366 num_pages, 1, 0, user_pages, NULL); 367 up_read(&mm->mmap_sem); 368 if (pinned_pages < num_pages) { 369 ret = -EFAULT; 370 goto fail_put_user_pages; 371 } 372 373 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 374 375 mutex_lock(&dev->struct_mutex); 376 377 ret = i915_gem_object_get_pages_or_evict(obj); 378 if (ret) 379 goto fail_unlock; 380 381 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 382 args->size); 383 if (ret != 0) 384 goto fail_put_pages; 385 386 obj_priv = to_intel_bo(obj); 387 offset = args->offset; 388 389 while (remain > 0) { 390 /* Operation in this page 391 * 392 * shmem_page_index = page number within shmem file 393 * shmem_page_offset = offset within page in shmem file 394 * data_page_index = page number in get_user_pages return 395 * data_page_offset = offset with data_page_index page. 396 * page_length = bytes to copy for this page 397 */ 398 shmem_page_index = offset / PAGE_SIZE; 399 shmem_page_offset = offset & ~PAGE_MASK; 400 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 401 data_page_offset = data_ptr & ~PAGE_MASK; 402 403 page_length = remain; 404 if ((shmem_page_offset + page_length) > PAGE_SIZE) 405 page_length = PAGE_SIZE - shmem_page_offset; 406 if ((data_page_offset + page_length) > PAGE_SIZE) 407 page_length = PAGE_SIZE - data_page_offset; 408 409 if (do_bit17_swizzling) { 410 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 411 shmem_page_offset, 412 user_pages[data_page_index], 413 data_page_offset, 414 page_length, 415 1); 416 } else { 417 slow_shmem_copy(user_pages[data_page_index], 418 data_page_offset, 419 obj_priv->pages[shmem_page_index], 420 shmem_page_offset, 421 page_length); 422 } 423 424 remain -= page_length; 425 data_ptr += page_length; 426 offset += page_length; 427 } 428 429 fail_put_pages: 430 i915_gem_object_put_pages(obj); 431 fail_unlock: 432 mutex_unlock(&dev->struct_mutex); 433 fail_put_user_pages: 434 for (i = 0; i < pinned_pages; i++) { 435 SetPageDirty(user_pages[i]); 436 page_cache_release(user_pages[i]); 437 } 438 drm_free_large(user_pages); 439 440 return ret; 441 } 442 443 /** 444 * Reads data from the object referenced by handle. 445 * 446 * On error, the contents of *data are undefined. 447 */ 448 int 449 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 450 struct drm_file *file_priv) 451 { 452 struct drm_i915_gem_pread *args = data; 453 struct drm_gem_object *obj; 454 struct drm_i915_gem_object *obj_priv; 455 int ret; 456 457 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 458 if (obj == NULL) 459 return -EBADF; 460 obj_priv = to_intel_bo(obj); 461 462 /* Bounds check source. 463 * 464 * XXX: This could use review for overflow issues... 465 */ 466 if (args->offset > obj->size || args->size > obj->size || 467 args->offset + args->size > obj->size) { 468 drm_gem_object_unreference_unlocked(obj); 469 return -EINVAL; 470 } 471 472 if (i915_gem_object_needs_bit17_swizzle(obj)) { 473 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); 474 } else { 475 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); 476 if (ret != 0) 477 ret = i915_gem_shmem_pread_slow(dev, obj, args, 478 file_priv); 479 } 480 481 drm_gem_object_unreference_unlocked(obj); 482 483 return ret; 484 } 485 486 /* This is the fast write path which cannot handle 487 * page faults in the source data 488 */ 489 490 static inline int 491 fast_user_write(struct io_mapping *mapping, 492 loff_t page_base, int page_offset, 493 char __user *user_data, 494 int length) 495 { 496 char *vaddr_atomic; 497 unsigned long unwritten; 498 499 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 500 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 501 user_data, length); 502 io_mapping_unmap_atomic(vaddr_atomic); 503 if (unwritten) 504 return -EFAULT; 505 return 0; 506 } 507 508 /* Here's the write path which can sleep for 509 * page faults 510 */ 511 512 static inline void 513 slow_kernel_write(struct io_mapping *mapping, 514 loff_t gtt_base, int gtt_offset, 515 struct page *user_page, int user_offset, 516 int length) 517 { 518 char __iomem *dst_vaddr; 519 char *src_vaddr; 520 521 dst_vaddr = io_mapping_map_wc(mapping, gtt_base); 522 src_vaddr = kmap(user_page); 523 524 memcpy_toio(dst_vaddr + gtt_offset, 525 src_vaddr + user_offset, 526 length); 527 528 kunmap(user_page); 529 io_mapping_unmap(dst_vaddr); 530 } 531 532 static inline int 533 fast_shmem_write(struct page **pages, 534 loff_t page_base, int page_offset, 535 char __user *data, 536 int length) 537 { 538 char __iomem *vaddr; 539 unsigned long unwritten; 540 541 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 542 if (vaddr == NULL) 543 return -ENOMEM; 544 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length); 545 kunmap_atomic(vaddr, KM_USER0); 546 547 if (unwritten) 548 return -EFAULT; 549 return 0; 550 } 551 552 /** 553 * This is the fast pwrite path, where we copy the data directly from the 554 * user into the GTT, uncached. 555 */ 556 static int 557 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 558 struct drm_i915_gem_pwrite *args, 559 struct drm_file *file_priv) 560 { 561 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 562 drm_i915_private_t *dev_priv = dev->dev_private; 563 ssize_t remain; 564 loff_t offset, page_base; 565 char __user *user_data; 566 int page_offset, page_length; 567 int ret; 568 569 user_data = (char __user *) (uintptr_t) args->data_ptr; 570 remain = args->size; 571 if (!access_ok(VERIFY_READ, user_data, remain)) 572 return -EFAULT; 573 574 575 mutex_lock(&dev->struct_mutex); 576 ret = i915_gem_object_pin(obj, 0); 577 if (ret) { 578 mutex_unlock(&dev->struct_mutex); 579 return ret; 580 } 581 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 582 if (ret) 583 goto fail; 584 585 obj_priv = to_intel_bo(obj); 586 offset = obj_priv->gtt_offset + args->offset; 587 588 while (remain > 0) { 589 /* Operation in this page 590 * 591 * page_base = page offset within aperture 592 * page_offset = offset within page 593 * page_length = bytes to copy for this page 594 */ 595 page_base = (offset & ~(PAGE_SIZE-1)); 596 page_offset = offset & (PAGE_SIZE-1); 597 page_length = remain; 598 if ((page_offset + remain) > PAGE_SIZE) 599 page_length = PAGE_SIZE - page_offset; 600 601 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, 602 page_offset, user_data, page_length); 603 604 /* If we get a fault while copying data, then (presumably) our 605 * source page isn't available. Return the error and we'll 606 * retry in the slow path. 607 */ 608 if (ret) 609 goto fail; 610 611 remain -= page_length; 612 user_data += page_length; 613 offset += page_length; 614 } 615 616 fail: 617 i915_gem_object_unpin(obj); 618 mutex_unlock(&dev->struct_mutex); 619 620 return ret; 621 } 622 623 /** 624 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 625 * the memory and maps it using kmap_atomic for copying. 626 * 627 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 628 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 629 */ 630 static int 631 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 632 struct drm_i915_gem_pwrite *args, 633 struct drm_file *file_priv) 634 { 635 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 636 drm_i915_private_t *dev_priv = dev->dev_private; 637 ssize_t remain; 638 loff_t gtt_page_base, offset; 639 loff_t first_data_page, last_data_page, num_pages; 640 loff_t pinned_pages, i; 641 struct page **user_pages; 642 struct mm_struct *mm = current->mm; 643 int gtt_page_offset, data_page_offset, data_page_index, page_length; 644 int ret; 645 uint64_t data_ptr = args->data_ptr; 646 647 remain = args->size; 648 649 /* Pin the user pages containing the data. We can't fault while 650 * holding the struct mutex, and all of the pwrite implementations 651 * want to hold it while dereferencing the user data. 652 */ 653 first_data_page = data_ptr / PAGE_SIZE; 654 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 655 num_pages = last_data_page - first_data_page + 1; 656 657 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 658 if (user_pages == NULL) 659 return -ENOMEM; 660 661 down_read(&mm->mmap_sem); 662 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 663 num_pages, 0, 0, user_pages, NULL); 664 up_read(&mm->mmap_sem); 665 if (pinned_pages < num_pages) { 666 ret = -EFAULT; 667 goto out_unpin_pages; 668 } 669 670 mutex_lock(&dev->struct_mutex); 671 ret = i915_gem_object_pin(obj, 0); 672 if (ret) 673 goto out_unlock; 674 675 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 676 if (ret) 677 goto out_unpin_object; 678 679 obj_priv = to_intel_bo(obj); 680 offset = obj_priv->gtt_offset + args->offset; 681 682 while (remain > 0) { 683 /* Operation in this page 684 * 685 * gtt_page_base = page offset within aperture 686 * gtt_page_offset = offset within page in aperture 687 * data_page_index = page number in get_user_pages return 688 * data_page_offset = offset with data_page_index page. 689 * page_length = bytes to copy for this page 690 */ 691 gtt_page_base = offset & PAGE_MASK; 692 gtt_page_offset = offset & ~PAGE_MASK; 693 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 694 data_page_offset = data_ptr & ~PAGE_MASK; 695 696 page_length = remain; 697 if ((gtt_page_offset + page_length) > PAGE_SIZE) 698 page_length = PAGE_SIZE - gtt_page_offset; 699 if ((data_page_offset + page_length) > PAGE_SIZE) 700 page_length = PAGE_SIZE - data_page_offset; 701 702 slow_kernel_write(dev_priv->mm.gtt_mapping, 703 gtt_page_base, gtt_page_offset, 704 user_pages[data_page_index], 705 data_page_offset, 706 page_length); 707 708 remain -= page_length; 709 offset += page_length; 710 data_ptr += page_length; 711 } 712 713 out_unpin_object: 714 i915_gem_object_unpin(obj); 715 out_unlock: 716 mutex_unlock(&dev->struct_mutex); 717 out_unpin_pages: 718 for (i = 0; i < pinned_pages; i++) 719 page_cache_release(user_pages[i]); 720 drm_free_large(user_pages); 721 722 return ret; 723 } 724 725 /** 726 * This is the fast shmem pwrite path, which attempts to directly 727 * copy_from_user into the kmapped pages backing the object. 728 */ 729 static int 730 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 731 struct drm_i915_gem_pwrite *args, 732 struct drm_file *file_priv) 733 { 734 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 735 ssize_t remain; 736 loff_t offset, page_base; 737 char __user *user_data; 738 int page_offset, page_length; 739 int ret; 740 741 user_data = (char __user *) (uintptr_t) args->data_ptr; 742 remain = args->size; 743 744 mutex_lock(&dev->struct_mutex); 745 746 ret = i915_gem_object_get_pages(obj, 0); 747 if (ret != 0) 748 goto fail_unlock; 749 750 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 751 if (ret != 0) 752 goto fail_put_pages; 753 754 obj_priv = to_intel_bo(obj); 755 offset = args->offset; 756 obj_priv->dirty = 1; 757 758 while (remain > 0) { 759 /* Operation in this page 760 * 761 * page_base = page offset within aperture 762 * page_offset = offset within page 763 * page_length = bytes to copy for this page 764 */ 765 page_base = (offset & ~(PAGE_SIZE-1)); 766 page_offset = offset & (PAGE_SIZE-1); 767 page_length = remain; 768 if ((page_offset + remain) > PAGE_SIZE) 769 page_length = PAGE_SIZE - page_offset; 770 771 ret = fast_shmem_write(obj_priv->pages, 772 page_base, page_offset, 773 user_data, page_length); 774 if (ret) 775 goto fail_put_pages; 776 777 remain -= page_length; 778 user_data += page_length; 779 offset += page_length; 780 } 781 782 fail_put_pages: 783 i915_gem_object_put_pages(obj); 784 fail_unlock: 785 mutex_unlock(&dev->struct_mutex); 786 787 return ret; 788 } 789 790 /** 791 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 792 * the memory and maps it using kmap_atomic for copying. 793 * 794 * This avoids taking mmap_sem for faulting on the user's address while the 795 * struct_mutex is held. 796 */ 797 static int 798 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 799 struct drm_i915_gem_pwrite *args, 800 struct drm_file *file_priv) 801 { 802 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 803 struct mm_struct *mm = current->mm; 804 struct page **user_pages; 805 ssize_t remain; 806 loff_t offset, pinned_pages, i; 807 loff_t first_data_page, last_data_page, num_pages; 808 int shmem_page_index, shmem_page_offset; 809 int data_page_index, data_page_offset; 810 int page_length; 811 int ret; 812 uint64_t data_ptr = args->data_ptr; 813 int do_bit17_swizzling; 814 815 remain = args->size; 816 817 /* Pin the user pages containing the data. We can't fault while 818 * holding the struct mutex, and all of the pwrite implementations 819 * want to hold it while dereferencing the user data. 820 */ 821 first_data_page = data_ptr / PAGE_SIZE; 822 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 823 num_pages = last_data_page - first_data_page + 1; 824 825 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 826 if (user_pages == NULL) 827 return -ENOMEM; 828 829 down_read(&mm->mmap_sem); 830 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 831 num_pages, 0, 0, user_pages, NULL); 832 up_read(&mm->mmap_sem); 833 if (pinned_pages < num_pages) { 834 ret = -EFAULT; 835 goto fail_put_user_pages; 836 } 837 838 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 839 840 mutex_lock(&dev->struct_mutex); 841 842 ret = i915_gem_object_get_pages_or_evict(obj); 843 if (ret) 844 goto fail_unlock; 845 846 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 847 if (ret != 0) 848 goto fail_put_pages; 849 850 obj_priv = to_intel_bo(obj); 851 offset = args->offset; 852 obj_priv->dirty = 1; 853 854 while (remain > 0) { 855 /* Operation in this page 856 * 857 * shmem_page_index = page number within shmem file 858 * shmem_page_offset = offset within page in shmem file 859 * data_page_index = page number in get_user_pages return 860 * data_page_offset = offset with data_page_index page. 861 * page_length = bytes to copy for this page 862 */ 863 shmem_page_index = offset / PAGE_SIZE; 864 shmem_page_offset = offset & ~PAGE_MASK; 865 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 866 data_page_offset = data_ptr & ~PAGE_MASK; 867 868 page_length = remain; 869 if ((shmem_page_offset + page_length) > PAGE_SIZE) 870 page_length = PAGE_SIZE - shmem_page_offset; 871 if ((data_page_offset + page_length) > PAGE_SIZE) 872 page_length = PAGE_SIZE - data_page_offset; 873 874 if (do_bit17_swizzling) { 875 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 876 shmem_page_offset, 877 user_pages[data_page_index], 878 data_page_offset, 879 page_length, 880 0); 881 } else { 882 slow_shmem_copy(obj_priv->pages[shmem_page_index], 883 shmem_page_offset, 884 user_pages[data_page_index], 885 data_page_offset, 886 page_length); 887 } 888 889 remain -= page_length; 890 data_ptr += page_length; 891 offset += page_length; 892 } 893 894 fail_put_pages: 895 i915_gem_object_put_pages(obj); 896 fail_unlock: 897 mutex_unlock(&dev->struct_mutex); 898 fail_put_user_pages: 899 for (i = 0; i < pinned_pages; i++) 900 page_cache_release(user_pages[i]); 901 drm_free_large(user_pages); 902 903 return ret; 904 } 905 906 /** 907 * Writes data to the object referenced by handle. 908 * 909 * On error, the contents of the buffer that were to be modified are undefined. 910 */ 911 int 912 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 913 struct drm_file *file_priv) 914 { 915 struct drm_i915_gem_pwrite *args = data; 916 struct drm_gem_object *obj; 917 struct drm_i915_gem_object *obj_priv; 918 int ret = 0; 919 920 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 921 if (obj == NULL) 922 return -EBADF; 923 obj_priv = to_intel_bo(obj); 924 925 /* Bounds check destination. 926 * 927 * XXX: This could use review for overflow issues... 928 */ 929 if (args->offset > obj->size || args->size > obj->size || 930 args->offset + args->size > obj->size) { 931 drm_gem_object_unreference_unlocked(obj); 932 return -EINVAL; 933 } 934 935 /* We can only do the GTT pwrite on untiled buffers, as otherwise 936 * it would end up going through the fenced access, and we'll get 937 * different detiling behavior between reading and writing. 938 * pread/pwrite currently are reading and writing from the CPU 939 * perspective, requiring manual detiling by the client. 940 */ 941 if (obj_priv->phys_obj) 942 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); 943 else if (obj_priv->tiling_mode == I915_TILING_NONE && 944 dev->gtt_total != 0 && 945 obj->write_domain != I915_GEM_DOMAIN_CPU) { 946 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); 947 if (ret == -EFAULT) { 948 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, 949 file_priv); 950 } 951 } else if (i915_gem_object_needs_bit17_swizzle(obj)) { 952 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); 953 } else { 954 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); 955 if (ret == -EFAULT) { 956 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, 957 file_priv); 958 } 959 } 960 961 #if WATCH_PWRITE 962 if (ret) 963 DRM_INFO("pwrite failed %d\n", ret); 964 #endif 965 966 drm_gem_object_unreference_unlocked(obj); 967 968 return ret; 969 } 970 971 /** 972 * Called when user space prepares to use an object with the CPU, either 973 * through the mmap ioctl's mapping or a GTT mapping. 974 */ 975 int 976 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 977 struct drm_file *file_priv) 978 { 979 struct drm_i915_private *dev_priv = dev->dev_private; 980 struct drm_i915_gem_set_domain *args = data; 981 struct drm_gem_object *obj; 982 struct drm_i915_gem_object *obj_priv; 983 uint32_t read_domains = args->read_domains; 984 uint32_t write_domain = args->write_domain; 985 int ret; 986 987 if (!(dev->driver->driver_features & DRIVER_GEM)) 988 return -ENODEV; 989 990 /* Only handle setting domains to types used by the CPU. */ 991 if (write_domain & I915_GEM_GPU_DOMAINS) 992 return -EINVAL; 993 994 if (read_domains & I915_GEM_GPU_DOMAINS) 995 return -EINVAL; 996 997 /* Having something in the write domain implies it's in the read 998 * domain, and only that read domain. Enforce that in the request. 999 */ 1000 if (write_domain != 0 && read_domains != write_domain) 1001 return -EINVAL; 1002 1003 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1004 if (obj == NULL) 1005 return -EBADF; 1006 obj_priv = to_intel_bo(obj); 1007 1008 mutex_lock(&dev->struct_mutex); 1009 1010 intel_mark_busy(dev, obj); 1011 1012 #if WATCH_BUF 1013 DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n", 1014 obj, obj->size, read_domains, write_domain); 1015 #endif 1016 if (read_domains & I915_GEM_DOMAIN_GTT) { 1017 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1018 1019 /* Update the LRU on the fence for the CPU access that's 1020 * about to occur. 1021 */ 1022 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 1023 struct drm_i915_fence_reg *reg = 1024 &dev_priv->fence_regs[obj_priv->fence_reg]; 1025 list_move_tail(®->lru_list, 1026 &dev_priv->mm.fence_list); 1027 } 1028 1029 /* Silently promote "you're not bound, there was nothing to do" 1030 * to success, since the client was just asking us to 1031 * make sure everything was done. 1032 */ 1033 if (ret == -EINVAL) 1034 ret = 0; 1035 } else { 1036 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1037 } 1038 1039 drm_gem_object_unreference(obj); 1040 mutex_unlock(&dev->struct_mutex); 1041 return ret; 1042 } 1043 1044 /** 1045 * Called when user space has done writes to this buffer 1046 */ 1047 int 1048 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1049 struct drm_file *file_priv) 1050 { 1051 struct drm_i915_gem_sw_finish *args = data; 1052 struct drm_gem_object *obj; 1053 struct drm_i915_gem_object *obj_priv; 1054 int ret = 0; 1055 1056 if (!(dev->driver->driver_features & DRIVER_GEM)) 1057 return -ENODEV; 1058 1059 mutex_lock(&dev->struct_mutex); 1060 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1061 if (obj == NULL) { 1062 mutex_unlock(&dev->struct_mutex); 1063 return -EBADF; 1064 } 1065 1066 #if WATCH_BUF 1067 DRM_INFO("%s: sw_finish %d (%p %zd)\n", 1068 __func__, args->handle, obj, obj->size); 1069 #endif 1070 obj_priv = to_intel_bo(obj); 1071 1072 /* Pinned buffers may be scanout, so flush the cache */ 1073 if (obj_priv->pin_count) 1074 i915_gem_object_flush_cpu_write_domain(obj); 1075 1076 drm_gem_object_unreference(obj); 1077 mutex_unlock(&dev->struct_mutex); 1078 return ret; 1079 } 1080 1081 /** 1082 * Maps the contents of an object, returning the address it is mapped 1083 * into. 1084 * 1085 * While the mapping holds a reference on the contents of the object, it doesn't 1086 * imply a ref on the object itself. 1087 */ 1088 int 1089 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1090 struct drm_file *file_priv) 1091 { 1092 struct drm_i915_gem_mmap *args = data; 1093 struct drm_gem_object *obj; 1094 loff_t offset; 1095 unsigned long addr; 1096 1097 if (!(dev->driver->driver_features & DRIVER_GEM)) 1098 return -ENODEV; 1099 1100 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1101 if (obj == NULL) 1102 return -EBADF; 1103 1104 offset = args->offset; 1105 1106 down_write(¤t->mm->mmap_sem); 1107 addr = do_mmap(obj->filp, 0, args->size, 1108 PROT_READ | PROT_WRITE, MAP_SHARED, 1109 args->offset); 1110 up_write(¤t->mm->mmap_sem); 1111 drm_gem_object_unreference_unlocked(obj); 1112 if (IS_ERR((void *)addr)) 1113 return addr; 1114 1115 args->addr_ptr = (uint64_t) addr; 1116 1117 return 0; 1118 } 1119 1120 /** 1121 * i915_gem_fault - fault a page into the GTT 1122 * vma: VMA in question 1123 * vmf: fault info 1124 * 1125 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1126 * from userspace. The fault handler takes care of binding the object to 1127 * the GTT (if needed), allocating and programming a fence register (again, 1128 * only if needed based on whether the old reg is still valid or the object 1129 * is tiled) and inserting a new PTE into the faulting process. 1130 * 1131 * Note that the faulting process may involve evicting existing objects 1132 * from the GTT and/or fence registers to make room. So performance may 1133 * suffer if the GTT working set is large or there are few fence registers 1134 * left. 1135 */ 1136 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1137 { 1138 struct drm_gem_object *obj = vma->vm_private_data; 1139 struct drm_device *dev = obj->dev; 1140 struct drm_i915_private *dev_priv = dev->dev_private; 1141 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1142 pgoff_t page_offset; 1143 unsigned long pfn; 1144 int ret = 0; 1145 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1146 1147 /* We don't use vmf->pgoff since that has the fake offset */ 1148 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1149 PAGE_SHIFT; 1150 1151 /* Now bind it into the GTT if needed */ 1152 mutex_lock(&dev->struct_mutex); 1153 if (!obj_priv->gtt_space) { 1154 ret = i915_gem_object_bind_to_gtt(obj, 0); 1155 if (ret) 1156 goto unlock; 1157 1158 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1159 1160 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1161 if (ret) 1162 goto unlock; 1163 } 1164 1165 /* Need a new fence register? */ 1166 if (obj_priv->tiling_mode != I915_TILING_NONE) { 1167 ret = i915_gem_object_get_fence_reg(obj); 1168 if (ret) 1169 goto unlock; 1170 } 1171 1172 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + 1173 page_offset; 1174 1175 /* Finally, remap it using the new GTT offset */ 1176 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1177 unlock: 1178 mutex_unlock(&dev->struct_mutex); 1179 1180 switch (ret) { 1181 case 0: 1182 case -ERESTARTSYS: 1183 return VM_FAULT_NOPAGE; 1184 case -ENOMEM: 1185 case -EAGAIN: 1186 return VM_FAULT_OOM; 1187 default: 1188 return VM_FAULT_SIGBUS; 1189 } 1190 } 1191 1192 /** 1193 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1194 * @obj: obj in question 1195 * 1196 * GEM memory mapping works by handing back to userspace a fake mmap offset 1197 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1198 * up the object based on the offset and sets up the various memory mapping 1199 * structures. 1200 * 1201 * This routine allocates and attaches a fake offset for @obj. 1202 */ 1203 static int 1204 i915_gem_create_mmap_offset(struct drm_gem_object *obj) 1205 { 1206 struct drm_device *dev = obj->dev; 1207 struct drm_gem_mm *mm = dev->mm_private; 1208 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1209 struct drm_map_list *list; 1210 struct drm_local_map *map; 1211 int ret = 0; 1212 1213 /* Set the object up for mmap'ing */ 1214 list = &obj->map_list; 1215 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL); 1216 if (!list->map) 1217 return -ENOMEM; 1218 1219 map = list->map; 1220 map->type = _DRM_GEM; 1221 map->size = obj->size; 1222 map->handle = obj; 1223 1224 /* Get a DRM GEM mmap offset allocated... */ 1225 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1226 obj->size / PAGE_SIZE, 0, 0); 1227 if (!list->file_offset_node) { 1228 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name); 1229 ret = -ENOMEM; 1230 goto out_free_list; 1231 } 1232 1233 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1234 obj->size / PAGE_SIZE, 0); 1235 if (!list->file_offset_node) { 1236 ret = -ENOMEM; 1237 goto out_free_list; 1238 } 1239 1240 list->hash.key = list->file_offset_node->start; 1241 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) { 1242 DRM_ERROR("failed to add to map hash\n"); 1243 ret = -ENOMEM; 1244 goto out_free_mm; 1245 } 1246 1247 /* By now we should be all set, any drm_mmap request on the offset 1248 * below will get to our mmap & fault handler */ 1249 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; 1250 1251 return 0; 1252 1253 out_free_mm: 1254 drm_mm_put_block(list->file_offset_node); 1255 out_free_list: 1256 kfree(list->map); 1257 1258 return ret; 1259 } 1260 1261 /** 1262 * i915_gem_release_mmap - remove physical page mappings 1263 * @obj: obj in question 1264 * 1265 * Preserve the reservation of the mmapping with the DRM core code, but 1266 * relinquish ownership of the pages back to the system. 1267 * 1268 * It is vital that we remove the page mapping if we have mapped a tiled 1269 * object through the GTT and then lose the fence register due to 1270 * resource pressure. Similarly if the object has been moved out of the 1271 * aperture, than pages mapped into userspace must be revoked. Removing the 1272 * mapping will then trigger a page fault on the next user access, allowing 1273 * fixup by i915_gem_fault(). 1274 */ 1275 void 1276 i915_gem_release_mmap(struct drm_gem_object *obj) 1277 { 1278 struct drm_device *dev = obj->dev; 1279 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1280 1281 if (dev->dev_mapping) 1282 unmap_mapping_range(dev->dev_mapping, 1283 obj_priv->mmap_offset, obj->size, 1); 1284 } 1285 1286 static void 1287 i915_gem_free_mmap_offset(struct drm_gem_object *obj) 1288 { 1289 struct drm_device *dev = obj->dev; 1290 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1291 struct drm_gem_mm *mm = dev->mm_private; 1292 struct drm_map_list *list; 1293 1294 list = &obj->map_list; 1295 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1296 1297 if (list->file_offset_node) { 1298 drm_mm_put_block(list->file_offset_node); 1299 list->file_offset_node = NULL; 1300 } 1301 1302 if (list->map) { 1303 kfree(list->map); 1304 list->map = NULL; 1305 } 1306 1307 obj_priv->mmap_offset = 0; 1308 } 1309 1310 /** 1311 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1312 * @obj: object to check 1313 * 1314 * Return the required GTT alignment for an object, taking into account 1315 * potential fence register mapping if needed. 1316 */ 1317 static uint32_t 1318 i915_gem_get_gtt_alignment(struct drm_gem_object *obj) 1319 { 1320 struct drm_device *dev = obj->dev; 1321 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1322 int start, i; 1323 1324 /* 1325 * Minimum alignment is 4k (GTT page size), but might be greater 1326 * if a fence register is needed for the object. 1327 */ 1328 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE) 1329 return 4096; 1330 1331 /* 1332 * Previous chips need to be aligned to the size of the smallest 1333 * fence register that can contain the object. 1334 */ 1335 if (IS_I9XX(dev)) 1336 start = 1024*1024; 1337 else 1338 start = 512*1024; 1339 1340 for (i = start; i < obj->size; i <<= 1) 1341 ; 1342 1343 return i; 1344 } 1345 1346 /** 1347 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1348 * @dev: DRM device 1349 * @data: GTT mapping ioctl data 1350 * @file_priv: GEM object info 1351 * 1352 * Simply returns the fake offset to userspace so it can mmap it. 1353 * The mmap call will end up in drm_gem_mmap(), which will set things 1354 * up so we can get faults in the handler above. 1355 * 1356 * The fault handler will take care of binding the object into the GTT 1357 * (since it may have been evicted to make room for something), allocating 1358 * a fence register, and mapping the appropriate aperture address into 1359 * userspace. 1360 */ 1361 int 1362 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1363 struct drm_file *file_priv) 1364 { 1365 struct drm_i915_gem_mmap_gtt *args = data; 1366 struct drm_i915_private *dev_priv = dev->dev_private; 1367 struct drm_gem_object *obj; 1368 struct drm_i915_gem_object *obj_priv; 1369 int ret; 1370 1371 if (!(dev->driver->driver_features & DRIVER_GEM)) 1372 return -ENODEV; 1373 1374 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1375 if (obj == NULL) 1376 return -EBADF; 1377 1378 mutex_lock(&dev->struct_mutex); 1379 1380 obj_priv = to_intel_bo(obj); 1381 1382 if (obj_priv->madv != I915_MADV_WILLNEED) { 1383 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1384 drm_gem_object_unreference(obj); 1385 mutex_unlock(&dev->struct_mutex); 1386 return -EINVAL; 1387 } 1388 1389 1390 if (!obj_priv->mmap_offset) { 1391 ret = i915_gem_create_mmap_offset(obj); 1392 if (ret) { 1393 drm_gem_object_unreference(obj); 1394 mutex_unlock(&dev->struct_mutex); 1395 return ret; 1396 } 1397 } 1398 1399 args->offset = obj_priv->mmap_offset; 1400 1401 /* 1402 * Pull it into the GTT so that we have a page list (makes the 1403 * initial fault faster and any subsequent flushing possible). 1404 */ 1405 if (!obj_priv->agp_mem) { 1406 ret = i915_gem_object_bind_to_gtt(obj, 0); 1407 if (ret) { 1408 drm_gem_object_unreference(obj); 1409 mutex_unlock(&dev->struct_mutex); 1410 return ret; 1411 } 1412 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1413 } 1414 1415 drm_gem_object_unreference(obj); 1416 mutex_unlock(&dev->struct_mutex); 1417 1418 return 0; 1419 } 1420 1421 void 1422 i915_gem_object_put_pages(struct drm_gem_object *obj) 1423 { 1424 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1425 int page_count = obj->size / PAGE_SIZE; 1426 int i; 1427 1428 BUG_ON(obj_priv->pages_refcount == 0); 1429 BUG_ON(obj_priv->madv == __I915_MADV_PURGED); 1430 1431 if (--obj_priv->pages_refcount != 0) 1432 return; 1433 1434 if (obj_priv->tiling_mode != I915_TILING_NONE) 1435 i915_gem_object_save_bit_17_swizzle(obj); 1436 1437 if (obj_priv->madv == I915_MADV_DONTNEED) 1438 obj_priv->dirty = 0; 1439 1440 for (i = 0; i < page_count; i++) { 1441 if (obj_priv->dirty) 1442 set_page_dirty(obj_priv->pages[i]); 1443 1444 if (obj_priv->madv == I915_MADV_WILLNEED) 1445 mark_page_accessed(obj_priv->pages[i]); 1446 1447 page_cache_release(obj_priv->pages[i]); 1448 } 1449 obj_priv->dirty = 0; 1450 1451 drm_free_large(obj_priv->pages); 1452 obj_priv->pages = NULL; 1453 } 1454 1455 static void 1456 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno, 1457 struct intel_ring_buffer *ring) 1458 { 1459 struct drm_device *dev = obj->dev; 1460 drm_i915_private_t *dev_priv = dev->dev_private; 1461 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1462 BUG_ON(ring == NULL); 1463 obj_priv->ring = ring; 1464 1465 /* Add a reference if we're newly entering the active list. */ 1466 if (!obj_priv->active) { 1467 drm_gem_object_reference(obj); 1468 obj_priv->active = 1; 1469 } 1470 /* Move from whatever list we were on to the tail of execution. */ 1471 spin_lock(&dev_priv->mm.active_list_lock); 1472 list_move_tail(&obj_priv->list, &ring->active_list); 1473 spin_unlock(&dev_priv->mm.active_list_lock); 1474 obj_priv->last_rendering_seqno = seqno; 1475 } 1476 1477 static void 1478 i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 1479 { 1480 struct drm_device *dev = obj->dev; 1481 drm_i915_private_t *dev_priv = dev->dev_private; 1482 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1483 1484 BUG_ON(!obj_priv->active); 1485 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list); 1486 obj_priv->last_rendering_seqno = 0; 1487 } 1488 1489 /* Immediately discard the backing storage */ 1490 static void 1491 i915_gem_object_truncate(struct drm_gem_object *obj) 1492 { 1493 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1494 struct inode *inode; 1495 1496 inode = obj->filp->f_path.dentry->d_inode; 1497 if (inode->i_op->truncate) 1498 inode->i_op->truncate (inode); 1499 1500 obj_priv->madv = __I915_MADV_PURGED; 1501 } 1502 1503 static inline int 1504 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) 1505 { 1506 return obj_priv->madv == I915_MADV_DONTNEED; 1507 } 1508 1509 static void 1510 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 1511 { 1512 struct drm_device *dev = obj->dev; 1513 drm_i915_private_t *dev_priv = dev->dev_private; 1514 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1515 1516 i915_verify_inactive(dev, __FILE__, __LINE__); 1517 if (obj_priv->pin_count != 0) 1518 list_del_init(&obj_priv->list); 1519 else 1520 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1521 1522 BUG_ON(!list_empty(&obj_priv->gpu_write_list)); 1523 1524 obj_priv->last_rendering_seqno = 0; 1525 obj_priv->ring = NULL; 1526 if (obj_priv->active) { 1527 obj_priv->active = 0; 1528 drm_gem_object_unreference(obj); 1529 } 1530 i915_verify_inactive(dev, __FILE__, __LINE__); 1531 } 1532 1533 static void 1534 i915_gem_process_flushing_list(struct drm_device *dev, 1535 uint32_t flush_domains, uint32_t seqno, 1536 struct intel_ring_buffer *ring) 1537 { 1538 drm_i915_private_t *dev_priv = dev->dev_private; 1539 struct drm_i915_gem_object *obj_priv, *next; 1540 1541 list_for_each_entry_safe(obj_priv, next, 1542 &dev_priv->mm.gpu_write_list, 1543 gpu_write_list) { 1544 struct drm_gem_object *obj = &obj_priv->base; 1545 1546 if ((obj->write_domain & flush_domains) == 1547 obj->write_domain && 1548 obj_priv->ring->ring_flag == ring->ring_flag) { 1549 uint32_t old_write_domain = obj->write_domain; 1550 1551 obj->write_domain = 0; 1552 list_del_init(&obj_priv->gpu_write_list); 1553 i915_gem_object_move_to_active(obj, seqno, ring); 1554 1555 /* update the fence lru list */ 1556 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 1557 struct drm_i915_fence_reg *reg = 1558 &dev_priv->fence_regs[obj_priv->fence_reg]; 1559 list_move_tail(®->lru_list, 1560 &dev_priv->mm.fence_list); 1561 } 1562 1563 trace_i915_gem_object_change_domain(obj, 1564 obj->read_domains, 1565 old_write_domain); 1566 } 1567 } 1568 } 1569 1570 uint32_t 1571 i915_add_request(struct drm_device *dev, struct drm_file *file_priv, 1572 uint32_t flush_domains, struct intel_ring_buffer *ring) 1573 { 1574 drm_i915_private_t *dev_priv = dev->dev_private; 1575 struct drm_i915_file_private *i915_file_priv = NULL; 1576 struct drm_i915_gem_request *request; 1577 uint32_t seqno; 1578 int was_empty; 1579 1580 if (file_priv != NULL) 1581 i915_file_priv = file_priv->driver_priv; 1582 1583 request = kzalloc(sizeof(*request), GFP_KERNEL); 1584 if (request == NULL) 1585 return 0; 1586 1587 seqno = ring->add_request(dev, ring, file_priv, flush_domains); 1588 1589 request->seqno = seqno; 1590 request->ring = ring; 1591 request->emitted_jiffies = jiffies; 1592 was_empty = list_empty(&ring->request_list); 1593 list_add_tail(&request->list, &ring->request_list); 1594 1595 if (i915_file_priv) { 1596 list_add_tail(&request->client_list, 1597 &i915_file_priv->mm.request_list); 1598 } else { 1599 INIT_LIST_HEAD(&request->client_list); 1600 } 1601 1602 /* Associate any objects on the flushing list matching the write 1603 * domain we're flushing with our flush. 1604 */ 1605 if (flush_domains != 0) 1606 i915_gem_process_flushing_list(dev, flush_domains, seqno, ring); 1607 1608 if (!dev_priv->mm.suspended) { 1609 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); 1610 if (was_empty) 1611 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1612 } 1613 return seqno; 1614 } 1615 1616 /** 1617 * Command execution barrier 1618 * 1619 * Ensures that all commands in the ring are finished 1620 * before signalling the CPU 1621 */ 1622 static uint32_t 1623 i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring) 1624 { 1625 uint32_t flush_domains = 0; 1626 1627 /* The sampler always gets flushed on i965 (sigh) */ 1628 if (IS_I965G(dev)) 1629 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 1630 1631 ring->flush(dev, ring, 1632 I915_GEM_DOMAIN_COMMAND, flush_domains); 1633 return flush_domains; 1634 } 1635 1636 /** 1637 * Moves buffers associated only with the given active seqno from the active 1638 * to inactive list, potentially freeing them. 1639 */ 1640 static void 1641 i915_gem_retire_request(struct drm_device *dev, 1642 struct drm_i915_gem_request *request) 1643 { 1644 drm_i915_private_t *dev_priv = dev->dev_private; 1645 1646 trace_i915_gem_request_retire(dev, request->seqno); 1647 1648 /* Move any buffers on the active list that are no longer referenced 1649 * by the ringbuffer to the flushing/inactive lists as appropriate. 1650 */ 1651 spin_lock(&dev_priv->mm.active_list_lock); 1652 while (!list_empty(&request->ring->active_list)) { 1653 struct drm_gem_object *obj; 1654 struct drm_i915_gem_object *obj_priv; 1655 1656 obj_priv = list_first_entry(&request->ring->active_list, 1657 struct drm_i915_gem_object, 1658 list); 1659 obj = &obj_priv->base; 1660 1661 /* If the seqno being retired doesn't match the oldest in the 1662 * list, then the oldest in the list must still be newer than 1663 * this seqno. 1664 */ 1665 if (obj_priv->last_rendering_seqno != request->seqno) 1666 goto out; 1667 1668 #if WATCH_LRU 1669 DRM_INFO("%s: retire %d moves to inactive list %p\n", 1670 __func__, request->seqno, obj); 1671 #endif 1672 1673 if (obj->write_domain != 0) 1674 i915_gem_object_move_to_flushing(obj); 1675 else { 1676 /* Take a reference on the object so it won't be 1677 * freed while the spinlock is held. The list 1678 * protection for this spinlock is safe when breaking 1679 * the lock like this since the next thing we do 1680 * is just get the head of the list again. 1681 */ 1682 drm_gem_object_reference(obj); 1683 i915_gem_object_move_to_inactive(obj); 1684 spin_unlock(&dev_priv->mm.active_list_lock); 1685 drm_gem_object_unreference(obj); 1686 spin_lock(&dev_priv->mm.active_list_lock); 1687 } 1688 } 1689 out: 1690 spin_unlock(&dev_priv->mm.active_list_lock); 1691 } 1692 1693 /** 1694 * Returns true if seq1 is later than seq2. 1695 */ 1696 bool 1697 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 1698 { 1699 return (int32_t)(seq1 - seq2) >= 0; 1700 } 1701 1702 uint32_t 1703 i915_get_gem_seqno(struct drm_device *dev, 1704 struct intel_ring_buffer *ring) 1705 { 1706 return ring->get_gem_seqno(dev, ring); 1707 } 1708 1709 /** 1710 * This function clears the request list as sequence numbers are passed. 1711 */ 1712 void 1713 i915_gem_retire_requests(struct drm_device *dev, 1714 struct intel_ring_buffer *ring) 1715 { 1716 drm_i915_private_t *dev_priv = dev->dev_private; 1717 uint32_t seqno; 1718 1719 if (!ring->status_page.page_addr 1720 || list_empty(&ring->request_list)) 1721 return; 1722 1723 seqno = i915_get_gem_seqno(dev, ring); 1724 1725 while (!list_empty(&ring->request_list)) { 1726 struct drm_i915_gem_request *request; 1727 uint32_t retiring_seqno; 1728 1729 request = list_first_entry(&ring->request_list, 1730 struct drm_i915_gem_request, 1731 list); 1732 retiring_seqno = request->seqno; 1733 1734 if (i915_seqno_passed(seqno, retiring_seqno) || 1735 atomic_read(&dev_priv->mm.wedged)) { 1736 i915_gem_retire_request(dev, request); 1737 1738 list_del(&request->list); 1739 list_del(&request->client_list); 1740 kfree(request); 1741 } else 1742 break; 1743 } 1744 1745 if (unlikely (dev_priv->trace_irq_seqno && 1746 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) { 1747 1748 ring->user_irq_put(dev, ring); 1749 dev_priv->trace_irq_seqno = 0; 1750 } 1751 } 1752 1753 void 1754 i915_gem_retire_work_handler(struct work_struct *work) 1755 { 1756 drm_i915_private_t *dev_priv; 1757 struct drm_device *dev; 1758 1759 dev_priv = container_of(work, drm_i915_private_t, 1760 mm.retire_work.work); 1761 dev = dev_priv->dev; 1762 1763 mutex_lock(&dev->struct_mutex); 1764 i915_gem_retire_requests(dev, &dev_priv->render_ring); 1765 1766 if (HAS_BSD(dev)) 1767 i915_gem_retire_requests(dev, &dev_priv->bsd_ring); 1768 1769 if (!dev_priv->mm.suspended && 1770 (!list_empty(&dev_priv->render_ring.request_list) || 1771 (HAS_BSD(dev) && 1772 !list_empty(&dev_priv->bsd_ring.request_list)))) 1773 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1774 mutex_unlock(&dev->struct_mutex); 1775 } 1776 1777 int 1778 i915_do_wait_request(struct drm_device *dev, uint32_t seqno, 1779 int interruptible, struct intel_ring_buffer *ring) 1780 { 1781 drm_i915_private_t *dev_priv = dev->dev_private; 1782 u32 ier; 1783 int ret = 0; 1784 1785 BUG_ON(seqno == 0); 1786 1787 if (atomic_read(&dev_priv->mm.wedged)) 1788 return -EIO; 1789 1790 if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) { 1791 if (HAS_PCH_SPLIT(dev)) 1792 ier = I915_READ(DEIER) | I915_READ(GTIER); 1793 else 1794 ier = I915_READ(IER); 1795 if (!ier) { 1796 DRM_ERROR("something (likely vbetool) disabled " 1797 "interrupts, re-enabling\n"); 1798 i915_driver_irq_preinstall(dev); 1799 i915_driver_irq_postinstall(dev); 1800 } 1801 1802 trace_i915_gem_request_wait_begin(dev, seqno); 1803 1804 ring->waiting_gem_seqno = seqno; 1805 ring->user_irq_get(dev, ring); 1806 if (interruptible) 1807 ret = wait_event_interruptible(ring->irq_queue, 1808 i915_seqno_passed( 1809 ring->get_gem_seqno(dev, ring), seqno) 1810 || atomic_read(&dev_priv->mm.wedged)); 1811 else 1812 wait_event(ring->irq_queue, 1813 i915_seqno_passed( 1814 ring->get_gem_seqno(dev, ring), seqno) 1815 || atomic_read(&dev_priv->mm.wedged)); 1816 1817 ring->user_irq_put(dev, ring); 1818 ring->waiting_gem_seqno = 0; 1819 1820 trace_i915_gem_request_wait_end(dev, seqno); 1821 } 1822 if (atomic_read(&dev_priv->mm.wedged)) 1823 ret = -EIO; 1824 1825 if (ret && ret != -ERESTARTSYS) 1826 DRM_ERROR("%s returns %d (awaiting %d at %d)\n", 1827 __func__, ret, seqno, ring->get_gem_seqno(dev, ring)); 1828 1829 /* Directly dispatch request retiring. While we have the work queue 1830 * to handle this, the waiter on a request often wants an associated 1831 * buffer to have made it to the inactive list, and we would need 1832 * a separate wait queue to handle that. 1833 */ 1834 if (ret == 0) 1835 i915_gem_retire_requests(dev, ring); 1836 1837 return ret; 1838 } 1839 1840 /** 1841 * Waits for a sequence number to be signaled, and cleans up the 1842 * request and object lists appropriately for that event. 1843 */ 1844 static int 1845 i915_wait_request(struct drm_device *dev, uint32_t seqno, 1846 struct intel_ring_buffer *ring) 1847 { 1848 return i915_do_wait_request(dev, seqno, 1, ring); 1849 } 1850 1851 static void 1852 i915_gem_flush(struct drm_device *dev, 1853 uint32_t invalidate_domains, 1854 uint32_t flush_domains) 1855 { 1856 drm_i915_private_t *dev_priv = dev->dev_private; 1857 if (flush_domains & I915_GEM_DOMAIN_CPU) 1858 drm_agp_chipset_flush(dev); 1859 dev_priv->render_ring.flush(dev, &dev_priv->render_ring, 1860 invalidate_domains, 1861 flush_domains); 1862 1863 if (HAS_BSD(dev)) 1864 dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring, 1865 invalidate_domains, 1866 flush_domains); 1867 } 1868 1869 static void 1870 i915_gem_flush_ring(struct drm_device *dev, 1871 uint32_t invalidate_domains, 1872 uint32_t flush_domains, 1873 struct intel_ring_buffer *ring) 1874 { 1875 if (flush_domains & I915_GEM_DOMAIN_CPU) 1876 drm_agp_chipset_flush(dev); 1877 ring->flush(dev, ring, 1878 invalidate_domains, 1879 flush_domains); 1880 } 1881 1882 /** 1883 * Ensures that all rendering to the object has completed and the object is 1884 * safe to unbind from the GTT or access from the CPU. 1885 */ 1886 static int 1887 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1888 { 1889 struct drm_device *dev = obj->dev; 1890 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1891 int ret; 1892 1893 /* This function only exists to support waiting for existing rendering, 1894 * not for emitting required flushes. 1895 */ 1896 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0); 1897 1898 /* If there is rendering queued on the buffer being evicted, wait for 1899 * it. 1900 */ 1901 if (obj_priv->active) { 1902 #if WATCH_BUF 1903 DRM_INFO("%s: object %p wait for seqno %08x\n", 1904 __func__, obj, obj_priv->last_rendering_seqno); 1905 #endif 1906 ret = i915_wait_request(dev, 1907 obj_priv->last_rendering_seqno, obj_priv->ring); 1908 if (ret != 0) 1909 return ret; 1910 } 1911 1912 return 0; 1913 } 1914 1915 /** 1916 * Unbinds an object from the GTT aperture. 1917 */ 1918 int 1919 i915_gem_object_unbind(struct drm_gem_object *obj) 1920 { 1921 struct drm_device *dev = obj->dev; 1922 drm_i915_private_t *dev_priv = dev->dev_private; 1923 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 1924 int ret = 0; 1925 1926 #if WATCH_BUF 1927 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); 1928 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); 1929 #endif 1930 if (obj_priv->gtt_space == NULL) 1931 return 0; 1932 1933 if (obj_priv->pin_count != 0) { 1934 DRM_ERROR("Attempting to unbind pinned buffer\n"); 1935 return -EINVAL; 1936 } 1937 1938 /* blow away mappings if mapped through GTT */ 1939 i915_gem_release_mmap(obj); 1940 1941 /* Move the object to the CPU domain to ensure that 1942 * any possible CPU writes while it's not in the GTT 1943 * are flushed when we go to remap it. This will 1944 * also ensure that all pending GPU writes are finished 1945 * before we unbind. 1946 */ 1947 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1948 if (ret) { 1949 if (ret != -ERESTARTSYS) 1950 DRM_ERROR("set_domain failed: %d\n", ret); 1951 return ret; 1952 } 1953 1954 BUG_ON(obj_priv->active); 1955 1956 /* release the fence reg _after_ flushing */ 1957 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) 1958 i915_gem_clear_fence_reg(obj); 1959 1960 if (obj_priv->agp_mem != NULL) { 1961 drm_unbind_agp(obj_priv->agp_mem); 1962 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); 1963 obj_priv->agp_mem = NULL; 1964 } 1965 1966 i915_gem_object_put_pages(obj); 1967 BUG_ON(obj_priv->pages_refcount); 1968 1969 if (obj_priv->gtt_space) { 1970 atomic_dec(&dev->gtt_count); 1971 atomic_sub(obj->size, &dev->gtt_memory); 1972 1973 drm_mm_put_block(obj_priv->gtt_space); 1974 obj_priv->gtt_space = NULL; 1975 } 1976 1977 /* Remove ourselves from the LRU list if present. */ 1978 spin_lock(&dev_priv->mm.active_list_lock); 1979 if (!list_empty(&obj_priv->list)) 1980 list_del_init(&obj_priv->list); 1981 spin_unlock(&dev_priv->mm.active_list_lock); 1982 1983 if (i915_gem_object_is_purgeable(obj_priv)) 1984 i915_gem_object_truncate(obj); 1985 1986 trace_i915_gem_object_unbind(obj); 1987 1988 return 0; 1989 } 1990 1991 static struct drm_gem_object * 1992 i915_gem_find_inactive_object(struct drm_device *dev, int min_size) 1993 { 1994 drm_i915_private_t *dev_priv = dev->dev_private; 1995 struct drm_i915_gem_object *obj_priv; 1996 struct drm_gem_object *best = NULL; 1997 struct drm_gem_object *first = NULL; 1998 1999 /* Try to find the smallest clean object */ 2000 list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { 2001 struct drm_gem_object *obj = &obj_priv->base; 2002 if (obj->size >= min_size) { 2003 if ((!obj_priv->dirty || 2004 i915_gem_object_is_purgeable(obj_priv)) && 2005 (!best || obj->size < best->size)) { 2006 best = obj; 2007 if (best->size == min_size) 2008 return best; 2009 } 2010 if (!first) 2011 first = obj; 2012 } 2013 } 2014 2015 return best ? best : first; 2016 } 2017 2018 static int 2019 i915_gpu_idle(struct drm_device *dev) 2020 { 2021 drm_i915_private_t *dev_priv = dev->dev_private; 2022 bool lists_empty; 2023 uint32_t seqno1, seqno2; 2024 int ret; 2025 2026 spin_lock(&dev_priv->mm.active_list_lock); 2027 lists_empty = (list_empty(&dev_priv->mm.flushing_list) && 2028 list_empty(&dev_priv->render_ring.active_list) && 2029 (!HAS_BSD(dev) || 2030 list_empty(&dev_priv->bsd_ring.active_list))); 2031 spin_unlock(&dev_priv->mm.active_list_lock); 2032 2033 if (lists_empty) 2034 return 0; 2035 2036 /* Flush everything onto the inactive list. */ 2037 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2038 seqno1 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS, 2039 &dev_priv->render_ring); 2040 if (seqno1 == 0) 2041 return -ENOMEM; 2042 ret = i915_wait_request(dev, seqno1, &dev_priv->render_ring); 2043 2044 if (HAS_BSD(dev)) { 2045 seqno2 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS, 2046 &dev_priv->bsd_ring); 2047 if (seqno2 == 0) 2048 return -ENOMEM; 2049 2050 ret = i915_wait_request(dev, seqno2, &dev_priv->bsd_ring); 2051 if (ret) 2052 return ret; 2053 } 2054 2055 2056 return ret; 2057 } 2058 2059 static int 2060 i915_gem_evict_everything(struct drm_device *dev) 2061 { 2062 drm_i915_private_t *dev_priv = dev->dev_private; 2063 int ret; 2064 bool lists_empty; 2065 2066 spin_lock(&dev_priv->mm.active_list_lock); 2067 lists_empty = (list_empty(&dev_priv->mm.inactive_list) && 2068 list_empty(&dev_priv->mm.flushing_list) && 2069 list_empty(&dev_priv->render_ring.active_list) && 2070 (!HAS_BSD(dev) 2071 || list_empty(&dev_priv->bsd_ring.active_list))); 2072 spin_unlock(&dev_priv->mm.active_list_lock); 2073 2074 if (lists_empty) 2075 return -ENOSPC; 2076 2077 /* Flush everything (on to the inactive lists) and evict */ 2078 ret = i915_gpu_idle(dev); 2079 if (ret) 2080 return ret; 2081 2082 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 2083 2084 ret = i915_gem_evict_from_inactive_list(dev); 2085 if (ret) 2086 return ret; 2087 2088 spin_lock(&dev_priv->mm.active_list_lock); 2089 lists_empty = (list_empty(&dev_priv->mm.inactive_list) && 2090 list_empty(&dev_priv->mm.flushing_list) && 2091 list_empty(&dev_priv->render_ring.active_list) && 2092 (!HAS_BSD(dev) 2093 || list_empty(&dev_priv->bsd_ring.active_list))); 2094 spin_unlock(&dev_priv->mm.active_list_lock); 2095 BUG_ON(!lists_empty); 2096 2097 return 0; 2098 } 2099 2100 static int 2101 i915_gem_evict_something(struct drm_device *dev, int min_size) 2102 { 2103 drm_i915_private_t *dev_priv = dev->dev_private; 2104 struct drm_gem_object *obj; 2105 int ret; 2106 2107 struct intel_ring_buffer *render_ring = &dev_priv->render_ring; 2108 struct intel_ring_buffer *bsd_ring = &dev_priv->bsd_ring; 2109 for (;;) { 2110 i915_gem_retire_requests(dev, render_ring); 2111 2112 if (HAS_BSD(dev)) 2113 i915_gem_retire_requests(dev, bsd_ring); 2114 2115 /* If there's an inactive buffer available now, grab it 2116 * and be done. 2117 */ 2118 obj = i915_gem_find_inactive_object(dev, min_size); 2119 if (obj) { 2120 struct drm_i915_gem_object *obj_priv; 2121 2122 #if WATCH_LRU 2123 DRM_INFO("%s: evicting %p\n", __func__, obj); 2124 #endif 2125 obj_priv = to_intel_bo(obj); 2126 BUG_ON(obj_priv->pin_count != 0); 2127 BUG_ON(obj_priv->active); 2128 2129 /* Wait on the rendering and unbind the buffer. */ 2130 return i915_gem_object_unbind(obj); 2131 } 2132 2133 /* If we didn't get anything, but the ring is still processing 2134 * things, wait for the next to finish and hopefully leave us 2135 * a buffer to evict. 2136 */ 2137 if (!list_empty(&render_ring->request_list)) { 2138 struct drm_i915_gem_request *request; 2139 2140 request = list_first_entry(&render_ring->request_list, 2141 struct drm_i915_gem_request, 2142 list); 2143 2144 ret = i915_wait_request(dev, 2145 request->seqno, request->ring); 2146 if (ret) 2147 return ret; 2148 2149 continue; 2150 } 2151 2152 if (HAS_BSD(dev) && !list_empty(&bsd_ring->request_list)) { 2153 struct drm_i915_gem_request *request; 2154 2155 request = list_first_entry(&bsd_ring->request_list, 2156 struct drm_i915_gem_request, 2157 list); 2158 2159 ret = i915_wait_request(dev, 2160 request->seqno, request->ring); 2161 if (ret) 2162 return ret; 2163 2164 continue; 2165 } 2166 2167 /* If we didn't have anything on the request list but there 2168 * are buffers awaiting a flush, emit one and try again. 2169 * When we wait on it, those buffers waiting for that flush 2170 * will get moved to inactive. 2171 */ 2172 if (!list_empty(&dev_priv->mm.flushing_list)) { 2173 struct drm_i915_gem_object *obj_priv; 2174 2175 /* Find an object that we can immediately reuse */ 2176 list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) { 2177 obj = &obj_priv->base; 2178 if (obj->size >= min_size) 2179 break; 2180 2181 obj = NULL; 2182 } 2183 2184 if (obj != NULL) { 2185 uint32_t seqno; 2186 2187 i915_gem_flush_ring(dev, 2188 obj->write_domain, 2189 obj->write_domain, 2190 obj_priv->ring); 2191 seqno = i915_add_request(dev, NULL, 2192 obj->write_domain, 2193 obj_priv->ring); 2194 if (seqno == 0) 2195 return -ENOMEM; 2196 continue; 2197 } 2198 } 2199 2200 /* If we didn't do any of the above, there's no single buffer 2201 * large enough to swap out for the new one, so just evict 2202 * everything and start again. (This should be rare.) 2203 */ 2204 if (!list_empty (&dev_priv->mm.inactive_list)) 2205 return i915_gem_evict_from_inactive_list(dev); 2206 else 2207 return i915_gem_evict_everything(dev); 2208 } 2209 } 2210 2211 int 2212 i915_gem_object_get_pages(struct drm_gem_object *obj, 2213 gfp_t gfpmask) 2214 { 2215 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2216 int page_count, i; 2217 struct address_space *mapping; 2218 struct inode *inode; 2219 struct page *page; 2220 2221 BUG_ON(obj_priv->pages_refcount 2222 == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT); 2223 2224 if (obj_priv->pages_refcount++ != 0) 2225 return 0; 2226 2227 /* Get the list of pages out of our struct file. They'll be pinned 2228 * at this point until we release them. 2229 */ 2230 page_count = obj->size / PAGE_SIZE; 2231 BUG_ON(obj_priv->pages != NULL); 2232 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); 2233 if (obj_priv->pages == NULL) { 2234 obj_priv->pages_refcount--; 2235 return -ENOMEM; 2236 } 2237 2238 inode = obj->filp->f_path.dentry->d_inode; 2239 mapping = inode->i_mapping; 2240 for (i = 0; i < page_count; i++) { 2241 page = read_cache_page_gfp(mapping, i, 2242 mapping_gfp_mask (mapping) | 2243 __GFP_COLD | 2244 gfpmask); 2245 if (IS_ERR(page)) 2246 goto err_pages; 2247 2248 obj_priv->pages[i] = page; 2249 } 2250 2251 if (obj_priv->tiling_mode != I915_TILING_NONE) 2252 i915_gem_object_do_bit_17_swizzle(obj); 2253 2254 return 0; 2255 2256 err_pages: 2257 while (i--) 2258 page_cache_release(obj_priv->pages[i]); 2259 2260 drm_free_large(obj_priv->pages); 2261 obj_priv->pages = NULL; 2262 obj_priv->pages_refcount--; 2263 return PTR_ERR(page); 2264 } 2265 2266 static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg) 2267 { 2268 struct drm_gem_object *obj = reg->obj; 2269 struct drm_device *dev = obj->dev; 2270 drm_i915_private_t *dev_priv = dev->dev_private; 2271 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2272 int regnum = obj_priv->fence_reg; 2273 uint64_t val; 2274 2275 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 2276 0xfffff000) << 32; 2277 val |= obj_priv->gtt_offset & 0xfffff000; 2278 val |= (uint64_t)((obj_priv->stride / 128) - 1) << 2279 SANDYBRIDGE_FENCE_PITCH_SHIFT; 2280 2281 if (obj_priv->tiling_mode == I915_TILING_Y) 2282 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2283 val |= I965_FENCE_REG_VALID; 2284 2285 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val); 2286 } 2287 2288 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) 2289 { 2290 struct drm_gem_object *obj = reg->obj; 2291 struct drm_device *dev = obj->dev; 2292 drm_i915_private_t *dev_priv = dev->dev_private; 2293 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2294 int regnum = obj_priv->fence_reg; 2295 uint64_t val; 2296 2297 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 2298 0xfffff000) << 32; 2299 val |= obj_priv->gtt_offset & 0xfffff000; 2300 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2301 if (obj_priv->tiling_mode == I915_TILING_Y) 2302 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2303 val |= I965_FENCE_REG_VALID; 2304 2305 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); 2306 } 2307 2308 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) 2309 { 2310 struct drm_gem_object *obj = reg->obj; 2311 struct drm_device *dev = obj->dev; 2312 drm_i915_private_t *dev_priv = dev->dev_private; 2313 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2314 int regnum = obj_priv->fence_reg; 2315 int tile_width; 2316 uint32_t fence_reg, val; 2317 uint32_t pitch_val; 2318 2319 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 2320 (obj_priv->gtt_offset & (obj->size - 1))) { 2321 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n", 2322 __func__, obj_priv->gtt_offset, obj->size); 2323 return; 2324 } 2325 2326 if (obj_priv->tiling_mode == I915_TILING_Y && 2327 HAS_128_BYTE_Y_TILING(dev)) 2328 tile_width = 128; 2329 else 2330 tile_width = 512; 2331 2332 /* Note: pitch better be a power of two tile widths */ 2333 pitch_val = obj_priv->stride / tile_width; 2334 pitch_val = ffs(pitch_val) - 1; 2335 2336 if (obj_priv->tiling_mode == I915_TILING_Y && 2337 HAS_128_BYTE_Y_TILING(dev)) 2338 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); 2339 else 2340 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL); 2341 2342 val = obj_priv->gtt_offset; 2343 if (obj_priv->tiling_mode == I915_TILING_Y) 2344 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2345 val |= I915_FENCE_SIZE_BITS(obj->size); 2346 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2347 val |= I830_FENCE_REG_VALID; 2348 2349 if (regnum < 8) 2350 fence_reg = FENCE_REG_830_0 + (regnum * 4); 2351 else 2352 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); 2353 I915_WRITE(fence_reg, val); 2354 } 2355 2356 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) 2357 { 2358 struct drm_gem_object *obj = reg->obj; 2359 struct drm_device *dev = obj->dev; 2360 drm_i915_private_t *dev_priv = dev->dev_private; 2361 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2362 int regnum = obj_priv->fence_reg; 2363 uint32_t val; 2364 uint32_t pitch_val; 2365 uint32_t fence_size_bits; 2366 2367 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) || 2368 (obj_priv->gtt_offset & (obj->size - 1))) { 2369 WARN(1, "%s: object 0x%08x not 512K or size aligned\n", 2370 __func__, obj_priv->gtt_offset); 2371 return; 2372 } 2373 2374 pitch_val = obj_priv->stride / 128; 2375 pitch_val = ffs(pitch_val) - 1; 2376 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); 2377 2378 val = obj_priv->gtt_offset; 2379 if (obj_priv->tiling_mode == I915_TILING_Y) 2380 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2381 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size); 2382 WARN_ON(fence_size_bits & ~0x00000f00); 2383 val |= fence_size_bits; 2384 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2385 val |= I830_FENCE_REG_VALID; 2386 2387 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); 2388 } 2389 2390 static int i915_find_fence_reg(struct drm_device *dev) 2391 { 2392 struct drm_i915_fence_reg *reg = NULL; 2393 struct drm_i915_gem_object *obj_priv = NULL; 2394 struct drm_i915_private *dev_priv = dev->dev_private; 2395 struct drm_gem_object *obj = NULL; 2396 int i, avail, ret; 2397 2398 /* First try to find a free reg */ 2399 avail = 0; 2400 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2401 reg = &dev_priv->fence_regs[i]; 2402 if (!reg->obj) 2403 return i; 2404 2405 obj_priv = to_intel_bo(reg->obj); 2406 if (!obj_priv->pin_count) 2407 avail++; 2408 } 2409 2410 if (avail == 0) 2411 return -ENOSPC; 2412 2413 /* None available, try to steal one or wait for a user to finish */ 2414 i = I915_FENCE_REG_NONE; 2415 list_for_each_entry(reg, &dev_priv->mm.fence_list, 2416 lru_list) { 2417 obj = reg->obj; 2418 obj_priv = to_intel_bo(obj); 2419 2420 if (obj_priv->pin_count) 2421 continue; 2422 2423 /* found one! */ 2424 i = obj_priv->fence_reg; 2425 break; 2426 } 2427 2428 BUG_ON(i == I915_FENCE_REG_NONE); 2429 2430 /* We only have a reference on obj from the active list. put_fence_reg 2431 * might drop that one, causing a use-after-free in it. So hold a 2432 * private reference to obj like the other callers of put_fence_reg 2433 * (set_tiling ioctl) do. */ 2434 drm_gem_object_reference(obj); 2435 ret = i915_gem_object_put_fence_reg(obj); 2436 drm_gem_object_unreference(obj); 2437 if (ret != 0) 2438 return ret; 2439 2440 return i; 2441 } 2442 2443 /** 2444 * i915_gem_object_get_fence_reg - set up a fence reg for an object 2445 * @obj: object to map through a fence reg 2446 * 2447 * When mapping objects through the GTT, userspace wants to be able to write 2448 * to them without having to worry about swizzling if the object is tiled. 2449 * 2450 * This function walks the fence regs looking for a free one for @obj, 2451 * stealing one if it can't find any. 2452 * 2453 * It then sets up the reg based on the object's properties: address, pitch 2454 * and tiling format. 2455 */ 2456 int 2457 i915_gem_object_get_fence_reg(struct drm_gem_object *obj) 2458 { 2459 struct drm_device *dev = obj->dev; 2460 struct drm_i915_private *dev_priv = dev->dev_private; 2461 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2462 struct drm_i915_fence_reg *reg = NULL; 2463 int ret; 2464 2465 /* Just update our place in the LRU if our fence is getting used. */ 2466 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 2467 reg = &dev_priv->fence_regs[obj_priv->fence_reg]; 2468 list_move_tail(®->lru_list, &dev_priv->mm.fence_list); 2469 return 0; 2470 } 2471 2472 switch (obj_priv->tiling_mode) { 2473 case I915_TILING_NONE: 2474 WARN(1, "allocating a fence for non-tiled object?\n"); 2475 break; 2476 case I915_TILING_X: 2477 if (!obj_priv->stride) 2478 return -EINVAL; 2479 WARN((obj_priv->stride & (512 - 1)), 2480 "object 0x%08x is X tiled but has non-512B pitch\n", 2481 obj_priv->gtt_offset); 2482 break; 2483 case I915_TILING_Y: 2484 if (!obj_priv->stride) 2485 return -EINVAL; 2486 WARN((obj_priv->stride & (128 - 1)), 2487 "object 0x%08x is Y tiled but has non-128B pitch\n", 2488 obj_priv->gtt_offset); 2489 break; 2490 } 2491 2492 ret = i915_find_fence_reg(dev); 2493 if (ret < 0) 2494 return ret; 2495 2496 obj_priv->fence_reg = ret; 2497 reg = &dev_priv->fence_regs[obj_priv->fence_reg]; 2498 list_add_tail(®->lru_list, &dev_priv->mm.fence_list); 2499 2500 reg->obj = obj; 2501 2502 if (IS_GEN6(dev)) 2503 sandybridge_write_fence_reg(reg); 2504 else if (IS_I965G(dev)) 2505 i965_write_fence_reg(reg); 2506 else if (IS_I9XX(dev)) 2507 i915_write_fence_reg(reg); 2508 else 2509 i830_write_fence_reg(reg); 2510 2511 trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg, 2512 obj_priv->tiling_mode); 2513 2514 return 0; 2515 } 2516 2517 /** 2518 * i915_gem_clear_fence_reg - clear out fence register info 2519 * @obj: object to clear 2520 * 2521 * Zeroes out the fence register itself and clears out the associated 2522 * data structures in dev_priv and obj_priv. 2523 */ 2524 static void 2525 i915_gem_clear_fence_reg(struct drm_gem_object *obj) 2526 { 2527 struct drm_device *dev = obj->dev; 2528 drm_i915_private_t *dev_priv = dev->dev_private; 2529 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2530 struct drm_i915_fence_reg *reg = 2531 &dev_priv->fence_regs[obj_priv->fence_reg]; 2532 2533 if (IS_GEN6(dev)) { 2534 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + 2535 (obj_priv->fence_reg * 8), 0); 2536 } else if (IS_I965G(dev)) { 2537 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); 2538 } else { 2539 uint32_t fence_reg; 2540 2541 if (obj_priv->fence_reg < 8) 2542 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; 2543 else 2544 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 2545 8) * 4; 2546 2547 I915_WRITE(fence_reg, 0); 2548 } 2549 2550 reg->obj = NULL; 2551 obj_priv->fence_reg = I915_FENCE_REG_NONE; 2552 list_del_init(®->lru_list); 2553 } 2554 2555 /** 2556 * i915_gem_object_put_fence_reg - waits on outstanding fenced access 2557 * to the buffer to finish, and then resets the fence register. 2558 * @obj: tiled object holding a fence register. 2559 * 2560 * Zeroes out the fence register itself and clears out the associated 2561 * data structures in dev_priv and obj_priv. 2562 */ 2563 int 2564 i915_gem_object_put_fence_reg(struct drm_gem_object *obj) 2565 { 2566 struct drm_device *dev = obj->dev; 2567 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2568 2569 if (obj_priv->fence_reg == I915_FENCE_REG_NONE) 2570 return 0; 2571 2572 /* If we've changed tiling, GTT-mappings of the object 2573 * need to re-fault to ensure that the correct fence register 2574 * setup is in place. 2575 */ 2576 i915_gem_release_mmap(obj); 2577 2578 /* On the i915, GPU access to tiled buffers is via a fence, 2579 * therefore we must wait for any outstanding access to complete 2580 * before clearing the fence. 2581 */ 2582 if (!IS_I965G(dev)) { 2583 int ret; 2584 2585 i915_gem_object_flush_gpu_write_domain(obj); 2586 ret = i915_gem_object_wait_rendering(obj); 2587 if (ret != 0) 2588 return ret; 2589 } 2590 2591 i915_gem_object_flush_gtt_write_domain(obj); 2592 i915_gem_clear_fence_reg (obj); 2593 2594 return 0; 2595 } 2596 2597 /** 2598 * Finds free space in the GTT aperture and binds the object there. 2599 */ 2600 static int 2601 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) 2602 { 2603 struct drm_device *dev = obj->dev; 2604 drm_i915_private_t *dev_priv = dev->dev_private; 2605 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2606 struct drm_mm_node *free_space; 2607 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; 2608 int ret; 2609 2610 if (obj_priv->madv != I915_MADV_WILLNEED) { 2611 DRM_ERROR("Attempting to bind a purgeable object\n"); 2612 return -EINVAL; 2613 } 2614 2615 if (alignment == 0) 2616 alignment = i915_gem_get_gtt_alignment(obj); 2617 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { 2618 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2619 return -EINVAL; 2620 } 2621 2622 /* If the object is bigger than the entire aperture, reject it early 2623 * before evicting everything in a vain attempt to find space. 2624 */ 2625 if (obj->size > dev->gtt_total) { 2626 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 2627 return -E2BIG; 2628 } 2629 2630 search_free: 2631 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2632 obj->size, alignment, 0); 2633 if (free_space != NULL) { 2634 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size, 2635 alignment); 2636 if (obj_priv->gtt_space != NULL) { 2637 obj_priv->gtt_space->private = obj; 2638 obj_priv->gtt_offset = obj_priv->gtt_space->start; 2639 } 2640 } 2641 if (obj_priv->gtt_space == NULL) { 2642 /* If the gtt is empty and we're still having trouble 2643 * fitting our object in, we're out of memory. 2644 */ 2645 #if WATCH_LRU 2646 DRM_INFO("%s: GTT full, evicting something\n", __func__); 2647 #endif 2648 ret = i915_gem_evict_something(dev, obj->size); 2649 if (ret) 2650 return ret; 2651 2652 goto search_free; 2653 } 2654 2655 #if WATCH_BUF 2656 DRM_INFO("Binding object of size %zd at 0x%08x\n", 2657 obj->size, obj_priv->gtt_offset); 2658 #endif 2659 ret = i915_gem_object_get_pages(obj, gfpmask); 2660 if (ret) { 2661 drm_mm_put_block(obj_priv->gtt_space); 2662 obj_priv->gtt_space = NULL; 2663 2664 if (ret == -ENOMEM) { 2665 /* first try to clear up some space from the GTT */ 2666 ret = i915_gem_evict_something(dev, obj->size); 2667 if (ret) { 2668 /* now try to shrink everyone else */ 2669 if (gfpmask) { 2670 gfpmask = 0; 2671 goto search_free; 2672 } 2673 2674 return ret; 2675 } 2676 2677 goto search_free; 2678 } 2679 2680 return ret; 2681 } 2682 2683 /* Create an AGP memory structure pointing at our pages, and bind it 2684 * into the GTT. 2685 */ 2686 obj_priv->agp_mem = drm_agp_bind_pages(dev, 2687 obj_priv->pages, 2688 obj->size >> PAGE_SHIFT, 2689 obj_priv->gtt_offset, 2690 obj_priv->agp_type); 2691 if (obj_priv->agp_mem == NULL) { 2692 i915_gem_object_put_pages(obj); 2693 drm_mm_put_block(obj_priv->gtt_space); 2694 obj_priv->gtt_space = NULL; 2695 2696 ret = i915_gem_evict_something(dev, obj->size); 2697 if (ret) 2698 return ret; 2699 2700 goto search_free; 2701 } 2702 atomic_inc(&dev->gtt_count); 2703 atomic_add(obj->size, &dev->gtt_memory); 2704 2705 /* Assert that the object is not currently in any GPU domain. As it 2706 * wasn't in the GTT, there shouldn't be any way it could have been in 2707 * a GPU cache 2708 */ 2709 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2710 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2711 2712 trace_i915_gem_object_bind(obj, obj_priv->gtt_offset); 2713 2714 return 0; 2715 } 2716 2717 void 2718 i915_gem_clflush_object(struct drm_gem_object *obj) 2719 { 2720 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2721 2722 /* If we don't have a page list set up, then we're not pinned 2723 * to GPU, and we can ignore the cache flush because it'll happen 2724 * again at bind time. 2725 */ 2726 if (obj_priv->pages == NULL) 2727 return; 2728 2729 trace_i915_gem_object_clflush(obj); 2730 2731 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); 2732 } 2733 2734 /** Flushes any GPU write domain for the object if it's dirty. */ 2735 static void 2736 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 2737 { 2738 struct drm_device *dev = obj->dev; 2739 uint32_t old_write_domain; 2740 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2741 2742 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 2743 return; 2744 2745 /* Queue the GPU write cache flushing we need. */ 2746 old_write_domain = obj->write_domain; 2747 i915_gem_flush(dev, 0, obj->write_domain); 2748 (void) i915_add_request(dev, NULL, obj->write_domain, obj_priv->ring); 2749 BUG_ON(obj->write_domain); 2750 2751 trace_i915_gem_object_change_domain(obj, 2752 obj->read_domains, 2753 old_write_domain); 2754 } 2755 2756 /** Flushes the GTT write domain for the object if it's dirty. */ 2757 static void 2758 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 2759 { 2760 uint32_t old_write_domain; 2761 2762 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 2763 return; 2764 2765 /* No actual flushing is required for the GTT write domain. Writes 2766 * to it immediately go to main memory as far as we know, so there's 2767 * no chipset flush. It also doesn't land in render cache. 2768 */ 2769 old_write_domain = obj->write_domain; 2770 obj->write_domain = 0; 2771 2772 trace_i915_gem_object_change_domain(obj, 2773 obj->read_domains, 2774 old_write_domain); 2775 } 2776 2777 /** Flushes the CPU write domain for the object if it's dirty. */ 2778 static void 2779 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 2780 { 2781 struct drm_device *dev = obj->dev; 2782 uint32_t old_write_domain; 2783 2784 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 2785 return; 2786 2787 i915_gem_clflush_object(obj); 2788 drm_agp_chipset_flush(dev); 2789 old_write_domain = obj->write_domain; 2790 obj->write_domain = 0; 2791 2792 trace_i915_gem_object_change_domain(obj, 2793 obj->read_domains, 2794 old_write_domain); 2795 } 2796 2797 void 2798 i915_gem_object_flush_write_domain(struct drm_gem_object *obj) 2799 { 2800 switch (obj->write_domain) { 2801 case I915_GEM_DOMAIN_GTT: 2802 i915_gem_object_flush_gtt_write_domain(obj); 2803 break; 2804 case I915_GEM_DOMAIN_CPU: 2805 i915_gem_object_flush_cpu_write_domain(obj); 2806 break; 2807 default: 2808 i915_gem_object_flush_gpu_write_domain(obj); 2809 break; 2810 } 2811 } 2812 2813 /** 2814 * Moves a single object to the GTT read, and possibly write domain. 2815 * 2816 * This function returns when the move is complete, including waiting on 2817 * flushes to occur. 2818 */ 2819 int 2820 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 2821 { 2822 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2823 uint32_t old_write_domain, old_read_domains; 2824 int ret; 2825 2826 /* Not valid to be called on unbound objects. */ 2827 if (obj_priv->gtt_space == NULL) 2828 return -EINVAL; 2829 2830 i915_gem_object_flush_gpu_write_domain(obj); 2831 /* Wait on any GPU rendering and flushing to occur. */ 2832 ret = i915_gem_object_wait_rendering(obj); 2833 if (ret != 0) 2834 return ret; 2835 2836 old_write_domain = obj->write_domain; 2837 old_read_domains = obj->read_domains; 2838 2839 /* If we're writing through the GTT domain, then CPU and GPU caches 2840 * will need to be invalidated at next use. 2841 */ 2842 if (write) 2843 obj->read_domains &= I915_GEM_DOMAIN_GTT; 2844 2845 i915_gem_object_flush_cpu_write_domain(obj); 2846 2847 /* It should now be out of any other write domains, and we can update 2848 * the domain values for our changes. 2849 */ 2850 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2851 obj->read_domains |= I915_GEM_DOMAIN_GTT; 2852 if (write) { 2853 obj->write_domain = I915_GEM_DOMAIN_GTT; 2854 obj_priv->dirty = 1; 2855 } 2856 2857 trace_i915_gem_object_change_domain(obj, 2858 old_read_domains, 2859 old_write_domain); 2860 2861 return 0; 2862 } 2863 2864 /* 2865 * Prepare buffer for display plane. Use uninterruptible for possible flush 2866 * wait, as in modesetting process we're not supposed to be interrupted. 2867 */ 2868 int 2869 i915_gem_object_set_to_display_plane(struct drm_gem_object *obj) 2870 { 2871 struct drm_device *dev = obj->dev; 2872 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 2873 uint32_t old_write_domain, old_read_domains; 2874 int ret; 2875 2876 /* Not valid to be called on unbound objects. */ 2877 if (obj_priv->gtt_space == NULL) 2878 return -EINVAL; 2879 2880 i915_gem_object_flush_gpu_write_domain(obj); 2881 2882 /* Wait on any GPU rendering and flushing to occur. */ 2883 if (obj_priv->active) { 2884 #if WATCH_BUF 2885 DRM_INFO("%s: object %p wait for seqno %08x\n", 2886 __func__, obj, obj_priv->last_rendering_seqno); 2887 #endif 2888 ret = i915_do_wait_request(dev, 2889 obj_priv->last_rendering_seqno, 2890 0, 2891 obj_priv->ring); 2892 if (ret != 0) 2893 return ret; 2894 } 2895 2896 i915_gem_object_flush_cpu_write_domain(obj); 2897 2898 old_write_domain = obj->write_domain; 2899 old_read_domains = obj->read_domains; 2900 2901 /* It should now be out of any other write domains, and we can update 2902 * the domain values for our changes. 2903 */ 2904 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2905 obj->read_domains = I915_GEM_DOMAIN_GTT; 2906 obj->write_domain = I915_GEM_DOMAIN_GTT; 2907 obj_priv->dirty = 1; 2908 2909 trace_i915_gem_object_change_domain(obj, 2910 old_read_domains, 2911 old_write_domain); 2912 2913 return 0; 2914 } 2915 2916 /** 2917 * Moves a single object to the CPU read, and possibly write domain. 2918 * 2919 * This function returns when the move is complete, including waiting on 2920 * flushes to occur. 2921 */ 2922 static int 2923 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 2924 { 2925 uint32_t old_write_domain, old_read_domains; 2926 int ret; 2927 2928 i915_gem_object_flush_gpu_write_domain(obj); 2929 /* Wait on any GPU rendering and flushing to occur. */ 2930 ret = i915_gem_object_wait_rendering(obj); 2931 if (ret != 0) 2932 return ret; 2933 2934 i915_gem_object_flush_gtt_write_domain(obj); 2935 2936 /* If we have a partially-valid cache of the object in the CPU, 2937 * finish invalidating it and free the per-page flags. 2938 */ 2939 i915_gem_object_set_to_full_cpu_read_domain(obj); 2940 2941 old_write_domain = obj->write_domain; 2942 old_read_domains = obj->read_domains; 2943 2944 /* Flush the CPU cache if it's still invalid. */ 2945 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2946 i915_gem_clflush_object(obj); 2947 2948 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2949 } 2950 2951 /* It should now be out of any other write domains, and we can update 2952 * the domain values for our changes. 2953 */ 2954 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2955 2956 /* If we're writing through the CPU, then the GPU read domains will 2957 * need to be invalidated at next use. 2958 */ 2959 if (write) { 2960 obj->read_domains &= I915_GEM_DOMAIN_CPU; 2961 obj->write_domain = I915_GEM_DOMAIN_CPU; 2962 } 2963 2964 trace_i915_gem_object_change_domain(obj, 2965 old_read_domains, 2966 old_write_domain); 2967 2968 return 0; 2969 } 2970 2971 /* 2972 * Set the next domain for the specified object. This 2973 * may not actually perform the necessary flushing/invaliding though, 2974 * as that may want to be batched with other set_domain operations 2975 * 2976 * This is (we hope) the only really tricky part of gem. The goal 2977 * is fairly simple -- track which caches hold bits of the object 2978 * and make sure they remain coherent. A few concrete examples may 2979 * help to explain how it works. For shorthand, we use the notation 2980 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 2981 * a pair of read and write domain masks. 2982 * 2983 * Case 1: the batch buffer 2984 * 2985 * 1. Allocated 2986 * 2. Written by CPU 2987 * 3. Mapped to GTT 2988 * 4. Read by GPU 2989 * 5. Unmapped from GTT 2990 * 6. Freed 2991 * 2992 * Let's take these a step at a time 2993 * 2994 * 1. Allocated 2995 * Pages allocated from the kernel may still have 2996 * cache contents, so we set them to (CPU, CPU) always. 2997 * 2. Written by CPU (using pwrite) 2998 * The pwrite function calls set_domain (CPU, CPU) and 2999 * this function does nothing (as nothing changes) 3000 * 3. Mapped by GTT 3001 * This function asserts that the object is not 3002 * currently in any GPU-based read or write domains 3003 * 4. Read by GPU 3004 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 3005 * As write_domain is zero, this function adds in the 3006 * current read domains (CPU+COMMAND, 0). 3007 * flush_domains is set to CPU. 3008 * invalidate_domains is set to COMMAND 3009 * clflush is run to get data out of the CPU caches 3010 * then i915_dev_set_domain calls i915_gem_flush to 3011 * emit an MI_FLUSH and drm_agp_chipset_flush 3012 * 5. Unmapped from GTT 3013 * i915_gem_object_unbind calls set_domain (CPU, CPU) 3014 * flush_domains and invalidate_domains end up both zero 3015 * so no flushing/invalidating happens 3016 * 6. Freed 3017 * yay, done 3018 * 3019 * Case 2: The shared render buffer 3020 * 3021 * 1. Allocated 3022 * 2. Mapped to GTT 3023 * 3. Read/written by GPU 3024 * 4. set_domain to (CPU,CPU) 3025 * 5. Read/written by CPU 3026 * 6. Read/written by GPU 3027 * 3028 * 1. Allocated 3029 * Same as last example, (CPU, CPU) 3030 * 2. Mapped to GTT 3031 * Nothing changes (assertions find that it is not in the GPU) 3032 * 3. Read/written by GPU 3033 * execbuffer calls set_domain (RENDER, RENDER) 3034 * flush_domains gets CPU 3035 * invalidate_domains gets GPU 3036 * clflush (obj) 3037 * MI_FLUSH and drm_agp_chipset_flush 3038 * 4. set_domain (CPU, CPU) 3039 * flush_domains gets GPU 3040 * invalidate_domains gets CPU 3041 * wait_rendering (obj) to make sure all drawing is complete. 3042 * This will include an MI_FLUSH to get the data from GPU 3043 * to memory 3044 * clflush (obj) to invalidate the CPU cache 3045 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 3046 * 5. Read/written by CPU 3047 * cache lines are loaded and dirtied 3048 * 6. Read written by GPU 3049 * Same as last GPU access 3050 * 3051 * Case 3: The constant buffer 3052 * 3053 * 1. Allocated 3054 * 2. Written by CPU 3055 * 3. Read by GPU 3056 * 4. Updated (written) by CPU again 3057 * 5. Read by GPU 3058 * 3059 * 1. Allocated 3060 * (CPU, CPU) 3061 * 2. Written by CPU 3062 * (CPU, CPU) 3063 * 3. Read by GPU 3064 * (CPU+RENDER, 0) 3065 * flush_domains = CPU 3066 * invalidate_domains = RENDER 3067 * clflush (obj) 3068 * MI_FLUSH 3069 * drm_agp_chipset_flush 3070 * 4. Updated (written) by CPU again 3071 * (CPU, CPU) 3072 * flush_domains = 0 (no previous write domain) 3073 * invalidate_domains = 0 (no new read domains) 3074 * 5. Read by GPU 3075 * (CPU+RENDER, 0) 3076 * flush_domains = CPU 3077 * invalidate_domains = RENDER 3078 * clflush (obj) 3079 * MI_FLUSH 3080 * drm_agp_chipset_flush 3081 */ 3082 static void 3083 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) 3084 { 3085 struct drm_device *dev = obj->dev; 3086 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3087 uint32_t invalidate_domains = 0; 3088 uint32_t flush_domains = 0; 3089 uint32_t old_read_domains; 3090 3091 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); 3092 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); 3093 3094 intel_mark_busy(dev, obj); 3095 3096 #if WATCH_BUF 3097 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", 3098 __func__, obj, 3099 obj->read_domains, obj->pending_read_domains, 3100 obj->write_domain, obj->pending_write_domain); 3101 #endif 3102 /* 3103 * If the object isn't moving to a new write domain, 3104 * let the object stay in multiple read domains 3105 */ 3106 if (obj->pending_write_domain == 0) 3107 obj->pending_read_domains |= obj->read_domains; 3108 else 3109 obj_priv->dirty = 1; 3110 3111 /* 3112 * Flush the current write domain if 3113 * the new read domains don't match. Invalidate 3114 * any read domains which differ from the old 3115 * write domain 3116 */ 3117 if (obj->write_domain && 3118 obj->write_domain != obj->pending_read_domains) { 3119 flush_domains |= obj->write_domain; 3120 invalidate_domains |= 3121 obj->pending_read_domains & ~obj->write_domain; 3122 } 3123 /* 3124 * Invalidate any read caches which may have 3125 * stale data. That is, any new read domains. 3126 */ 3127 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains; 3128 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 3129 #if WATCH_BUF 3130 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", 3131 __func__, flush_domains, invalidate_domains); 3132 #endif 3133 i915_gem_clflush_object(obj); 3134 } 3135 3136 old_read_domains = obj->read_domains; 3137 3138 /* The actual obj->write_domain will be updated with 3139 * pending_write_domain after we emit the accumulated flush for all 3140 * of our domain changes in execbuffers (which clears objects' 3141 * write_domains). So if we have a current write domain that we 3142 * aren't changing, set pending_write_domain to that. 3143 */ 3144 if (flush_domains == 0 && obj->pending_write_domain == 0) 3145 obj->pending_write_domain = obj->write_domain; 3146 obj->read_domains = obj->pending_read_domains; 3147 3148 dev->invalidate_domains |= invalidate_domains; 3149 dev->flush_domains |= flush_domains; 3150 #if WATCH_BUF 3151 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n", 3152 __func__, 3153 obj->read_domains, obj->write_domain, 3154 dev->invalidate_domains, dev->flush_domains); 3155 #endif 3156 3157 trace_i915_gem_object_change_domain(obj, 3158 old_read_domains, 3159 obj->write_domain); 3160 } 3161 3162 /** 3163 * Moves the object from a partially CPU read to a full one. 3164 * 3165 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 3166 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 3167 */ 3168 static void 3169 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 3170 { 3171 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3172 3173 if (!obj_priv->page_cpu_valid) 3174 return; 3175 3176 /* If we're partially in the CPU read domain, finish moving it in. 3177 */ 3178 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 3179 int i; 3180 3181 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 3182 if (obj_priv->page_cpu_valid[i]) 3183 continue; 3184 drm_clflush_pages(obj_priv->pages + i, 1); 3185 } 3186 } 3187 3188 /* Free the page_cpu_valid mappings which are now stale, whether 3189 * or not we've got I915_GEM_DOMAIN_CPU. 3190 */ 3191 kfree(obj_priv->page_cpu_valid); 3192 obj_priv->page_cpu_valid = NULL; 3193 } 3194 3195 /** 3196 * Set the CPU read domain on a range of the object. 3197 * 3198 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 3199 * not entirely valid. The page_cpu_valid member of the object flags which 3200 * pages have been flushed, and will be respected by 3201 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 3202 * of the whole object. 3203 * 3204 * This function returns when the move is complete, including waiting on 3205 * flushes to occur. 3206 */ 3207 static int 3208 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 3209 uint64_t offset, uint64_t size) 3210 { 3211 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3212 uint32_t old_read_domains; 3213 int i, ret; 3214 3215 if (offset == 0 && size == obj->size) 3216 return i915_gem_object_set_to_cpu_domain(obj, 0); 3217 3218 i915_gem_object_flush_gpu_write_domain(obj); 3219 /* Wait on any GPU rendering and flushing to occur. */ 3220 ret = i915_gem_object_wait_rendering(obj); 3221 if (ret != 0) 3222 return ret; 3223 i915_gem_object_flush_gtt_write_domain(obj); 3224 3225 /* If we're already fully in the CPU read domain, we're done. */ 3226 if (obj_priv->page_cpu_valid == NULL && 3227 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 3228 return 0; 3229 3230 /* Otherwise, create/clear the per-page CPU read domain flag if we're 3231 * newly adding I915_GEM_DOMAIN_CPU 3232 */ 3233 if (obj_priv->page_cpu_valid == NULL) { 3234 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE, 3235 GFP_KERNEL); 3236 if (obj_priv->page_cpu_valid == NULL) 3237 return -ENOMEM; 3238 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 3239 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 3240 3241 /* Flush the cache on any pages that are still invalid from the CPU's 3242 * perspective. 3243 */ 3244 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 3245 i++) { 3246 if (obj_priv->page_cpu_valid[i]) 3247 continue; 3248 3249 drm_clflush_pages(obj_priv->pages + i, 1); 3250 3251 obj_priv->page_cpu_valid[i] = 1; 3252 } 3253 3254 /* It should now be out of any other write domains, and we can update 3255 * the domain values for our changes. 3256 */ 3257 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3258 3259 old_read_domains = obj->read_domains; 3260 obj->read_domains |= I915_GEM_DOMAIN_CPU; 3261 3262 trace_i915_gem_object_change_domain(obj, 3263 old_read_domains, 3264 obj->write_domain); 3265 3266 return 0; 3267 } 3268 3269 /** 3270 * Pin an object to the GTT and evaluate the relocations landing in it. 3271 */ 3272 static int 3273 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 3274 struct drm_file *file_priv, 3275 struct drm_i915_gem_exec_object2 *entry, 3276 struct drm_i915_gem_relocation_entry *relocs) 3277 { 3278 struct drm_device *dev = obj->dev; 3279 drm_i915_private_t *dev_priv = dev->dev_private; 3280 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3281 int i, ret; 3282 void __iomem *reloc_page; 3283 bool need_fence; 3284 3285 need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE && 3286 obj_priv->tiling_mode != I915_TILING_NONE; 3287 3288 /* Check fence reg constraints and rebind if necessary */ 3289 if (need_fence && 3290 !i915_gem_object_fence_offset_ok(obj, 3291 obj_priv->tiling_mode)) { 3292 ret = i915_gem_object_unbind(obj); 3293 if (ret) 3294 return ret; 3295 } 3296 3297 /* Choose the GTT offset for our buffer and put it there. */ 3298 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 3299 if (ret) 3300 return ret; 3301 3302 /* 3303 * Pre-965 chips need a fence register set up in order to 3304 * properly handle blits to/from tiled surfaces. 3305 */ 3306 if (need_fence) { 3307 ret = i915_gem_object_get_fence_reg(obj); 3308 if (ret != 0) { 3309 i915_gem_object_unpin(obj); 3310 return ret; 3311 } 3312 } 3313 3314 entry->offset = obj_priv->gtt_offset; 3315 3316 /* Apply the relocations, using the GTT aperture to avoid cache 3317 * flushing requirements. 3318 */ 3319 for (i = 0; i < entry->relocation_count; i++) { 3320 struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; 3321 struct drm_gem_object *target_obj; 3322 struct drm_i915_gem_object *target_obj_priv; 3323 uint32_t reloc_val, reloc_offset; 3324 uint32_t __iomem *reloc_entry; 3325 3326 target_obj = drm_gem_object_lookup(obj->dev, file_priv, 3327 reloc->target_handle); 3328 if (target_obj == NULL) { 3329 i915_gem_object_unpin(obj); 3330 return -EBADF; 3331 } 3332 target_obj_priv = to_intel_bo(target_obj); 3333 3334 #if WATCH_RELOC 3335 DRM_INFO("%s: obj %p offset %08x target %d " 3336 "read %08x write %08x gtt %08x " 3337 "presumed %08x delta %08x\n", 3338 __func__, 3339 obj, 3340 (int) reloc->offset, 3341 (int) reloc->target_handle, 3342 (int) reloc->read_domains, 3343 (int) reloc->write_domain, 3344 (int) target_obj_priv->gtt_offset, 3345 (int) reloc->presumed_offset, 3346 reloc->delta); 3347 #endif 3348 3349 /* The target buffer should have appeared before us in the 3350 * exec_object list, so it should have a GTT space bound by now. 3351 */ 3352 if (target_obj_priv->gtt_space == NULL) { 3353 DRM_ERROR("No GTT space found for object %d\n", 3354 reloc->target_handle); 3355 drm_gem_object_unreference(target_obj); 3356 i915_gem_object_unpin(obj); 3357 return -EINVAL; 3358 } 3359 3360 /* Validate that the target is in a valid r/w GPU domain */ 3361 if (reloc->write_domain & (reloc->write_domain - 1)) { 3362 DRM_ERROR("reloc with multiple write domains: " 3363 "obj %p target %d offset %d " 3364 "read %08x write %08x", 3365 obj, reloc->target_handle, 3366 (int) reloc->offset, 3367 reloc->read_domains, 3368 reloc->write_domain); 3369 return -EINVAL; 3370 } 3371 if (reloc->write_domain & I915_GEM_DOMAIN_CPU || 3372 reloc->read_domains & I915_GEM_DOMAIN_CPU) { 3373 DRM_ERROR("reloc with read/write CPU domains: " 3374 "obj %p target %d offset %d " 3375 "read %08x write %08x", 3376 obj, reloc->target_handle, 3377 (int) reloc->offset, 3378 reloc->read_domains, 3379 reloc->write_domain); 3380 drm_gem_object_unreference(target_obj); 3381 i915_gem_object_unpin(obj); 3382 return -EINVAL; 3383 } 3384 if (reloc->write_domain && target_obj->pending_write_domain && 3385 reloc->write_domain != target_obj->pending_write_domain) { 3386 DRM_ERROR("Write domain conflict: " 3387 "obj %p target %d offset %d " 3388 "new %08x old %08x\n", 3389 obj, reloc->target_handle, 3390 (int) reloc->offset, 3391 reloc->write_domain, 3392 target_obj->pending_write_domain); 3393 drm_gem_object_unreference(target_obj); 3394 i915_gem_object_unpin(obj); 3395 return -EINVAL; 3396 } 3397 3398 target_obj->pending_read_domains |= reloc->read_domains; 3399 target_obj->pending_write_domain |= reloc->write_domain; 3400 3401 /* If the relocation already has the right value in it, no 3402 * more work needs to be done. 3403 */ 3404 if (target_obj_priv->gtt_offset == reloc->presumed_offset) { 3405 drm_gem_object_unreference(target_obj); 3406 continue; 3407 } 3408 3409 /* Check that the relocation address is valid... */ 3410 if (reloc->offset > obj->size - 4) { 3411 DRM_ERROR("Relocation beyond object bounds: " 3412 "obj %p target %d offset %d size %d.\n", 3413 obj, reloc->target_handle, 3414 (int) reloc->offset, (int) obj->size); 3415 drm_gem_object_unreference(target_obj); 3416 i915_gem_object_unpin(obj); 3417 return -EINVAL; 3418 } 3419 if (reloc->offset & 3) { 3420 DRM_ERROR("Relocation not 4-byte aligned: " 3421 "obj %p target %d offset %d.\n", 3422 obj, reloc->target_handle, 3423 (int) reloc->offset); 3424 drm_gem_object_unreference(target_obj); 3425 i915_gem_object_unpin(obj); 3426 return -EINVAL; 3427 } 3428 3429 /* and points to somewhere within the target object. */ 3430 if (reloc->delta >= target_obj->size) { 3431 DRM_ERROR("Relocation beyond target object bounds: " 3432 "obj %p target %d delta %d size %d.\n", 3433 obj, reloc->target_handle, 3434 (int) reloc->delta, (int) target_obj->size); 3435 drm_gem_object_unreference(target_obj); 3436 i915_gem_object_unpin(obj); 3437 return -EINVAL; 3438 } 3439 3440 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 3441 if (ret != 0) { 3442 drm_gem_object_unreference(target_obj); 3443 i915_gem_object_unpin(obj); 3444 return -EINVAL; 3445 } 3446 3447 /* Map the page containing the relocation we're going to 3448 * perform. 3449 */ 3450 reloc_offset = obj_priv->gtt_offset + reloc->offset; 3451 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, 3452 (reloc_offset & 3453 ~(PAGE_SIZE - 1))); 3454 reloc_entry = (uint32_t __iomem *)(reloc_page + 3455 (reloc_offset & (PAGE_SIZE - 1))); 3456 reloc_val = target_obj_priv->gtt_offset + reloc->delta; 3457 3458 #if WATCH_BUF 3459 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", 3460 obj, (unsigned int) reloc->offset, 3461 readl(reloc_entry), reloc_val); 3462 #endif 3463 writel(reloc_val, reloc_entry); 3464 io_mapping_unmap_atomic(reloc_page); 3465 3466 /* The updated presumed offset for this entry will be 3467 * copied back out to the user. 3468 */ 3469 reloc->presumed_offset = target_obj_priv->gtt_offset; 3470 3471 drm_gem_object_unreference(target_obj); 3472 } 3473 3474 #if WATCH_BUF 3475 if (0) 3476 i915_gem_dump_object(obj, 128, __func__, ~0); 3477 #endif 3478 return 0; 3479 } 3480 3481 /* Throttle our rendering by waiting until the ring has completed our requests 3482 * emitted over 20 msec ago. 3483 * 3484 * Note that if we were to use the current jiffies each time around the loop, 3485 * we wouldn't escape the function with any frames outstanding if the time to 3486 * render a frame was over 20ms. 3487 * 3488 * This should get us reasonable parallelism between CPU and GPU but also 3489 * relatively low latency when blocking on a particular request to finish. 3490 */ 3491 static int 3492 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 3493 { 3494 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 3495 int ret = 0; 3496 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3497 3498 mutex_lock(&dev->struct_mutex); 3499 while (!list_empty(&i915_file_priv->mm.request_list)) { 3500 struct drm_i915_gem_request *request; 3501 3502 request = list_first_entry(&i915_file_priv->mm.request_list, 3503 struct drm_i915_gem_request, 3504 client_list); 3505 3506 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3507 break; 3508 3509 ret = i915_wait_request(dev, request->seqno, request->ring); 3510 if (ret != 0) 3511 break; 3512 } 3513 mutex_unlock(&dev->struct_mutex); 3514 3515 return ret; 3516 } 3517 3518 static int 3519 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list, 3520 uint32_t buffer_count, 3521 struct drm_i915_gem_relocation_entry **relocs) 3522 { 3523 uint32_t reloc_count = 0, reloc_index = 0, i; 3524 int ret; 3525 3526 *relocs = NULL; 3527 for (i = 0; i < buffer_count; i++) { 3528 if (reloc_count + exec_list[i].relocation_count < reloc_count) 3529 return -EINVAL; 3530 reloc_count += exec_list[i].relocation_count; 3531 } 3532 3533 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); 3534 if (*relocs == NULL) { 3535 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count); 3536 return -ENOMEM; 3537 } 3538 3539 for (i = 0; i < buffer_count; i++) { 3540 struct drm_i915_gem_relocation_entry __user *user_relocs; 3541 3542 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3543 3544 ret = copy_from_user(&(*relocs)[reloc_index], 3545 user_relocs, 3546 exec_list[i].relocation_count * 3547 sizeof(**relocs)); 3548 if (ret != 0) { 3549 drm_free_large(*relocs); 3550 *relocs = NULL; 3551 return -EFAULT; 3552 } 3553 3554 reloc_index += exec_list[i].relocation_count; 3555 } 3556 3557 return 0; 3558 } 3559 3560 static int 3561 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list, 3562 uint32_t buffer_count, 3563 struct drm_i915_gem_relocation_entry *relocs) 3564 { 3565 uint32_t reloc_count = 0, i; 3566 int ret = 0; 3567 3568 if (relocs == NULL) 3569 return 0; 3570 3571 for (i = 0; i < buffer_count; i++) { 3572 struct drm_i915_gem_relocation_entry __user *user_relocs; 3573 int unwritten; 3574 3575 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3576 3577 unwritten = copy_to_user(user_relocs, 3578 &relocs[reloc_count], 3579 exec_list[i].relocation_count * 3580 sizeof(*relocs)); 3581 3582 if (unwritten) { 3583 ret = -EFAULT; 3584 goto err; 3585 } 3586 3587 reloc_count += exec_list[i].relocation_count; 3588 } 3589 3590 err: 3591 drm_free_large(relocs); 3592 3593 return ret; 3594 } 3595 3596 static int 3597 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec, 3598 uint64_t exec_offset) 3599 { 3600 uint32_t exec_start, exec_len; 3601 3602 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3603 exec_len = (uint32_t) exec->batch_len; 3604 3605 if ((exec_start | exec_len) & 0x7) 3606 return -EINVAL; 3607 3608 if (!exec_start) 3609 return -EINVAL; 3610 3611 return 0; 3612 } 3613 3614 static int 3615 i915_gem_wait_for_pending_flip(struct drm_device *dev, 3616 struct drm_gem_object **object_list, 3617 int count) 3618 { 3619 drm_i915_private_t *dev_priv = dev->dev_private; 3620 struct drm_i915_gem_object *obj_priv; 3621 DEFINE_WAIT(wait); 3622 int i, ret = 0; 3623 3624 for (;;) { 3625 prepare_to_wait(&dev_priv->pending_flip_queue, 3626 &wait, TASK_INTERRUPTIBLE); 3627 for (i = 0; i < count; i++) { 3628 obj_priv = to_intel_bo(object_list[i]); 3629 if (atomic_read(&obj_priv->pending_flip) > 0) 3630 break; 3631 } 3632 if (i == count) 3633 break; 3634 3635 if (!signal_pending(current)) { 3636 mutex_unlock(&dev->struct_mutex); 3637 schedule(); 3638 mutex_lock(&dev->struct_mutex); 3639 continue; 3640 } 3641 ret = -ERESTARTSYS; 3642 break; 3643 } 3644 finish_wait(&dev_priv->pending_flip_queue, &wait); 3645 3646 return ret; 3647 } 3648 3649 int 3650 i915_gem_do_execbuffer(struct drm_device *dev, void *data, 3651 struct drm_file *file_priv, 3652 struct drm_i915_gem_execbuffer2 *args, 3653 struct drm_i915_gem_exec_object2 *exec_list) 3654 { 3655 drm_i915_private_t *dev_priv = dev->dev_private; 3656 struct drm_gem_object **object_list = NULL; 3657 struct drm_gem_object *batch_obj; 3658 struct drm_i915_gem_object *obj_priv; 3659 struct drm_clip_rect *cliprects = NULL; 3660 struct drm_i915_gem_relocation_entry *relocs = NULL; 3661 int ret = 0, ret2, i, pinned = 0; 3662 uint64_t exec_offset; 3663 uint32_t seqno, flush_domains, reloc_index; 3664 int pin_tries, flips; 3665 3666 struct intel_ring_buffer *ring = NULL; 3667 3668 #if WATCH_EXEC 3669 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3670 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3671 #endif 3672 if (args->flags & I915_EXEC_BSD) { 3673 if (!HAS_BSD(dev)) { 3674 DRM_ERROR("execbuf with wrong flag\n"); 3675 return -EINVAL; 3676 } 3677 ring = &dev_priv->bsd_ring; 3678 } else { 3679 ring = &dev_priv->render_ring; 3680 } 3681 3682 3683 if (args->buffer_count < 1) { 3684 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 3685 return -EINVAL; 3686 } 3687 object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count); 3688 if (object_list == NULL) { 3689 DRM_ERROR("Failed to allocate object list for %d buffers\n", 3690 args->buffer_count); 3691 ret = -ENOMEM; 3692 goto pre_mutex_err; 3693 } 3694 3695 if (args->num_cliprects != 0) { 3696 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects), 3697 GFP_KERNEL); 3698 if (cliprects == NULL) { 3699 ret = -ENOMEM; 3700 goto pre_mutex_err; 3701 } 3702 3703 ret = copy_from_user(cliprects, 3704 (struct drm_clip_rect __user *) 3705 (uintptr_t) args->cliprects_ptr, 3706 sizeof(*cliprects) * args->num_cliprects); 3707 if (ret != 0) { 3708 DRM_ERROR("copy %d cliprects failed: %d\n", 3709 args->num_cliprects, ret); 3710 goto pre_mutex_err; 3711 } 3712 } 3713 3714 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, 3715 &relocs); 3716 if (ret != 0) 3717 goto pre_mutex_err; 3718 3719 mutex_lock(&dev->struct_mutex); 3720 3721 i915_verify_inactive(dev, __FILE__, __LINE__); 3722 3723 if (atomic_read(&dev_priv->mm.wedged)) { 3724 mutex_unlock(&dev->struct_mutex); 3725 ret = -EIO; 3726 goto pre_mutex_err; 3727 } 3728 3729 if (dev_priv->mm.suspended) { 3730 mutex_unlock(&dev->struct_mutex); 3731 ret = -EBUSY; 3732 goto pre_mutex_err; 3733 } 3734 3735 /* Look up object handles */ 3736 flips = 0; 3737 for (i = 0; i < args->buffer_count; i++) { 3738 object_list[i] = drm_gem_object_lookup(dev, file_priv, 3739 exec_list[i].handle); 3740 if (object_list[i] == NULL) { 3741 DRM_ERROR("Invalid object handle %d at index %d\n", 3742 exec_list[i].handle, i); 3743 /* prevent error path from reading uninitialized data */ 3744 args->buffer_count = i + 1; 3745 ret = -EBADF; 3746 goto err; 3747 } 3748 3749 obj_priv = to_intel_bo(object_list[i]); 3750 if (obj_priv->in_execbuffer) { 3751 DRM_ERROR("Object %p appears more than once in object list\n", 3752 object_list[i]); 3753 /* prevent error path from reading uninitialized data */ 3754 args->buffer_count = i + 1; 3755 ret = -EBADF; 3756 goto err; 3757 } 3758 obj_priv->in_execbuffer = true; 3759 flips += atomic_read(&obj_priv->pending_flip); 3760 } 3761 3762 if (flips > 0) { 3763 ret = i915_gem_wait_for_pending_flip(dev, object_list, 3764 args->buffer_count); 3765 if (ret) 3766 goto err; 3767 } 3768 3769 /* Pin and relocate */ 3770 for (pin_tries = 0; ; pin_tries++) { 3771 ret = 0; 3772 reloc_index = 0; 3773 3774 for (i = 0; i < args->buffer_count; i++) { 3775 object_list[i]->pending_read_domains = 0; 3776 object_list[i]->pending_write_domain = 0; 3777 ret = i915_gem_object_pin_and_relocate(object_list[i], 3778 file_priv, 3779 &exec_list[i], 3780 &relocs[reloc_index]); 3781 if (ret) 3782 break; 3783 pinned = i + 1; 3784 reloc_index += exec_list[i].relocation_count; 3785 } 3786 /* success */ 3787 if (ret == 0) 3788 break; 3789 3790 /* error other than GTT full, or we've already tried again */ 3791 if (ret != -ENOSPC || pin_tries >= 1) { 3792 if (ret != -ERESTARTSYS) { 3793 unsigned long long total_size = 0; 3794 int num_fences = 0; 3795 for (i = 0; i < args->buffer_count; i++) { 3796 obj_priv = object_list[i]->driver_private; 3797 3798 total_size += object_list[i]->size; 3799 num_fences += 3800 exec_list[i].flags & EXEC_OBJECT_NEEDS_FENCE && 3801 obj_priv->tiling_mode != I915_TILING_NONE; 3802 } 3803 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes, %d fences: %d\n", 3804 pinned+1, args->buffer_count, 3805 total_size, num_fences, 3806 ret); 3807 DRM_ERROR("%d objects [%d pinned], " 3808 "%d object bytes [%d pinned], " 3809 "%d/%d gtt bytes\n", 3810 atomic_read(&dev->object_count), 3811 atomic_read(&dev->pin_count), 3812 atomic_read(&dev->object_memory), 3813 atomic_read(&dev->pin_memory), 3814 atomic_read(&dev->gtt_memory), 3815 dev->gtt_total); 3816 } 3817 goto err; 3818 } 3819 3820 /* unpin all of our buffers */ 3821 for (i = 0; i < pinned; i++) 3822 i915_gem_object_unpin(object_list[i]); 3823 pinned = 0; 3824 3825 /* evict everyone we can from the aperture */ 3826 ret = i915_gem_evict_everything(dev); 3827 if (ret && ret != -ENOSPC) 3828 goto err; 3829 } 3830 3831 /* Set the pending read domains for the batch buffer to COMMAND */ 3832 batch_obj = object_list[args->buffer_count-1]; 3833 if (batch_obj->pending_write_domain) { 3834 DRM_ERROR("Attempting to use self-modifying batch buffer\n"); 3835 ret = -EINVAL; 3836 goto err; 3837 } 3838 batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 3839 3840 /* Sanity check the batch buffer, prior to moving objects */ 3841 exec_offset = exec_list[args->buffer_count - 1].offset; 3842 ret = i915_gem_check_execbuffer (args, exec_offset); 3843 if (ret != 0) { 3844 DRM_ERROR("execbuf with invalid offset/length\n"); 3845 goto err; 3846 } 3847 3848 i915_verify_inactive(dev, __FILE__, __LINE__); 3849 3850 /* Zero the global flush/invalidate flags. These 3851 * will be modified as new domains are computed 3852 * for each object 3853 */ 3854 dev->invalidate_domains = 0; 3855 dev->flush_domains = 0; 3856 3857 for (i = 0; i < args->buffer_count; i++) { 3858 struct drm_gem_object *obj = object_list[i]; 3859 3860 /* Compute new gpu domains and update invalidate/flush */ 3861 i915_gem_object_set_to_gpu_domain(obj); 3862 } 3863 3864 i915_verify_inactive(dev, __FILE__, __LINE__); 3865 3866 if (dev->invalidate_domains | dev->flush_domains) { 3867 #if WATCH_EXEC 3868 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 3869 __func__, 3870 dev->invalidate_domains, 3871 dev->flush_domains); 3872 #endif 3873 i915_gem_flush(dev, 3874 dev->invalidate_domains, 3875 dev->flush_domains); 3876 if (dev->flush_domains & I915_GEM_GPU_DOMAINS) { 3877 (void)i915_add_request(dev, file_priv, 3878 dev->flush_domains, 3879 &dev_priv->render_ring); 3880 3881 if (HAS_BSD(dev)) 3882 (void)i915_add_request(dev, file_priv, 3883 dev->flush_domains, 3884 &dev_priv->bsd_ring); 3885 } 3886 } 3887 3888 for (i = 0; i < args->buffer_count; i++) { 3889 struct drm_gem_object *obj = object_list[i]; 3890 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3891 uint32_t old_write_domain = obj->write_domain; 3892 3893 obj->write_domain = obj->pending_write_domain; 3894 if (obj->write_domain) 3895 list_move_tail(&obj_priv->gpu_write_list, 3896 &dev_priv->mm.gpu_write_list); 3897 else 3898 list_del_init(&obj_priv->gpu_write_list); 3899 3900 trace_i915_gem_object_change_domain(obj, 3901 obj->read_domains, 3902 old_write_domain); 3903 } 3904 3905 i915_verify_inactive(dev, __FILE__, __LINE__); 3906 3907 #if WATCH_COHERENCY 3908 for (i = 0; i < args->buffer_count; i++) { 3909 i915_gem_object_check_coherency(object_list[i], 3910 exec_list[i].handle); 3911 } 3912 #endif 3913 3914 #if WATCH_EXEC 3915 i915_gem_dump_object(batch_obj, 3916 args->batch_len, 3917 __func__, 3918 ~0); 3919 #endif 3920 3921 /* Exec the batchbuffer */ 3922 ret = ring->dispatch_gem_execbuffer(dev, ring, args, 3923 cliprects, exec_offset); 3924 if (ret) { 3925 DRM_ERROR("dispatch failed %d\n", ret); 3926 goto err; 3927 } 3928 3929 /* 3930 * Ensure that the commands in the batch buffer are 3931 * finished before the interrupt fires 3932 */ 3933 flush_domains = i915_retire_commands(dev, ring); 3934 3935 i915_verify_inactive(dev, __FILE__, __LINE__); 3936 3937 /* 3938 * Get a seqno representing the execution of the current buffer, 3939 * which we can wait on. We would like to mitigate these interrupts, 3940 * likely by only creating seqnos occasionally (so that we have 3941 * *some* interrupts representing completion of buffers that we can 3942 * wait on when trying to clear up gtt space). 3943 */ 3944 seqno = i915_add_request(dev, file_priv, flush_domains, ring); 3945 BUG_ON(seqno == 0); 3946 for (i = 0; i < args->buffer_count; i++) { 3947 struct drm_gem_object *obj = object_list[i]; 3948 obj_priv = to_intel_bo(obj); 3949 3950 i915_gem_object_move_to_active(obj, seqno, ring); 3951 #if WATCH_LRU 3952 DRM_INFO("%s: move to exec list %p\n", __func__, obj); 3953 #endif 3954 } 3955 #if WATCH_LRU 3956 i915_dump_lru(dev, __func__); 3957 #endif 3958 3959 i915_verify_inactive(dev, __FILE__, __LINE__); 3960 3961 err: 3962 for (i = 0; i < pinned; i++) 3963 i915_gem_object_unpin(object_list[i]); 3964 3965 for (i = 0; i < args->buffer_count; i++) { 3966 if (object_list[i]) { 3967 obj_priv = to_intel_bo(object_list[i]); 3968 obj_priv->in_execbuffer = false; 3969 } 3970 drm_gem_object_unreference(object_list[i]); 3971 } 3972 3973 mutex_unlock(&dev->struct_mutex); 3974 3975 pre_mutex_err: 3976 /* Copy the updated relocations out regardless of current error 3977 * state. Failure to update the relocs would mean that the next 3978 * time userland calls execbuf, it would do so with presumed offset 3979 * state that didn't match the actual object state. 3980 */ 3981 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count, 3982 relocs); 3983 if (ret2 != 0) { 3984 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2); 3985 3986 if (ret == 0) 3987 ret = ret2; 3988 } 3989 3990 drm_free_large(object_list); 3991 kfree(cliprects); 3992 3993 return ret; 3994 } 3995 3996 /* 3997 * Legacy execbuffer just creates an exec2 list from the original exec object 3998 * list array and passes it to the real function. 3999 */ 4000 int 4001 i915_gem_execbuffer(struct drm_device *dev, void *data, 4002 struct drm_file *file_priv) 4003 { 4004 struct drm_i915_gem_execbuffer *args = data; 4005 struct drm_i915_gem_execbuffer2 exec2; 4006 struct drm_i915_gem_exec_object *exec_list = NULL; 4007 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 4008 int ret, i; 4009 4010 #if WATCH_EXEC 4011 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 4012 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 4013 #endif 4014 4015 if (args->buffer_count < 1) { 4016 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 4017 return -EINVAL; 4018 } 4019 4020 /* Copy in the exec list from userland */ 4021 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 4022 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 4023 if (exec_list == NULL || exec2_list == NULL) { 4024 DRM_ERROR("Failed to allocate exec list for %d buffers\n", 4025 args->buffer_count); 4026 drm_free_large(exec_list); 4027 drm_free_large(exec2_list); 4028 return -ENOMEM; 4029 } 4030 ret = copy_from_user(exec_list, 4031 (struct drm_i915_relocation_entry __user *) 4032 (uintptr_t) args->buffers_ptr, 4033 sizeof(*exec_list) * args->buffer_count); 4034 if (ret != 0) { 4035 DRM_ERROR("copy %d exec entries failed %d\n", 4036 args->buffer_count, ret); 4037 drm_free_large(exec_list); 4038 drm_free_large(exec2_list); 4039 return -EFAULT; 4040 } 4041 4042 for (i = 0; i < args->buffer_count; i++) { 4043 exec2_list[i].handle = exec_list[i].handle; 4044 exec2_list[i].relocation_count = exec_list[i].relocation_count; 4045 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 4046 exec2_list[i].alignment = exec_list[i].alignment; 4047 exec2_list[i].offset = exec_list[i].offset; 4048 if (!IS_I965G(dev)) 4049 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 4050 else 4051 exec2_list[i].flags = 0; 4052 } 4053 4054 exec2.buffers_ptr = args->buffers_ptr; 4055 exec2.buffer_count = args->buffer_count; 4056 exec2.batch_start_offset = args->batch_start_offset; 4057 exec2.batch_len = args->batch_len; 4058 exec2.DR1 = args->DR1; 4059 exec2.DR4 = args->DR4; 4060 exec2.num_cliprects = args->num_cliprects; 4061 exec2.cliprects_ptr = args->cliprects_ptr; 4062 exec2.flags = I915_EXEC_RENDER; 4063 4064 ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list); 4065 if (!ret) { 4066 /* Copy the new buffer offsets back to the user's exec list. */ 4067 for (i = 0; i < args->buffer_count; i++) 4068 exec_list[i].offset = exec2_list[i].offset; 4069 /* ... and back out to userspace */ 4070 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 4071 (uintptr_t) args->buffers_ptr, 4072 exec_list, 4073 sizeof(*exec_list) * args->buffer_count); 4074 if (ret) { 4075 ret = -EFAULT; 4076 DRM_ERROR("failed to copy %d exec entries " 4077 "back to user (%d)\n", 4078 args->buffer_count, ret); 4079 } 4080 } 4081 4082 drm_free_large(exec_list); 4083 drm_free_large(exec2_list); 4084 return ret; 4085 } 4086 4087 int 4088 i915_gem_execbuffer2(struct drm_device *dev, void *data, 4089 struct drm_file *file_priv) 4090 { 4091 struct drm_i915_gem_execbuffer2 *args = data; 4092 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 4093 int ret; 4094 4095 #if WATCH_EXEC 4096 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 4097 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 4098 #endif 4099 4100 if (args->buffer_count < 1) { 4101 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count); 4102 return -EINVAL; 4103 } 4104 4105 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 4106 if (exec2_list == NULL) { 4107 DRM_ERROR("Failed to allocate exec list for %d buffers\n", 4108 args->buffer_count); 4109 return -ENOMEM; 4110 } 4111 ret = copy_from_user(exec2_list, 4112 (struct drm_i915_relocation_entry __user *) 4113 (uintptr_t) args->buffers_ptr, 4114 sizeof(*exec2_list) * args->buffer_count); 4115 if (ret != 0) { 4116 DRM_ERROR("copy %d exec entries failed %d\n", 4117 args->buffer_count, ret); 4118 drm_free_large(exec2_list); 4119 return -EFAULT; 4120 } 4121 4122 ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list); 4123 if (!ret) { 4124 /* Copy the new buffer offsets back to the user's exec list. */ 4125 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 4126 (uintptr_t) args->buffers_ptr, 4127 exec2_list, 4128 sizeof(*exec2_list) * args->buffer_count); 4129 if (ret) { 4130 ret = -EFAULT; 4131 DRM_ERROR("failed to copy %d exec entries " 4132 "back to user (%d)\n", 4133 args->buffer_count, ret); 4134 } 4135 } 4136 4137 drm_free_large(exec2_list); 4138 return ret; 4139 } 4140 4141 int 4142 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 4143 { 4144 struct drm_device *dev = obj->dev; 4145 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 4146 int ret; 4147 4148 BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 4149 4150 i915_verify_inactive(dev, __FILE__, __LINE__); 4151 4152 if (obj_priv->gtt_space != NULL) { 4153 if (alignment == 0) 4154 alignment = i915_gem_get_gtt_alignment(obj); 4155 if (obj_priv->gtt_offset & (alignment - 1)) { 4156 ret = i915_gem_object_unbind(obj); 4157 if (ret) 4158 return ret; 4159 } 4160 } 4161 4162 if (obj_priv->gtt_space == NULL) { 4163 ret = i915_gem_object_bind_to_gtt(obj, alignment); 4164 if (ret) 4165 return ret; 4166 } 4167 4168 obj_priv->pin_count++; 4169 4170 /* If the object is not active and not pending a flush, 4171 * remove it from the inactive list 4172 */ 4173 if (obj_priv->pin_count == 1) { 4174 atomic_inc(&dev->pin_count); 4175 atomic_add(obj->size, &dev->pin_memory); 4176 if (!obj_priv->active && 4177 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 && 4178 !list_empty(&obj_priv->list)) 4179 list_del_init(&obj_priv->list); 4180 } 4181 i915_verify_inactive(dev, __FILE__, __LINE__); 4182 4183 return 0; 4184 } 4185 4186 void 4187 i915_gem_object_unpin(struct drm_gem_object *obj) 4188 { 4189 struct drm_device *dev = obj->dev; 4190 drm_i915_private_t *dev_priv = dev->dev_private; 4191 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 4192 4193 i915_verify_inactive(dev, __FILE__, __LINE__); 4194 obj_priv->pin_count--; 4195 BUG_ON(obj_priv->pin_count < 0); 4196 BUG_ON(obj_priv->gtt_space == NULL); 4197 4198 /* If the object is no longer pinned, and is 4199 * neither active nor being flushed, then stick it on 4200 * the inactive list 4201 */ 4202 if (obj_priv->pin_count == 0) { 4203 if (!obj_priv->active && 4204 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 4205 list_move_tail(&obj_priv->list, 4206 &dev_priv->mm.inactive_list); 4207 atomic_dec(&dev->pin_count); 4208 atomic_sub(obj->size, &dev->pin_memory); 4209 } 4210 i915_verify_inactive(dev, __FILE__, __LINE__); 4211 } 4212 4213 int 4214 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 4215 struct drm_file *file_priv) 4216 { 4217 struct drm_i915_gem_pin *args = data; 4218 struct drm_gem_object *obj; 4219 struct drm_i915_gem_object *obj_priv; 4220 int ret; 4221 4222 mutex_lock(&dev->struct_mutex); 4223 4224 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4225 if (obj == NULL) { 4226 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 4227 args->handle); 4228 mutex_unlock(&dev->struct_mutex); 4229 return -EBADF; 4230 } 4231 obj_priv = to_intel_bo(obj); 4232 4233 if (obj_priv->madv != I915_MADV_WILLNEED) { 4234 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 4235 drm_gem_object_unreference(obj); 4236 mutex_unlock(&dev->struct_mutex); 4237 return -EINVAL; 4238 } 4239 4240 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { 4241 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 4242 args->handle); 4243 drm_gem_object_unreference(obj); 4244 mutex_unlock(&dev->struct_mutex); 4245 return -EINVAL; 4246 } 4247 4248 obj_priv->user_pin_count++; 4249 obj_priv->pin_filp = file_priv; 4250 if (obj_priv->user_pin_count == 1) { 4251 ret = i915_gem_object_pin(obj, args->alignment); 4252 if (ret != 0) { 4253 drm_gem_object_unreference(obj); 4254 mutex_unlock(&dev->struct_mutex); 4255 return ret; 4256 } 4257 } 4258 4259 /* XXX - flush the CPU caches for pinned objects 4260 * as the X server doesn't manage domains yet 4261 */ 4262 i915_gem_object_flush_cpu_write_domain(obj); 4263 args->offset = obj_priv->gtt_offset; 4264 drm_gem_object_unreference(obj); 4265 mutex_unlock(&dev->struct_mutex); 4266 4267 return 0; 4268 } 4269 4270 int 4271 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4272 struct drm_file *file_priv) 4273 { 4274 struct drm_i915_gem_pin *args = data; 4275 struct drm_gem_object *obj; 4276 struct drm_i915_gem_object *obj_priv; 4277 4278 mutex_lock(&dev->struct_mutex); 4279 4280 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4281 if (obj == NULL) { 4282 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 4283 args->handle); 4284 mutex_unlock(&dev->struct_mutex); 4285 return -EBADF; 4286 } 4287 4288 obj_priv = to_intel_bo(obj); 4289 if (obj_priv->pin_filp != file_priv) { 4290 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4291 args->handle); 4292 drm_gem_object_unreference(obj); 4293 mutex_unlock(&dev->struct_mutex); 4294 return -EINVAL; 4295 } 4296 obj_priv->user_pin_count--; 4297 if (obj_priv->user_pin_count == 0) { 4298 obj_priv->pin_filp = NULL; 4299 i915_gem_object_unpin(obj); 4300 } 4301 4302 drm_gem_object_unreference(obj); 4303 mutex_unlock(&dev->struct_mutex); 4304 return 0; 4305 } 4306 4307 int 4308 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4309 struct drm_file *file_priv) 4310 { 4311 struct drm_i915_gem_busy *args = data; 4312 struct drm_gem_object *obj; 4313 struct drm_i915_gem_object *obj_priv; 4314 drm_i915_private_t *dev_priv = dev->dev_private; 4315 4316 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4317 if (obj == NULL) { 4318 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 4319 args->handle); 4320 return -EBADF; 4321 } 4322 4323 mutex_lock(&dev->struct_mutex); 4324 /* Update the active list for the hardware's current position. 4325 * Otherwise this only updates on a delayed timer or when irqs are 4326 * actually unmasked, and our working set ends up being larger than 4327 * required. 4328 */ 4329 i915_gem_retire_requests(dev, &dev_priv->render_ring); 4330 4331 if (HAS_BSD(dev)) 4332 i915_gem_retire_requests(dev, &dev_priv->bsd_ring); 4333 4334 obj_priv = to_intel_bo(obj); 4335 /* Don't count being on the flushing list against the object being 4336 * done. Otherwise, a buffer left on the flushing list but not getting 4337 * flushed (because nobody's flushing that domain) won't ever return 4338 * unbusy and get reused by libdrm's bo cache. The other expected 4339 * consumer of this interface, OpenGL's occlusion queries, also specs 4340 * that the objects get unbusy "eventually" without any interference. 4341 */ 4342 args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0; 4343 4344 drm_gem_object_unreference(obj); 4345 mutex_unlock(&dev->struct_mutex); 4346 return 0; 4347 } 4348 4349 int 4350 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4351 struct drm_file *file_priv) 4352 { 4353 return i915_gem_ring_throttle(dev, file_priv); 4354 } 4355 4356 int 4357 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4358 struct drm_file *file_priv) 4359 { 4360 struct drm_i915_gem_madvise *args = data; 4361 struct drm_gem_object *obj; 4362 struct drm_i915_gem_object *obj_priv; 4363 4364 switch (args->madv) { 4365 case I915_MADV_DONTNEED: 4366 case I915_MADV_WILLNEED: 4367 break; 4368 default: 4369 return -EINVAL; 4370 } 4371 4372 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4373 if (obj == NULL) { 4374 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n", 4375 args->handle); 4376 return -EBADF; 4377 } 4378 4379 mutex_lock(&dev->struct_mutex); 4380 obj_priv = to_intel_bo(obj); 4381 4382 if (obj_priv->pin_count) { 4383 drm_gem_object_unreference(obj); 4384 mutex_unlock(&dev->struct_mutex); 4385 4386 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n"); 4387 return -EINVAL; 4388 } 4389 4390 if (obj_priv->madv != __I915_MADV_PURGED) 4391 obj_priv->madv = args->madv; 4392 4393 /* if the object is no longer bound, discard its backing storage */ 4394 if (i915_gem_object_is_purgeable(obj_priv) && 4395 obj_priv->gtt_space == NULL) 4396 i915_gem_object_truncate(obj); 4397 4398 args->retained = obj_priv->madv != __I915_MADV_PURGED; 4399 4400 drm_gem_object_unreference(obj); 4401 mutex_unlock(&dev->struct_mutex); 4402 4403 return 0; 4404 } 4405 4406 struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev, 4407 size_t size) 4408 { 4409 struct drm_i915_gem_object *obj; 4410 4411 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 4412 if (obj == NULL) 4413 return NULL; 4414 4415 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4416 kfree(obj); 4417 return NULL; 4418 } 4419 4420 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4421 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4422 4423 obj->agp_type = AGP_USER_MEMORY; 4424 obj->base.driver_private = NULL; 4425 obj->fence_reg = I915_FENCE_REG_NONE; 4426 INIT_LIST_HEAD(&obj->list); 4427 INIT_LIST_HEAD(&obj->gpu_write_list); 4428 obj->madv = I915_MADV_WILLNEED; 4429 4430 trace_i915_gem_object_create(&obj->base); 4431 4432 return &obj->base; 4433 } 4434 4435 int i915_gem_init_object(struct drm_gem_object *obj) 4436 { 4437 BUG(); 4438 4439 return 0; 4440 } 4441 4442 void i915_gem_free_object(struct drm_gem_object *obj) 4443 { 4444 struct drm_device *dev = obj->dev; 4445 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 4446 4447 trace_i915_gem_object_destroy(obj); 4448 4449 while (obj_priv->pin_count > 0) 4450 i915_gem_object_unpin(obj); 4451 4452 if (obj_priv->phys_obj) 4453 i915_gem_detach_phys_object(dev, obj); 4454 4455 i915_gem_object_unbind(obj); 4456 4457 if (obj_priv->mmap_offset) 4458 i915_gem_free_mmap_offset(obj); 4459 4460 drm_gem_object_release(obj); 4461 4462 kfree(obj_priv->page_cpu_valid); 4463 kfree(obj_priv->bit_17); 4464 kfree(obj_priv); 4465 } 4466 4467 /** Unbinds all inactive objects. */ 4468 static int 4469 i915_gem_evict_from_inactive_list(struct drm_device *dev) 4470 { 4471 drm_i915_private_t *dev_priv = dev->dev_private; 4472 4473 while (!list_empty(&dev_priv->mm.inactive_list)) { 4474 struct drm_gem_object *obj; 4475 int ret; 4476 4477 obj = &list_first_entry(&dev_priv->mm.inactive_list, 4478 struct drm_i915_gem_object, 4479 list)->base; 4480 4481 ret = i915_gem_object_unbind(obj); 4482 if (ret != 0) { 4483 DRM_ERROR("Error unbinding object: %d\n", ret); 4484 return ret; 4485 } 4486 } 4487 4488 return 0; 4489 } 4490 4491 int 4492 i915_gem_idle(struct drm_device *dev) 4493 { 4494 drm_i915_private_t *dev_priv = dev->dev_private; 4495 int ret; 4496 4497 mutex_lock(&dev->struct_mutex); 4498 4499 if (dev_priv->mm.suspended || 4500 (dev_priv->render_ring.gem_object == NULL) || 4501 (HAS_BSD(dev) && 4502 dev_priv->bsd_ring.gem_object == NULL)) { 4503 mutex_unlock(&dev->struct_mutex); 4504 return 0; 4505 } 4506 4507 ret = i915_gpu_idle(dev); 4508 if (ret) { 4509 mutex_unlock(&dev->struct_mutex); 4510 return ret; 4511 } 4512 4513 /* Under UMS, be paranoid and evict. */ 4514 if (!drm_core_check_feature(dev, DRIVER_MODESET)) { 4515 ret = i915_gem_evict_from_inactive_list(dev); 4516 if (ret) { 4517 mutex_unlock(&dev->struct_mutex); 4518 return ret; 4519 } 4520 } 4521 4522 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4523 * We need to replace this with a semaphore, or something. 4524 * And not confound mm.suspended! 4525 */ 4526 dev_priv->mm.suspended = 1; 4527 del_timer(&dev_priv->hangcheck_timer); 4528 4529 i915_kernel_lost_context(dev); 4530 i915_gem_cleanup_ringbuffer(dev); 4531 4532 mutex_unlock(&dev->struct_mutex); 4533 4534 /* Cancel the retire work handler, which should be idle now. */ 4535 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4536 4537 return 0; 4538 } 4539 4540 /* 4541 * 965+ support PIPE_CONTROL commands, which provide finer grained control 4542 * over cache flushing. 4543 */ 4544 static int 4545 i915_gem_init_pipe_control(struct drm_device *dev) 4546 { 4547 drm_i915_private_t *dev_priv = dev->dev_private; 4548 struct drm_gem_object *obj; 4549 struct drm_i915_gem_object *obj_priv; 4550 int ret; 4551 4552 obj = i915_gem_alloc_object(dev, 4096); 4553 if (obj == NULL) { 4554 DRM_ERROR("Failed to allocate seqno page\n"); 4555 ret = -ENOMEM; 4556 goto err; 4557 } 4558 obj_priv = to_intel_bo(obj); 4559 obj_priv->agp_type = AGP_USER_CACHED_MEMORY; 4560 4561 ret = i915_gem_object_pin(obj, 4096); 4562 if (ret) 4563 goto err_unref; 4564 4565 dev_priv->seqno_gfx_addr = obj_priv->gtt_offset; 4566 dev_priv->seqno_page = kmap(obj_priv->pages[0]); 4567 if (dev_priv->seqno_page == NULL) 4568 goto err_unpin; 4569 4570 dev_priv->seqno_obj = obj; 4571 memset(dev_priv->seqno_page, 0, PAGE_SIZE); 4572 4573 return 0; 4574 4575 err_unpin: 4576 i915_gem_object_unpin(obj); 4577 err_unref: 4578 drm_gem_object_unreference(obj); 4579 err: 4580 return ret; 4581 } 4582 4583 4584 static void 4585 i915_gem_cleanup_pipe_control(struct drm_device *dev) 4586 { 4587 drm_i915_private_t *dev_priv = dev->dev_private; 4588 struct drm_gem_object *obj; 4589 struct drm_i915_gem_object *obj_priv; 4590 4591 obj = dev_priv->seqno_obj; 4592 obj_priv = to_intel_bo(obj); 4593 kunmap(obj_priv->pages[0]); 4594 i915_gem_object_unpin(obj); 4595 drm_gem_object_unreference(obj); 4596 dev_priv->seqno_obj = NULL; 4597 4598 dev_priv->seqno_page = NULL; 4599 } 4600 4601 int 4602 i915_gem_init_ringbuffer(struct drm_device *dev) 4603 { 4604 drm_i915_private_t *dev_priv = dev->dev_private; 4605 int ret; 4606 4607 dev_priv->render_ring = render_ring; 4608 4609 if (!I915_NEED_GFX_HWS(dev)) { 4610 dev_priv->render_ring.status_page.page_addr 4611 = dev_priv->status_page_dmah->vaddr; 4612 memset(dev_priv->render_ring.status_page.page_addr, 4613 0, PAGE_SIZE); 4614 } 4615 4616 if (HAS_PIPE_CONTROL(dev)) { 4617 ret = i915_gem_init_pipe_control(dev); 4618 if (ret) 4619 return ret; 4620 } 4621 4622 ret = intel_init_ring_buffer(dev, &dev_priv->render_ring); 4623 if (ret) 4624 goto cleanup_pipe_control; 4625 4626 if (HAS_BSD(dev)) { 4627 dev_priv->bsd_ring = bsd_ring; 4628 ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring); 4629 if (ret) 4630 goto cleanup_render_ring; 4631 } 4632 4633 return 0; 4634 4635 cleanup_render_ring: 4636 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring); 4637 cleanup_pipe_control: 4638 if (HAS_PIPE_CONTROL(dev)) 4639 i915_gem_cleanup_pipe_control(dev); 4640 return ret; 4641 } 4642 4643 void 4644 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4645 { 4646 drm_i915_private_t *dev_priv = dev->dev_private; 4647 4648 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring); 4649 if (HAS_BSD(dev)) 4650 intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring); 4651 if (HAS_PIPE_CONTROL(dev)) 4652 i915_gem_cleanup_pipe_control(dev); 4653 } 4654 4655 int 4656 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4657 struct drm_file *file_priv) 4658 { 4659 drm_i915_private_t *dev_priv = dev->dev_private; 4660 int ret; 4661 4662 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4663 return 0; 4664 4665 if (atomic_read(&dev_priv->mm.wedged)) { 4666 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4667 atomic_set(&dev_priv->mm.wedged, 0); 4668 } 4669 4670 mutex_lock(&dev->struct_mutex); 4671 dev_priv->mm.suspended = 0; 4672 4673 ret = i915_gem_init_ringbuffer(dev); 4674 if (ret != 0) { 4675 mutex_unlock(&dev->struct_mutex); 4676 return ret; 4677 } 4678 4679 spin_lock(&dev_priv->mm.active_list_lock); 4680 BUG_ON(!list_empty(&dev_priv->render_ring.active_list)); 4681 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list)); 4682 spin_unlock(&dev_priv->mm.active_list_lock); 4683 4684 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 4685 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 4686 BUG_ON(!list_empty(&dev_priv->render_ring.request_list)); 4687 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list)); 4688 mutex_unlock(&dev->struct_mutex); 4689 4690 drm_irq_install(dev); 4691 4692 return 0; 4693 } 4694 4695 int 4696 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4697 struct drm_file *file_priv) 4698 { 4699 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4700 return 0; 4701 4702 drm_irq_uninstall(dev); 4703 return i915_gem_idle(dev); 4704 } 4705 4706 void 4707 i915_gem_lastclose(struct drm_device *dev) 4708 { 4709 int ret; 4710 4711 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4712 return; 4713 4714 ret = i915_gem_idle(dev); 4715 if (ret) 4716 DRM_ERROR("failed to idle hardware: %d\n", ret); 4717 } 4718 4719 void 4720 i915_gem_load(struct drm_device *dev) 4721 { 4722 int i; 4723 drm_i915_private_t *dev_priv = dev->dev_private; 4724 4725 spin_lock_init(&dev_priv->mm.active_list_lock); 4726 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 4727 INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list); 4728 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4729 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4730 INIT_LIST_HEAD(&dev_priv->render_ring.active_list); 4731 INIT_LIST_HEAD(&dev_priv->render_ring.request_list); 4732 if (HAS_BSD(dev)) { 4733 INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list); 4734 INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list); 4735 } 4736 for (i = 0; i < 16; i++) 4737 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4738 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4739 i915_gem_retire_work_handler); 4740 spin_lock(&shrink_list_lock); 4741 list_add(&dev_priv->mm.shrink_list, &shrink_list); 4742 spin_unlock(&shrink_list_lock); 4743 4744 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4745 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4746 dev_priv->fence_reg_start = 3; 4747 4748 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4749 dev_priv->num_fence_regs = 16; 4750 else 4751 dev_priv->num_fence_regs = 8; 4752 4753 /* Initialize fence registers to zero */ 4754 if (IS_I965G(dev)) { 4755 for (i = 0; i < 16; i++) 4756 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0); 4757 } else { 4758 for (i = 0; i < 8; i++) 4759 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0); 4760 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4761 for (i = 0; i < 8; i++) 4762 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0); 4763 } 4764 i915_gem_detect_bit_6_swizzle(dev); 4765 init_waitqueue_head(&dev_priv->pending_flip_queue); 4766 } 4767 4768 /* 4769 * Create a physically contiguous memory object for this object 4770 * e.g. for cursor + overlay regs 4771 */ 4772 int i915_gem_init_phys_object(struct drm_device *dev, 4773 int id, int size) 4774 { 4775 drm_i915_private_t *dev_priv = dev->dev_private; 4776 struct drm_i915_gem_phys_object *phys_obj; 4777 int ret; 4778 4779 if (dev_priv->mm.phys_objs[id - 1] || !size) 4780 return 0; 4781 4782 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4783 if (!phys_obj) 4784 return -ENOMEM; 4785 4786 phys_obj->id = id; 4787 4788 phys_obj->handle = drm_pci_alloc(dev, size, 0); 4789 if (!phys_obj->handle) { 4790 ret = -ENOMEM; 4791 goto kfree_obj; 4792 } 4793 #ifdef CONFIG_X86 4794 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4795 #endif 4796 4797 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4798 4799 return 0; 4800 kfree_obj: 4801 kfree(phys_obj); 4802 return ret; 4803 } 4804 4805 void i915_gem_free_phys_object(struct drm_device *dev, int id) 4806 { 4807 drm_i915_private_t *dev_priv = dev->dev_private; 4808 struct drm_i915_gem_phys_object *phys_obj; 4809 4810 if (!dev_priv->mm.phys_objs[id - 1]) 4811 return; 4812 4813 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4814 if (phys_obj->cur_obj) { 4815 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4816 } 4817 4818 #ifdef CONFIG_X86 4819 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4820 #endif 4821 drm_pci_free(dev, phys_obj->handle); 4822 kfree(phys_obj); 4823 dev_priv->mm.phys_objs[id - 1] = NULL; 4824 } 4825 4826 void i915_gem_free_all_phys_object(struct drm_device *dev) 4827 { 4828 int i; 4829 4830 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4831 i915_gem_free_phys_object(dev, i); 4832 } 4833 4834 void i915_gem_detach_phys_object(struct drm_device *dev, 4835 struct drm_gem_object *obj) 4836 { 4837 struct drm_i915_gem_object *obj_priv; 4838 int i; 4839 int ret; 4840 int page_count; 4841 4842 obj_priv = to_intel_bo(obj); 4843 if (!obj_priv->phys_obj) 4844 return; 4845 4846 ret = i915_gem_object_get_pages(obj, 0); 4847 if (ret) 4848 goto out; 4849 4850 page_count = obj->size / PAGE_SIZE; 4851 4852 for (i = 0; i < page_count; i++) { 4853 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0); 4854 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4855 4856 memcpy(dst, src, PAGE_SIZE); 4857 kunmap_atomic(dst, KM_USER0); 4858 } 4859 drm_clflush_pages(obj_priv->pages, page_count); 4860 drm_agp_chipset_flush(dev); 4861 4862 i915_gem_object_put_pages(obj); 4863 out: 4864 obj_priv->phys_obj->cur_obj = NULL; 4865 obj_priv->phys_obj = NULL; 4866 } 4867 4868 int 4869 i915_gem_attach_phys_object(struct drm_device *dev, 4870 struct drm_gem_object *obj, int id) 4871 { 4872 drm_i915_private_t *dev_priv = dev->dev_private; 4873 struct drm_i915_gem_object *obj_priv; 4874 int ret = 0; 4875 int page_count; 4876 int i; 4877 4878 if (id > I915_MAX_PHYS_OBJECT) 4879 return -EINVAL; 4880 4881 obj_priv = to_intel_bo(obj); 4882 4883 if (obj_priv->phys_obj) { 4884 if (obj_priv->phys_obj->id == id) 4885 return 0; 4886 i915_gem_detach_phys_object(dev, obj); 4887 } 4888 4889 4890 /* create a new object */ 4891 if (!dev_priv->mm.phys_objs[id - 1]) { 4892 ret = i915_gem_init_phys_object(dev, id, 4893 obj->size); 4894 if (ret) { 4895 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size); 4896 goto out; 4897 } 4898 } 4899 4900 /* bind to the object */ 4901 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4902 obj_priv->phys_obj->cur_obj = obj; 4903 4904 ret = i915_gem_object_get_pages(obj, 0); 4905 if (ret) { 4906 DRM_ERROR("failed to get page list\n"); 4907 goto out; 4908 } 4909 4910 page_count = obj->size / PAGE_SIZE; 4911 4912 for (i = 0; i < page_count; i++) { 4913 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0); 4914 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4915 4916 memcpy(dst, src, PAGE_SIZE); 4917 kunmap_atomic(src, KM_USER0); 4918 } 4919 4920 i915_gem_object_put_pages(obj); 4921 4922 return 0; 4923 out: 4924 return ret; 4925 } 4926 4927 static int 4928 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 4929 struct drm_i915_gem_pwrite *args, 4930 struct drm_file *file_priv) 4931 { 4932 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 4933 void *obj_addr; 4934 int ret; 4935 char __user *user_data; 4936 4937 user_data = (char __user *) (uintptr_t) args->data_ptr; 4938 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset; 4939 4940 DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size); 4941 ret = copy_from_user(obj_addr, user_data, args->size); 4942 if (ret) 4943 return -EFAULT; 4944 4945 drm_agp_chipset_flush(dev); 4946 return 0; 4947 } 4948 4949 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) 4950 { 4951 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 4952 4953 /* Clean up our request list when the client is going away, so that 4954 * later retire_requests won't dereference our soon-to-be-gone 4955 * file_priv. 4956 */ 4957 mutex_lock(&dev->struct_mutex); 4958 while (!list_empty(&i915_file_priv->mm.request_list)) 4959 list_del_init(i915_file_priv->mm.request_list.next); 4960 mutex_unlock(&dev->struct_mutex); 4961 } 4962 4963 static int 4964 i915_gpu_is_active(struct drm_device *dev) 4965 { 4966 drm_i915_private_t *dev_priv = dev->dev_private; 4967 int lists_empty; 4968 4969 spin_lock(&dev_priv->mm.active_list_lock); 4970 lists_empty = list_empty(&dev_priv->mm.flushing_list) && 4971 list_empty(&dev_priv->render_ring.active_list); 4972 if (HAS_BSD(dev)) 4973 lists_empty &= list_empty(&dev_priv->bsd_ring.active_list); 4974 spin_unlock(&dev_priv->mm.active_list_lock); 4975 4976 return !lists_empty; 4977 } 4978 4979 static int 4980 i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) 4981 { 4982 drm_i915_private_t *dev_priv, *next_dev; 4983 struct drm_i915_gem_object *obj_priv, *next_obj; 4984 int cnt = 0; 4985 int would_deadlock = 1; 4986 4987 /* "fast-path" to count number of available objects */ 4988 if (nr_to_scan == 0) { 4989 spin_lock(&shrink_list_lock); 4990 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) { 4991 struct drm_device *dev = dev_priv->dev; 4992 4993 if (mutex_trylock(&dev->struct_mutex)) { 4994 list_for_each_entry(obj_priv, 4995 &dev_priv->mm.inactive_list, 4996 list) 4997 cnt++; 4998 mutex_unlock(&dev->struct_mutex); 4999 } 5000 } 5001 spin_unlock(&shrink_list_lock); 5002 5003 return (cnt / 100) * sysctl_vfs_cache_pressure; 5004 } 5005 5006 spin_lock(&shrink_list_lock); 5007 5008 rescan: 5009 /* first scan for clean buffers */ 5010 list_for_each_entry_safe(dev_priv, next_dev, 5011 &shrink_list, mm.shrink_list) { 5012 struct drm_device *dev = dev_priv->dev; 5013 5014 if (! mutex_trylock(&dev->struct_mutex)) 5015 continue; 5016 5017 spin_unlock(&shrink_list_lock); 5018 i915_gem_retire_requests(dev, &dev_priv->render_ring); 5019 5020 if (HAS_BSD(dev)) 5021 i915_gem_retire_requests(dev, &dev_priv->bsd_ring); 5022 5023 list_for_each_entry_safe(obj_priv, next_obj, 5024 &dev_priv->mm.inactive_list, 5025 list) { 5026 if (i915_gem_object_is_purgeable(obj_priv)) { 5027 i915_gem_object_unbind(&obj_priv->base); 5028 if (--nr_to_scan <= 0) 5029 break; 5030 } 5031 } 5032 5033 spin_lock(&shrink_list_lock); 5034 mutex_unlock(&dev->struct_mutex); 5035 5036 would_deadlock = 0; 5037 5038 if (nr_to_scan <= 0) 5039 break; 5040 } 5041 5042 /* second pass, evict/count anything still on the inactive list */ 5043 list_for_each_entry_safe(dev_priv, next_dev, 5044 &shrink_list, mm.shrink_list) { 5045 struct drm_device *dev = dev_priv->dev; 5046 5047 if (! mutex_trylock(&dev->struct_mutex)) 5048 continue; 5049 5050 spin_unlock(&shrink_list_lock); 5051 5052 list_for_each_entry_safe(obj_priv, next_obj, 5053 &dev_priv->mm.inactive_list, 5054 list) { 5055 if (nr_to_scan > 0) { 5056 i915_gem_object_unbind(&obj_priv->base); 5057 nr_to_scan--; 5058 } else 5059 cnt++; 5060 } 5061 5062 spin_lock(&shrink_list_lock); 5063 mutex_unlock(&dev->struct_mutex); 5064 5065 would_deadlock = 0; 5066 } 5067 5068 if (nr_to_scan) { 5069 int active = 0; 5070 5071 /* 5072 * We are desperate for pages, so as a last resort, wait 5073 * for the GPU to finish and discard whatever we can. 5074 * This has a dramatic impact to reduce the number of 5075 * OOM-killer events whilst running the GPU aggressively. 5076 */ 5077 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) { 5078 struct drm_device *dev = dev_priv->dev; 5079 5080 if (!mutex_trylock(&dev->struct_mutex)) 5081 continue; 5082 5083 spin_unlock(&shrink_list_lock); 5084 5085 if (i915_gpu_is_active(dev)) { 5086 i915_gpu_idle(dev); 5087 active++; 5088 } 5089 5090 spin_lock(&shrink_list_lock); 5091 mutex_unlock(&dev->struct_mutex); 5092 } 5093 5094 if (active) 5095 goto rescan; 5096 } 5097 5098 spin_unlock(&shrink_list_lock); 5099 5100 if (would_deadlock) 5101 return -1; 5102 else if (cnt > 0) 5103 return (cnt / 100) * sysctl_vfs_cache_pressure; 5104 else 5105 return 0; 5106 } 5107 5108 static struct shrinker shrinker = { 5109 .shrink = i915_gem_shrink, 5110 .seeks = DEFAULT_SEEKS, 5111 }; 5112 5113 __init void 5114 i915_gem_shrinker_init(void) 5115 { 5116 register_shrinker(&shrinker); 5117 } 5118 5119 __exit void 5120 i915_gem_shrinker_exit(void) 5121 { 5122 unregister_shrinker(&shrinker); 5123 } 5124