1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/swap.h> 35 #include <linux/pci.h> 36 37 #define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 38 39 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 40 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 41 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 42 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 43 int write); 44 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 45 uint64_t offset, 46 uint64_t size); 47 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 48 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 49 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, 50 unsigned alignment); 51 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); 52 static int i915_gem_evict_something(struct drm_device *dev, int min_size); 53 static int i915_gem_evict_from_inactive_list(struct drm_device *dev); 54 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 55 struct drm_i915_gem_pwrite *args, 56 struct drm_file *file_priv); 57 58 static LIST_HEAD(shrink_list); 59 static DEFINE_SPINLOCK(shrink_list_lock); 60 61 int i915_gem_do_init(struct drm_device *dev, unsigned long start, 62 unsigned long end) 63 { 64 drm_i915_private_t *dev_priv = dev->dev_private; 65 66 if (start >= end || 67 (start & (PAGE_SIZE - 1)) != 0 || 68 (end & (PAGE_SIZE - 1)) != 0) { 69 return -EINVAL; 70 } 71 72 drm_mm_init(&dev_priv->mm.gtt_space, start, 73 end - start); 74 75 dev->gtt_total = (uint32_t) (end - start); 76 77 return 0; 78 } 79 80 int 81 i915_gem_init_ioctl(struct drm_device *dev, void *data, 82 struct drm_file *file_priv) 83 { 84 struct drm_i915_gem_init *args = data; 85 int ret; 86 87 mutex_lock(&dev->struct_mutex); 88 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end); 89 mutex_unlock(&dev->struct_mutex); 90 91 return ret; 92 } 93 94 int 95 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 96 struct drm_file *file_priv) 97 { 98 struct drm_i915_gem_get_aperture *args = data; 99 100 if (!(dev->driver->driver_features & DRIVER_GEM)) 101 return -ENODEV; 102 103 args->aper_size = dev->gtt_total; 104 args->aper_available_size = (args->aper_size - 105 atomic_read(&dev->pin_memory)); 106 107 return 0; 108 } 109 110 111 /** 112 * Creates a new mm object and returns a handle to it. 113 */ 114 int 115 i915_gem_create_ioctl(struct drm_device *dev, void *data, 116 struct drm_file *file_priv) 117 { 118 struct drm_i915_gem_create *args = data; 119 struct drm_gem_object *obj; 120 int ret; 121 u32 handle; 122 123 args->size = roundup(args->size, PAGE_SIZE); 124 125 /* Allocate the new object */ 126 obj = drm_gem_object_alloc(dev, args->size); 127 if (obj == NULL) 128 return -ENOMEM; 129 130 ret = drm_gem_handle_create(file_priv, obj, &handle); 131 mutex_lock(&dev->struct_mutex); 132 drm_gem_object_handle_unreference(obj); 133 mutex_unlock(&dev->struct_mutex); 134 135 if (ret) 136 return ret; 137 138 args->handle = handle; 139 140 return 0; 141 } 142 143 static inline int 144 fast_shmem_read(struct page **pages, 145 loff_t page_base, int page_offset, 146 char __user *data, 147 int length) 148 { 149 char __iomem *vaddr; 150 int unwritten; 151 152 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 153 if (vaddr == NULL) 154 return -ENOMEM; 155 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length); 156 kunmap_atomic(vaddr, KM_USER0); 157 158 if (unwritten) 159 return -EFAULT; 160 161 return 0; 162 } 163 164 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) 165 { 166 drm_i915_private_t *dev_priv = obj->dev->dev_private; 167 struct drm_i915_gem_object *obj_priv = obj->driver_private; 168 169 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 170 obj_priv->tiling_mode != I915_TILING_NONE; 171 } 172 173 static inline int 174 slow_shmem_copy(struct page *dst_page, 175 int dst_offset, 176 struct page *src_page, 177 int src_offset, 178 int length) 179 { 180 char *dst_vaddr, *src_vaddr; 181 182 dst_vaddr = kmap_atomic(dst_page, KM_USER0); 183 if (dst_vaddr == NULL) 184 return -ENOMEM; 185 186 src_vaddr = kmap_atomic(src_page, KM_USER1); 187 if (src_vaddr == NULL) { 188 kunmap_atomic(dst_vaddr, KM_USER0); 189 return -ENOMEM; 190 } 191 192 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 193 194 kunmap_atomic(src_vaddr, KM_USER1); 195 kunmap_atomic(dst_vaddr, KM_USER0); 196 197 return 0; 198 } 199 200 static inline int 201 slow_shmem_bit17_copy(struct page *gpu_page, 202 int gpu_offset, 203 struct page *cpu_page, 204 int cpu_offset, 205 int length, 206 int is_read) 207 { 208 char *gpu_vaddr, *cpu_vaddr; 209 210 /* Use the unswizzled path if this page isn't affected. */ 211 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 212 if (is_read) 213 return slow_shmem_copy(cpu_page, cpu_offset, 214 gpu_page, gpu_offset, length); 215 else 216 return slow_shmem_copy(gpu_page, gpu_offset, 217 cpu_page, cpu_offset, length); 218 } 219 220 gpu_vaddr = kmap_atomic(gpu_page, KM_USER0); 221 if (gpu_vaddr == NULL) 222 return -ENOMEM; 223 224 cpu_vaddr = kmap_atomic(cpu_page, KM_USER1); 225 if (cpu_vaddr == NULL) { 226 kunmap_atomic(gpu_vaddr, KM_USER0); 227 return -ENOMEM; 228 } 229 230 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 231 * XORing with the other bits (A9 for Y, A9 and A10 for X) 232 */ 233 while (length > 0) { 234 int cacheline_end = ALIGN(gpu_offset + 1, 64); 235 int this_length = min(cacheline_end - gpu_offset, length); 236 int swizzled_gpu_offset = gpu_offset ^ 64; 237 238 if (is_read) { 239 memcpy(cpu_vaddr + cpu_offset, 240 gpu_vaddr + swizzled_gpu_offset, 241 this_length); 242 } else { 243 memcpy(gpu_vaddr + swizzled_gpu_offset, 244 cpu_vaddr + cpu_offset, 245 this_length); 246 } 247 cpu_offset += this_length; 248 gpu_offset += this_length; 249 length -= this_length; 250 } 251 252 kunmap_atomic(cpu_vaddr, KM_USER1); 253 kunmap_atomic(gpu_vaddr, KM_USER0); 254 255 return 0; 256 } 257 258 /** 259 * This is the fast shmem pread path, which attempts to copy_from_user directly 260 * from the backing pages of the object to the user's address space. On a 261 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 262 */ 263 static int 264 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj, 265 struct drm_i915_gem_pread *args, 266 struct drm_file *file_priv) 267 { 268 struct drm_i915_gem_object *obj_priv = obj->driver_private; 269 ssize_t remain; 270 loff_t offset, page_base; 271 char __user *user_data; 272 int page_offset, page_length; 273 int ret; 274 275 user_data = (char __user *) (uintptr_t) args->data_ptr; 276 remain = args->size; 277 278 mutex_lock(&dev->struct_mutex); 279 280 ret = i915_gem_object_get_pages(obj); 281 if (ret != 0) 282 goto fail_unlock; 283 284 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 285 args->size); 286 if (ret != 0) 287 goto fail_put_pages; 288 289 obj_priv = obj->driver_private; 290 offset = args->offset; 291 292 while (remain > 0) { 293 /* Operation in this page 294 * 295 * page_base = page offset within aperture 296 * page_offset = offset within page 297 * page_length = bytes to copy for this page 298 */ 299 page_base = (offset & ~(PAGE_SIZE-1)); 300 page_offset = offset & (PAGE_SIZE-1); 301 page_length = remain; 302 if ((page_offset + remain) > PAGE_SIZE) 303 page_length = PAGE_SIZE - page_offset; 304 305 ret = fast_shmem_read(obj_priv->pages, 306 page_base, page_offset, 307 user_data, page_length); 308 if (ret) 309 goto fail_put_pages; 310 311 remain -= page_length; 312 user_data += page_length; 313 offset += page_length; 314 } 315 316 fail_put_pages: 317 i915_gem_object_put_pages(obj); 318 fail_unlock: 319 mutex_unlock(&dev->struct_mutex); 320 321 return ret; 322 } 323 324 static inline gfp_t 325 i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj) 326 { 327 return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping); 328 } 329 330 static inline void 331 i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp) 332 { 333 mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp); 334 } 335 336 static int 337 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj) 338 { 339 int ret; 340 341 ret = i915_gem_object_get_pages(obj); 342 343 /* If we've insufficient memory to map in the pages, attempt 344 * to make some space by throwing out some old buffers. 345 */ 346 if (ret == -ENOMEM) { 347 struct drm_device *dev = obj->dev; 348 gfp_t gfp; 349 350 ret = i915_gem_evict_something(dev, obj->size); 351 if (ret) 352 return ret; 353 354 gfp = i915_gem_object_get_page_gfp_mask(obj); 355 i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY); 356 ret = i915_gem_object_get_pages(obj); 357 i915_gem_object_set_page_gfp_mask (obj, gfp); 358 } 359 360 return ret; 361 } 362 363 /** 364 * This is the fallback shmem pread path, which allocates temporary storage 365 * in kernel space to copy_to_user into outside of the struct_mutex, so we 366 * can copy out of the object's backing pages while holding the struct mutex 367 * and not take page faults. 368 */ 369 static int 370 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, 371 struct drm_i915_gem_pread *args, 372 struct drm_file *file_priv) 373 { 374 struct drm_i915_gem_object *obj_priv = obj->driver_private; 375 struct mm_struct *mm = current->mm; 376 struct page **user_pages; 377 ssize_t remain; 378 loff_t offset, pinned_pages, i; 379 loff_t first_data_page, last_data_page, num_pages; 380 int shmem_page_index, shmem_page_offset; 381 int data_page_index, data_page_offset; 382 int page_length; 383 int ret; 384 uint64_t data_ptr = args->data_ptr; 385 int do_bit17_swizzling; 386 387 remain = args->size; 388 389 /* Pin the user pages containing the data. We can't fault while 390 * holding the struct mutex, yet we want to hold it while 391 * dereferencing the user data. 392 */ 393 first_data_page = data_ptr / PAGE_SIZE; 394 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 395 num_pages = last_data_page - first_data_page + 1; 396 397 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 398 if (user_pages == NULL) 399 return -ENOMEM; 400 401 down_read(&mm->mmap_sem); 402 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 403 num_pages, 1, 0, user_pages, NULL); 404 up_read(&mm->mmap_sem); 405 if (pinned_pages < num_pages) { 406 ret = -EFAULT; 407 goto fail_put_user_pages; 408 } 409 410 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 411 412 mutex_lock(&dev->struct_mutex); 413 414 ret = i915_gem_object_get_pages_or_evict(obj); 415 if (ret) 416 goto fail_unlock; 417 418 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 419 args->size); 420 if (ret != 0) 421 goto fail_put_pages; 422 423 obj_priv = obj->driver_private; 424 offset = args->offset; 425 426 while (remain > 0) { 427 /* Operation in this page 428 * 429 * shmem_page_index = page number within shmem file 430 * shmem_page_offset = offset within page in shmem file 431 * data_page_index = page number in get_user_pages return 432 * data_page_offset = offset with data_page_index page. 433 * page_length = bytes to copy for this page 434 */ 435 shmem_page_index = offset / PAGE_SIZE; 436 shmem_page_offset = offset & ~PAGE_MASK; 437 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 438 data_page_offset = data_ptr & ~PAGE_MASK; 439 440 page_length = remain; 441 if ((shmem_page_offset + page_length) > PAGE_SIZE) 442 page_length = PAGE_SIZE - shmem_page_offset; 443 if ((data_page_offset + page_length) > PAGE_SIZE) 444 page_length = PAGE_SIZE - data_page_offset; 445 446 if (do_bit17_swizzling) { 447 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 448 shmem_page_offset, 449 user_pages[data_page_index], 450 data_page_offset, 451 page_length, 452 1); 453 } else { 454 ret = slow_shmem_copy(user_pages[data_page_index], 455 data_page_offset, 456 obj_priv->pages[shmem_page_index], 457 shmem_page_offset, 458 page_length); 459 } 460 if (ret) 461 goto fail_put_pages; 462 463 remain -= page_length; 464 data_ptr += page_length; 465 offset += page_length; 466 } 467 468 fail_put_pages: 469 i915_gem_object_put_pages(obj); 470 fail_unlock: 471 mutex_unlock(&dev->struct_mutex); 472 fail_put_user_pages: 473 for (i = 0; i < pinned_pages; i++) { 474 SetPageDirty(user_pages[i]); 475 page_cache_release(user_pages[i]); 476 } 477 drm_free_large(user_pages); 478 479 return ret; 480 } 481 482 /** 483 * Reads data from the object referenced by handle. 484 * 485 * On error, the contents of *data are undefined. 486 */ 487 int 488 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 489 struct drm_file *file_priv) 490 { 491 struct drm_i915_gem_pread *args = data; 492 struct drm_gem_object *obj; 493 struct drm_i915_gem_object *obj_priv; 494 int ret; 495 496 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 497 if (obj == NULL) 498 return -EBADF; 499 obj_priv = obj->driver_private; 500 501 /* Bounds check source. 502 * 503 * XXX: This could use review for overflow issues... 504 */ 505 if (args->offset > obj->size || args->size > obj->size || 506 args->offset + args->size > obj->size) { 507 drm_gem_object_unreference(obj); 508 return -EINVAL; 509 } 510 511 if (i915_gem_object_needs_bit17_swizzle(obj)) { 512 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); 513 } else { 514 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); 515 if (ret != 0) 516 ret = i915_gem_shmem_pread_slow(dev, obj, args, 517 file_priv); 518 } 519 520 drm_gem_object_unreference(obj); 521 522 return ret; 523 } 524 525 /* This is the fast write path which cannot handle 526 * page faults in the source data 527 */ 528 529 static inline int 530 fast_user_write(struct io_mapping *mapping, 531 loff_t page_base, int page_offset, 532 char __user *user_data, 533 int length) 534 { 535 char *vaddr_atomic; 536 unsigned long unwritten; 537 538 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 539 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 540 user_data, length); 541 io_mapping_unmap_atomic(vaddr_atomic); 542 if (unwritten) 543 return -EFAULT; 544 return 0; 545 } 546 547 /* Here's the write path which can sleep for 548 * page faults 549 */ 550 551 static inline int 552 slow_kernel_write(struct io_mapping *mapping, 553 loff_t gtt_base, int gtt_offset, 554 struct page *user_page, int user_offset, 555 int length) 556 { 557 char *src_vaddr, *dst_vaddr; 558 unsigned long unwritten; 559 560 dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base); 561 src_vaddr = kmap_atomic(user_page, KM_USER1); 562 unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset, 563 src_vaddr + user_offset, 564 length); 565 kunmap_atomic(src_vaddr, KM_USER1); 566 io_mapping_unmap_atomic(dst_vaddr); 567 if (unwritten) 568 return -EFAULT; 569 return 0; 570 } 571 572 static inline int 573 fast_shmem_write(struct page **pages, 574 loff_t page_base, int page_offset, 575 char __user *data, 576 int length) 577 { 578 char __iomem *vaddr; 579 unsigned long unwritten; 580 581 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 582 if (vaddr == NULL) 583 return -ENOMEM; 584 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length); 585 kunmap_atomic(vaddr, KM_USER0); 586 587 if (unwritten) 588 return -EFAULT; 589 return 0; 590 } 591 592 /** 593 * This is the fast pwrite path, where we copy the data directly from the 594 * user into the GTT, uncached. 595 */ 596 static int 597 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 598 struct drm_i915_gem_pwrite *args, 599 struct drm_file *file_priv) 600 { 601 struct drm_i915_gem_object *obj_priv = obj->driver_private; 602 drm_i915_private_t *dev_priv = dev->dev_private; 603 ssize_t remain; 604 loff_t offset, page_base; 605 char __user *user_data; 606 int page_offset, page_length; 607 int ret; 608 609 user_data = (char __user *) (uintptr_t) args->data_ptr; 610 remain = args->size; 611 if (!access_ok(VERIFY_READ, user_data, remain)) 612 return -EFAULT; 613 614 615 mutex_lock(&dev->struct_mutex); 616 ret = i915_gem_object_pin(obj, 0); 617 if (ret) { 618 mutex_unlock(&dev->struct_mutex); 619 return ret; 620 } 621 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 622 if (ret) 623 goto fail; 624 625 obj_priv = obj->driver_private; 626 offset = obj_priv->gtt_offset + args->offset; 627 628 while (remain > 0) { 629 /* Operation in this page 630 * 631 * page_base = page offset within aperture 632 * page_offset = offset within page 633 * page_length = bytes to copy for this page 634 */ 635 page_base = (offset & ~(PAGE_SIZE-1)); 636 page_offset = offset & (PAGE_SIZE-1); 637 page_length = remain; 638 if ((page_offset + remain) > PAGE_SIZE) 639 page_length = PAGE_SIZE - page_offset; 640 641 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, 642 page_offset, user_data, page_length); 643 644 /* If we get a fault while copying data, then (presumably) our 645 * source page isn't available. Return the error and we'll 646 * retry in the slow path. 647 */ 648 if (ret) 649 goto fail; 650 651 remain -= page_length; 652 user_data += page_length; 653 offset += page_length; 654 } 655 656 fail: 657 i915_gem_object_unpin(obj); 658 mutex_unlock(&dev->struct_mutex); 659 660 return ret; 661 } 662 663 /** 664 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 665 * the memory and maps it using kmap_atomic for copying. 666 * 667 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 668 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 669 */ 670 static int 671 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 672 struct drm_i915_gem_pwrite *args, 673 struct drm_file *file_priv) 674 { 675 struct drm_i915_gem_object *obj_priv = obj->driver_private; 676 drm_i915_private_t *dev_priv = dev->dev_private; 677 ssize_t remain; 678 loff_t gtt_page_base, offset; 679 loff_t first_data_page, last_data_page, num_pages; 680 loff_t pinned_pages, i; 681 struct page **user_pages; 682 struct mm_struct *mm = current->mm; 683 int gtt_page_offset, data_page_offset, data_page_index, page_length; 684 int ret; 685 uint64_t data_ptr = args->data_ptr; 686 687 remain = args->size; 688 689 /* Pin the user pages containing the data. We can't fault while 690 * holding the struct mutex, and all of the pwrite implementations 691 * want to hold it while dereferencing the user data. 692 */ 693 first_data_page = data_ptr / PAGE_SIZE; 694 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 695 num_pages = last_data_page - first_data_page + 1; 696 697 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 698 if (user_pages == NULL) 699 return -ENOMEM; 700 701 down_read(&mm->mmap_sem); 702 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 703 num_pages, 0, 0, user_pages, NULL); 704 up_read(&mm->mmap_sem); 705 if (pinned_pages < num_pages) { 706 ret = -EFAULT; 707 goto out_unpin_pages; 708 } 709 710 mutex_lock(&dev->struct_mutex); 711 ret = i915_gem_object_pin(obj, 0); 712 if (ret) 713 goto out_unlock; 714 715 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 716 if (ret) 717 goto out_unpin_object; 718 719 obj_priv = obj->driver_private; 720 offset = obj_priv->gtt_offset + args->offset; 721 722 while (remain > 0) { 723 /* Operation in this page 724 * 725 * gtt_page_base = page offset within aperture 726 * gtt_page_offset = offset within page in aperture 727 * data_page_index = page number in get_user_pages return 728 * data_page_offset = offset with data_page_index page. 729 * page_length = bytes to copy for this page 730 */ 731 gtt_page_base = offset & PAGE_MASK; 732 gtt_page_offset = offset & ~PAGE_MASK; 733 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 734 data_page_offset = data_ptr & ~PAGE_MASK; 735 736 page_length = remain; 737 if ((gtt_page_offset + page_length) > PAGE_SIZE) 738 page_length = PAGE_SIZE - gtt_page_offset; 739 if ((data_page_offset + page_length) > PAGE_SIZE) 740 page_length = PAGE_SIZE - data_page_offset; 741 742 ret = slow_kernel_write(dev_priv->mm.gtt_mapping, 743 gtt_page_base, gtt_page_offset, 744 user_pages[data_page_index], 745 data_page_offset, 746 page_length); 747 748 /* If we get a fault while copying data, then (presumably) our 749 * source page isn't available. Return the error and we'll 750 * retry in the slow path. 751 */ 752 if (ret) 753 goto out_unpin_object; 754 755 remain -= page_length; 756 offset += page_length; 757 data_ptr += page_length; 758 } 759 760 out_unpin_object: 761 i915_gem_object_unpin(obj); 762 out_unlock: 763 mutex_unlock(&dev->struct_mutex); 764 out_unpin_pages: 765 for (i = 0; i < pinned_pages; i++) 766 page_cache_release(user_pages[i]); 767 drm_free_large(user_pages); 768 769 return ret; 770 } 771 772 /** 773 * This is the fast shmem pwrite path, which attempts to directly 774 * copy_from_user into the kmapped pages backing the object. 775 */ 776 static int 777 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 778 struct drm_i915_gem_pwrite *args, 779 struct drm_file *file_priv) 780 { 781 struct drm_i915_gem_object *obj_priv = obj->driver_private; 782 ssize_t remain; 783 loff_t offset, page_base; 784 char __user *user_data; 785 int page_offset, page_length; 786 int ret; 787 788 user_data = (char __user *) (uintptr_t) args->data_ptr; 789 remain = args->size; 790 791 mutex_lock(&dev->struct_mutex); 792 793 ret = i915_gem_object_get_pages(obj); 794 if (ret != 0) 795 goto fail_unlock; 796 797 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 798 if (ret != 0) 799 goto fail_put_pages; 800 801 obj_priv = obj->driver_private; 802 offset = args->offset; 803 obj_priv->dirty = 1; 804 805 while (remain > 0) { 806 /* Operation in this page 807 * 808 * page_base = page offset within aperture 809 * page_offset = offset within page 810 * page_length = bytes to copy for this page 811 */ 812 page_base = (offset & ~(PAGE_SIZE-1)); 813 page_offset = offset & (PAGE_SIZE-1); 814 page_length = remain; 815 if ((page_offset + remain) > PAGE_SIZE) 816 page_length = PAGE_SIZE - page_offset; 817 818 ret = fast_shmem_write(obj_priv->pages, 819 page_base, page_offset, 820 user_data, page_length); 821 if (ret) 822 goto fail_put_pages; 823 824 remain -= page_length; 825 user_data += page_length; 826 offset += page_length; 827 } 828 829 fail_put_pages: 830 i915_gem_object_put_pages(obj); 831 fail_unlock: 832 mutex_unlock(&dev->struct_mutex); 833 834 return ret; 835 } 836 837 /** 838 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 839 * the memory and maps it using kmap_atomic for copying. 840 * 841 * This avoids taking mmap_sem for faulting on the user's address while the 842 * struct_mutex is held. 843 */ 844 static int 845 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 846 struct drm_i915_gem_pwrite *args, 847 struct drm_file *file_priv) 848 { 849 struct drm_i915_gem_object *obj_priv = obj->driver_private; 850 struct mm_struct *mm = current->mm; 851 struct page **user_pages; 852 ssize_t remain; 853 loff_t offset, pinned_pages, i; 854 loff_t first_data_page, last_data_page, num_pages; 855 int shmem_page_index, shmem_page_offset; 856 int data_page_index, data_page_offset; 857 int page_length; 858 int ret; 859 uint64_t data_ptr = args->data_ptr; 860 int do_bit17_swizzling; 861 862 remain = args->size; 863 864 /* Pin the user pages containing the data. We can't fault while 865 * holding the struct mutex, and all of the pwrite implementations 866 * want to hold it while dereferencing the user data. 867 */ 868 first_data_page = data_ptr / PAGE_SIZE; 869 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 870 num_pages = last_data_page - first_data_page + 1; 871 872 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 873 if (user_pages == NULL) 874 return -ENOMEM; 875 876 down_read(&mm->mmap_sem); 877 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 878 num_pages, 0, 0, user_pages, NULL); 879 up_read(&mm->mmap_sem); 880 if (pinned_pages < num_pages) { 881 ret = -EFAULT; 882 goto fail_put_user_pages; 883 } 884 885 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 886 887 mutex_lock(&dev->struct_mutex); 888 889 ret = i915_gem_object_get_pages_or_evict(obj); 890 if (ret) 891 goto fail_unlock; 892 893 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 894 if (ret != 0) 895 goto fail_put_pages; 896 897 obj_priv = obj->driver_private; 898 offset = args->offset; 899 obj_priv->dirty = 1; 900 901 while (remain > 0) { 902 /* Operation in this page 903 * 904 * shmem_page_index = page number within shmem file 905 * shmem_page_offset = offset within page in shmem file 906 * data_page_index = page number in get_user_pages return 907 * data_page_offset = offset with data_page_index page. 908 * page_length = bytes to copy for this page 909 */ 910 shmem_page_index = offset / PAGE_SIZE; 911 shmem_page_offset = offset & ~PAGE_MASK; 912 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 913 data_page_offset = data_ptr & ~PAGE_MASK; 914 915 page_length = remain; 916 if ((shmem_page_offset + page_length) > PAGE_SIZE) 917 page_length = PAGE_SIZE - shmem_page_offset; 918 if ((data_page_offset + page_length) > PAGE_SIZE) 919 page_length = PAGE_SIZE - data_page_offset; 920 921 if (do_bit17_swizzling) { 922 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 923 shmem_page_offset, 924 user_pages[data_page_index], 925 data_page_offset, 926 page_length, 927 0); 928 } else { 929 ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], 930 shmem_page_offset, 931 user_pages[data_page_index], 932 data_page_offset, 933 page_length); 934 } 935 if (ret) 936 goto fail_put_pages; 937 938 remain -= page_length; 939 data_ptr += page_length; 940 offset += page_length; 941 } 942 943 fail_put_pages: 944 i915_gem_object_put_pages(obj); 945 fail_unlock: 946 mutex_unlock(&dev->struct_mutex); 947 fail_put_user_pages: 948 for (i = 0; i < pinned_pages; i++) 949 page_cache_release(user_pages[i]); 950 drm_free_large(user_pages); 951 952 return ret; 953 } 954 955 /** 956 * Writes data to the object referenced by handle. 957 * 958 * On error, the contents of the buffer that were to be modified are undefined. 959 */ 960 int 961 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 962 struct drm_file *file_priv) 963 { 964 struct drm_i915_gem_pwrite *args = data; 965 struct drm_gem_object *obj; 966 struct drm_i915_gem_object *obj_priv; 967 int ret = 0; 968 969 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 970 if (obj == NULL) 971 return -EBADF; 972 obj_priv = obj->driver_private; 973 974 /* Bounds check destination. 975 * 976 * XXX: This could use review for overflow issues... 977 */ 978 if (args->offset > obj->size || args->size > obj->size || 979 args->offset + args->size > obj->size) { 980 drm_gem_object_unreference(obj); 981 return -EINVAL; 982 } 983 984 /* We can only do the GTT pwrite on untiled buffers, as otherwise 985 * it would end up going through the fenced access, and we'll get 986 * different detiling behavior between reading and writing. 987 * pread/pwrite currently are reading and writing from the CPU 988 * perspective, requiring manual detiling by the client. 989 */ 990 if (obj_priv->phys_obj) 991 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); 992 else if (obj_priv->tiling_mode == I915_TILING_NONE && 993 dev->gtt_total != 0) { 994 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); 995 if (ret == -EFAULT) { 996 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, 997 file_priv); 998 } 999 } else if (i915_gem_object_needs_bit17_swizzle(obj)) { 1000 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); 1001 } else { 1002 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); 1003 if (ret == -EFAULT) { 1004 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, 1005 file_priv); 1006 } 1007 } 1008 1009 #if WATCH_PWRITE 1010 if (ret) 1011 DRM_INFO("pwrite failed %d\n", ret); 1012 #endif 1013 1014 drm_gem_object_unreference(obj); 1015 1016 return ret; 1017 } 1018 1019 /** 1020 * Called when user space prepares to use an object with the CPU, either 1021 * through the mmap ioctl's mapping or a GTT mapping. 1022 */ 1023 int 1024 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1025 struct drm_file *file_priv) 1026 { 1027 struct drm_i915_private *dev_priv = dev->dev_private; 1028 struct drm_i915_gem_set_domain *args = data; 1029 struct drm_gem_object *obj; 1030 struct drm_i915_gem_object *obj_priv; 1031 uint32_t read_domains = args->read_domains; 1032 uint32_t write_domain = args->write_domain; 1033 int ret; 1034 1035 if (!(dev->driver->driver_features & DRIVER_GEM)) 1036 return -ENODEV; 1037 1038 /* Only handle setting domains to types used by the CPU. */ 1039 if (write_domain & I915_GEM_GPU_DOMAINS) 1040 return -EINVAL; 1041 1042 if (read_domains & I915_GEM_GPU_DOMAINS) 1043 return -EINVAL; 1044 1045 /* Having something in the write domain implies it's in the read 1046 * domain, and only that read domain. Enforce that in the request. 1047 */ 1048 if (write_domain != 0 && read_domains != write_domain) 1049 return -EINVAL; 1050 1051 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1052 if (obj == NULL) 1053 return -EBADF; 1054 obj_priv = obj->driver_private; 1055 1056 mutex_lock(&dev->struct_mutex); 1057 1058 intel_mark_busy(dev, obj); 1059 1060 #if WATCH_BUF 1061 DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n", 1062 obj, obj->size, read_domains, write_domain); 1063 #endif 1064 if (read_domains & I915_GEM_DOMAIN_GTT) { 1065 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1066 1067 /* Update the LRU on the fence for the CPU access that's 1068 * about to occur. 1069 */ 1070 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 1071 list_move_tail(&obj_priv->fence_list, 1072 &dev_priv->mm.fence_list); 1073 } 1074 1075 /* Silently promote "you're not bound, there was nothing to do" 1076 * to success, since the client was just asking us to 1077 * make sure everything was done. 1078 */ 1079 if (ret == -EINVAL) 1080 ret = 0; 1081 } else { 1082 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1083 } 1084 1085 drm_gem_object_unreference(obj); 1086 mutex_unlock(&dev->struct_mutex); 1087 return ret; 1088 } 1089 1090 /** 1091 * Called when user space has done writes to this buffer 1092 */ 1093 int 1094 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1095 struct drm_file *file_priv) 1096 { 1097 struct drm_i915_gem_sw_finish *args = data; 1098 struct drm_gem_object *obj; 1099 struct drm_i915_gem_object *obj_priv; 1100 int ret = 0; 1101 1102 if (!(dev->driver->driver_features & DRIVER_GEM)) 1103 return -ENODEV; 1104 1105 mutex_lock(&dev->struct_mutex); 1106 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1107 if (obj == NULL) { 1108 mutex_unlock(&dev->struct_mutex); 1109 return -EBADF; 1110 } 1111 1112 #if WATCH_BUF 1113 DRM_INFO("%s: sw_finish %d (%p %zd)\n", 1114 __func__, args->handle, obj, obj->size); 1115 #endif 1116 obj_priv = obj->driver_private; 1117 1118 /* Pinned buffers may be scanout, so flush the cache */ 1119 if (obj_priv->pin_count) 1120 i915_gem_object_flush_cpu_write_domain(obj); 1121 1122 drm_gem_object_unreference(obj); 1123 mutex_unlock(&dev->struct_mutex); 1124 return ret; 1125 } 1126 1127 /** 1128 * Maps the contents of an object, returning the address it is mapped 1129 * into. 1130 * 1131 * While the mapping holds a reference on the contents of the object, it doesn't 1132 * imply a ref on the object itself. 1133 */ 1134 int 1135 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1136 struct drm_file *file_priv) 1137 { 1138 struct drm_i915_gem_mmap *args = data; 1139 struct drm_gem_object *obj; 1140 loff_t offset; 1141 unsigned long addr; 1142 1143 if (!(dev->driver->driver_features & DRIVER_GEM)) 1144 return -ENODEV; 1145 1146 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1147 if (obj == NULL) 1148 return -EBADF; 1149 1150 offset = args->offset; 1151 1152 down_write(¤t->mm->mmap_sem); 1153 addr = do_mmap(obj->filp, 0, args->size, 1154 PROT_READ | PROT_WRITE, MAP_SHARED, 1155 args->offset); 1156 up_write(¤t->mm->mmap_sem); 1157 mutex_lock(&dev->struct_mutex); 1158 drm_gem_object_unreference(obj); 1159 mutex_unlock(&dev->struct_mutex); 1160 if (IS_ERR((void *)addr)) 1161 return addr; 1162 1163 args->addr_ptr = (uint64_t) addr; 1164 1165 return 0; 1166 } 1167 1168 /** 1169 * i915_gem_fault - fault a page into the GTT 1170 * vma: VMA in question 1171 * vmf: fault info 1172 * 1173 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1174 * from userspace. The fault handler takes care of binding the object to 1175 * the GTT (if needed), allocating and programming a fence register (again, 1176 * only if needed based on whether the old reg is still valid or the object 1177 * is tiled) and inserting a new PTE into the faulting process. 1178 * 1179 * Note that the faulting process may involve evicting existing objects 1180 * from the GTT and/or fence registers to make room. So performance may 1181 * suffer if the GTT working set is large or there are few fence registers 1182 * left. 1183 */ 1184 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1185 { 1186 struct drm_gem_object *obj = vma->vm_private_data; 1187 struct drm_device *dev = obj->dev; 1188 struct drm_i915_private *dev_priv = dev->dev_private; 1189 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1190 pgoff_t page_offset; 1191 unsigned long pfn; 1192 int ret = 0; 1193 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1194 1195 /* We don't use vmf->pgoff since that has the fake offset */ 1196 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1197 PAGE_SHIFT; 1198 1199 /* Now bind it into the GTT if needed */ 1200 mutex_lock(&dev->struct_mutex); 1201 if (!obj_priv->gtt_space) { 1202 ret = i915_gem_object_bind_to_gtt(obj, 0); 1203 if (ret) 1204 goto unlock; 1205 1206 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1207 1208 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1209 if (ret) 1210 goto unlock; 1211 } 1212 1213 /* Need a new fence register? */ 1214 if (obj_priv->tiling_mode != I915_TILING_NONE) { 1215 ret = i915_gem_object_get_fence_reg(obj); 1216 if (ret) 1217 goto unlock; 1218 } 1219 1220 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + 1221 page_offset; 1222 1223 /* Finally, remap it using the new GTT offset */ 1224 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1225 unlock: 1226 mutex_unlock(&dev->struct_mutex); 1227 1228 switch (ret) { 1229 case 0: 1230 case -ERESTARTSYS: 1231 return VM_FAULT_NOPAGE; 1232 case -ENOMEM: 1233 case -EAGAIN: 1234 return VM_FAULT_OOM; 1235 default: 1236 return VM_FAULT_SIGBUS; 1237 } 1238 } 1239 1240 /** 1241 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1242 * @obj: obj in question 1243 * 1244 * GEM memory mapping works by handing back to userspace a fake mmap offset 1245 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1246 * up the object based on the offset and sets up the various memory mapping 1247 * structures. 1248 * 1249 * This routine allocates and attaches a fake offset for @obj. 1250 */ 1251 static int 1252 i915_gem_create_mmap_offset(struct drm_gem_object *obj) 1253 { 1254 struct drm_device *dev = obj->dev; 1255 struct drm_gem_mm *mm = dev->mm_private; 1256 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1257 struct drm_map_list *list; 1258 struct drm_local_map *map; 1259 int ret = 0; 1260 1261 /* Set the object up for mmap'ing */ 1262 list = &obj->map_list; 1263 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL); 1264 if (!list->map) 1265 return -ENOMEM; 1266 1267 map = list->map; 1268 map->type = _DRM_GEM; 1269 map->size = obj->size; 1270 map->handle = obj; 1271 1272 /* Get a DRM GEM mmap offset allocated... */ 1273 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1274 obj->size / PAGE_SIZE, 0, 0); 1275 if (!list->file_offset_node) { 1276 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name); 1277 ret = -ENOMEM; 1278 goto out_free_list; 1279 } 1280 1281 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1282 obj->size / PAGE_SIZE, 0); 1283 if (!list->file_offset_node) { 1284 ret = -ENOMEM; 1285 goto out_free_list; 1286 } 1287 1288 list->hash.key = list->file_offset_node->start; 1289 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) { 1290 DRM_ERROR("failed to add to map hash\n"); 1291 goto out_free_mm; 1292 } 1293 1294 /* By now we should be all set, any drm_mmap request on the offset 1295 * below will get to our mmap & fault handler */ 1296 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; 1297 1298 return 0; 1299 1300 out_free_mm: 1301 drm_mm_put_block(list->file_offset_node); 1302 out_free_list: 1303 kfree(list->map); 1304 1305 return ret; 1306 } 1307 1308 /** 1309 * i915_gem_release_mmap - remove physical page mappings 1310 * @obj: obj in question 1311 * 1312 * Preserve the reservation of the mmaping with the DRM core code, but 1313 * relinquish ownership of the pages back to the system. 1314 * 1315 * It is vital that we remove the page mapping if we have mapped a tiled 1316 * object through the GTT and then lose the fence register due to 1317 * resource pressure. Similarly if the object has been moved out of the 1318 * aperture, than pages mapped into userspace must be revoked. Removing the 1319 * mapping will then trigger a page fault on the next user access, allowing 1320 * fixup by i915_gem_fault(). 1321 */ 1322 void 1323 i915_gem_release_mmap(struct drm_gem_object *obj) 1324 { 1325 struct drm_device *dev = obj->dev; 1326 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1327 1328 if (dev->dev_mapping) 1329 unmap_mapping_range(dev->dev_mapping, 1330 obj_priv->mmap_offset, obj->size, 1); 1331 } 1332 1333 static void 1334 i915_gem_free_mmap_offset(struct drm_gem_object *obj) 1335 { 1336 struct drm_device *dev = obj->dev; 1337 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1338 struct drm_gem_mm *mm = dev->mm_private; 1339 struct drm_map_list *list; 1340 1341 list = &obj->map_list; 1342 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1343 1344 if (list->file_offset_node) { 1345 drm_mm_put_block(list->file_offset_node); 1346 list->file_offset_node = NULL; 1347 } 1348 1349 if (list->map) { 1350 kfree(list->map); 1351 list->map = NULL; 1352 } 1353 1354 obj_priv->mmap_offset = 0; 1355 } 1356 1357 /** 1358 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1359 * @obj: object to check 1360 * 1361 * Return the required GTT alignment for an object, taking into account 1362 * potential fence register mapping if needed. 1363 */ 1364 static uint32_t 1365 i915_gem_get_gtt_alignment(struct drm_gem_object *obj) 1366 { 1367 struct drm_device *dev = obj->dev; 1368 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1369 int start, i; 1370 1371 /* 1372 * Minimum alignment is 4k (GTT page size), but might be greater 1373 * if a fence register is needed for the object. 1374 */ 1375 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE) 1376 return 4096; 1377 1378 /* 1379 * Previous chips need to be aligned to the size of the smallest 1380 * fence register that can contain the object. 1381 */ 1382 if (IS_I9XX(dev)) 1383 start = 1024*1024; 1384 else 1385 start = 512*1024; 1386 1387 for (i = start; i < obj->size; i <<= 1) 1388 ; 1389 1390 return i; 1391 } 1392 1393 /** 1394 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1395 * @dev: DRM device 1396 * @data: GTT mapping ioctl data 1397 * @file_priv: GEM object info 1398 * 1399 * Simply returns the fake offset to userspace so it can mmap it. 1400 * The mmap call will end up in drm_gem_mmap(), which will set things 1401 * up so we can get faults in the handler above. 1402 * 1403 * The fault handler will take care of binding the object into the GTT 1404 * (since it may have been evicted to make room for something), allocating 1405 * a fence register, and mapping the appropriate aperture address into 1406 * userspace. 1407 */ 1408 int 1409 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1410 struct drm_file *file_priv) 1411 { 1412 struct drm_i915_gem_mmap_gtt *args = data; 1413 struct drm_i915_private *dev_priv = dev->dev_private; 1414 struct drm_gem_object *obj; 1415 struct drm_i915_gem_object *obj_priv; 1416 int ret; 1417 1418 if (!(dev->driver->driver_features & DRIVER_GEM)) 1419 return -ENODEV; 1420 1421 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1422 if (obj == NULL) 1423 return -EBADF; 1424 1425 mutex_lock(&dev->struct_mutex); 1426 1427 obj_priv = obj->driver_private; 1428 1429 if (obj_priv->madv != I915_MADV_WILLNEED) { 1430 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1431 drm_gem_object_unreference(obj); 1432 mutex_unlock(&dev->struct_mutex); 1433 return -EINVAL; 1434 } 1435 1436 1437 if (!obj_priv->mmap_offset) { 1438 ret = i915_gem_create_mmap_offset(obj); 1439 if (ret) { 1440 drm_gem_object_unreference(obj); 1441 mutex_unlock(&dev->struct_mutex); 1442 return ret; 1443 } 1444 } 1445 1446 args->offset = obj_priv->mmap_offset; 1447 1448 /* 1449 * Pull it into the GTT so that we have a page list (makes the 1450 * initial fault faster and any subsequent flushing possible). 1451 */ 1452 if (!obj_priv->agp_mem) { 1453 ret = i915_gem_object_bind_to_gtt(obj, 0); 1454 if (ret) { 1455 drm_gem_object_unreference(obj); 1456 mutex_unlock(&dev->struct_mutex); 1457 return ret; 1458 } 1459 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1460 } 1461 1462 drm_gem_object_unreference(obj); 1463 mutex_unlock(&dev->struct_mutex); 1464 1465 return 0; 1466 } 1467 1468 void 1469 i915_gem_object_put_pages(struct drm_gem_object *obj) 1470 { 1471 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1472 int page_count = obj->size / PAGE_SIZE; 1473 int i; 1474 1475 BUG_ON(obj_priv->pages_refcount == 0); 1476 BUG_ON(obj_priv->madv == __I915_MADV_PURGED); 1477 1478 if (--obj_priv->pages_refcount != 0) 1479 return; 1480 1481 if (obj_priv->tiling_mode != I915_TILING_NONE) 1482 i915_gem_object_save_bit_17_swizzle(obj); 1483 1484 if (obj_priv->madv == I915_MADV_DONTNEED) 1485 obj_priv->dirty = 0; 1486 1487 for (i = 0; i < page_count; i++) { 1488 if (obj_priv->pages[i] == NULL) 1489 break; 1490 1491 if (obj_priv->dirty) 1492 set_page_dirty(obj_priv->pages[i]); 1493 1494 if (obj_priv->madv == I915_MADV_WILLNEED) 1495 mark_page_accessed(obj_priv->pages[i]); 1496 1497 page_cache_release(obj_priv->pages[i]); 1498 } 1499 obj_priv->dirty = 0; 1500 1501 drm_free_large(obj_priv->pages); 1502 obj_priv->pages = NULL; 1503 } 1504 1505 static void 1506 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno) 1507 { 1508 struct drm_device *dev = obj->dev; 1509 drm_i915_private_t *dev_priv = dev->dev_private; 1510 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1511 1512 /* Add a reference if we're newly entering the active list. */ 1513 if (!obj_priv->active) { 1514 drm_gem_object_reference(obj); 1515 obj_priv->active = 1; 1516 } 1517 /* Move from whatever list we were on to the tail of execution. */ 1518 spin_lock(&dev_priv->mm.active_list_lock); 1519 list_move_tail(&obj_priv->list, 1520 &dev_priv->mm.active_list); 1521 spin_unlock(&dev_priv->mm.active_list_lock); 1522 obj_priv->last_rendering_seqno = seqno; 1523 } 1524 1525 static void 1526 i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 1527 { 1528 struct drm_device *dev = obj->dev; 1529 drm_i915_private_t *dev_priv = dev->dev_private; 1530 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1531 1532 BUG_ON(!obj_priv->active); 1533 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list); 1534 obj_priv->last_rendering_seqno = 0; 1535 } 1536 1537 /* Immediately discard the backing storage */ 1538 static void 1539 i915_gem_object_truncate(struct drm_gem_object *obj) 1540 { 1541 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1542 struct inode *inode; 1543 1544 inode = obj->filp->f_path.dentry->d_inode; 1545 if (inode->i_op->truncate) 1546 inode->i_op->truncate (inode); 1547 1548 obj_priv->madv = __I915_MADV_PURGED; 1549 } 1550 1551 static inline int 1552 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) 1553 { 1554 return obj_priv->madv == I915_MADV_DONTNEED; 1555 } 1556 1557 static void 1558 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 1559 { 1560 struct drm_device *dev = obj->dev; 1561 drm_i915_private_t *dev_priv = dev->dev_private; 1562 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1563 1564 i915_verify_inactive(dev, __FILE__, __LINE__); 1565 if (obj_priv->pin_count != 0) 1566 list_del_init(&obj_priv->list); 1567 else 1568 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1569 1570 obj_priv->last_rendering_seqno = 0; 1571 if (obj_priv->active) { 1572 obj_priv->active = 0; 1573 drm_gem_object_unreference(obj); 1574 } 1575 i915_verify_inactive(dev, __FILE__, __LINE__); 1576 } 1577 1578 /** 1579 * Creates a new sequence number, emitting a write of it to the status page 1580 * plus an interrupt, which will trigger i915_user_interrupt_handler. 1581 * 1582 * Must be called with struct_lock held. 1583 * 1584 * Returned sequence numbers are nonzero on success. 1585 */ 1586 static uint32_t 1587 i915_add_request(struct drm_device *dev, struct drm_file *file_priv, 1588 uint32_t flush_domains) 1589 { 1590 drm_i915_private_t *dev_priv = dev->dev_private; 1591 struct drm_i915_file_private *i915_file_priv = NULL; 1592 struct drm_i915_gem_request *request; 1593 uint32_t seqno; 1594 int was_empty; 1595 RING_LOCALS; 1596 1597 if (file_priv != NULL) 1598 i915_file_priv = file_priv->driver_priv; 1599 1600 request = kzalloc(sizeof(*request), GFP_KERNEL); 1601 if (request == NULL) 1602 return 0; 1603 1604 /* Grab the seqno we're going to make this request be, and bump the 1605 * next (skipping 0 so it can be the reserved no-seqno value). 1606 */ 1607 seqno = dev_priv->mm.next_gem_seqno; 1608 dev_priv->mm.next_gem_seqno++; 1609 if (dev_priv->mm.next_gem_seqno == 0) 1610 dev_priv->mm.next_gem_seqno++; 1611 1612 BEGIN_LP_RING(4); 1613 OUT_RING(MI_STORE_DWORD_INDEX); 1614 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1615 OUT_RING(seqno); 1616 1617 OUT_RING(MI_USER_INTERRUPT); 1618 ADVANCE_LP_RING(); 1619 1620 DRM_DEBUG("%d\n", seqno); 1621 1622 request->seqno = seqno; 1623 request->emitted_jiffies = jiffies; 1624 was_empty = list_empty(&dev_priv->mm.request_list); 1625 list_add_tail(&request->list, &dev_priv->mm.request_list); 1626 if (i915_file_priv) { 1627 list_add_tail(&request->client_list, 1628 &i915_file_priv->mm.request_list); 1629 } else { 1630 INIT_LIST_HEAD(&request->client_list); 1631 } 1632 1633 /* Associate any objects on the flushing list matching the write 1634 * domain we're flushing with our flush. 1635 */ 1636 if (flush_domains != 0) { 1637 struct drm_i915_gem_object *obj_priv, *next; 1638 1639 list_for_each_entry_safe(obj_priv, next, 1640 &dev_priv->mm.flushing_list, list) { 1641 struct drm_gem_object *obj = obj_priv->obj; 1642 1643 if ((obj->write_domain & flush_domains) == 1644 obj->write_domain) { 1645 uint32_t old_write_domain = obj->write_domain; 1646 1647 obj->write_domain = 0; 1648 i915_gem_object_move_to_active(obj, seqno); 1649 1650 trace_i915_gem_object_change_domain(obj, 1651 obj->read_domains, 1652 old_write_domain); 1653 } 1654 } 1655 1656 } 1657 1658 if (!dev_priv->mm.suspended) { 1659 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); 1660 if (was_empty) 1661 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1662 } 1663 return seqno; 1664 } 1665 1666 /** 1667 * Command execution barrier 1668 * 1669 * Ensures that all commands in the ring are finished 1670 * before signalling the CPU 1671 */ 1672 static uint32_t 1673 i915_retire_commands(struct drm_device *dev) 1674 { 1675 drm_i915_private_t *dev_priv = dev->dev_private; 1676 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1677 uint32_t flush_domains = 0; 1678 RING_LOCALS; 1679 1680 /* The sampler always gets flushed on i965 (sigh) */ 1681 if (IS_I965G(dev)) 1682 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 1683 BEGIN_LP_RING(2); 1684 OUT_RING(cmd); 1685 OUT_RING(0); /* noop */ 1686 ADVANCE_LP_RING(); 1687 return flush_domains; 1688 } 1689 1690 /** 1691 * Moves buffers associated only with the given active seqno from the active 1692 * to inactive list, potentially freeing them. 1693 */ 1694 static void 1695 i915_gem_retire_request(struct drm_device *dev, 1696 struct drm_i915_gem_request *request) 1697 { 1698 drm_i915_private_t *dev_priv = dev->dev_private; 1699 1700 trace_i915_gem_request_retire(dev, request->seqno); 1701 1702 /* Move any buffers on the active list that are no longer referenced 1703 * by the ringbuffer to the flushing/inactive lists as appropriate. 1704 */ 1705 spin_lock(&dev_priv->mm.active_list_lock); 1706 while (!list_empty(&dev_priv->mm.active_list)) { 1707 struct drm_gem_object *obj; 1708 struct drm_i915_gem_object *obj_priv; 1709 1710 obj_priv = list_first_entry(&dev_priv->mm.active_list, 1711 struct drm_i915_gem_object, 1712 list); 1713 obj = obj_priv->obj; 1714 1715 /* If the seqno being retired doesn't match the oldest in the 1716 * list, then the oldest in the list must still be newer than 1717 * this seqno. 1718 */ 1719 if (obj_priv->last_rendering_seqno != request->seqno) 1720 goto out; 1721 1722 #if WATCH_LRU 1723 DRM_INFO("%s: retire %d moves to inactive list %p\n", 1724 __func__, request->seqno, obj); 1725 #endif 1726 1727 if (obj->write_domain != 0) 1728 i915_gem_object_move_to_flushing(obj); 1729 else { 1730 /* Take a reference on the object so it won't be 1731 * freed while the spinlock is held. The list 1732 * protection for this spinlock is safe when breaking 1733 * the lock like this since the next thing we do 1734 * is just get the head of the list again. 1735 */ 1736 drm_gem_object_reference(obj); 1737 i915_gem_object_move_to_inactive(obj); 1738 spin_unlock(&dev_priv->mm.active_list_lock); 1739 drm_gem_object_unreference(obj); 1740 spin_lock(&dev_priv->mm.active_list_lock); 1741 } 1742 } 1743 out: 1744 spin_unlock(&dev_priv->mm.active_list_lock); 1745 } 1746 1747 /** 1748 * Returns true if seq1 is later than seq2. 1749 */ 1750 bool 1751 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 1752 { 1753 return (int32_t)(seq1 - seq2) >= 0; 1754 } 1755 1756 uint32_t 1757 i915_get_gem_seqno(struct drm_device *dev) 1758 { 1759 drm_i915_private_t *dev_priv = dev->dev_private; 1760 1761 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); 1762 } 1763 1764 /** 1765 * This function clears the request list as sequence numbers are passed. 1766 */ 1767 void 1768 i915_gem_retire_requests(struct drm_device *dev) 1769 { 1770 drm_i915_private_t *dev_priv = dev->dev_private; 1771 uint32_t seqno; 1772 1773 if (!dev_priv->hw_status_page || list_empty(&dev_priv->mm.request_list)) 1774 return; 1775 1776 seqno = i915_get_gem_seqno(dev); 1777 1778 while (!list_empty(&dev_priv->mm.request_list)) { 1779 struct drm_i915_gem_request *request; 1780 uint32_t retiring_seqno; 1781 1782 request = list_first_entry(&dev_priv->mm.request_list, 1783 struct drm_i915_gem_request, 1784 list); 1785 retiring_seqno = request->seqno; 1786 1787 if (i915_seqno_passed(seqno, retiring_seqno) || 1788 atomic_read(&dev_priv->mm.wedged)) { 1789 i915_gem_retire_request(dev, request); 1790 1791 list_del(&request->list); 1792 list_del(&request->client_list); 1793 kfree(request); 1794 } else 1795 break; 1796 } 1797 1798 if (unlikely (dev_priv->trace_irq_seqno && 1799 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) { 1800 i915_user_irq_put(dev); 1801 dev_priv->trace_irq_seqno = 0; 1802 } 1803 } 1804 1805 void 1806 i915_gem_retire_work_handler(struct work_struct *work) 1807 { 1808 drm_i915_private_t *dev_priv; 1809 struct drm_device *dev; 1810 1811 dev_priv = container_of(work, drm_i915_private_t, 1812 mm.retire_work.work); 1813 dev = dev_priv->dev; 1814 1815 mutex_lock(&dev->struct_mutex); 1816 i915_gem_retire_requests(dev); 1817 if (!dev_priv->mm.suspended && 1818 !list_empty(&dev_priv->mm.request_list)) 1819 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1820 mutex_unlock(&dev->struct_mutex); 1821 } 1822 1823 /** 1824 * Waits for a sequence number to be signaled, and cleans up the 1825 * request and object lists appropriately for that event. 1826 */ 1827 static int 1828 i915_wait_request(struct drm_device *dev, uint32_t seqno) 1829 { 1830 drm_i915_private_t *dev_priv = dev->dev_private; 1831 u32 ier; 1832 int ret = 0; 1833 1834 BUG_ON(seqno == 0); 1835 1836 if (atomic_read(&dev_priv->mm.wedged)) 1837 return -EIO; 1838 1839 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 1840 if (IS_IGDNG(dev)) 1841 ier = I915_READ(DEIER) | I915_READ(GTIER); 1842 else 1843 ier = I915_READ(IER); 1844 if (!ier) { 1845 DRM_ERROR("something (likely vbetool) disabled " 1846 "interrupts, re-enabling\n"); 1847 i915_driver_irq_preinstall(dev); 1848 i915_driver_irq_postinstall(dev); 1849 } 1850 1851 trace_i915_gem_request_wait_begin(dev, seqno); 1852 1853 dev_priv->mm.waiting_gem_seqno = seqno; 1854 i915_user_irq_get(dev); 1855 ret = wait_event_interruptible(dev_priv->irq_queue, 1856 i915_seqno_passed(i915_get_gem_seqno(dev), 1857 seqno) || 1858 atomic_read(&dev_priv->mm.wedged)); 1859 i915_user_irq_put(dev); 1860 dev_priv->mm.waiting_gem_seqno = 0; 1861 1862 trace_i915_gem_request_wait_end(dev, seqno); 1863 } 1864 if (atomic_read(&dev_priv->mm.wedged)) 1865 ret = -EIO; 1866 1867 if (ret && ret != -ERESTARTSYS) 1868 DRM_ERROR("%s returns %d (awaiting %d at %d)\n", 1869 __func__, ret, seqno, i915_get_gem_seqno(dev)); 1870 1871 /* Directly dispatch request retiring. While we have the work queue 1872 * to handle this, the waiter on a request often wants an associated 1873 * buffer to have made it to the inactive list, and we would need 1874 * a separate wait queue to handle that. 1875 */ 1876 if (ret == 0) 1877 i915_gem_retire_requests(dev); 1878 1879 return ret; 1880 } 1881 1882 static void 1883 i915_gem_flush(struct drm_device *dev, 1884 uint32_t invalidate_domains, 1885 uint32_t flush_domains) 1886 { 1887 drm_i915_private_t *dev_priv = dev->dev_private; 1888 uint32_t cmd; 1889 RING_LOCALS; 1890 1891 #if WATCH_EXEC 1892 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, 1893 invalidate_domains, flush_domains); 1894 #endif 1895 trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno, 1896 invalidate_domains, flush_domains); 1897 1898 if (flush_domains & I915_GEM_DOMAIN_CPU) 1899 drm_agp_chipset_flush(dev); 1900 1901 if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { 1902 /* 1903 * read/write caches: 1904 * 1905 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 1906 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 1907 * also flushed at 2d versus 3d pipeline switches. 1908 * 1909 * read-only caches: 1910 * 1911 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 1912 * MI_READ_FLUSH is set, and is always flushed on 965. 1913 * 1914 * I915_GEM_DOMAIN_COMMAND may not exist? 1915 * 1916 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 1917 * invalidated when MI_EXE_FLUSH is set. 1918 * 1919 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 1920 * invalidated with every MI_FLUSH. 1921 * 1922 * TLBs: 1923 * 1924 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 1925 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 1926 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 1927 * are flushed at any MI_FLUSH. 1928 */ 1929 1930 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1931 if ((invalidate_domains|flush_domains) & 1932 I915_GEM_DOMAIN_RENDER) 1933 cmd &= ~MI_NO_WRITE_FLUSH; 1934 if (!IS_I965G(dev)) { 1935 /* 1936 * On the 965, the sampler cache always gets flushed 1937 * and this bit is reserved. 1938 */ 1939 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 1940 cmd |= MI_READ_FLUSH; 1941 } 1942 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 1943 cmd |= MI_EXE_FLUSH; 1944 1945 #if WATCH_EXEC 1946 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); 1947 #endif 1948 BEGIN_LP_RING(2); 1949 OUT_RING(cmd); 1950 OUT_RING(0); /* noop */ 1951 ADVANCE_LP_RING(); 1952 } 1953 } 1954 1955 /** 1956 * Ensures that all rendering to the object has completed and the object is 1957 * safe to unbind from the GTT or access from the CPU. 1958 */ 1959 static int 1960 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1961 { 1962 struct drm_device *dev = obj->dev; 1963 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1964 int ret; 1965 1966 /* This function only exists to support waiting for existing rendering, 1967 * not for emitting required flushes. 1968 */ 1969 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0); 1970 1971 /* If there is rendering queued on the buffer being evicted, wait for 1972 * it. 1973 */ 1974 if (obj_priv->active) { 1975 #if WATCH_BUF 1976 DRM_INFO("%s: object %p wait for seqno %08x\n", 1977 __func__, obj, obj_priv->last_rendering_seqno); 1978 #endif 1979 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); 1980 if (ret != 0) 1981 return ret; 1982 } 1983 1984 return 0; 1985 } 1986 1987 /** 1988 * Unbinds an object from the GTT aperture. 1989 */ 1990 int 1991 i915_gem_object_unbind(struct drm_gem_object *obj) 1992 { 1993 struct drm_device *dev = obj->dev; 1994 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1995 int ret = 0; 1996 1997 #if WATCH_BUF 1998 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); 1999 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); 2000 #endif 2001 if (obj_priv->gtt_space == NULL) 2002 return 0; 2003 2004 if (obj_priv->pin_count != 0) { 2005 DRM_ERROR("Attempting to unbind pinned buffer\n"); 2006 return -EINVAL; 2007 } 2008 2009 /* blow away mappings if mapped through GTT */ 2010 i915_gem_release_mmap(obj); 2011 2012 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) 2013 i915_gem_clear_fence_reg(obj); 2014 2015 /* Move the object to the CPU domain to ensure that 2016 * any possible CPU writes while it's not in the GTT 2017 * are flushed when we go to remap it. This will 2018 * also ensure that all pending GPU writes are finished 2019 * before we unbind. 2020 */ 2021 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2022 if (ret) { 2023 if (ret != -ERESTARTSYS) 2024 DRM_ERROR("set_domain failed: %d\n", ret); 2025 return ret; 2026 } 2027 2028 BUG_ON(obj_priv->active); 2029 2030 if (obj_priv->agp_mem != NULL) { 2031 drm_unbind_agp(obj_priv->agp_mem); 2032 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); 2033 obj_priv->agp_mem = NULL; 2034 } 2035 2036 i915_gem_object_put_pages(obj); 2037 BUG_ON(obj_priv->pages_refcount); 2038 2039 if (obj_priv->gtt_space) { 2040 atomic_dec(&dev->gtt_count); 2041 atomic_sub(obj->size, &dev->gtt_memory); 2042 2043 drm_mm_put_block(obj_priv->gtt_space); 2044 obj_priv->gtt_space = NULL; 2045 } 2046 2047 /* Remove ourselves from the LRU list if present. */ 2048 if (!list_empty(&obj_priv->list)) 2049 list_del_init(&obj_priv->list); 2050 2051 if (i915_gem_object_is_purgeable(obj_priv)) 2052 i915_gem_object_truncate(obj); 2053 2054 trace_i915_gem_object_unbind(obj); 2055 2056 return 0; 2057 } 2058 2059 static struct drm_gem_object * 2060 i915_gem_find_inactive_object(struct drm_device *dev, int min_size) 2061 { 2062 drm_i915_private_t *dev_priv = dev->dev_private; 2063 struct drm_i915_gem_object *obj_priv; 2064 struct drm_gem_object *best = NULL; 2065 struct drm_gem_object *first = NULL; 2066 2067 /* Try to find the smallest clean object */ 2068 list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { 2069 struct drm_gem_object *obj = obj_priv->obj; 2070 if (obj->size >= min_size) { 2071 if ((!obj_priv->dirty || 2072 i915_gem_object_is_purgeable(obj_priv)) && 2073 (!best || obj->size < best->size)) { 2074 best = obj; 2075 if (best->size == min_size) 2076 return best; 2077 } 2078 if (!first) 2079 first = obj; 2080 } 2081 } 2082 2083 return best ? best : first; 2084 } 2085 2086 static int 2087 i915_gem_evict_everything(struct drm_device *dev) 2088 { 2089 drm_i915_private_t *dev_priv = dev->dev_private; 2090 uint32_t seqno; 2091 int ret; 2092 bool lists_empty; 2093 2094 spin_lock(&dev_priv->mm.active_list_lock); 2095 lists_empty = (list_empty(&dev_priv->mm.inactive_list) && 2096 list_empty(&dev_priv->mm.flushing_list) && 2097 list_empty(&dev_priv->mm.active_list)); 2098 spin_unlock(&dev_priv->mm.active_list_lock); 2099 2100 if (lists_empty) 2101 return -ENOSPC; 2102 2103 /* Flush everything (on to the inactive lists) and evict */ 2104 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2105 seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); 2106 if (seqno == 0) 2107 return -ENOMEM; 2108 2109 ret = i915_wait_request(dev, seqno); 2110 if (ret) 2111 return ret; 2112 2113 ret = i915_gem_evict_from_inactive_list(dev); 2114 if (ret) 2115 return ret; 2116 2117 spin_lock(&dev_priv->mm.active_list_lock); 2118 lists_empty = (list_empty(&dev_priv->mm.inactive_list) && 2119 list_empty(&dev_priv->mm.flushing_list) && 2120 list_empty(&dev_priv->mm.active_list)); 2121 spin_unlock(&dev_priv->mm.active_list_lock); 2122 BUG_ON(!lists_empty); 2123 2124 return 0; 2125 } 2126 2127 static int 2128 i915_gem_evict_something(struct drm_device *dev, int min_size) 2129 { 2130 drm_i915_private_t *dev_priv = dev->dev_private; 2131 struct drm_gem_object *obj; 2132 int ret; 2133 2134 for (;;) { 2135 i915_gem_retire_requests(dev); 2136 2137 /* If there's an inactive buffer available now, grab it 2138 * and be done. 2139 */ 2140 obj = i915_gem_find_inactive_object(dev, min_size); 2141 if (obj) { 2142 struct drm_i915_gem_object *obj_priv; 2143 2144 #if WATCH_LRU 2145 DRM_INFO("%s: evicting %p\n", __func__, obj); 2146 #endif 2147 obj_priv = obj->driver_private; 2148 BUG_ON(obj_priv->pin_count != 0); 2149 BUG_ON(obj_priv->active); 2150 2151 /* Wait on the rendering and unbind the buffer. */ 2152 return i915_gem_object_unbind(obj); 2153 } 2154 2155 /* If we didn't get anything, but the ring is still processing 2156 * things, wait for the next to finish and hopefully leave us 2157 * a buffer to evict. 2158 */ 2159 if (!list_empty(&dev_priv->mm.request_list)) { 2160 struct drm_i915_gem_request *request; 2161 2162 request = list_first_entry(&dev_priv->mm.request_list, 2163 struct drm_i915_gem_request, 2164 list); 2165 2166 ret = i915_wait_request(dev, request->seqno); 2167 if (ret) 2168 return ret; 2169 2170 continue; 2171 } 2172 2173 /* If we didn't have anything on the request list but there 2174 * are buffers awaiting a flush, emit one and try again. 2175 * When we wait on it, those buffers waiting for that flush 2176 * will get moved to inactive. 2177 */ 2178 if (!list_empty(&dev_priv->mm.flushing_list)) { 2179 struct drm_i915_gem_object *obj_priv; 2180 2181 /* Find an object that we can immediately reuse */ 2182 list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) { 2183 obj = obj_priv->obj; 2184 if (obj->size >= min_size) 2185 break; 2186 2187 obj = NULL; 2188 } 2189 2190 if (obj != NULL) { 2191 uint32_t seqno; 2192 2193 i915_gem_flush(dev, 2194 obj->write_domain, 2195 obj->write_domain); 2196 seqno = i915_add_request(dev, NULL, obj->write_domain); 2197 if (seqno == 0) 2198 return -ENOMEM; 2199 2200 ret = i915_wait_request(dev, seqno); 2201 if (ret) 2202 return ret; 2203 2204 continue; 2205 } 2206 } 2207 2208 /* If we didn't do any of the above, there's no single buffer 2209 * large enough to swap out for the new one, so just evict 2210 * everything and start again. (This should be rare.) 2211 */ 2212 if (!list_empty (&dev_priv->mm.inactive_list)) 2213 return i915_gem_evict_from_inactive_list(dev); 2214 else 2215 return i915_gem_evict_everything(dev); 2216 } 2217 } 2218 2219 int 2220 i915_gem_object_get_pages(struct drm_gem_object *obj) 2221 { 2222 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2223 int page_count, i; 2224 struct address_space *mapping; 2225 struct inode *inode; 2226 struct page *page; 2227 int ret; 2228 2229 if (obj_priv->pages_refcount++ != 0) 2230 return 0; 2231 2232 /* Get the list of pages out of our struct file. They'll be pinned 2233 * at this point until we release them. 2234 */ 2235 page_count = obj->size / PAGE_SIZE; 2236 BUG_ON(obj_priv->pages != NULL); 2237 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); 2238 if (obj_priv->pages == NULL) { 2239 obj_priv->pages_refcount--; 2240 return -ENOMEM; 2241 } 2242 2243 inode = obj->filp->f_path.dentry->d_inode; 2244 mapping = inode->i_mapping; 2245 for (i = 0; i < page_count; i++) { 2246 page = read_mapping_page(mapping, i, NULL); 2247 if (IS_ERR(page)) { 2248 ret = PTR_ERR(page); 2249 i915_gem_object_put_pages(obj); 2250 return ret; 2251 } 2252 obj_priv->pages[i] = page; 2253 } 2254 2255 if (obj_priv->tiling_mode != I915_TILING_NONE) 2256 i915_gem_object_do_bit_17_swizzle(obj); 2257 2258 return 0; 2259 } 2260 2261 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) 2262 { 2263 struct drm_gem_object *obj = reg->obj; 2264 struct drm_device *dev = obj->dev; 2265 drm_i915_private_t *dev_priv = dev->dev_private; 2266 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2267 int regnum = obj_priv->fence_reg; 2268 uint64_t val; 2269 2270 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 2271 0xfffff000) << 32; 2272 val |= obj_priv->gtt_offset & 0xfffff000; 2273 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2274 if (obj_priv->tiling_mode == I915_TILING_Y) 2275 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2276 val |= I965_FENCE_REG_VALID; 2277 2278 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); 2279 } 2280 2281 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) 2282 { 2283 struct drm_gem_object *obj = reg->obj; 2284 struct drm_device *dev = obj->dev; 2285 drm_i915_private_t *dev_priv = dev->dev_private; 2286 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2287 int regnum = obj_priv->fence_reg; 2288 int tile_width; 2289 uint32_t fence_reg, val; 2290 uint32_t pitch_val; 2291 2292 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 2293 (obj_priv->gtt_offset & (obj->size - 1))) { 2294 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n", 2295 __func__, obj_priv->gtt_offset, obj->size); 2296 return; 2297 } 2298 2299 if (obj_priv->tiling_mode == I915_TILING_Y && 2300 HAS_128_BYTE_Y_TILING(dev)) 2301 tile_width = 128; 2302 else 2303 tile_width = 512; 2304 2305 /* Note: pitch better be a power of two tile widths */ 2306 pitch_val = obj_priv->stride / tile_width; 2307 pitch_val = ffs(pitch_val) - 1; 2308 2309 val = obj_priv->gtt_offset; 2310 if (obj_priv->tiling_mode == I915_TILING_Y) 2311 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2312 val |= I915_FENCE_SIZE_BITS(obj->size); 2313 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2314 val |= I830_FENCE_REG_VALID; 2315 2316 if (regnum < 8) 2317 fence_reg = FENCE_REG_830_0 + (regnum * 4); 2318 else 2319 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); 2320 I915_WRITE(fence_reg, val); 2321 } 2322 2323 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) 2324 { 2325 struct drm_gem_object *obj = reg->obj; 2326 struct drm_device *dev = obj->dev; 2327 drm_i915_private_t *dev_priv = dev->dev_private; 2328 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2329 int regnum = obj_priv->fence_reg; 2330 uint32_t val; 2331 uint32_t pitch_val; 2332 uint32_t fence_size_bits; 2333 2334 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) || 2335 (obj_priv->gtt_offset & (obj->size - 1))) { 2336 WARN(1, "%s: object 0x%08x not 512K or size aligned\n", 2337 __func__, obj_priv->gtt_offset); 2338 return; 2339 } 2340 2341 pitch_val = obj_priv->stride / 128; 2342 pitch_val = ffs(pitch_val) - 1; 2343 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); 2344 2345 val = obj_priv->gtt_offset; 2346 if (obj_priv->tiling_mode == I915_TILING_Y) 2347 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2348 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size); 2349 WARN_ON(fence_size_bits & ~0x00000f00); 2350 val |= fence_size_bits; 2351 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2352 val |= I830_FENCE_REG_VALID; 2353 2354 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); 2355 } 2356 2357 /** 2358 * i915_gem_object_get_fence_reg - set up a fence reg for an object 2359 * @obj: object to map through a fence reg 2360 * 2361 * When mapping objects through the GTT, userspace wants to be able to write 2362 * to them without having to worry about swizzling if the object is tiled. 2363 * 2364 * This function walks the fence regs looking for a free one for @obj, 2365 * stealing one if it can't find any. 2366 * 2367 * It then sets up the reg based on the object's properties: address, pitch 2368 * and tiling format. 2369 */ 2370 int 2371 i915_gem_object_get_fence_reg(struct drm_gem_object *obj) 2372 { 2373 struct drm_device *dev = obj->dev; 2374 struct drm_i915_private *dev_priv = dev->dev_private; 2375 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2376 struct drm_i915_fence_reg *reg = NULL; 2377 struct drm_i915_gem_object *old_obj_priv = NULL; 2378 int i, ret, avail; 2379 2380 /* Just update our place in the LRU if our fence is getting used. */ 2381 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 2382 list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list); 2383 return 0; 2384 } 2385 2386 switch (obj_priv->tiling_mode) { 2387 case I915_TILING_NONE: 2388 WARN(1, "allocating a fence for non-tiled object?\n"); 2389 break; 2390 case I915_TILING_X: 2391 if (!obj_priv->stride) 2392 return -EINVAL; 2393 WARN((obj_priv->stride & (512 - 1)), 2394 "object 0x%08x is X tiled but has non-512B pitch\n", 2395 obj_priv->gtt_offset); 2396 break; 2397 case I915_TILING_Y: 2398 if (!obj_priv->stride) 2399 return -EINVAL; 2400 WARN((obj_priv->stride & (128 - 1)), 2401 "object 0x%08x is Y tiled but has non-128B pitch\n", 2402 obj_priv->gtt_offset); 2403 break; 2404 } 2405 2406 /* First try to find a free reg */ 2407 avail = 0; 2408 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2409 reg = &dev_priv->fence_regs[i]; 2410 if (!reg->obj) 2411 break; 2412 2413 old_obj_priv = reg->obj->driver_private; 2414 if (!old_obj_priv->pin_count) 2415 avail++; 2416 } 2417 2418 /* None available, try to steal one or wait for a user to finish */ 2419 if (i == dev_priv->num_fence_regs) { 2420 struct drm_gem_object *old_obj = NULL; 2421 2422 if (avail == 0) 2423 return -ENOSPC; 2424 2425 list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list, 2426 fence_list) { 2427 old_obj = old_obj_priv->obj; 2428 2429 if (old_obj_priv->pin_count) 2430 continue; 2431 2432 /* Take a reference, as otherwise the wait_rendering 2433 * below may cause the object to get freed out from 2434 * under us. 2435 */ 2436 drm_gem_object_reference(old_obj); 2437 2438 /* i915 uses fences for GPU access to tiled buffers */ 2439 if (IS_I965G(dev) || !old_obj_priv->active) 2440 break; 2441 2442 /* This brings the object to the head of the LRU if it 2443 * had been written to. The only way this should 2444 * result in us waiting longer than the expected 2445 * optimal amount of time is if there was a 2446 * fence-using buffer later that was read-only. 2447 */ 2448 i915_gem_object_flush_gpu_write_domain(old_obj); 2449 ret = i915_gem_object_wait_rendering(old_obj); 2450 if (ret != 0) { 2451 drm_gem_object_unreference(old_obj); 2452 return ret; 2453 } 2454 2455 break; 2456 } 2457 2458 /* 2459 * Zap this virtual mapping so we can set up a fence again 2460 * for this object next time we need it. 2461 */ 2462 i915_gem_release_mmap(old_obj); 2463 2464 i = old_obj_priv->fence_reg; 2465 reg = &dev_priv->fence_regs[i]; 2466 2467 old_obj_priv->fence_reg = I915_FENCE_REG_NONE; 2468 list_del_init(&old_obj_priv->fence_list); 2469 2470 drm_gem_object_unreference(old_obj); 2471 } 2472 2473 obj_priv->fence_reg = i; 2474 list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list); 2475 2476 reg->obj = obj; 2477 2478 if (IS_I965G(dev)) 2479 i965_write_fence_reg(reg); 2480 else if (IS_I9XX(dev)) 2481 i915_write_fence_reg(reg); 2482 else 2483 i830_write_fence_reg(reg); 2484 2485 trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode); 2486 2487 return 0; 2488 } 2489 2490 /** 2491 * i915_gem_clear_fence_reg - clear out fence register info 2492 * @obj: object to clear 2493 * 2494 * Zeroes out the fence register itself and clears out the associated 2495 * data structures in dev_priv and obj_priv. 2496 */ 2497 static void 2498 i915_gem_clear_fence_reg(struct drm_gem_object *obj) 2499 { 2500 struct drm_device *dev = obj->dev; 2501 drm_i915_private_t *dev_priv = dev->dev_private; 2502 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2503 2504 if (IS_I965G(dev)) 2505 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); 2506 else { 2507 uint32_t fence_reg; 2508 2509 if (obj_priv->fence_reg < 8) 2510 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; 2511 else 2512 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 2513 8) * 4; 2514 2515 I915_WRITE(fence_reg, 0); 2516 } 2517 2518 dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL; 2519 obj_priv->fence_reg = I915_FENCE_REG_NONE; 2520 list_del_init(&obj_priv->fence_list); 2521 } 2522 2523 /** 2524 * i915_gem_object_put_fence_reg - waits on outstanding fenced access 2525 * to the buffer to finish, and then resets the fence register. 2526 * @obj: tiled object holding a fence register. 2527 * 2528 * Zeroes out the fence register itself and clears out the associated 2529 * data structures in dev_priv and obj_priv. 2530 */ 2531 int 2532 i915_gem_object_put_fence_reg(struct drm_gem_object *obj) 2533 { 2534 struct drm_device *dev = obj->dev; 2535 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2536 2537 if (obj_priv->fence_reg == I915_FENCE_REG_NONE) 2538 return 0; 2539 2540 /* On the i915, GPU access to tiled buffers is via a fence, 2541 * therefore we must wait for any outstanding access to complete 2542 * before clearing the fence. 2543 */ 2544 if (!IS_I965G(dev)) { 2545 int ret; 2546 2547 i915_gem_object_flush_gpu_write_domain(obj); 2548 i915_gem_object_flush_gtt_write_domain(obj); 2549 ret = i915_gem_object_wait_rendering(obj); 2550 if (ret != 0) 2551 return ret; 2552 } 2553 2554 i915_gem_clear_fence_reg (obj); 2555 2556 return 0; 2557 } 2558 2559 /** 2560 * Finds free space in the GTT aperture and binds the object there. 2561 */ 2562 static int 2563 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) 2564 { 2565 struct drm_device *dev = obj->dev; 2566 drm_i915_private_t *dev_priv = dev->dev_private; 2567 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2568 struct drm_mm_node *free_space; 2569 bool retry_alloc = false; 2570 int ret; 2571 2572 if (dev_priv->mm.suspended) 2573 return -EBUSY; 2574 2575 if (obj_priv->madv != I915_MADV_WILLNEED) { 2576 DRM_ERROR("Attempting to bind a purgeable object\n"); 2577 return -EINVAL; 2578 } 2579 2580 if (alignment == 0) 2581 alignment = i915_gem_get_gtt_alignment(obj); 2582 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { 2583 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2584 return -EINVAL; 2585 } 2586 2587 search_free: 2588 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2589 obj->size, alignment, 0); 2590 if (free_space != NULL) { 2591 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size, 2592 alignment); 2593 if (obj_priv->gtt_space != NULL) { 2594 obj_priv->gtt_space->private = obj; 2595 obj_priv->gtt_offset = obj_priv->gtt_space->start; 2596 } 2597 } 2598 if (obj_priv->gtt_space == NULL) { 2599 /* If the gtt is empty and we're still having trouble 2600 * fitting our object in, we're out of memory. 2601 */ 2602 #if WATCH_LRU 2603 DRM_INFO("%s: GTT full, evicting something\n", __func__); 2604 #endif 2605 ret = i915_gem_evict_something(dev, obj->size); 2606 if (ret) 2607 return ret; 2608 2609 goto search_free; 2610 } 2611 2612 #if WATCH_BUF 2613 DRM_INFO("Binding object of size %zd at 0x%08x\n", 2614 obj->size, obj_priv->gtt_offset); 2615 #endif 2616 if (retry_alloc) { 2617 i915_gem_object_set_page_gfp_mask (obj, 2618 i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY); 2619 } 2620 ret = i915_gem_object_get_pages(obj); 2621 if (retry_alloc) { 2622 i915_gem_object_set_page_gfp_mask (obj, 2623 i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY); 2624 } 2625 if (ret) { 2626 drm_mm_put_block(obj_priv->gtt_space); 2627 obj_priv->gtt_space = NULL; 2628 2629 if (ret == -ENOMEM) { 2630 /* first try to clear up some space from the GTT */ 2631 ret = i915_gem_evict_something(dev, obj->size); 2632 if (ret) { 2633 /* now try to shrink everyone else */ 2634 if (! retry_alloc) { 2635 retry_alloc = true; 2636 goto search_free; 2637 } 2638 2639 return ret; 2640 } 2641 2642 goto search_free; 2643 } 2644 2645 return ret; 2646 } 2647 2648 /* Create an AGP memory structure pointing at our pages, and bind it 2649 * into the GTT. 2650 */ 2651 obj_priv->agp_mem = drm_agp_bind_pages(dev, 2652 obj_priv->pages, 2653 obj->size >> PAGE_SHIFT, 2654 obj_priv->gtt_offset, 2655 obj_priv->agp_type); 2656 if (obj_priv->agp_mem == NULL) { 2657 i915_gem_object_put_pages(obj); 2658 drm_mm_put_block(obj_priv->gtt_space); 2659 obj_priv->gtt_space = NULL; 2660 2661 ret = i915_gem_evict_something(dev, obj->size); 2662 if (ret) 2663 return ret; 2664 2665 goto search_free; 2666 } 2667 atomic_inc(&dev->gtt_count); 2668 atomic_add(obj->size, &dev->gtt_memory); 2669 2670 /* Assert that the object is not currently in any GPU domain. As it 2671 * wasn't in the GTT, there shouldn't be any way it could have been in 2672 * a GPU cache 2673 */ 2674 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2675 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2676 2677 trace_i915_gem_object_bind(obj, obj_priv->gtt_offset); 2678 2679 return 0; 2680 } 2681 2682 void 2683 i915_gem_clflush_object(struct drm_gem_object *obj) 2684 { 2685 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2686 2687 /* If we don't have a page list set up, then we're not pinned 2688 * to GPU, and we can ignore the cache flush because it'll happen 2689 * again at bind time. 2690 */ 2691 if (obj_priv->pages == NULL) 2692 return; 2693 2694 trace_i915_gem_object_clflush(obj); 2695 2696 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); 2697 } 2698 2699 /** Flushes any GPU write domain for the object if it's dirty. */ 2700 static void 2701 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 2702 { 2703 struct drm_device *dev = obj->dev; 2704 uint32_t seqno; 2705 uint32_t old_write_domain; 2706 2707 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 2708 return; 2709 2710 /* Queue the GPU write cache flushing we need. */ 2711 old_write_domain = obj->write_domain; 2712 i915_gem_flush(dev, 0, obj->write_domain); 2713 seqno = i915_add_request(dev, NULL, obj->write_domain); 2714 obj->write_domain = 0; 2715 i915_gem_object_move_to_active(obj, seqno); 2716 2717 trace_i915_gem_object_change_domain(obj, 2718 obj->read_domains, 2719 old_write_domain); 2720 } 2721 2722 /** Flushes the GTT write domain for the object if it's dirty. */ 2723 static void 2724 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 2725 { 2726 uint32_t old_write_domain; 2727 2728 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 2729 return; 2730 2731 /* No actual flushing is required for the GTT write domain. Writes 2732 * to it immediately go to main memory as far as we know, so there's 2733 * no chipset flush. It also doesn't land in render cache. 2734 */ 2735 old_write_domain = obj->write_domain; 2736 obj->write_domain = 0; 2737 2738 trace_i915_gem_object_change_domain(obj, 2739 obj->read_domains, 2740 old_write_domain); 2741 } 2742 2743 /** Flushes the CPU write domain for the object if it's dirty. */ 2744 static void 2745 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 2746 { 2747 struct drm_device *dev = obj->dev; 2748 uint32_t old_write_domain; 2749 2750 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 2751 return; 2752 2753 i915_gem_clflush_object(obj); 2754 drm_agp_chipset_flush(dev); 2755 old_write_domain = obj->write_domain; 2756 obj->write_domain = 0; 2757 2758 trace_i915_gem_object_change_domain(obj, 2759 obj->read_domains, 2760 old_write_domain); 2761 } 2762 2763 /** 2764 * Moves a single object to the GTT read, and possibly write domain. 2765 * 2766 * This function returns when the move is complete, including waiting on 2767 * flushes to occur. 2768 */ 2769 int 2770 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 2771 { 2772 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2773 uint32_t old_write_domain, old_read_domains; 2774 int ret; 2775 2776 /* Not valid to be called on unbound objects. */ 2777 if (obj_priv->gtt_space == NULL) 2778 return -EINVAL; 2779 2780 i915_gem_object_flush_gpu_write_domain(obj); 2781 /* Wait on any GPU rendering and flushing to occur. */ 2782 ret = i915_gem_object_wait_rendering(obj); 2783 if (ret != 0) 2784 return ret; 2785 2786 old_write_domain = obj->write_domain; 2787 old_read_domains = obj->read_domains; 2788 2789 /* If we're writing through the GTT domain, then CPU and GPU caches 2790 * will need to be invalidated at next use. 2791 */ 2792 if (write) 2793 obj->read_domains &= I915_GEM_DOMAIN_GTT; 2794 2795 i915_gem_object_flush_cpu_write_domain(obj); 2796 2797 /* It should now be out of any other write domains, and we can update 2798 * the domain values for our changes. 2799 */ 2800 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2801 obj->read_domains |= I915_GEM_DOMAIN_GTT; 2802 if (write) { 2803 obj->write_domain = I915_GEM_DOMAIN_GTT; 2804 obj_priv->dirty = 1; 2805 } 2806 2807 trace_i915_gem_object_change_domain(obj, 2808 old_read_domains, 2809 old_write_domain); 2810 2811 return 0; 2812 } 2813 2814 /** 2815 * Moves a single object to the CPU read, and possibly write domain. 2816 * 2817 * This function returns when the move is complete, including waiting on 2818 * flushes to occur. 2819 */ 2820 static int 2821 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 2822 { 2823 uint32_t old_write_domain, old_read_domains; 2824 int ret; 2825 2826 i915_gem_object_flush_gpu_write_domain(obj); 2827 /* Wait on any GPU rendering and flushing to occur. */ 2828 ret = i915_gem_object_wait_rendering(obj); 2829 if (ret != 0) 2830 return ret; 2831 2832 i915_gem_object_flush_gtt_write_domain(obj); 2833 2834 /* If we have a partially-valid cache of the object in the CPU, 2835 * finish invalidating it and free the per-page flags. 2836 */ 2837 i915_gem_object_set_to_full_cpu_read_domain(obj); 2838 2839 old_write_domain = obj->write_domain; 2840 old_read_domains = obj->read_domains; 2841 2842 /* Flush the CPU cache if it's still invalid. */ 2843 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2844 i915_gem_clflush_object(obj); 2845 2846 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2847 } 2848 2849 /* It should now be out of any other write domains, and we can update 2850 * the domain values for our changes. 2851 */ 2852 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2853 2854 /* If we're writing through the CPU, then the GPU read domains will 2855 * need to be invalidated at next use. 2856 */ 2857 if (write) { 2858 obj->read_domains &= I915_GEM_DOMAIN_CPU; 2859 obj->write_domain = I915_GEM_DOMAIN_CPU; 2860 } 2861 2862 trace_i915_gem_object_change_domain(obj, 2863 old_read_domains, 2864 old_write_domain); 2865 2866 return 0; 2867 } 2868 2869 /* 2870 * Set the next domain for the specified object. This 2871 * may not actually perform the necessary flushing/invaliding though, 2872 * as that may want to be batched with other set_domain operations 2873 * 2874 * This is (we hope) the only really tricky part of gem. The goal 2875 * is fairly simple -- track which caches hold bits of the object 2876 * and make sure they remain coherent. A few concrete examples may 2877 * help to explain how it works. For shorthand, we use the notation 2878 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 2879 * a pair of read and write domain masks. 2880 * 2881 * Case 1: the batch buffer 2882 * 2883 * 1. Allocated 2884 * 2. Written by CPU 2885 * 3. Mapped to GTT 2886 * 4. Read by GPU 2887 * 5. Unmapped from GTT 2888 * 6. Freed 2889 * 2890 * Let's take these a step at a time 2891 * 2892 * 1. Allocated 2893 * Pages allocated from the kernel may still have 2894 * cache contents, so we set them to (CPU, CPU) always. 2895 * 2. Written by CPU (using pwrite) 2896 * The pwrite function calls set_domain (CPU, CPU) and 2897 * this function does nothing (as nothing changes) 2898 * 3. Mapped by GTT 2899 * This function asserts that the object is not 2900 * currently in any GPU-based read or write domains 2901 * 4. Read by GPU 2902 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 2903 * As write_domain is zero, this function adds in the 2904 * current read domains (CPU+COMMAND, 0). 2905 * flush_domains is set to CPU. 2906 * invalidate_domains is set to COMMAND 2907 * clflush is run to get data out of the CPU caches 2908 * then i915_dev_set_domain calls i915_gem_flush to 2909 * emit an MI_FLUSH and drm_agp_chipset_flush 2910 * 5. Unmapped from GTT 2911 * i915_gem_object_unbind calls set_domain (CPU, CPU) 2912 * flush_domains and invalidate_domains end up both zero 2913 * so no flushing/invalidating happens 2914 * 6. Freed 2915 * yay, done 2916 * 2917 * Case 2: The shared render buffer 2918 * 2919 * 1. Allocated 2920 * 2. Mapped to GTT 2921 * 3. Read/written by GPU 2922 * 4. set_domain to (CPU,CPU) 2923 * 5. Read/written by CPU 2924 * 6. Read/written by GPU 2925 * 2926 * 1. Allocated 2927 * Same as last example, (CPU, CPU) 2928 * 2. Mapped to GTT 2929 * Nothing changes (assertions find that it is not in the GPU) 2930 * 3. Read/written by GPU 2931 * execbuffer calls set_domain (RENDER, RENDER) 2932 * flush_domains gets CPU 2933 * invalidate_domains gets GPU 2934 * clflush (obj) 2935 * MI_FLUSH and drm_agp_chipset_flush 2936 * 4. set_domain (CPU, CPU) 2937 * flush_domains gets GPU 2938 * invalidate_domains gets CPU 2939 * wait_rendering (obj) to make sure all drawing is complete. 2940 * This will include an MI_FLUSH to get the data from GPU 2941 * to memory 2942 * clflush (obj) to invalidate the CPU cache 2943 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 2944 * 5. Read/written by CPU 2945 * cache lines are loaded and dirtied 2946 * 6. Read written by GPU 2947 * Same as last GPU access 2948 * 2949 * Case 3: The constant buffer 2950 * 2951 * 1. Allocated 2952 * 2. Written by CPU 2953 * 3. Read by GPU 2954 * 4. Updated (written) by CPU again 2955 * 5. Read by GPU 2956 * 2957 * 1. Allocated 2958 * (CPU, CPU) 2959 * 2. Written by CPU 2960 * (CPU, CPU) 2961 * 3. Read by GPU 2962 * (CPU+RENDER, 0) 2963 * flush_domains = CPU 2964 * invalidate_domains = RENDER 2965 * clflush (obj) 2966 * MI_FLUSH 2967 * drm_agp_chipset_flush 2968 * 4. Updated (written) by CPU again 2969 * (CPU, CPU) 2970 * flush_domains = 0 (no previous write domain) 2971 * invalidate_domains = 0 (no new read domains) 2972 * 5. Read by GPU 2973 * (CPU+RENDER, 0) 2974 * flush_domains = CPU 2975 * invalidate_domains = RENDER 2976 * clflush (obj) 2977 * MI_FLUSH 2978 * drm_agp_chipset_flush 2979 */ 2980 static void 2981 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) 2982 { 2983 struct drm_device *dev = obj->dev; 2984 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2985 uint32_t invalidate_domains = 0; 2986 uint32_t flush_domains = 0; 2987 uint32_t old_read_domains; 2988 2989 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); 2990 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); 2991 2992 intel_mark_busy(dev, obj); 2993 2994 #if WATCH_BUF 2995 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", 2996 __func__, obj, 2997 obj->read_domains, obj->pending_read_domains, 2998 obj->write_domain, obj->pending_write_domain); 2999 #endif 3000 /* 3001 * If the object isn't moving to a new write domain, 3002 * let the object stay in multiple read domains 3003 */ 3004 if (obj->pending_write_domain == 0) 3005 obj->pending_read_domains |= obj->read_domains; 3006 else 3007 obj_priv->dirty = 1; 3008 3009 /* 3010 * Flush the current write domain if 3011 * the new read domains don't match. Invalidate 3012 * any read domains which differ from the old 3013 * write domain 3014 */ 3015 if (obj->write_domain && 3016 obj->write_domain != obj->pending_read_domains) { 3017 flush_domains |= obj->write_domain; 3018 invalidate_domains |= 3019 obj->pending_read_domains & ~obj->write_domain; 3020 } 3021 /* 3022 * Invalidate any read caches which may have 3023 * stale data. That is, any new read domains. 3024 */ 3025 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains; 3026 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 3027 #if WATCH_BUF 3028 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", 3029 __func__, flush_domains, invalidate_domains); 3030 #endif 3031 i915_gem_clflush_object(obj); 3032 } 3033 3034 old_read_domains = obj->read_domains; 3035 3036 /* The actual obj->write_domain will be updated with 3037 * pending_write_domain after we emit the accumulated flush for all 3038 * of our domain changes in execbuffers (which clears objects' 3039 * write_domains). So if we have a current write domain that we 3040 * aren't changing, set pending_write_domain to that. 3041 */ 3042 if (flush_domains == 0 && obj->pending_write_domain == 0) 3043 obj->pending_write_domain = obj->write_domain; 3044 obj->read_domains = obj->pending_read_domains; 3045 3046 dev->invalidate_domains |= invalidate_domains; 3047 dev->flush_domains |= flush_domains; 3048 #if WATCH_BUF 3049 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n", 3050 __func__, 3051 obj->read_domains, obj->write_domain, 3052 dev->invalidate_domains, dev->flush_domains); 3053 #endif 3054 3055 trace_i915_gem_object_change_domain(obj, 3056 old_read_domains, 3057 obj->write_domain); 3058 } 3059 3060 /** 3061 * Moves the object from a partially CPU read to a full one. 3062 * 3063 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 3064 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 3065 */ 3066 static void 3067 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 3068 { 3069 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3070 3071 if (!obj_priv->page_cpu_valid) 3072 return; 3073 3074 /* If we're partially in the CPU read domain, finish moving it in. 3075 */ 3076 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 3077 int i; 3078 3079 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 3080 if (obj_priv->page_cpu_valid[i]) 3081 continue; 3082 drm_clflush_pages(obj_priv->pages + i, 1); 3083 } 3084 } 3085 3086 /* Free the page_cpu_valid mappings which are now stale, whether 3087 * or not we've got I915_GEM_DOMAIN_CPU. 3088 */ 3089 kfree(obj_priv->page_cpu_valid); 3090 obj_priv->page_cpu_valid = NULL; 3091 } 3092 3093 /** 3094 * Set the CPU read domain on a range of the object. 3095 * 3096 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 3097 * not entirely valid. The page_cpu_valid member of the object flags which 3098 * pages have been flushed, and will be respected by 3099 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 3100 * of the whole object. 3101 * 3102 * This function returns when the move is complete, including waiting on 3103 * flushes to occur. 3104 */ 3105 static int 3106 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 3107 uint64_t offset, uint64_t size) 3108 { 3109 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3110 uint32_t old_read_domains; 3111 int i, ret; 3112 3113 if (offset == 0 && size == obj->size) 3114 return i915_gem_object_set_to_cpu_domain(obj, 0); 3115 3116 i915_gem_object_flush_gpu_write_domain(obj); 3117 /* Wait on any GPU rendering and flushing to occur. */ 3118 ret = i915_gem_object_wait_rendering(obj); 3119 if (ret != 0) 3120 return ret; 3121 i915_gem_object_flush_gtt_write_domain(obj); 3122 3123 /* If we're already fully in the CPU read domain, we're done. */ 3124 if (obj_priv->page_cpu_valid == NULL && 3125 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 3126 return 0; 3127 3128 /* Otherwise, create/clear the per-page CPU read domain flag if we're 3129 * newly adding I915_GEM_DOMAIN_CPU 3130 */ 3131 if (obj_priv->page_cpu_valid == NULL) { 3132 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE, 3133 GFP_KERNEL); 3134 if (obj_priv->page_cpu_valid == NULL) 3135 return -ENOMEM; 3136 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 3137 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 3138 3139 /* Flush the cache on any pages that are still invalid from the CPU's 3140 * perspective. 3141 */ 3142 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 3143 i++) { 3144 if (obj_priv->page_cpu_valid[i]) 3145 continue; 3146 3147 drm_clflush_pages(obj_priv->pages + i, 1); 3148 3149 obj_priv->page_cpu_valid[i] = 1; 3150 } 3151 3152 /* It should now be out of any other write domains, and we can update 3153 * the domain values for our changes. 3154 */ 3155 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3156 3157 old_read_domains = obj->read_domains; 3158 obj->read_domains |= I915_GEM_DOMAIN_CPU; 3159 3160 trace_i915_gem_object_change_domain(obj, 3161 old_read_domains, 3162 obj->write_domain); 3163 3164 return 0; 3165 } 3166 3167 /** 3168 * Pin an object to the GTT and evaluate the relocations landing in it. 3169 */ 3170 static int 3171 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 3172 struct drm_file *file_priv, 3173 struct drm_i915_gem_exec_object *entry, 3174 struct drm_i915_gem_relocation_entry *relocs) 3175 { 3176 struct drm_device *dev = obj->dev; 3177 drm_i915_private_t *dev_priv = dev->dev_private; 3178 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3179 int i, ret; 3180 void __iomem *reloc_page; 3181 3182 /* Choose the GTT offset for our buffer and put it there. */ 3183 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 3184 if (ret) 3185 return ret; 3186 3187 entry->offset = obj_priv->gtt_offset; 3188 3189 /* Apply the relocations, using the GTT aperture to avoid cache 3190 * flushing requirements. 3191 */ 3192 for (i = 0; i < entry->relocation_count; i++) { 3193 struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; 3194 struct drm_gem_object *target_obj; 3195 struct drm_i915_gem_object *target_obj_priv; 3196 uint32_t reloc_val, reloc_offset; 3197 uint32_t __iomem *reloc_entry; 3198 3199 target_obj = drm_gem_object_lookup(obj->dev, file_priv, 3200 reloc->target_handle); 3201 if (target_obj == NULL) { 3202 i915_gem_object_unpin(obj); 3203 return -EBADF; 3204 } 3205 target_obj_priv = target_obj->driver_private; 3206 3207 #if WATCH_RELOC 3208 DRM_INFO("%s: obj %p offset %08x target %d " 3209 "read %08x write %08x gtt %08x " 3210 "presumed %08x delta %08x\n", 3211 __func__, 3212 obj, 3213 (int) reloc->offset, 3214 (int) reloc->target_handle, 3215 (int) reloc->read_domains, 3216 (int) reloc->write_domain, 3217 (int) target_obj_priv->gtt_offset, 3218 (int) reloc->presumed_offset, 3219 reloc->delta); 3220 #endif 3221 3222 /* The target buffer should have appeared before us in the 3223 * exec_object list, so it should have a GTT space bound by now. 3224 */ 3225 if (target_obj_priv->gtt_space == NULL) { 3226 DRM_ERROR("No GTT space found for object %d\n", 3227 reloc->target_handle); 3228 drm_gem_object_unreference(target_obj); 3229 i915_gem_object_unpin(obj); 3230 return -EINVAL; 3231 } 3232 3233 /* Validate that the target is in a valid r/w GPU domain */ 3234 if (reloc->write_domain & I915_GEM_DOMAIN_CPU || 3235 reloc->read_domains & I915_GEM_DOMAIN_CPU) { 3236 DRM_ERROR("reloc with read/write CPU domains: " 3237 "obj %p target %d offset %d " 3238 "read %08x write %08x", 3239 obj, reloc->target_handle, 3240 (int) reloc->offset, 3241 reloc->read_domains, 3242 reloc->write_domain); 3243 drm_gem_object_unreference(target_obj); 3244 i915_gem_object_unpin(obj); 3245 return -EINVAL; 3246 } 3247 if (reloc->write_domain && target_obj->pending_write_domain && 3248 reloc->write_domain != target_obj->pending_write_domain) { 3249 DRM_ERROR("Write domain conflict: " 3250 "obj %p target %d offset %d " 3251 "new %08x old %08x\n", 3252 obj, reloc->target_handle, 3253 (int) reloc->offset, 3254 reloc->write_domain, 3255 target_obj->pending_write_domain); 3256 drm_gem_object_unreference(target_obj); 3257 i915_gem_object_unpin(obj); 3258 return -EINVAL; 3259 } 3260 3261 target_obj->pending_read_domains |= reloc->read_domains; 3262 target_obj->pending_write_domain |= reloc->write_domain; 3263 3264 /* If the relocation already has the right value in it, no 3265 * more work needs to be done. 3266 */ 3267 if (target_obj_priv->gtt_offset == reloc->presumed_offset) { 3268 drm_gem_object_unreference(target_obj); 3269 continue; 3270 } 3271 3272 /* Check that the relocation address is valid... */ 3273 if (reloc->offset > obj->size - 4) { 3274 DRM_ERROR("Relocation beyond object bounds: " 3275 "obj %p target %d offset %d size %d.\n", 3276 obj, reloc->target_handle, 3277 (int) reloc->offset, (int) obj->size); 3278 drm_gem_object_unreference(target_obj); 3279 i915_gem_object_unpin(obj); 3280 return -EINVAL; 3281 } 3282 if (reloc->offset & 3) { 3283 DRM_ERROR("Relocation not 4-byte aligned: " 3284 "obj %p target %d offset %d.\n", 3285 obj, reloc->target_handle, 3286 (int) reloc->offset); 3287 drm_gem_object_unreference(target_obj); 3288 i915_gem_object_unpin(obj); 3289 return -EINVAL; 3290 } 3291 3292 /* and points to somewhere within the target object. */ 3293 if (reloc->delta >= target_obj->size) { 3294 DRM_ERROR("Relocation beyond target object bounds: " 3295 "obj %p target %d delta %d size %d.\n", 3296 obj, reloc->target_handle, 3297 (int) reloc->delta, (int) target_obj->size); 3298 drm_gem_object_unreference(target_obj); 3299 i915_gem_object_unpin(obj); 3300 return -EINVAL; 3301 } 3302 3303 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 3304 if (ret != 0) { 3305 drm_gem_object_unreference(target_obj); 3306 i915_gem_object_unpin(obj); 3307 return -EINVAL; 3308 } 3309 3310 /* Map the page containing the relocation we're going to 3311 * perform. 3312 */ 3313 reloc_offset = obj_priv->gtt_offset + reloc->offset; 3314 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, 3315 (reloc_offset & 3316 ~(PAGE_SIZE - 1))); 3317 reloc_entry = (uint32_t __iomem *)(reloc_page + 3318 (reloc_offset & (PAGE_SIZE - 1))); 3319 reloc_val = target_obj_priv->gtt_offset + reloc->delta; 3320 3321 #if WATCH_BUF 3322 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", 3323 obj, (unsigned int) reloc->offset, 3324 readl(reloc_entry), reloc_val); 3325 #endif 3326 writel(reloc_val, reloc_entry); 3327 io_mapping_unmap_atomic(reloc_page); 3328 3329 /* The updated presumed offset for this entry will be 3330 * copied back out to the user. 3331 */ 3332 reloc->presumed_offset = target_obj_priv->gtt_offset; 3333 3334 drm_gem_object_unreference(target_obj); 3335 } 3336 3337 #if WATCH_BUF 3338 if (0) 3339 i915_gem_dump_object(obj, 128, __func__, ~0); 3340 #endif 3341 return 0; 3342 } 3343 3344 /** Dispatch a batchbuffer to the ring 3345 */ 3346 static int 3347 i915_dispatch_gem_execbuffer(struct drm_device *dev, 3348 struct drm_i915_gem_execbuffer *exec, 3349 struct drm_clip_rect *cliprects, 3350 uint64_t exec_offset) 3351 { 3352 drm_i915_private_t *dev_priv = dev->dev_private; 3353 int nbox = exec->num_cliprects; 3354 int i = 0, count; 3355 uint32_t exec_start, exec_len; 3356 RING_LOCALS; 3357 3358 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3359 exec_len = (uint32_t) exec->batch_len; 3360 3361 trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno + 1); 3362 3363 count = nbox ? nbox : 1; 3364 3365 for (i = 0; i < count; i++) { 3366 if (i < nbox) { 3367 int ret = i915_emit_box(dev, cliprects, i, 3368 exec->DR1, exec->DR4); 3369 if (ret) 3370 return ret; 3371 } 3372 3373 if (IS_I830(dev) || IS_845G(dev)) { 3374 BEGIN_LP_RING(4); 3375 OUT_RING(MI_BATCH_BUFFER); 3376 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 3377 OUT_RING(exec_start + exec_len - 4); 3378 OUT_RING(0); 3379 ADVANCE_LP_RING(); 3380 } else { 3381 BEGIN_LP_RING(2); 3382 if (IS_I965G(dev)) { 3383 OUT_RING(MI_BATCH_BUFFER_START | 3384 (2 << 6) | 3385 MI_BATCH_NON_SECURE_I965); 3386 OUT_RING(exec_start); 3387 } else { 3388 OUT_RING(MI_BATCH_BUFFER_START | 3389 (2 << 6)); 3390 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 3391 } 3392 ADVANCE_LP_RING(); 3393 } 3394 } 3395 3396 /* XXX breadcrumb */ 3397 return 0; 3398 } 3399 3400 /* Throttle our rendering by waiting until the ring has completed our requests 3401 * emitted over 20 msec ago. 3402 * 3403 * Note that if we were to use the current jiffies each time around the loop, 3404 * we wouldn't escape the function with any frames outstanding if the time to 3405 * render a frame was over 20ms. 3406 * 3407 * This should get us reasonable parallelism between CPU and GPU but also 3408 * relatively low latency when blocking on a particular request to finish. 3409 */ 3410 static int 3411 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 3412 { 3413 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 3414 int ret = 0; 3415 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3416 3417 mutex_lock(&dev->struct_mutex); 3418 while (!list_empty(&i915_file_priv->mm.request_list)) { 3419 struct drm_i915_gem_request *request; 3420 3421 request = list_first_entry(&i915_file_priv->mm.request_list, 3422 struct drm_i915_gem_request, 3423 client_list); 3424 3425 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3426 break; 3427 3428 ret = i915_wait_request(dev, request->seqno); 3429 if (ret != 0) 3430 break; 3431 } 3432 mutex_unlock(&dev->struct_mutex); 3433 3434 return ret; 3435 } 3436 3437 static int 3438 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, 3439 uint32_t buffer_count, 3440 struct drm_i915_gem_relocation_entry **relocs) 3441 { 3442 uint32_t reloc_count = 0, reloc_index = 0, i; 3443 int ret; 3444 3445 *relocs = NULL; 3446 for (i = 0; i < buffer_count; i++) { 3447 if (reloc_count + exec_list[i].relocation_count < reloc_count) 3448 return -EINVAL; 3449 reloc_count += exec_list[i].relocation_count; 3450 } 3451 3452 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); 3453 if (*relocs == NULL) 3454 return -ENOMEM; 3455 3456 for (i = 0; i < buffer_count; i++) { 3457 struct drm_i915_gem_relocation_entry __user *user_relocs; 3458 3459 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3460 3461 ret = copy_from_user(&(*relocs)[reloc_index], 3462 user_relocs, 3463 exec_list[i].relocation_count * 3464 sizeof(**relocs)); 3465 if (ret != 0) { 3466 drm_free_large(*relocs); 3467 *relocs = NULL; 3468 return -EFAULT; 3469 } 3470 3471 reloc_index += exec_list[i].relocation_count; 3472 } 3473 3474 return 0; 3475 } 3476 3477 static int 3478 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, 3479 uint32_t buffer_count, 3480 struct drm_i915_gem_relocation_entry *relocs) 3481 { 3482 uint32_t reloc_count = 0, i; 3483 int ret = 0; 3484 3485 for (i = 0; i < buffer_count; i++) { 3486 struct drm_i915_gem_relocation_entry __user *user_relocs; 3487 int unwritten; 3488 3489 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3490 3491 unwritten = copy_to_user(user_relocs, 3492 &relocs[reloc_count], 3493 exec_list[i].relocation_count * 3494 sizeof(*relocs)); 3495 3496 if (unwritten) { 3497 ret = -EFAULT; 3498 goto err; 3499 } 3500 3501 reloc_count += exec_list[i].relocation_count; 3502 } 3503 3504 err: 3505 drm_free_large(relocs); 3506 3507 return ret; 3508 } 3509 3510 static int 3511 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec, 3512 uint64_t exec_offset) 3513 { 3514 uint32_t exec_start, exec_len; 3515 3516 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3517 exec_len = (uint32_t) exec->batch_len; 3518 3519 if ((exec_start | exec_len) & 0x7) 3520 return -EINVAL; 3521 3522 if (!exec_start) 3523 return -EINVAL; 3524 3525 return 0; 3526 } 3527 3528 int 3529 i915_gem_execbuffer(struct drm_device *dev, void *data, 3530 struct drm_file *file_priv) 3531 { 3532 drm_i915_private_t *dev_priv = dev->dev_private; 3533 struct drm_i915_gem_execbuffer *args = data; 3534 struct drm_i915_gem_exec_object *exec_list = NULL; 3535 struct drm_gem_object **object_list = NULL; 3536 struct drm_gem_object *batch_obj; 3537 struct drm_i915_gem_object *obj_priv; 3538 struct drm_clip_rect *cliprects = NULL; 3539 struct drm_i915_gem_relocation_entry *relocs; 3540 int ret, ret2, i, pinned = 0; 3541 uint64_t exec_offset; 3542 uint32_t seqno, flush_domains, reloc_index; 3543 int pin_tries; 3544 3545 #if WATCH_EXEC 3546 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3547 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3548 #endif 3549 3550 if (args->buffer_count < 1) { 3551 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 3552 return -EINVAL; 3553 } 3554 /* Copy in the exec list from userland */ 3555 exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count); 3556 object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count); 3557 if (exec_list == NULL || object_list == NULL) { 3558 DRM_ERROR("Failed to allocate exec or object list " 3559 "for %d buffers\n", 3560 args->buffer_count); 3561 ret = -ENOMEM; 3562 goto pre_mutex_err; 3563 } 3564 ret = copy_from_user(exec_list, 3565 (struct drm_i915_relocation_entry __user *) 3566 (uintptr_t) args->buffers_ptr, 3567 sizeof(*exec_list) * args->buffer_count); 3568 if (ret != 0) { 3569 DRM_ERROR("copy %d exec entries failed %d\n", 3570 args->buffer_count, ret); 3571 goto pre_mutex_err; 3572 } 3573 3574 if (args->num_cliprects != 0) { 3575 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects), 3576 GFP_KERNEL); 3577 if (cliprects == NULL) 3578 goto pre_mutex_err; 3579 3580 ret = copy_from_user(cliprects, 3581 (struct drm_clip_rect __user *) 3582 (uintptr_t) args->cliprects_ptr, 3583 sizeof(*cliprects) * args->num_cliprects); 3584 if (ret != 0) { 3585 DRM_ERROR("copy %d cliprects failed: %d\n", 3586 args->num_cliprects, ret); 3587 goto pre_mutex_err; 3588 } 3589 } 3590 3591 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, 3592 &relocs); 3593 if (ret != 0) 3594 goto pre_mutex_err; 3595 3596 mutex_lock(&dev->struct_mutex); 3597 3598 i915_verify_inactive(dev, __FILE__, __LINE__); 3599 3600 if (atomic_read(&dev_priv->mm.wedged)) { 3601 DRM_ERROR("Execbuf while wedged\n"); 3602 mutex_unlock(&dev->struct_mutex); 3603 ret = -EIO; 3604 goto pre_mutex_err; 3605 } 3606 3607 if (dev_priv->mm.suspended) { 3608 DRM_ERROR("Execbuf while VT-switched.\n"); 3609 mutex_unlock(&dev->struct_mutex); 3610 ret = -EBUSY; 3611 goto pre_mutex_err; 3612 } 3613 3614 /* Look up object handles */ 3615 for (i = 0; i < args->buffer_count; i++) { 3616 object_list[i] = drm_gem_object_lookup(dev, file_priv, 3617 exec_list[i].handle); 3618 if (object_list[i] == NULL) { 3619 DRM_ERROR("Invalid object handle %d at index %d\n", 3620 exec_list[i].handle, i); 3621 ret = -EBADF; 3622 goto err; 3623 } 3624 3625 obj_priv = object_list[i]->driver_private; 3626 if (obj_priv->in_execbuffer) { 3627 DRM_ERROR("Object %p appears more than once in object list\n", 3628 object_list[i]); 3629 ret = -EBADF; 3630 goto err; 3631 } 3632 obj_priv->in_execbuffer = true; 3633 } 3634 3635 /* Pin and relocate */ 3636 for (pin_tries = 0; ; pin_tries++) { 3637 ret = 0; 3638 reloc_index = 0; 3639 3640 for (i = 0; i < args->buffer_count; i++) { 3641 object_list[i]->pending_read_domains = 0; 3642 object_list[i]->pending_write_domain = 0; 3643 ret = i915_gem_object_pin_and_relocate(object_list[i], 3644 file_priv, 3645 &exec_list[i], 3646 &relocs[reloc_index]); 3647 if (ret) 3648 break; 3649 pinned = i + 1; 3650 reloc_index += exec_list[i].relocation_count; 3651 } 3652 /* success */ 3653 if (ret == 0) 3654 break; 3655 3656 /* error other than GTT full, or we've already tried again */ 3657 if (ret != -ENOSPC || pin_tries >= 1) { 3658 if (ret != -ERESTARTSYS) { 3659 unsigned long long total_size = 0; 3660 for (i = 0; i < args->buffer_count; i++) 3661 total_size += object_list[i]->size; 3662 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n", 3663 pinned+1, args->buffer_count, 3664 total_size, ret); 3665 DRM_ERROR("%d objects [%d pinned], " 3666 "%d object bytes [%d pinned], " 3667 "%d/%d gtt bytes\n", 3668 atomic_read(&dev->object_count), 3669 atomic_read(&dev->pin_count), 3670 atomic_read(&dev->object_memory), 3671 atomic_read(&dev->pin_memory), 3672 atomic_read(&dev->gtt_memory), 3673 dev->gtt_total); 3674 } 3675 goto err; 3676 } 3677 3678 /* unpin all of our buffers */ 3679 for (i = 0; i < pinned; i++) 3680 i915_gem_object_unpin(object_list[i]); 3681 pinned = 0; 3682 3683 /* evict everyone we can from the aperture */ 3684 ret = i915_gem_evict_everything(dev); 3685 if (ret && ret != -ENOSPC) 3686 goto err; 3687 } 3688 3689 /* Set the pending read domains for the batch buffer to COMMAND */ 3690 batch_obj = object_list[args->buffer_count-1]; 3691 if (batch_obj->pending_write_domain) { 3692 DRM_ERROR("Attempting to use self-modifying batch buffer\n"); 3693 ret = -EINVAL; 3694 goto err; 3695 } 3696 batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 3697 3698 /* Sanity check the batch buffer, prior to moving objects */ 3699 exec_offset = exec_list[args->buffer_count - 1].offset; 3700 ret = i915_gem_check_execbuffer (args, exec_offset); 3701 if (ret != 0) { 3702 DRM_ERROR("execbuf with invalid offset/length\n"); 3703 goto err; 3704 } 3705 3706 i915_verify_inactive(dev, __FILE__, __LINE__); 3707 3708 /* Zero the global flush/invalidate flags. These 3709 * will be modified as new domains are computed 3710 * for each object 3711 */ 3712 dev->invalidate_domains = 0; 3713 dev->flush_domains = 0; 3714 3715 for (i = 0; i < args->buffer_count; i++) { 3716 struct drm_gem_object *obj = object_list[i]; 3717 3718 /* Compute new gpu domains and update invalidate/flush */ 3719 i915_gem_object_set_to_gpu_domain(obj); 3720 } 3721 3722 i915_verify_inactive(dev, __FILE__, __LINE__); 3723 3724 if (dev->invalidate_domains | dev->flush_domains) { 3725 #if WATCH_EXEC 3726 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 3727 __func__, 3728 dev->invalidate_domains, 3729 dev->flush_domains); 3730 #endif 3731 i915_gem_flush(dev, 3732 dev->invalidate_domains, 3733 dev->flush_domains); 3734 if (dev->flush_domains) 3735 (void)i915_add_request(dev, file_priv, 3736 dev->flush_domains); 3737 } 3738 3739 for (i = 0; i < args->buffer_count; i++) { 3740 struct drm_gem_object *obj = object_list[i]; 3741 uint32_t old_write_domain = obj->write_domain; 3742 3743 obj->write_domain = obj->pending_write_domain; 3744 trace_i915_gem_object_change_domain(obj, 3745 obj->read_domains, 3746 old_write_domain); 3747 } 3748 3749 i915_verify_inactive(dev, __FILE__, __LINE__); 3750 3751 #if WATCH_COHERENCY 3752 for (i = 0; i < args->buffer_count; i++) { 3753 i915_gem_object_check_coherency(object_list[i], 3754 exec_list[i].handle); 3755 } 3756 #endif 3757 3758 #if WATCH_EXEC 3759 i915_gem_dump_object(batch_obj, 3760 args->batch_len, 3761 __func__, 3762 ~0); 3763 #endif 3764 3765 /* Exec the batchbuffer */ 3766 ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset); 3767 if (ret) { 3768 DRM_ERROR("dispatch failed %d\n", ret); 3769 goto err; 3770 } 3771 3772 /* 3773 * Ensure that the commands in the batch buffer are 3774 * finished before the interrupt fires 3775 */ 3776 flush_domains = i915_retire_commands(dev); 3777 3778 i915_verify_inactive(dev, __FILE__, __LINE__); 3779 3780 /* 3781 * Get a seqno representing the execution of the current buffer, 3782 * which we can wait on. We would like to mitigate these interrupts, 3783 * likely by only creating seqnos occasionally (so that we have 3784 * *some* interrupts representing completion of buffers that we can 3785 * wait on when trying to clear up gtt space). 3786 */ 3787 seqno = i915_add_request(dev, file_priv, flush_domains); 3788 BUG_ON(seqno == 0); 3789 for (i = 0; i < args->buffer_count; i++) { 3790 struct drm_gem_object *obj = object_list[i]; 3791 3792 i915_gem_object_move_to_active(obj, seqno); 3793 #if WATCH_LRU 3794 DRM_INFO("%s: move to exec list %p\n", __func__, obj); 3795 #endif 3796 } 3797 #if WATCH_LRU 3798 i915_dump_lru(dev, __func__); 3799 #endif 3800 3801 i915_verify_inactive(dev, __FILE__, __LINE__); 3802 3803 err: 3804 for (i = 0; i < pinned; i++) 3805 i915_gem_object_unpin(object_list[i]); 3806 3807 for (i = 0; i < args->buffer_count; i++) { 3808 if (object_list[i]) { 3809 obj_priv = object_list[i]->driver_private; 3810 obj_priv->in_execbuffer = false; 3811 } 3812 drm_gem_object_unreference(object_list[i]); 3813 } 3814 3815 mutex_unlock(&dev->struct_mutex); 3816 3817 if (!ret) { 3818 /* Copy the new buffer offsets back to the user's exec list. */ 3819 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 3820 (uintptr_t) args->buffers_ptr, 3821 exec_list, 3822 sizeof(*exec_list) * args->buffer_count); 3823 if (ret) { 3824 ret = -EFAULT; 3825 DRM_ERROR("failed to copy %d exec entries " 3826 "back to user (%d)\n", 3827 args->buffer_count, ret); 3828 } 3829 } 3830 3831 /* Copy the updated relocations out regardless of current error 3832 * state. Failure to update the relocs would mean that the next 3833 * time userland calls execbuf, it would do so with presumed offset 3834 * state that didn't match the actual object state. 3835 */ 3836 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count, 3837 relocs); 3838 if (ret2 != 0) { 3839 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2); 3840 3841 if (ret == 0) 3842 ret = ret2; 3843 } 3844 3845 pre_mutex_err: 3846 drm_free_large(object_list); 3847 drm_free_large(exec_list); 3848 kfree(cliprects); 3849 3850 return ret; 3851 } 3852 3853 int 3854 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 3855 { 3856 struct drm_device *dev = obj->dev; 3857 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3858 int ret; 3859 3860 i915_verify_inactive(dev, __FILE__, __LINE__); 3861 if (obj_priv->gtt_space == NULL) { 3862 ret = i915_gem_object_bind_to_gtt(obj, alignment); 3863 if (ret) 3864 return ret; 3865 } 3866 /* 3867 * Pre-965 chips need a fence register set up in order to 3868 * properly handle tiled surfaces. 3869 */ 3870 if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) { 3871 ret = i915_gem_object_get_fence_reg(obj); 3872 if (ret != 0) { 3873 if (ret != -EBUSY && ret != -ERESTARTSYS) 3874 DRM_ERROR("Failure to install fence: %d\n", 3875 ret); 3876 return ret; 3877 } 3878 } 3879 obj_priv->pin_count++; 3880 3881 /* If the object is not active and not pending a flush, 3882 * remove it from the inactive list 3883 */ 3884 if (obj_priv->pin_count == 1) { 3885 atomic_inc(&dev->pin_count); 3886 atomic_add(obj->size, &dev->pin_memory); 3887 if (!obj_priv->active && 3888 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 && 3889 !list_empty(&obj_priv->list)) 3890 list_del_init(&obj_priv->list); 3891 } 3892 i915_verify_inactive(dev, __FILE__, __LINE__); 3893 3894 return 0; 3895 } 3896 3897 void 3898 i915_gem_object_unpin(struct drm_gem_object *obj) 3899 { 3900 struct drm_device *dev = obj->dev; 3901 drm_i915_private_t *dev_priv = dev->dev_private; 3902 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3903 3904 i915_verify_inactive(dev, __FILE__, __LINE__); 3905 obj_priv->pin_count--; 3906 BUG_ON(obj_priv->pin_count < 0); 3907 BUG_ON(obj_priv->gtt_space == NULL); 3908 3909 /* If the object is no longer pinned, and is 3910 * neither active nor being flushed, then stick it on 3911 * the inactive list 3912 */ 3913 if (obj_priv->pin_count == 0) { 3914 if (!obj_priv->active && 3915 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 3916 list_move_tail(&obj_priv->list, 3917 &dev_priv->mm.inactive_list); 3918 atomic_dec(&dev->pin_count); 3919 atomic_sub(obj->size, &dev->pin_memory); 3920 } 3921 i915_verify_inactive(dev, __FILE__, __LINE__); 3922 } 3923 3924 int 3925 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3926 struct drm_file *file_priv) 3927 { 3928 struct drm_i915_gem_pin *args = data; 3929 struct drm_gem_object *obj; 3930 struct drm_i915_gem_object *obj_priv; 3931 int ret; 3932 3933 mutex_lock(&dev->struct_mutex); 3934 3935 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3936 if (obj == NULL) { 3937 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 3938 args->handle); 3939 mutex_unlock(&dev->struct_mutex); 3940 return -EBADF; 3941 } 3942 obj_priv = obj->driver_private; 3943 3944 if (obj_priv->madv != I915_MADV_WILLNEED) { 3945 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3946 drm_gem_object_unreference(obj); 3947 mutex_unlock(&dev->struct_mutex); 3948 return -EINVAL; 3949 } 3950 3951 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { 3952 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3953 args->handle); 3954 drm_gem_object_unreference(obj); 3955 mutex_unlock(&dev->struct_mutex); 3956 return -EINVAL; 3957 } 3958 3959 obj_priv->user_pin_count++; 3960 obj_priv->pin_filp = file_priv; 3961 if (obj_priv->user_pin_count == 1) { 3962 ret = i915_gem_object_pin(obj, args->alignment); 3963 if (ret != 0) { 3964 drm_gem_object_unreference(obj); 3965 mutex_unlock(&dev->struct_mutex); 3966 return ret; 3967 } 3968 } 3969 3970 /* XXX - flush the CPU caches for pinned objects 3971 * as the X server doesn't manage domains yet 3972 */ 3973 i915_gem_object_flush_cpu_write_domain(obj); 3974 args->offset = obj_priv->gtt_offset; 3975 drm_gem_object_unreference(obj); 3976 mutex_unlock(&dev->struct_mutex); 3977 3978 return 0; 3979 } 3980 3981 int 3982 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3983 struct drm_file *file_priv) 3984 { 3985 struct drm_i915_gem_pin *args = data; 3986 struct drm_gem_object *obj; 3987 struct drm_i915_gem_object *obj_priv; 3988 3989 mutex_lock(&dev->struct_mutex); 3990 3991 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3992 if (obj == NULL) { 3993 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 3994 args->handle); 3995 mutex_unlock(&dev->struct_mutex); 3996 return -EBADF; 3997 } 3998 3999 obj_priv = obj->driver_private; 4000 if (obj_priv->pin_filp != file_priv) { 4001 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4002 args->handle); 4003 drm_gem_object_unreference(obj); 4004 mutex_unlock(&dev->struct_mutex); 4005 return -EINVAL; 4006 } 4007 obj_priv->user_pin_count--; 4008 if (obj_priv->user_pin_count == 0) { 4009 obj_priv->pin_filp = NULL; 4010 i915_gem_object_unpin(obj); 4011 } 4012 4013 drm_gem_object_unreference(obj); 4014 mutex_unlock(&dev->struct_mutex); 4015 return 0; 4016 } 4017 4018 int 4019 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4020 struct drm_file *file_priv) 4021 { 4022 struct drm_i915_gem_busy *args = data; 4023 struct drm_gem_object *obj; 4024 struct drm_i915_gem_object *obj_priv; 4025 4026 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4027 if (obj == NULL) { 4028 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 4029 args->handle); 4030 return -EBADF; 4031 } 4032 4033 mutex_lock(&dev->struct_mutex); 4034 /* Update the active list for the hardware's current position. 4035 * Otherwise this only updates on a delayed timer or when irqs are 4036 * actually unmasked, and our working set ends up being larger than 4037 * required. 4038 */ 4039 i915_gem_retire_requests(dev); 4040 4041 obj_priv = obj->driver_private; 4042 /* Don't count being on the flushing list against the object being 4043 * done. Otherwise, a buffer left on the flushing list but not getting 4044 * flushed (because nobody's flushing that domain) won't ever return 4045 * unbusy and get reused by libdrm's bo cache. The other expected 4046 * consumer of this interface, OpenGL's occlusion queries, also specs 4047 * that the objects get unbusy "eventually" without any interference. 4048 */ 4049 args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0; 4050 4051 drm_gem_object_unreference(obj); 4052 mutex_unlock(&dev->struct_mutex); 4053 return 0; 4054 } 4055 4056 int 4057 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4058 struct drm_file *file_priv) 4059 { 4060 return i915_gem_ring_throttle(dev, file_priv); 4061 } 4062 4063 int 4064 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4065 struct drm_file *file_priv) 4066 { 4067 struct drm_i915_gem_madvise *args = data; 4068 struct drm_gem_object *obj; 4069 struct drm_i915_gem_object *obj_priv; 4070 4071 switch (args->madv) { 4072 case I915_MADV_DONTNEED: 4073 case I915_MADV_WILLNEED: 4074 break; 4075 default: 4076 return -EINVAL; 4077 } 4078 4079 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4080 if (obj == NULL) { 4081 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n", 4082 args->handle); 4083 return -EBADF; 4084 } 4085 4086 mutex_lock(&dev->struct_mutex); 4087 obj_priv = obj->driver_private; 4088 4089 if (obj_priv->pin_count) { 4090 drm_gem_object_unreference(obj); 4091 mutex_unlock(&dev->struct_mutex); 4092 4093 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n"); 4094 return -EINVAL; 4095 } 4096 4097 if (obj_priv->madv != __I915_MADV_PURGED) 4098 obj_priv->madv = args->madv; 4099 4100 /* if the object is no longer bound, discard its backing storage */ 4101 if (i915_gem_object_is_purgeable(obj_priv) && 4102 obj_priv->gtt_space == NULL) 4103 i915_gem_object_truncate(obj); 4104 4105 args->retained = obj_priv->madv != __I915_MADV_PURGED; 4106 4107 drm_gem_object_unreference(obj); 4108 mutex_unlock(&dev->struct_mutex); 4109 4110 return 0; 4111 } 4112 4113 int i915_gem_init_object(struct drm_gem_object *obj) 4114 { 4115 struct drm_i915_gem_object *obj_priv; 4116 4117 obj_priv = kzalloc(sizeof(*obj_priv), GFP_KERNEL); 4118 if (obj_priv == NULL) 4119 return -ENOMEM; 4120 4121 /* 4122 * We've just allocated pages from the kernel, 4123 * so they've just been written by the CPU with 4124 * zeros. They'll need to be clflushed before we 4125 * use them with the GPU. 4126 */ 4127 obj->write_domain = I915_GEM_DOMAIN_CPU; 4128 obj->read_domains = I915_GEM_DOMAIN_CPU; 4129 4130 obj_priv->agp_type = AGP_USER_MEMORY; 4131 4132 obj->driver_private = obj_priv; 4133 obj_priv->obj = obj; 4134 obj_priv->fence_reg = I915_FENCE_REG_NONE; 4135 INIT_LIST_HEAD(&obj_priv->list); 4136 INIT_LIST_HEAD(&obj_priv->fence_list); 4137 obj_priv->madv = I915_MADV_WILLNEED; 4138 4139 trace_i915_gem_object_create(obj); 4140 4141 return 0; 4142 } 4143 4144 void i915_gem_free_object(struct drm_gem_object *obj) 4145 { 4146 struct drm_device *dev = obj->dev; 4147 struct drm_i915_gem_object *obj_priv = obj->driver_private; 4148 4149 trace_i915_gem_object_destroy(obj); 4150 4151 while (obj_priv->pin_count > 0) 4152 i915_gem_object_unpin(obj); 4153 4154 if (obj_priv->phys_obj) 4155 i915_gem_detach_phys_object(dev, obj); 4156 4157 i915_gem_object_unbind(obj); 4158 4159 if (obj_priv->mmap_offset) 4160 i915_gem_free_mmap_offset(obj); 4161 4162 kfree(obj_priv->page_cpu_valid); 4163 kfree(obj_priv->bit_17); 4164 kfree(obj->driver_private); 4165 } 4166 4167 /** Unbinds all inactive objects. */ 4168 static int 4169 i915_gem_evict_from_inactive_list(struct drm_device *dev) 4170 { 4171 drm_i915_private_t *dev_priv = dev->dev_private; 4172 4173 while (!list_empty(&dev_priv->mm.inactive_list)) { 4174 struct drm_gem_object *obj; 4175 int ret; 4176 4177 obj = list_first_entry(&dev_priv->mm.inactive_list, 4178 struct drm_i915_gem_object, 4179 list)->obj; 4180 4181 ret = i915_gem_object_unbind(obj); 4182 if (ret != 0) { 4183 DRM_ERROR("Error unbinding object: %d\n", ret); 4184 return ret; 4185 } 4186 } 4187 4188 return 0; 4189 } 4190 4191 int 4192 i915_gem_idle(struct drm_device *dev) 4193 { 4194 drm_i915_private_t *dev_priv = dev->dev_private; 4195 uint32_t seqno, cur_seqno, last_seqno; 4196 int stuck, ret; 4197 4198 mutex_lock(&dev->struct_mutex); 4199 4200 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { 4201 mutex_unlock(&dev->struct_mutex); 4202 return 0; 4203 } 4204 4205 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4206 * We need to replace this with a semaphore, or something. 4207 */ 4208 dev_priv->mm.suspended = 1; 4209 del_timer(&dev_priv->hangcheck_timer); 4210 4211 /* Cancel the retire work handler, wait for it to finish if running 4212 */ 4213 mutex_unlock(&dev->struct_mutex); 4214 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4215 mutex_lock(&dev->struct_mutex); 4216 4217 i915_kernel_lost_context(dev); 4218 4219 /* Flush the GPU along with all non-CPU write domains 4220 */ 4221 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 4222 seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); 4223 4224 if (seqno == 0) { 4225 mutex_unlock(&dev->struct_mutex); 4226 return -ENOMEM; 4227 } 4228 4229 dev_priv->mm.waiting_gem_seqno = seqno; 4230 last_seqno = 0; 4231 stuck = 0; 4232 for (;;) { 4233 cur_seqno = i915_get_gem_seqno(dev); 4234 if (i915_seqno_passed(cur_seqno, seqno)) 4235 break; 4236 if (last_seqno == cur_seqno) { 4237 if (stuck++ > 100) { 4238 DRM_ERROR("hardware wedged\n"); 4239 atomic_set(&dev_priv->mm.wedged, 1); 4240 DRM_WAKEUP(&dev_priv->irq_queue); 4241 break; 4242 } 4243 } 4244 msleep(10); 4245 last_seqno = cur_seqno; 4246 } 4247 dev_priv->mm.waiting_gem_seqno = 0; 4248 4249 i915_gem_retire_requests(dev); 4250 4251 spin_lock(&dev_priv->mm.active_list_lock); 4252 if (!atomic_read(&dev_priv->mm.wedged)) { 4253 /* Active and flushing should now be empty as we've 4254 * waited for a sequence higher than any pending execbuffer 4255 */ 4256 WARN_ON(!list_empty(&dev_priv->mm.active_list)); 4257 WARN_ON(!list_empty(&dev_priv->mm.flushing_list)); 4258 /* Request should now be empty as we've also waited 4259 * for the last request in the list 4260 */ 4261 WARN_ON(!list_empty(&dev_priv->mm.request_list)); 4262 } 4263 4264 /* Empty the active and flushing lists to inactive. If there's 4265 * anything left at this point, it means that we're wedged and 4266 * nothing good's going to happen by leaving them there. So strip 4267 * the GPU domains and just stuff them onto inactive. 4268 */ 4269 while (!list_empty(&dev_priv->mm.active_list)) { 4270 struct drm_gem_object *obj; 4271 uint32_t old_write_domain; 4272 4273 obj = list_first_entry(&dev_priv->mm.active_list, 4274 struct drm_i915_gem_object, 4275 list)->obj; 4276 old_write_domain = obj->write_domain; 4277 obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 4278 i915_gem_object_move_to_inactive(obj); 4279 4280 trace_i915_gem_object_change_domain(obj, 4281 obj->read_domains, 4282 old_write_domain); 4283 } 4284 spin_unlock(&dev_priv->mm.active_list_lock); 4285 4286 while (!list_empty(&dev_priv->mm.flushing_list)) { 4287 struct drm_gem_object *obj; 4288 uint32_t old_write_domain; 4289 4290 obj = list_first_entry(&dev_priv->mm.flushing_list, 4291 struct drm_i915_gem_object, 4292 list)->obj; 4293 old_write_domain = obj->write_domain; 4294 obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 4295 i915_gem_object_move_to_inactive(obj); 4296 4297 trace_i915_gem_object_change_domain(obj, 4298 obj->read_domains, 4299 old_write_domain); 4300 } 4301 4302 4303 /* Move all inactive buffers out of the GTT. */ 4304 ret = i915_gem_evict_from_inactive_list(dev); 4305 WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); 4306 if (ret) { 4307 mutex_unlock(&dev->struct_mutex); 4308 return ret; 4309 } 4310 4311 i915_gem_cleanup_ringbuffer(dev); 4312 mutex_unlock(&dev->struct_mutex); 4313 4314 return 0; 4315 } 4316 4317 static int 4318 i915_gem_init_hws(struct drm_device *dev) 4319 { 4320 drm_i915_private_t *dev_priv = dev->dev_private; 4321 struct drm_gem_object *obj; 4322 struct drm_i915_gem_object *obj_priv; 4323 int ret; 4324 4325 /* If we need a physical address for the status page, it's already 4326 * initialized at driver load time. 4327 */ 4328 if (!I915_NEED_GFX_HWS(dev)) 4329 return 0; 4330 4331 obj = drm_gem_object_alloc(dev, 4096); 4332 if (obj == NULL) { 4333 DRM_ERROR("Failed to allocate status page\n"); 4334 return -ENOMEM; 4335 } 4336 obj_priv = obj->driver_private; 4337 obj_priv->agp_type = AGP_USER_CACHED_MEMORY; 4338 4339 ret = i915_gem_object_pin(obj, 4096); 4340 if (ret != 0) { 4341 drm_gem_object_unreference(obj); 4342 return ret; 4343 } 4344 4345 dev_priv->status_gfx_addr = obj_priv->gtt_offset; 4346 4347 dev_priv->hw_status_page = kmap(obj_priv->pages[0]); 4348 if (dev_priv->hw_status_page == NULL) { 4349 DRM_ERROR("Failed to map status page.\n"); 4350 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 4351 i915_gem_object_unpin(obj); 4352 drm_gem_object_unreference(obj); 4353 return -EINVAL; 4354 } 4355 dev_priv->hws_obj = obj; 4356 memset(dev_priv->hw_status_page, 0, PAGE_SIZE); 4357 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); 4358 I915_READ(HWS_PGA); /* posting read */ 4359 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); 4360 4361 return 0; 4362 } 4363 4364 static void 4365 i915_gem_cleanup_hws(struct drm_device *dev) 4366 { 4367 drm_i915_private_t *dev_priv = dev->dev_private; 4368 struct drm_gem_object *obj; 4369 struct drm_i915_gem_object *obj_priv; 4370 4371 if (dev_priv->hws_obj == NULL) 4372 return; 4373 4374 obj = dev_priv->hws_obj; 4375 obj_priv = obj->driver_private; 4376 4377 kunmap(obj_priv->pages[0]); 4378 i915_gem_object_unpin(obj); 4379 drm_gem_object_unreference(obj); 4380 dev_priv->hws_obj = NULL; 4381 4382 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 4383 dev_priv->hw_status_page = NULL; 4384 4385 /* Write high address into HWS_PGA when disabling. */ 4386 I915_WRITE(HWS_PGA, 0x1ffff000); 4387 } 4388 4389 int 4390 i915_gem_init_ringbuffer(struct drm_device *dev) 4391 { 4392 drm_i915_private_t *dev_priv = dev->dev_private; 4393 struct drm_gem_object *obj; 4394 struct drm_i915_gem_object *obj_priv; 4395 drm_i915_ring_buffer_t *ring = &dev_priv->ring; 4396 int ret; 4397 u32 head; 4398 4399 ret = i915_gem_init_hws(dev); 4400 if (ret != 0) 4401 return ret; 4402 4403 obj = drm_gem_object_alloc(dev, 128 * 1024); 4404 if (obj == NULL) { 4405 DRM_ERROR("Failed to allocate ringbuffer\n"); 4406 i915_gem_cleanup_hws(dev); 4407 return -ENOMEM; 4408 } 4409 obj_priv = obj->driver_private; 4410 4411 ret = i915_gem_object_pin(obj, 4096); 4412 if (ret != 0) { 4413 drm_gem_object_unreference(obj); 4414 i915_gem_cleanup_hws(dev); 4415 return ret; 4416 } 4417 4418 /* Set up the kernel mapping for the ring. */ 4419 ring->Size = obj->size; 4420 4421 ring->map.offset = dev->agp->base + obj_priv->gtt_offset; 4422 ring->map.size = obj->size; 4423 ring->map.type = 0; 4424 ring->map.flags = 0; 4425 ring->map.mtrr = 0; 4426 4427 drm_core_ioremap_wc(&ring->map, dev); 4428 if (ring->map.handle == NULL) { 4429 DRM_ERROR("Failed to map ringbuffer.\n"); 4430 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 4431 i915_gem_object_unpin(obj); 4432 drm_gem_object_unreference(obj); 4433 i915_gem_cleanup_hws(dev); 4434 return -EINVAL; 4435 } 4436 ring->ring_obj = obj; 4437 ring->virtual_start = ring->map.handle; 4438 4439 /* Stop the ring if it's running. */ 4440 I915_WRITE(PRB0_CTL, 0); 4441 I915_WRITE(PRB0_TAIL, 0); 4442 I915_WRITE(PRB0_HEAD, 0); 4443 4444 /* Initialize the ring. */ 4445 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 4446 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4447 4448 /* G45 ring initialization fails to reset head to zero */ 4449 if (head != 0) { 4450 DRM_ERROR("Ring head not reset to zero " 4451 "ctl %08x head %08x tail %08x start %08x\n", 4452 I915_READ(PRB0_CTL), 4453 I915_READ(PRB0_HEAD), 4454 I915_READ(PRB0_TAIL), 4455 I915_READ(PRB0_START)); 4456 I915_WRITE(PRB0_HEAD, 0); 4457 4458 DRM_ERROR("Ring head forced to zero " 4459 "ctl %08x head %08x tail %08x start %08x\n", 4460 I915_READ(PRB0_CTL), 4461 I915_READ(PRB0_HEAD), 4462 I915_READ(PRB0_TAIL), 4463 I915_READ(PRB0_START)); 4464 } 4465 4466 I915_WRITE(PRB0_CTL, 4467 ((obj->size - 4096) & RING_NR_PAGES) | 4468 RING_NO_REPORT | 4469 RING_VALID); 4470 4471 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4472 4473 /* If the head is still not zero, the ring is dead */ 4474 if (head != 0) { 4475 DRM_ERROR("Ring initialization failed " 4476 "ctl %08x head %08x tail %08x start %08x\n", 4477 I915_READ(PRB0_CTL), 4478 I915_READ(PRB0_HEAD), 4479 I915_READ(PRB0_TAIL), 4480 I915_READ(PRB0_START)); 4481 return -EIO; 4482 } 4483 4484 /* Update our cache of the ring state */ 4485 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4486 i915_kernel_lost_context(dev); 4487 else { 4488 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4489 ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; 4490 ring->space = ring->head - (ring->tail + 8); 4491 if (ring->space < 0) 4492 ring->space += ring->Size; 4493 } 4494 4495 return 0; 4496 } 4497 4498 void 4499 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4500 { 4501 drm_i915_private_t *dev_priv = dev->dev_private; 4502 4503 if (dev_priv->ring.ring_obj == NULL) 4504 return; 4505 4506 drm_core_ioremapfree(&dev_priv->ring.map, dev); 4507 4508 i915_gem_object_unpin(dev_priv->ring.ring_obj); 4509 drm_gem_object_unreference(dev_priv->ring.ring_obj); 4510 dev_priv->ring.ring_obj = NULL; 4511 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 4512 4513 i915_gem_cleanup_hws(dev); 4514 } 4515 4516 int 4517 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4518 struct drm_file *file_priv) 4519 { 4520 drm_i915_private_t *dev_priv = dev->dev_private; 4521 int ret; 4522 4523 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4524 return 0; 4525 4526 if (atomic_read(&dev_priv->mm.wedged)) { 4527 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4528 atomic_set(&dev_priv->mm.wedged, 0); 4529 } 4530 4531 mutex_lock(&dev->struct_mutex); 4532 dev_priv->mm.suspended = 0; 4533 4534 ret = i915_gem_init_ringbuffer(dev); 4535 if (ret != 0) { 4536 mutex_unlock(&dev->struct_mutex); 4537 return ret; 4538 } 4539 4540 spin_lock(&dev_priv->mm.active_list_lock); 4541 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 4542 spin_unlock(&dev_priv->mm.active_list_lock); 4543 4544 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 4545 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 4546 BUG_ON(!list_empty(&dev_priv->mm.request_list)); 4547 mutex_unlock(&dev->struct_mutex); 4548 4549 drm_irq_install(dev); 4550 4551 return 0; 4552 } 4553 4554 int 4555 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4556 struct drm_file *file_priv) 4557 { 4558 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4559 return 0; 4560 4561 drm_irq_uninstall(dev); 4562 return i915_gem_idle(dev); 4563 } 4564 4565 void 4566 i915_gem_lastclose(struct drm_device *dev) 4567 { 4568 int ret; 4569 4570 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4571 return; 4572 4573 ret = i915_gem_idle(dev); 4574 if (ret) 4575 DRM_ERROR("failed to idle hardware: %d\n", ret); 4576 } 4577 4578 void 4579 i915_gem_load(struct drm_device *dev) 4580 { 4581 int i; 4582 drm_i915_private_t *dev_priv = dev->dev_private; 4583 4584 spin_lock_init(&dev_priv->mm.active_list_lock); 4585 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4586 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 4587 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4588 INIT_LIST_HEAD(&dev_priv->mm.request_list); 4589 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4590 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4591 i915_gem_retire_work_handler); 4592 dev_priv->mm.next_gem_seqno = 1; 4593 4594 spin_lock(&shrink_list_lock); 4595 list_add(&dev_priv->mm.shrink_list, &shrink_list); 4596 spin_unlock(&shrink_list_lock); 4597 4598 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4599 dev_priv->fence_reg_start = 3; 4600 4601 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4602 dev_priv->num_fence_regs = 16; 4603 else 4604 dev_priv->num_fence_regs = 8; 4605 4606 /* Initialize fence registers to zero */ 4607 if (IS_I965G(dev)) { 4608 for (i = 0; i < 16; i++) 4609 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0); 4610 } else { 4611 for (i = 0; i < 8; i++) 4612 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0); 4613 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4614 for (i = 0; i < 8; i++) 4615 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0); 4616 } 4617 4618 i915_gem_detect_bit_6_swizzle(dev); 4619 } 4620 4621 /* 4622 * Create a physically contiguous memory object for this object 4623 * e.g. for cursor + overlay regs 4624 */ 4625 int i915_gem_init_phys_object(struct drm_device *dev, 4626 int id, int size) 4627 { 4628 drm_i915_private_t *dev_priv = dev->dev_private; 4629 struct drm_i915_gem_phys_object *phys_obj; 4630 int ret; 4631 4632 if (dev_priv->mm.phys_objs[id - 1] || !size) 4633 return 0; 4634 4635 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4636 if (!phys_obj) 4637 return -ENOMEM; 4638 4639 phys_obj->id = id; 4640 4641 phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff); 4642 if (!phys_obj->handle) { 4643 ret = -ENOMEM; 4644 goto kfree_obj; 4645 } 4646 #ifdef CONFIG_X86 4647 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4648 #endif 4649 4650 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4651 4652 return 0; 4653 kfree_obj: 4654 kfree(phys_obj); 4655 return ret; 4656 } 4657 4658 void i915_gem_free_phys_object(struct drm_device *dev, int id) 4659 { 4660 drm_i915_private_t *dev_priv = dev->dev_private; 4661 struct drm_i915_gem_phys_object *phys_obj; 4662 4663 if (!dev_priv->mm.phys_objs[id - 1]) 4664 return; 4665 4666 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4667 if (phys_obj->cur_obj) { 4668 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4669 } 4670 4671 #ifdef CONFIG_X86 4672 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4673 #endif 4674 drm_pci_free(dev, phys_obj->handle); 4675 kfree(phys_obj); 4676 dev_priv->mm.phys_objs[id - 1] = NULL; 4677 } 4678 4679 void i915_gem_free_all_phys_object(struct drm_device *dev) 4680 { 4681 int i; 4682 4683 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4684 i915_gem_free_phys_object(dev, i); 4685 } 4686 4687 void i915_gem_detach_phys_object(struct drm_device *dev, 4688 struct drm_gem_object *obj) 4689 { 4690 struct drm_i915_gem_object *obj_priv; 4691 int i; 4692 int ret; 4693 int page_count; 4694 4695 obj_priv = obj->driver_private; 4696 if (!obj_priv->phys_obj) 4697 return; 4698 4699 ret = i915_gem_object_get_pages(obj); 4700 if (ret) 4701 goto out; 4702 4703 page_count = obj->size / PAGE_SIZE; 4704 4705 for (i = 0; i < page_count; i++) { 4706 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0); 4707 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4708 4709 memcpy(dst, src, PAGE_SIZE); 4710 kunmap_atomic(dst, KM_USER0); 4711 } 4712 drm_clflush_pages(obj_priv->pages, page_count); 4713 drm_agp_chipset_flush(dev); 4714 4715 i915_gem_object_put_pages(obj); 4716 out: 4717 obj_priv->phys_obj->cur_obj = NULL; 4718 obj_priv->phys_obj = NULL; 4719 } 4720 4721 int 4722 i915_gem_attach_phys_object(struct drm_device *dev, 4723 struct drm_gem_object *obj, int id) 4724 { 4725 drm_i915_private_t *dev_priv = dev->dev_private; 4726 struct drm_i915_gem_object *obj_priv; 4727 int ret = 0; 4728 int page_count; 4729 int i; 4730 4731 if (id > I915_MAX_PHYS_OBJECT) 4732 return -EINVAL; 4733 4734 obj_priv = obj->driver_private; 4735 4736 if (obj_priv->phys_obj) { 4737 if (obj_priv->phys_obj->id == id) 4738 return 0; 4739 i915_gem_detach_phys_object(dev, obj); 4740 } 4741 4742 4743 /* create a new object */ 4744 if (!dev_priv->mm.phys_objs[id - 1]) { 4745 ret = i915_gem_init_phys_object(dev, id, 4746 obj->size); 4747 if (ret) { 4748 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size); 4749 goto out; 4750 } 4751 } 4752 4753 /* bind to the object */ 4754 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4755 obj_priv->phys_obj->cur_obj = obj; 4756 4757 ret = i915_gem_object_get_pages(obj); 4758 if (ret) { 4759 DRM_ERROR("failed to get page list\n"); 4760 goto out; 4761 } 4762 4763 page_count = obj->size / PAGE_SIZE; 4764 4765 for (i = 0; i < page_count; i++) { 4766 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0); 4767 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4768 4769 memcpy(dst, src, PAGE_SIZE); 4770 kunmap_atomic(src, KM_USER0); 4771 } 4772 4773 i915_gem_object_put_pages(obj); 4774 4775 return 0; 4776 out: 4777 return ret; 4778 } 4779 4780 static int 4781 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 4782 struct drm_i915_gem_pwrite *args, 4783 struct drm_file *file_priv) 4784 { 4785 struct drm_i915_gem_object *obj_priv = obj->driver_private; 4786 void *obj_addr; 4787 int ret; 4788 char __user *user_data; 4789 4790 user_data = (char __user *) (uintptr_t) args->data_ptr; 4791 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset; 4792 4793 DRM_DEBUG("obj_addr %p, %lld\n", obj_addr, args->size); 4794 ret = copy_from_user(obj_addr, user_data, args->size); 4795 if (ret) 4796 return -EFAULT; 4797 4798 drm_agp_chipset_flush(dev); 4799 return 0; 4800 } 4801 4802 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) 4803 { 4804 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 4805 4806 /* Clean up our request list when the client is going away, so that 4807 * later retire_requests won't dereference our soon-to-be-gone 4808 * file_priv. 4809 */ 4810 mutex_lock(&dev->struct_mutex); 4811 while (!list_empty(&i915_file_priv->mm.request_list)) 4812 list_del_init(i915_file_priv->mm.request_list.next); 4813 mutex_unlock(&dev->struct_mutex); 4814 } 4815 4816 static int 4817 i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) 4818 { 4819 drm_i915_private_t *dev_priv, *next_dev; 4820 struct drm_i915_gem_object *obj_priv, *next_obj; 4821 int cnt = 0; 4822 int would_deadlock = 1; 4823 4824 /* "fast-path" to count number of available objects */ 4825 if (nr_to_scan == 0) { 4826 spin_lock(&shrink_list_lock); 4827 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) { 4828 struct drm_device *dev = dev_priv->dev; 4829 4830 if (mutex_trylock(&dev->struct_mutex)) { 4831 list_for_each_entry(obj_priv, 4832 &dev_priv->mm.inactive_list, 4833 list) 4834 cnt++; 4835 mutex_unlock(&dev->struct_mutex); 4836 } 4837 } 4838 spin_unlock(&shrink_list_lock); 4839 4840 return (cnt / 100) * sysctl_vfs_cache_pressure; 4841 } 4842 4843 spin_lock(&shrink_list_lock); 4844 4845 /* first scan for clean buffers */ 4846 list_for_each_entry_safe(dev_priv, next_dev, 4847 &shrink_list, mm.shrink_list) { 4848 struct drm_device *dev = dev_priv->dev; 4849 4850 if (! mutex_trylock(&dev->struct_mutex)) 4851 continue; 4852 4853 spin_unlock(&shrink_list_lock); 4854 4855 i915_gem_retire_requests(dev); 4856 4857 list_for_each_entry_safe(obj_priv, next_obj, 4858 &dev_priv->mm.inactive_list, 4859 list) { 4860 if (i915_gem_object_is_purgeable(obj_priv)) { 4861 i915_gem_object_unbind(obj_priv->obj); 4862 if (--nr_to_scan <= 0) 4863 break; 4864 } 4865 } 4866 4867 spin_lock(&shrink_list_lock); 4868 mutex_unlock(&dev->struct_mutex); 4869 4870 would_deadlock = 0; 4871 4872 if (nr_to_scan <= 0) 4873 break; 4874 } 4875 4876 /* second pass, evict/count anything still on the inactive list */ 4877 list_for_each_entry_safe(dev_priv, next_dev, 4878 &shrink_list, mm.shrink_list) { 4879 struct drm_device *dev = dev_priv->dev; 4880 4881 if (! mutex_trylock(&dev->struct_mutex)) 4882 continue; 4883 4884 spin_unlock(&shrink_list_lock); 4885 4886 list_for_each_entry_safe(obj_priv, next_obj, 4887 &dev_priv->mm.inactive_list, 4888 list) { 4889 if (nr_to_scan > 0) { 4890 i915_gem_object_unbind(obj_priv->obj); 4891 nr_to_scan--; 4892 } else 4893 cnt++; 4894 } 4895 4896 spin_lock(&shrink_list_lock); 4897 mutex_unlock(&dev->struct_mutex); 4898 4899 would_deadlock = 0; 4900 } 4901 4902 spin_unlock(&shrink_list_lock); 4903 4904 if (would_deadlock) 4905 return -1; 4906 else if (cnt > 0) 4907 return (cnt / 100) * sysctl_vfs_cache_pressure; 4908 else 4909 return 0; 4910 } 4911 4912 static struct shrinker shrinker = { 4913 .shrink = i915_gem_shrink, 4914 .seeks = DEFAULT_SEEKS, 4915 }; 4916 4917 __init void 4918 i915_gem_shrinker_init(void) 4919 { 4920 register_shrinker(&shrinker); 4921 } 4922 4923 __exit void 4924 i915_gem_shrinker_exit(void) 4925 { 4926 unregister_shrinker(&shrinker); 4927 } 4928