1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include <linux/swap.h> 33 #include <linux/pci.h> 34 35 #define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 36 37 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 38 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 39 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 40 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 41 int write); 42 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 43 uint64_t offset, 44 uint64_t size); 45 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 46 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 47 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, 48 unsigned alignment); 49 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); 50 static int i915_gem_evict_something(struct drm_device *dev); 51 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 52 struct drm_i915_gem_pwrite *args, 53 struct drm_file *file_priv); 54 55 int i915_gem_do_init(struct drm_device *dev, unsigned long start, 56 unsigned long end) 57 { 58 drm_i915_private_t *dev_priv = dev->dev_private; 59 60 if (start >= end || 61 (start & (PAGE_SIZE - 1)) != 0 || 62 (end & (PAGE_SIZE - 1)) != 0) { 63 return -EINVAL; 64 } 65 66 drm_mm_init(&dev_priv->mm.gtt_space, start, 67 end - start); 68 69 dev->gtt_total = (uint32_t) (end - start); 70 71 return 0; 72 } 73 74 int 75 i915_gem_init_ioctl(struct drm_device *dev, void *data, 76 struct drm_file *file_priv) 77 { 78 struct drm_i915_gem_init *args = data; 79 int ret; 80 81 mutex_lock(&dev->struct_mutex); 82 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end); 83 mutex_unlock(&dev->struct_mutex); 84 85 return ret; 86 } 87 88 int 89 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 90 struct drm_file *file_priv) 91 { 92 struct drm_i915_gem_get_aperture *args = data; 93 94 if (!(dev->driver->driver_features & DRIVER_GEM)) 95 return -ENODEV; 96 97 args->aper_size = dev->gtt_total; 98 args->aper_available_size = (args->aper_size - 99 atomic_read(&dev->pin_memory)); 100 101 return 0; 102 } 103 104 105 /** 106 * Creates a new mm object and returns a handle to it. 107 */ 108 int 109 i915_gem_create_ioctl(struct drm_device *dev, void *data, 110 struct drm_file *file_priv) 111 { 112 struct drm_i915_gem_create *args = data; 113 struct drm_gem_object *obj; 114 int handle, ret; 115 116 args->size = roundup(args->size, PAGE_SIZE); 117 118 /* Allocate the new object */ 119 obj = drm_gem_object_alloc(dev, args->size); 120 if (obj == NULL) 121 return -ENOMEM; 122 123 ret = drm_gem_handle_create(file_priv, obj, &handle); 124 mutex_lock(&dev->struct_mutex); 125 drm_gem_object_handle_unreference(obj); 126 mutex_unlock(&dev->struct_mutex); 127 128 if (ret) 129 return ret; 130 131 args->handle = handle; 132 133 return 0; 134 } 135 136 static inline int 137 fast_shmem_read(struct page **pages, 138 loff_t page_base, int page_offset, 139 char __user *data, 140 int length) 141 { 142 char __iomem *vaddr; 143 int unwritten; 144 145 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 146 if (vaddr == NULL) 147 return -ENOMEM; 148 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length); 149 kunmap_atomic(vaddr, KM_USER0); 150 151 if (unwritten) 152 return -EFAULT; 153 154 return 0; 155 } 156 157 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) 158 { 159 drm_i915_private_t *dev_priv = obj->dev->dev_private; 160 struct drm_i915_gem_object *obj_priv = obj->driver_private; 161 162 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 163 obj_priv->tiling_mode != I915_TILING_NONE; 164 } 165 166 static inline int 167 slow_shmem_copy(struct page *dst_page, 168 int dst_offset, 169 struct page *src_page, 170 int src_offset, 171 int length) 172 { 173 char *dst_vaddr, *src_vaddr; 174 175 dst_vaddr = kmap_atomic(dst_page, KM_USER0); 176 if (dst_vaddr == NULL) 177 return -ENOMEM; 178 179 src_vaddr = kmap_atomic(src_page, KM_USER1); 180 if (src_vaddr == NULL) { 181 kunmap_atomic(dst_vaddr, KM_USER0); 182 return -ENOMEM; 183 } 184 185 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 186 187 kunmap_atomic(src_vaddr, KM_USER1); 188 kunmap_atomic(dst_vaddr, KM_USER0); 189 190 return 0; 191 } 192 193 static inline int 194 slow_shmem_bit17_copy(struct page *gpu_page, 195 int gpu_offset, 196 struct page *cpu_page, 197 int cpu_offset, 198 int length, 199 int is_read) 200 { 201 char *gpu_vaddr, *cpu_vaddr; 202 203 /* Use the unswizzled path if this page isn't affected. */ 204 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 205 if (is_read) 206 return slow_shmem_copy(cpu_page, cpu_offset, 207 gpu_page, gpu_offset, length); 208 else 209 return slow_shmem_copy(gpu_page, gpu_offset, 210 cpu_page, cpu_offset, length); 211 } 212 213 gpu_vaddr = kmap_atomic(gpu_page, KM_USER0); 214 if (gpu_vaddr == NULL) 215 return -ENOMEM; 216 217 cpu_vaddr = kmap_atomic(cpu_page, KM_USER1); 218 if (cpu_vaddr == NULL) { 219 kunmap_atomic(gpu_vaddr, KM_USER0); 220 return -ENOMEM; 221 } 222 223 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 224 * XORing with the other bits (A9 for Y, A9 and A10 for X) 225 */ 226 while (length > 0) { 227 int cacheline_end = ALIGN(gpu_offset + 1, 64); 228 int this_length = min(cacheline_end - gpu_offset, length); 229 int swizzled_gpu_offset = gpu_offset ^ 64; 230 231 if (is_read) { 232 memcpy(cpu_vaddr + cpu_offset, 233 gpu_vaddr + swizzled_gpu_offset, 234 this_length); 235 } else { 236 memcpy(gpu_vaddr + swizzled_gpu_offset, 237 cpu_vaddr + cpu_offset, 238 this_length); 239 } 240 cpu_offset += this_length; 241 gpu_offset += this_length; 242 length -= this_length; 243 } 244 245 kunmap_atomic(cpu_vaddr, KM_USER1); 246 kunmap_atomic(gpu_vaddr, KM_USER0); 247 248 return 0; 249 } 250 251 /** 252 * This is the fast shmem pread path, which attempts to copy_from_user directly 253 * from the backing pages of the object to the user's address space. On a 254 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 255 */ 256 static int 257 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj, 258 struct drm_i915_gem_pread *args, 259 struct drm_file *file_priv) 260 { 261 struct drm_i915_gem_object *obj_priv = obj->driver_private; 262 ssize_t remain; 263 loff_t offset, page_base; 264 char __user *user_data; 265 int page_offset, page_length; 266 int ret; 267 268 user_data = (char __user *) (uintptr_t) args->data_ptr; 269 remain = args->size; 270 271 mutex_lock(&dev->struct_mutex); 272 273 ret = i915_gem_object_get_pages(obj); 274 if (ret != 0) 275 goto fail_unlock; 276 277 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 278 args->size); 279 if (ret != 0) 280 goto fail_put_pages; 281 282 obj_priv = obj->driver_private; 283 offset = args->offset; 284 285 while (remain > 0) { 286 /* Operation in this page 287 * 288 * page_base = page offset within aperture 289 * page_offset = offset within page 290 * page_length = bytes to copy for this page 291 */ 292 page_base = (offset & ~(PAGE_SIZE-1)); 293 page_offset = offset & (PAGE_SIZE-1); 294 page_length = remain; 295 if ((page_offset + remain) > PAGE_SIZE) 296 page_length = PAGE_SIZE - page_offset; 297 298 ret = fast_shmem_read(obj_priv->pages, 299 page_base, page_offset, 300 user_data, page_length); 301 if (ret) 302 goto fail_put_pages; 303 304 remain -= page_length; 305 user_data += page_length; 306 offset += page_length; 307 } 308 309 fail_put_pages: 310 i915_gem_object_put_pages(obj); 311 fail_unlock: 312 mutex_unlock(&dev->struct_mutex); 313 314 return ret; 315 } 316 317 /** 318 * This is the fallback shmem pread path, which allocates temporary storage 319 * in kernel space to copy_to_user into outside of the struct_mutex, so we 320 * can copy out of the object's backing pages while holding the struct mutex 321 * and not take page faults. 322 */ 323 static int 324 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, 325 struct drm_i915_gem_pread *args, 326 struct drm_file *file_priv) 327 { 328 struct drm_i915_gem_object *obj_priv = obj->driver_private; 329 struct mm_struct *mm = current->mm; 330 struct page **user_pages; 331 ssize_t remain; 332 loff_t offset, pinned_pages, i; 333 loff_t first_data_page, last_data_page, num_pages; 334 int shmem_page_index, shmem_page_offset; 335 int data_page_index, data_page_offset; 336 int page_length; 337 int ret; 338 uint64_t data_ptr = args->data_ptr; 339 int do_bit17_swizzling; 340 341 remain = args->size; 342 343 /* Pin the user pages containing the data. We can't fault while 344 * holding the struct mutex, yet we want to hold it while 345 * dereferencing the user data. 346 */ 347 first_data_page = data_ptr / PAGE_SIZE; 348 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 349 num_pages = last_data_page - first_data_page + 1; 350 351 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 352 if (user_pages == NULL) 353 return -ENOMEM; 354 355 down_read(&mm->mmap_sem); 356 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 357 num_pages, 1, 0, user_pages, NULL); 358 up_read(&mm->mmap_sem); 359 if (pinned_pages < num_pages) { 360 ret = -EFAULT; 361 goto fail_put_user_pages; 362 } 363 364 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 365 366 mutex_lock(&dev->struct_mutex); 367 368 ret = i915_gem_object_get_pages(obj); 369 if (ret != 0) 370 goto fail_unlock; 371 372 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 373 args->size); 374 if (ret != 0) 375 goto fail_put_pages; 376 377 obj_priv = obj->driver_private; 378 offset = args->offset; 379 380 while (remain > 0) { 381 /* Operation in this page 382 * 383 * shmem_page_index = page number within shmem file 384 * shmem_page_offset = offset within page in shmem file 385 * data_page_index = page number in get_user_pages return 386 * data_page_offset = offset with data_page_index page. 387 * page_length = bytes to copy for this page 388 */ 389 shmem_page_index = offset / PAGE_SIZE; 390 shmem_page_offset = offset & ~PAGE_MASK; 391 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 392 data_page_offset = data_ptr & ~PAGE_MASK; 393 394 page_length = remain; 395 if ((shmem_page_offset + page_length) > PAGE_SIZE) 396 page_length = PAGE_SIZE - shmem_page_offset; 397 if ((data_page_offset + page_length) > PAGE_SIZE) 398 page_length = PAGE_SIZE - data_page_offset; 399 400 if (do_bit17_swizzling) { 401 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 402 shmem_page_offset, 403 user_pages[data_page_index], 404 data_page_offset, 405 page_length, 406 1); 407 } else { 408 ret = slow_shmem_copy(user_pages[data_page_index], 409 data_page_offset, 410 obj_priv->pages[shmem_page_index], 411 shmem_page_offset, 412 page_length); 413 } 414 if (ret) 415 goto fail_put_pages; 416 417 remain -= page_length; 418 data_ptr += page_length; 419 offset += page_length; 420 } 421 422 fail_put_pages: 423 i915_gem_object_put_pages(obj); 424 fail_unlock: 425 mutex_unlock(&dev->struct_mutex); 426 fail_put_user_pages: 427 for (i = 0; i < pinned_pages; i++) { 428 SetPageDirty(user_pages[i]); 429 page_cache_release(user_pages[i]); 430 } 431 drm_free_large(user_pages); 432 433 return ret; 434 } 435 436 /** 437 * Reads data from the object referenced by handle. 438 * 439 * On error, the contents of *data are undefined. 440 */ 441 int 442 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 443 struct drm_file *file_priv) 444 { 445 struct drm_i915_gem_pread *args = data; 446 struct drm_gem_object *obj; 447 struct drm_i915_gem_object *obj_priv; 448 int ret; 449 450 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 451 if (obj == NULL) 452 return -EBADF; 453 obj_priv = obj->driver_private; 454 455 /* Bounds check source. 456 * 457 * XXX: This could use review for overflow issues... 458 */ 459 if (args->offset > obj->size || args->size > obj->size || 460 args->offset + args->size > obj->size) { 461 drm_gem_object_unreference(obj); 462 return -EINVAL; 463 } 464 465 if (i915_gem_object_needs_bit17_swizzle(obj)) { 466 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); 467 } else { 468 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); 469 if (ret != 0) 470 ret = i915_gem_shmem_pread_slow(dev, obj, args, 471 file_priv); 472 } 473 474 drm_gem_object_unreference(obj); 475 476 return ret; 477 } 478 479 /* This is the fast write path which cannot handle 480 * page faults in the source data 481 */ 482 483 static inline int 484 fast_user_write(struct io_mapping *mapping, 485 loff_t page_base, int page_offset, 486 char __user *user_data, 487 int length) 488 { 489 char *vaddr_atomic; 490 unsigned long unwritten; 491 492 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 493 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 494 user_data, length); 495 io_mapping_unmap_atomic(vaddr_atomic); 496 if (unwritten) 497 return -EFAULT; 498 return 0; 499 } 500 501 /* Here's the write path which can sleep for 502 * page faults 503 */ 504 505 static inline int 506 slow_kernel_write(struct io_mapping *mapping, 507 loff_t gtt_base, int gtt_offset, 508 struct page *user_page, int user_offset, 509 int length) 510 { 511 char *src_vaddr, *dst_vaddr; 512 unsigned long unwritten; 513 514 dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base); 515 src_vaddr = kmap_atomic(user_page, KM_USER1); 516 unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset, 517 src_vaddr + user_offset, 518 length); 519 kunmap_atomic(src_vaddr, KM_USER1); 520 io_mapping_unmap_atomic(dst_vaddr); 521 if (unwritten) 522 return -EFAULT; 523 return 0; 524 } 525 526 static inline int 527 fast_shmem_write(struct page **pages, 528 loff_t page_base, int page_offset, 529 char __user *data, 530 int length) 531 { 532 char __iomem *vaddr; 533 unsigned long unwritten; 534 535 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 536 if (vaddr == NULL) 537 return -ENOMEM; 538 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length); 539 kunmap_atomic(vaddr, KM_USER0); 540 541 if (unwritten) 542 return -EFAULT; 543 return 0; 544 } 545 546 /** 547 * This is the fast pwrite path, where we copy the data directly from the 548 * user into the GTT, uncached. 549 */ 550 static int 551 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 552 struct drm_i915_gem_pwrite *args, 553 struct drm_file *file_priv) 554 { 555 struct drm_i915_gem_object *obj_priv = obj->driver_private; 556 drm_i915_private_t *dev_priv = dev->dev_private; 557 ssize_t remain; 558 loff_t offset, page_base; 559 char __user *user_data; 560 int page_offset, page_length; 561 int ret; 562 563 user_data = (char __user *) (uintptr_t) args->data_ptr; 564 remain = args->size; 565 if (!access_ok(VERIFY_READ, user_data, remain)) 566 return -EFAULT; 567 568 569 mutex_lock(&dev->struct_mutex); 570 ret = i915_gem_object_pin(obj, 0); 571 if (ret) { 572 mutex_unlock(&dev->struct_mutex); 573 return ret; 574 } 575 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 576 if (ret) 577 goto fail; 578 579 obj_priv = obj->driver_private; 580 offset = obj_priv->gtt_offset + args->offset; 581 582 while (remain > 0) { 583 /* Operation in this page 584 * 585 * page_base = page offset within aperture 586 * page_offset = offset within page 587 * page_length = bytes to copy for this page 588 */ 589 page_base = (offset & ~(PAGE_SIZE-1)); 590 page_offset = offset & (PAGE_SIZE-1); 591 page_length = remain; 592 if ((page_offset + remain) > PAGE_SIZE) 593 page_length = PAGE_SIZE - page_offset; 594 595 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, 596 page_offset, user_data, page_length); 597 598 /* If we get a fault while copying data, then (presumably) our 599 * source page isn't available. Return the error and we'll 600 * retry in the slow path. 601 */ 602 if (ret) 603 goto fail; 604 605 remain -= page_length; 606 user_data += page_length; 607 offset += page_length; 608 } 609 610 fail: 611 i915_gem_object_unpin(obj); 612 mutex_unlock(&dev->struct_mutex); 613 614 return ret; 615 } 616 617 /** 618 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 619 * the memory and maps it using kmap_atomic for copying. 620 * 621 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 622 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 623 */ 624 static int 625 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 626 struct drm_i915_gem_pwrite *args, 627 struct drm_file *file_priv) 628 { 629 struct drm_i915_gem_object *obj_priv = obj->driver_private; 630 drm_i915_private_t *dev_priv = dev->dev_private; 631 ssize_t remain; 632 loff_t gtt_page_base, offset; 633 loff_t first_data_page, last_data_page, num_pages; 634 loff_t pinned_pages, i; 635 struct page **user_pages; 636 struct mm_struct *mm = current->mm; 637 int gtt_page_offset, data_page_offset, data_page_index, page_length; 638 int ret; 639 uint64_t data_ptr = args->data_ptr; 640 641 remain = args->size; 642 643 /* Pin the user pages containing the data. We can't fault while 644 * holding the struct mutex, and all of the pwrite implementations 645 * want to hold it while dereferencing the user data. 646 */ 647 first_data_page = data_ptr / PAGE_SIZE; 648 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 649 num_pages = last_data_page - first_data_page + 1; 650 651 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 652 if (user_pages == NULL) 653 return -ENOMEM; 654 655 down_read(&mm->mmap_sem); 656 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 657 num_pages, 0, 0, user_pages, NULL); 658 up_read(&mm->mmap_sem); 659 if (pinned_pages < num_pages) { 660 ret = -EFAULT; 661 goto out_unpin_pages; 662 } 663 664 mutex_lock(&dev->struct_mutex); 665 ret = i915_gem_object_pin(obj, 0); 666 if (ret) 667 goto out_unlock; 668 669 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 670 if (ret) 671 goto out_unpin_object; 672 673 obj_priv = obj->driver_private; 674 offset = obj_priv->gtt_offset + args->offset; 675 676 while (remain > 0) { 677 /* Operation in this page 678 * 679 * gtt_page_base = page offset within aperture 680 * gtt_page_offset = offset within page in aperture 681 * data_page_index = page number in get_user_pages return 682 * data_page_offset = offset with data_page_index page. 683 * page_length = bytes to copy for this page 684 */ 685 gtt_page_base = offset & PAGE_MASK; 686 gtt_page_offset = offset & ~PAGE_MASK; 687 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 688 data_page_offset = data_ptr & ~PAGE_MASK; 689 690 page_length = remain; 691 if ((gtt_page_offset + page_length) > PAGE_SIZE) 692 page_length = PAGE_SIZE - gtt_page_offset; 693 if ((data_page_offset + page_length) > PAGE_SIZE) 694 page_length = PAGE_SIZE - data_page_offset; 695 696 ret = slow_kernel_write(dev_priv->mm.gtt_mapping, 697 gtt_page_base, gtt_page_offset, 698 user_pages[data_page_index], 699 data_page_offset, 700 page_length); 701 702 /* If we get a fault while copying data, then (presumably) our 703 * source page isn't available. Return the error and we'll 704 * retry in the slow path. 705 */ 706 if (ret) 707 goto out_unpin_object; 708 709 remain -= page_length; 710 offset += page_length; 711 data_ptr += page_length; 712 } 713 714 out_unpin_object: 715 i915_gem_object_unpin(obj); 716 out_unlock: 717 mutex_unlock(&dev->struct_mutex); 718 out_unpin_pages: 719 for (i = 0; i < pinned_pages; i++) 720 page_cache_release(user_pages[i]); 721 drm_free_large(user_pages); 722 723 return ret; 724 } 725 726 /** 727 * This is the fast shmem pwrite path, which attempts to directly 728 * copy_from_user into the kmapped pages backing the object. 729 */ 730 static int 731 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 732 struct drm_i915_gem_pwrite *args, 733 struct drm_file *file_priv) 734 { 735 struct drm_i915_gem_object *obj_priv = obj->driver_private; 736 ssize_t remain; 737 loff_t offset, page_base; 738 char __user *user_data; 739 int page_offset, page_length; 740 int ret; 741 742 user_data = (char __user *) (uintptr_t) args->data_ptr; 743 remain = args->size; 744 745 mutex_lock(&dev->struct_mutex); 746 747 ret = i915_gem_object_get_pages(obj); 748 if (ret != 0) 749 goto fail_unlock; 750 751 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 752 if (ret != 0) 753 goto fail_put_pages; 754 755 obj_priv = obj->driver_private; 756 offset = args->offset; 757 obj_priv->dirty = 1; 758 759 while (remain > 0) { 760 /* Operation in this page 761 * 762 * page_base = page offset within aperture 763 * page_offset = offset within page 764 * page_length = bytes to copy for this page 765 */ 766 page_base = (offset & ~(PAGE_SIZE-1)); 767 page_offset = offset & (PAGE_SIZE-1); 768 page_length = remain; 769 if ((page_offset + remain) > PAGE_SIZE) 770 page_length = PAGE_SIZE - page_offset; 771 772 ret = fast_shmem_write(obj_priv->pages, 773 page_base, page_offset, 774 user_data, page_length); 775 if (ret) 776 goto fail_put_pages; 777 778 remain -= page_length; 779 user_data += page_length; 780 offset += page_length; 781 } 782 783 fail_put_pages: 784 i915_gem_object_put_pages(obj); 785 fail_unlock: 786 mutex_unlock(&dev->struct_mutex); 787 788 return ret; 789 } 790 791 /** 792 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 793 * the memory and maps it using kmap_atomic for copying. 794 * 795 * This avoids taking mmap_sem for faulting on the user's address while the 796 * struct_mutex is held. 797 */ 798 static int 799 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 800 struct drm_i915_gem_pwrite *args, 801 struct drm_file *file_priv) 802 { 803 struct drm_i915_gem_object *obj_priv = obj->driver_private; 804 struct mm_struct *mm = current->mm; 805 struct page **user_pages; 806 ssize_t remain; 807 loff_t offset, pinned_pages, i; 808 loff_t first_data_page, last_data_page, num_pages; 809 int shmem_page_index, shmem_page_offset; 810 int data_page_index, data_page_offset; 811 int page_length; 812 int ret; 813 uint64_t data_ptr = args->data_ptr; 814 int do_bit17_swizzling; 815 816 remain = args->size; 817 818 /* Pin the user pages containing the data. We can't fault while 819 * holding the struct mutex, and all of the pwrite implementations 820 * want to hold it while dereferencing the user data. 821 */ 822 first_data_page = data_ptr / PAGE_SIZE; 823 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 824 num_pages = last_data_page - first_data_page + 1; 825 826 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 827 if (user_pages == NULL) 828 return -ENOMEM; 829 830 down_read(&mm->mmap_sem); 831 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 832 num_pages, 0, 0, user_pages, NULL); 833 up_read(&mm->mmap_sem); 834 if (pinned_pages < num_pages) { 835 ret = -EFAULT; 836 goto fail_put_user_pages; 837 } 838 839 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 840 841 mutex_lock(&dev->struct_mutex); 842 843 ret = i915_gem_object_get_pages(obj); 844 if (ret != 0) 845 goto fail_unlock; 846 847 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 848 if (ret != 0) 849 goto fail_put_pages; 850 851 obj_priv = obj->driver_private; 852 offset = args->offset; 853 obj_priv->dirty = 1; 854 855 while (remain > 0) { 856 /* Operation in this page 857 * 858 * shmem_page_index = page number within shmem file 859 * shmem_page_offset = offset within page in shmem file 860 * data_page_index = page number in get_user_pages return 861 * data_page_offset = offset with data_page_index page. 862 * page_length = bytes to copy for this page 863 */ 864 shmem_page_index = offset / PAGE_SIZE; 865 shmem_page_offset = offset & ~PAGE_MASK; 866 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 867 data_page_offset = data_ptr & ~PAGE_MASK; 868 869 page_length = remain; 870 if ((shmem_page_offset + page_length) > PAGE_SIZE) 871 page_length = PAGE_SIZE - shmem_page_offset; 872 if ((data_page_offset + page_length) > PAGE_SIZE) 873 page_length = PAGE_SIZE - data_page_offset; 874 875 if (do_bit17_swizzling) { 876 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 877 shmem_page_offset, 878 user_pages[data_page_index], 879 data_page_offset, 880 page_length, 881 0); 882 } else { 883 ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], 884 shmem_page_offset, 885 user_pages[data_page_index], 886 data_page_offset, 887 page_length); 888 } 889 if (ret) 890 goto fail_put_pages; 891 892 remain -= page_length; 893 data_ptr += page_length; 894 offset += page_length; 895 } 896 897 fail_put_pages: 898 i915_gem_object_put_pages(obj); 899 fail_unlock: 900 mutex_unlock(&dev->struct_mutex); 901 fail_put_user_pages: 902 for (i = 0; i < pinned_pages; i++) 903 page_cache_release(user_pages[i]); 904 drm_free_large(user_pages); 905 906 return ret; 907 } 908 909 /** 910 * Writes data to the object referenced by handle. 911 * 912 * On error, the contents of the buffer that were to be modified are undefined. 913 */ 914 int 915 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 916 struct drm_file *file_priv) 917 { 918 struct drm_i915_gem_pwrite *args = data; 919 struct drm_gem_object *obj; 920 struct drm_i915_gem_object *obj_priv; 921 int ret = 0; 922 923 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 924 if (obj == NULL) 925 return -EBADF; 926 obj_priv = obj->driver_private; 927 928 /* Bounds check destination. 929 * 930 * XXX: This could use review for overflow issues... 931 */ 932 if (args->offset > obj->size || args->size > obj->size || 933 args->offset + args->size > obj->size) { 934 drm_gem_object_unreference(obj); 935 return -EINVAL; 936 } 937 938 /* We can only do the GTT pwrite on untiled buffers, as otherwise 939 * it would end up going through the fenced access, and we'll get 940 * different detiling behavior between reading and writing. 941 * pread/pwrite currently are reading and writing from the CPU 942 * perspective, requiring manual detiling by the client. 943 */ 944 if (obj_priv->phys_obj) 945 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); 946 else if (obj_priv->tiling_mode == I915_TILING_NONE && 947 dev->gtt_total != 0) { 948 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); 949 if (ret == -EFAULT) { 950 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, 951 file_priv); 952 } 953 } else if (i915_gem_object_needs_bit17_swizzle(obj)) { 954 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); 955 } else { 956 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); 957 if (ret == -EFAULT) { 958 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, 959 file_priv); 960 } 961 } 962 963 #if WATCH_PWRITE 964 if (ret) 965 DRM_INFO("pwrite failed %d\n", ret); 966 #endif 967 968 drm_gem_object_unreference(obj); 969 970 return ret; 971 } 972 973 /** 974 * Called when user space prepares to use an object with the CPU, either 975 * through the mmap ioctl's mapping or a GTT mapping. 976 */ 977 int 978 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 979 struct drm_file *file_priv) 980 { 981 struct drm_i915_gem_set_domain *args = data; 982 struct drm_gem_object *obj; 983 uint32_t read_domains = args->read_domains; 984 uint32_t write_domain = args->write_domain; 985 int ret; 986 987 if (!(dev->driver->driver_features & DRIVER_GEM)) 988 return -ENODEV; 989 990 /* Only handle setting domains to types used by the CPU. */ 991 if (write_domain & I915_GEM_GPU_DOMAINS) 992 return -EINVAL; 993 994 if (read_domains & I915_GEM_GPU_DOMAINS) 995 return -EINVAL; 996 997 /* Having something in the write domain implies it's in the read 998 * domain, and only that read domain. Enforce that in the request. 999 */ 1000 if (write_domain != 0 && read_domains != write_domain) 1001 return -EINVAL; 1002 1003 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1004 if (obj == NULL) 1005 return -EBADF; 1006 1007 mutex_lock(&dev->struct_mutex); 1008 #if WATCH_BUF 1009 DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n", 1010 obj, obj->size, read_domains, write_domain); 1011 #endif 1012 if (read_domains & I915_GEM_DOMAIN_GTT) { 1013 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1014 1015 /* Silently promote "you're not bound, there was nothing to do" 1016 * to success, since the client was just asking us to 1017 * make sure everything was done. 1018 */ 1019 if (ret == -EINVAL) 1020 ret = 0; 1021 } else { 1022 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1023 } 1024 1025 drm_gem_object_unreference(obj); 1026 mutex_unlock(&dev->struct_mutex); 1027 return ret; 1028 } 1029 1030 /** 1031 * Called when user space has done writes to this buffer 1032 */ 1033 int 1034 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1035 struct drm_file *file_priv) 1036 { 1037 struct drm_i915_gem_sw_finish *args = data; 1038 struct drm_gem_object *obj; 1039 struct drm_i915_gem_object *obj_priv; 1040 int ret = 0; 1041 1042 if (!(dev->driver->driver_features & DRIVER_GEM)) 1043 return -ENODEV; 1044 1045 mutex_lock(&dev->struct_mutex); 1046 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1047 if (obj == NULL) { 1048 mutex_unlock(&dev->struct_mutex); 1049 return -EBADF; 1050 } 1051 1052 #if WATCH_BUF 1053 DRM_INFO("%s: sw_finish %d (%p %zd)\n", 1054 __func__, args->handle, obj, obj->size); 1055 #endif 1056 obj_priv = obj->driver_private; 1057 1058 /* Pinned buffers may be scanout, so flush the cache */ 1059 if (obj_priv->pin_count) 1060 i915_gem_object_flush_cpu_write_domain(obj); 1061 1062 drm_gem_object_unreference(obj); 1063 mutex_unlock(&dev->struct_mutex); 1064 return ret; 1065 } 1066 1067 /** 1068 * Maps the contents of an object, returning the address it is mapped 1069 * into. 1070 * 1071 * While the mapping holds a reference on the contents of the object, it doesn't 1072 * imply a ref on the object itself. 1073 */ 1074 int 1075 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1076 struct drm_file *file_priv) 1077 { 1078 struct drm_i915_gem_mmap *args = data; 1079 struct drm_gem_object *obj; 1080 loff_t offset; 1081 unsigned long addr; 1082 1083 if (!(dev->driver->driver_features & DRIVER_GEM)) 1084 return -ENODEV; 1085 1086 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1087 if (obj == NULL) 1088 return -EBADF; 1089 1090 offset = args->offset; 1091 1092 down_write(¤t->mm->mmap_sem); 1093 addr = do_mmap(obj->filp, 0, args->size, 1094 PROT_READ | PROT_WRITE, MAP_SHARED, 1095 args->offset); 1096 up_write(¤t->mm->mmap_sem); 1097 mutex_lock(&dev->struct_mutex); 1098 drm_gem_object_unreference(obj); 1099 mutex_unlock(&dev->struct_mutex); 1100 if (IS_ERR((void *)addr)) 1101 return addr; 1102 1103 args->addr_ptr = (uint64_t) addr; 1104 1105 return 0; 1106 } 1107 1108 /** 1109 * i915_gem_fault - fault a page into the GTT 1110 * vma: VMA in question 1111 * vmf: fault info 1112 * 1113 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1114 * from userspace. The fault handler takes care of binding the object to 1115 * the GTT (if needed), allocating and programming a fence register (again, 1116 * only if needed based on whether the old reg is still valid or the object 1117 * is tiled) and inserting a new PTE into the faulting process. 1118 * 1119 * Note that the faulting process may involve evicting existing objects 1120 * from the GTT and/or fence registers to make room. So performance may 1121 * suffer if the GTT working set is large or there are few fence registers 1122 * left. 1123 */ 1124 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1125 { 1126 struct drm_gem_object *obj = vma->vm_private_data; 1127 struct drm_device *dev = obj->dev; 1128 struct drm_i915_private *dev_priv = dev->dev_private; 1129 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1130 pgoff_t page_offset; 1131 unsigned long pfn; 1132 int ret = 0; 1133 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1134 1135 /* We don't use vmf->pgoff since that has the fake offset */ 1136 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1137 PAGE_SHIFT; 1138 1139 /* Now bind it into the GTT if needed */ 1140 mutex_lock(&dev->struct_mutex); 1141 if (!obj_priv->gtt_space) { 1142 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); 1143 if (ret) { 1144 mutex_unlock(&dev->struct_mutex); 1145 return VM_FAULT_SIGBUS; 1146 } 1147 1148 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1149 if (ret) { 1150 mutex_unlock(&dev->struct_mutex); 1151 return VM_FAULT_SIGBUS; 1152 } 1153 1154 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1155 } 1156 1157 /* Need a new fence register? */ 1158 if (obj_priv->fence_reg == I915_FENCE_REG_NONE && 1159 obj_priv->tiling_mode != I915_TILING_NONE) { 1160 ret = i915_gem_object_get_fence_reg(obj); 1161 if (ret) { 1162 mutex_unlock(&dev->struct_mutex); 1163 return VM_FAULT_SIGBUS; 1164 } 1165 } 1166 1167 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + 1168 page_offset; 1169 1170 /* Finally, remap it using the new GTT offset */ 1171 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1172 1173 mutex_unlock(&dev->struct_mutex); 1174 1175 switch (ret) { 1176 case -ENOMEM: 1177 case -EAGAIN: 1178 return VM_FAULT_OOM; 1179 case -EFAULT: 1180 case -EINVAL: 1181 return VM_FAULT_SIGBUS; 1182 default: 1183 return VM_FAULT_NOPAGE; 1184 } 1185 } 1186 1187 /** 1188 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1189 * @obj: obj in question 1190 * 1191 * GEM memory mapping works by handing back to userspace a fake mmap offset 1192 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1193 * up the object based on the offset and sets up the various memory mapping 1194 * structures. 1195 * 1196 * This routine allocates and attaches a fake offset for @obj. 1197 */ 1198 static int 1199 i915_gem_create_mmap_offset(struct drm_gem_object *obj) 1200 { 1201 struct drm_device *dev = obj->dev; 1202 struct drm_gem_mm *mm = dev->mm_private; 1203 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1204 struct drm_map_list *list; 1205 struct drm_local_map *map; 1206 int ret = 0; 1207 1208 /* Set the object up for mmap'ing */ 1209 list = &obj->map_list; 1210 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL); 1211 if (!list->map) 1212 return -ENOMEM; 1213 1214 map = list->map; 1215 map->type = _DRM_GEM; 1216 map->size = obj->size; 1217 map->handle = obj; 1218 1219 /* Get a DRM GEM mmap offset allocated... */ 1220 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1221 obj->size / PAGE_SIZE, 0, 0); 1222 if (!list->file_offset_node) { 1223 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name); 1224 ret = -ENOMEM; 1225 goto out_free_list; 1226 } 1227 1228 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1229 obj->size / PAGE_SIZE, 0); 1230 if (!list->file_offset_node) { 1231 ret = -ENOMEM; 1232 goto out_free_list; 1233 } 1234 1235 list->hash.key = list->file_offset_node->start; 1236 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) { 1237 DRM_ERROR("failed to add to map hash\n"); 1238 goto out_free_mm; 1239 } 1240 1241 /* By now we should be all set, any drm_mmap request on the offset 1242 * below will get to our mmap & fault handler */ 1243 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; 1244 1245 return 0; 1246 1247 out_free_mm: 1248 drm_mm_put_block(list->file_offset_node); 1249 out_free_list: 1250 kfree(list->map); 1251 1252 return ret; 1253 } 1254 1255 /** 1256 * i915_gem_release_mmap - remove physical page mappings 1257 * @obj: obj in question 1258 * 1259 * Preserve the reservation of the mmaping with the DRM core code, but 1260 * relinquish ownership of the pages back to the system. 1261 * 1262 * It is vital that we remove the page mapping if we have mapped a tiled 1263 * object through the GTT and then lose the fence register due to 1264 * resource pressure. Similarly if the object has been moved out of the 1265 * aperture, than pages mapped into userspace must be revoked. Removing the 1266 * mapping will then trigger a page fault on the next user access, allowing 1267 * fixup by i915_gem_fault(). 1268 */ 1269 void 1270 i915_gem_release_mmap(struct drm_gem_object *obj) 1271 { 1272 struct drm_device *dev = obj->dev; 1273 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1274 1275 if (dev->dev_mapping) 1276 unmap_mapping_range(dev->dev_mapping, 1277 obj_priv->mmap_offset, obj->size, 1); 1278 } 1279 1280 static void 1281 i915_gem_free_mmap_offset(struct drm_gem_object *obj) 1282 { 1283 struct drm_device *dev = obj->dev; 1284 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1285 struct drm_gem_mm *mm = dev->mm_private; 1286 struct drm_map_list *list; 1287 1288 list = &obj->map_list; 1289 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1290 1291 if (list->file_offset_node) { 1292 drm_mm_put_block(list->file_offset_node); 1293 list->file_offset_node = NULL; 1294 } 1295 1296 if (list->map) { 1297 kfree(list->map); 1298 list->map = NULL; 1299 } 1300 1301 obj_priv->mmap_offset = 0; 1302 } 1303 1304 /** 1305 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1306 * @obj: object to check 1307 * 1308 * Return the required GTT alignment for an object, taking into account 1309 * potential fence register mapping if needed. 1310 */ 1311 static uint32_t 1312 i915_gem_get_gtt_alignment(struct drm_gem_object *obj) 1313 { 1314 struct drm_device *dev = obj->dev; 1315 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1316 int start, i; 1317 1318 /* 1319 * Minimum alignment is 4k (GTT page size), but might be greater 1320 * if a fence register is needed for the object. 1321 */ 1322 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE) 1323 return 4096; 1324 1325 /* 1326 * Previous chips need to be aligned to the size of the smallest 1327 * fence register that can contain the object. 1328 */ 1329 if (IS_I9XX(dev)) 1330 start = 1024*1024; 1331 else 1332 start = 512*1024; 1333 1334 for (i = start; i < obj->size; i <<= 1) 1335 ; 1336 1337 return i; 1338 } 1339 1340 /** 1341 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1342 * @dev: DRM device 1343 * @data: GTT mapping ioctl data 1344 * @file_priv: GEM object info 1345 * 1346 * Simply returns the fake offset to userspace so it can mmap it. 1347 * The mmap call will end up in drm_gem_mmap(), which will set things 1348 * up so we can get faults in the handler above. 1349 * 1350 * The fault handler will take care of binding the object into the GTT 1351 * (since it may have been evicted to make room for something), allocating 1352 * a fence register, and mapping the appropriate aperture address into 1353 * userspace. 1354 */ 1355 int 1356 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1357 struct drm_file *file_priv) 1358 { 1359 struct drm_i915_gem_mmap_gtt *args = data; 1360 struct drm_i915_private *dev_priv = dev->dev_private; 1361 struct drm_gem_object *obj; 1362 struct drm_i915_gem_object *obj_priv; 1363 int ret; 1364 1365 if (!(dev->driver->driver_features & DRIVER_GEM)) 1366 return -ENODEV; 1367 1368 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1369 if (obj == NULL) 1370 return -EBADF; 1371 1372 mutex_lock(&dev->struct_mutex); 1373 1374 obj_priv = obj->driver_private; 1375 1376 if (!obj_priv->mmap_offset) { 1377 ret = i915_gem_create_mmap_offset(obj); 1378 if (ret) { 1379 drm_gem_object_unreference(obj); 1380 mutex_unlock(&dev->struct_mutex); 1381 return ret; 1382 } 1383 } 1384 1385 args->offset = obj_priv->mmap_offset; 1386 1387 obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj); 1388 1389 /* Make sure the alignment is correct for fence regs etc */ 1390 if (obj_priv->agp_mem && 1391 (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) { 1392 drm_gem_object_unreference(obj); 1393 mutex_unlock(&dev->struct_mutex); 1394 return -EINVAL; 1395 } 1396 1397 /* 1398 * Pull it into the GTT so that we have a page list (makes the 1399 * initial fault faster and any subsequent flushing possible). 1400 */ 1401 if (!obj_priv->agp_mem) { 1402 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); 1403 if (ret) { 1404 drm_gem_object_unreference(obj); 1405 mutex_unlock(&dev->struct_mutex); 1406 return ret; 1407 } 1408 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1409 } 1410 1411 drm_gem_object_unreference(obj); 1412 mutex_unlock(&dev->struct_mutex); 1413 1414 return 0; 1415 } 1416 1417 void 1418 i915_gem_object_put_pages(struct drm_gem_object *obj) 1419 { 1420 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1421 int page_count = obj->size / PAGE_SIZE; 1422 int i; 1423 1424 BUG_ON(obj_priv->pages_refcount == 0); 1425 1426 if (--obj_priv->pages_refcount != 0) 1427 return; 1428 1429 if (obj_priv->tiling_mode != I915_TILING_NONE) 1430 i915_gem_object_save_bit_17_swizzle(obj); 1431 1432 for (i = 0; i < page_count; i++) 1433 if (obj_priv->pages[i] != NULL) { 1434 if (obj_priv->dirty) 1435 set_page_dirty(obj_priv->pages[i]); 1436 mark_page_accessed(obj_priv->pages[i]); 1437 page_cache_release(obj_priv->pages[i]); 1438 } 1439 obj_priv->dirty = 0; 1440 1441 drm_free_large(obj_priv->pages); 1442 obj_priv->pages = NULL; 1443 } 1444 1445 static void 1446 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno) 1447 { 1448 struct drm_device *dev = obj->dev; 1449 drm_i915_private_t *dev_priv = dev->dev_private; 1450 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1451 1452 /* Add a reference if we're newly entering the active list. */ 1453 if (!obj_priv->active) { 1454 drm_gem_object_reference(obj); 1455 obj_priv->active = 1; 1456 } 1457 /* Move from whatever list we were on to the tail of execution. */ 1458 spin_lock(&dev_priv->mm.active_list_lock); 1459 list_move_tail(&obj_priv->list, 1460 &dev_priv->mm.active_list); 1461 spin_unlock(&dev_priv->mm.active_list_lock); 1462 obj_priv->last_rendering_seqno = seqno; 1463 } 1464 1465 static void 1466 i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 1467 { 1468 struct drm_device *dev = obj->dev; 1469 drm_i915_private_t *dev_priv = dev->dev_private; 1470 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1471 1472 BUG_ON(!obj_priv->active); 1473 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list); 1474 obj_priv->last_rendering_seqno = 0; 1475 } 1476 1477 static void 1478 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 1479 { 1480 struct drm_device *dev = obj->dev; 1481 drm_i915_private_t *dev_priv = dev->dev_private; 1482 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1483 1484 i915_verify_inactive(dev, __FILE__, __LINE__); 1485 if (obj_priv->pin_count != 0) 1486 list_del_init(&obj_priv->list); 1487 else 1488 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1489 1490 obj_priv->last_rendering_seqno = 0; 1491 if (obj_priv->active) { 1492 obj_priv->active = 0; 1493 drm_gem_object_unreference(obj); 1494 } 1495 i915_verify_inactive(dev, __FILE__, __LINE__); 1496 } 1497 1498 /** 1499 * Creates a new sequence number, emitting a write of it to the status page 1500 * plus an interrupt, which will trigger i915_user_interrupt_handler. 1501 * 1502 * Must be called with struct_lock held. 1503 * 1504 * Returned sequence numbers are nonzero on success. 1505 */ 1506 static uint32_t 1507 i915_add_request(struct drm_device *dev, struct drm_file *file_priv, 1508 uint32_t flush_domains) 1509 { 1510 drm_i915_private_t *dev_priv = dev->dev_private; 1511 struct drm_i915_file_private *i915_file_priv = NULL; 1512 struct drm_i915_gem_request *request; 1513 uint32_t seqno; 1514 int was_empty; 1515 RING_LOCALS; 1516 1517 if (file_priv != NULL) 1518 i915_file_priv = file_priv->driver_priv; 1519 1520 request = kzalloc(sizeof(*request), GFP_KERNEL); 1521 if (request == NULL) 1522 return 0; 1523 1524 /* Grab the seqno we're going to make this request be, and bump the 1525 * next (skipping 0 so it can be the reserved no-seqno value). 1526 */ 1527 seqno = dev_priv->mm.next_gem_seqno; 1528 dev_priv->mm.next_gem_seqno++; 1529 if (dev_priv->mm.next_gem_seqno == 0) 1530 dev_priv->mm.next_gem_seqno++; 1531 1532 BEGIN_LP_RING(4); 1533 OUT_RING(MI_STORE_DWORD_INDEX); 1534 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1535 OUT_RING(seqno); 1536 1537 OUT_RING(MI_USER_INTERRUPT); 1538 ADVANCE_LP_RING(); 1539 1540 DRM_DEBUG("%d\n", seqno); 1541 1542 request->seqno = seqno; 1543 request->emitted_jiffies = jiffies; 1544 was_empty = list_empty(&dev_priv->mm.request_list); 1545 list_add_tail(&request->list, &dev_priv->mm.request_list); 1546 if (i915_file_priv) { 1547 list_add_tail(&request->client_list, 1548 &i915_file_priv->mm.request_list); 1549 } else { 1550 INIT_LIST_HEAD(&request->client_list); 1551 } 1552 1553 /* Associate any objects on the flushing list matching the write 1554 * domain we're flushing with our flush. 1555 */ 1556 if (flush_domains != 0) { 1557 struct drm_i915_gem_object *obj_priv, *next; 1558 1559 list_for_each_entry_safe(obj_priv, next, 1560 &dev_priv->mm.flushing_list, list) { 1561 struct drm_gem_object *obj = obj_priv->obj; 1562 1563 if ((obj->write_domain & flush_domains) == 1564 obj->write_domain) { 1565 obj->write_domain = 0; 1566 i915_gem_object_move_to_active(obj, seqno); 1567 } 1568 } 1569 1570 } 1571 1572 if (was_empty && !dev_priv->mm.suspended) 1573 schedule_delayed_work(&dev_priv->mm.retire_work, HZ); 1574 return seqno; 1575 } 1576 1577 /** 1578 * Command execution barrier 1579 * 1580 * Ensures that all commands in the ring are finished 1581 * before signalling the CPU 1582 */ 1583 static uint32_t 1584 i915_retire_commands(struct drm_device *dev) 1585 { 1586 drm_i915_private_t *dev_priv = dev->dev_private; 1587 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1588 uint32_t flush_domains = 0; 1589 RING_LOCALS; 1590 1591 /* The sampler always gets flushed on i965 (sigh) */ 1592 if (IS_I965G(dev)) 1593 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 1594 BEGIN_LP_RING(2); 1595 OUT_RING(cmd); 1596 OUT_RING(0); /* noop */ 1597 ADVANCE_LP_RING(); 1598 return flush_domains; 1599 } 1600 1601 /** 1602 * Moves buffers associated only with the given active seqno from the active 1603 * to inactive list, potentially freeing them. 1604 */ 1605 static void 1606 i915_gem_retire_request(struct drm_device *dev, 1607 struct drm_i915_gem_request *request) 1608 { 1609 drm_i915_private_t *dev_priv = dev->dev_private; 1610 1611 /* Move any buffers on the active list that are no longer referenced 1612 * by the ringbuffer to the flushing/inactive lists as appropriate. 1613 */ 1614 spin_lock(&dev_priv->mm.active_list_lock); 1615 while (!list_empty(&dev_priv->mm.active_list)) { 1616 struct drm_gem_object *obj; 1617 struct drm_i915_gem_object *obj_priv; 1618 1619 obj_priv = list_first_entry(&dev_priv->mm.active_list, 1620 struct drm_i915_gem_object, 1621 list); 1622 obj = obj_priv->obj; 1623 1624 /* If the seqno being retired doesn't match the oldest in the 1625 * list, then the oldest in the list must still be newer than 1626 * this seqno. 1627 */ 1628 if (obj_priv->last_rendering_seqno != request->seqno) 1629 goto out; 1630 1631 #if WATCH_LRU 1632 DRM_INFO("%s: retire %d moves to inactive list %p\n", 1633 __func__, request->seqno, obj); 1634 #endif 1635 1636 if (obj->write_domain != 0) 1637 i915_gem_object_move_to_flushing(obj); 1638 else { 1639 /* Take a reference on the object so it won't be 1640 * freed while the spinlock is held. The list 1641 * protection for this spinlock is safe when breaking 1642 * the lock like this since the next thing we do 1643 * is just get the head of the list again. 1644 */ 1645 drm_gem_object_reference(obj); 1646 i915_gem_object_move_to_inactive(obj); 1647 spin_unlock(&dev_priv->mm.active_list_lock); 1648 drm_gem_object_unreference(obj); 1649 spin_lock(&dev_priv->mm.active_list_lock); 1650 } 1651 } 1652 out: 1653 spin_unlock(&dev_priv->mm.active_list_lock); 1654 } 1655 1656 /** 1657 * Returns true if seq1 is later than seq2. 1658 */ 1659 static int 1660 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 1661 { 1662 return (int32_t)(seq1 - seq2) >= 0; 1663 } 1664 1665 uint32_t 1666 i915_get_gem_seqno(struct drm_device *dev) 1667 { 1668 drm_i915_private_t *dev_priv = dev->dev_private; 1669 1670 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); 1671 } 1672 1673 /** 1674 * This function clears the request list as sequence numbers are passed. 1675 */ 1676 void 1677 i915_gem_retire_requests(struct drm_device *dev) 1678 { 1679 drm_i915_private_t *dev_priv = dev->dev_private; 1680 uint32_t seqno; 1681 1682 if (!dev_priv->hw_status_page) 1683 return; 1684 1685 seqno = i915_get_gem_seqno(dev); 1686 1687 while (!list_empty(&dev_priv->mm.request_list)) { 1688 struct drm_i915_gem_request *request; 1689 uint32_t retiring_seqno; 1690 1691 request = list_first_entry(&dev_priv->mm.request_list, 1692 struct drm_i915_gem_request, 1693 list); 1694 retiring_seqno = request->seqno; 1695 1696 if (i915_seqno_passed(seqno, retiring_seqno) || 1697 dev_priv->mm.wedged) { 1698 i915_gem_retire_request(dev, request); 1699 1700 list_del(&request->list); 1701 list_del(&request->client_list); 1702 kfree(request); 1703 } else 1704 break; 1705 } 1706 } 1707 1708 void 1709 i915_gem_retire_work_handler(struct work_struct *work) 1710 { 1711 drm_i915_private_t *dev_priv; 1712 struct drm_device *dev; 1713 1714 dev_priv = container_of(work, drm_i915_private_t, 1715 mm.retire_work.work); 1716 dev = dev_priv->dev; 1717 1718 mutex_lock(&dev->struct_mutex); 1719 i915_gem_retire_requests(dev); 1720 if (!dev_priv->mm.suspended && 1721 !list_empty(&dev_priv->mm.request_list)) 1722 schedule_delayed_work(&dev_priv->mm.retire_work, HZ); 1723 mutex_unlock(&dev->struct_mutex); 1724 } 1725 1726 /** 1727 * Waits for a sequence number to be signaled, and cleans up the 1728 * request and object lists appropriately for that event. 1729 */ 1730 static int 1731 i915_wait_request(struct drm_device *dev, uint32_t seqno) 1732 { 1733 drm_i915_private_t *dev_priv = dev->dev_private; 1734 u32 ier; 1735 int ret = 0; 1736 1737 BUG_ON(seqno == 0); 1738 1739 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 1740 if (IS_IGDNG(dev)) 1741 ier = I915_READ(DEIER) | I915_READ(GTIER); 1742 else 1743 ier = I915_READ(IER); 1744 if (!ier) { 1745 DRM_ERROR("something (likely vbetool) disabled " 1746 "interrupts, re-enabling\n"); 1747 i915_driver_irq_preinstall(dev); 1748 i915_driver_irq_postinstall(dev); 1749 } 1750 1751 dev_priv->mm.waiting_gem_seqno = seqno; 1752 i915_user_irq_get(dev); 1753 ret = wait_event_interruptible(dev_priv->irq_queue, 1754 i915_seqno_passed(i915_get_gem_seqno(dev), 1755 seqno) || 1756 dev_priv->mm.wedged); 1757 i915_user_irq_put(dev); 1758 dev_priv->mm.waiting_gem_seqno = 0; 1759 } 1760 if (dev_priv->mm.wedged) 1761 ret = -EIO; 1762 1763 if (ret && ret != -ERESTARTSYS) 1764 DRM_ERROR("%s returns %d (awaiting %d at %d)\n", 1765 __func__, ret, seqno, i915_get_gem_seqno(dev)); 1766 1767 /* Directly dispatch request retiring. While we have the work queue 1768 * to handle this, the waiter on a request often wants an associated 1769 * buffer to have made it to the inactive list, and we would need 1770 * a separate wait queue to handle that. 1771 */ 1772 if (ret == 0) 1773 i915_gem_retire_requests(dev); 1774 1775 return ret; 1776 } 1777 1778 static void 1779 i915_gem_flush(struct drm_device *dev, 1780 uint32_t invalidate_domains, 1781 uint32_t flush_domains) 1782 { 1783 drm_i915_private_t *dev_priv = dev->dev_private; 1784 uint32_t cmd; 1785 RING_LOCALS; 1786 1787 #if WATCH_EXEC 1788 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, 1789 invalidate_domains, flush_domains); 1790 #endif 1791 1792 if (flush_domains & I915_GEM_DOMAIN_CPU) 1793 drm_agp_chipset_flush(dev); 1794 1795 if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { 1796 /* 1797 * read/write caches: 1798 * 1799 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 1800 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 1801 * also flushed at 2d versus 3d pipeline switches. 1802 * 1803 * read-only caches: 1804 * 1805 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 1806 * MI_READ_FLUSH is set, and is always flushed on 965. 1807 * 1808 * I915_GEM_DOMAIN_COMMAND may not exist? 1809 * 1810 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 1811 * invalidated when MI_EXE_FLUSH is set. 1812 * 1813 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 1814 * invalidated with every MI_FLUSH. 1815 * 1816 * TLBs: 1817 * 1818 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 1819 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 1820 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 1821 * are flushed at any MI_FLUSH. 1822 */ 1823 1824 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1825 if ((invalidate_domains|flush_domains) & 1826 I915_GEM_DOMAIN_RENDER) 1827 cmd &= ~MI_NO_WRITE_FLUSH; 1828 if (!IS_I965G(dev)) { 1829 /* 1830 * On the 965, the sampler cache always gets flushed 1831 * and this bit is reserved. 1832 */ 1833 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 1834 cmd |= MI_READ_FLUSH; 1835 } 1836 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 1837 cmd |= MI_EXE_FLUSH; 1838 1839 #if WATCH_EXEC 1840 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); 1841 #endif 1842 BEGIN_LP_RING(2); 1843 OUT_RING(cmd); 1844 OUT_RING(0); /* noop */ 1845 ADVANCE_LP_RING(); 1846 } 1847 } 1848 1849 /** 1850 * Ensures that all rendering to the object has completed and the object is 1851 * safe to unbind from the GTT or access from the CPU. 1852 */ 1853 static int 1854 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1855 { 1856 struct drm_device *dev = obj->dev; 1857 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1858 int ret; 1859 1860 /* This function only exists to support waiting for existing rendering, 1861 * not for emitting required flushes. 1862 */ 1863 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0); 1864 1865 /* If there is rendering queued on the buffer being evicted, wait for 1866 * it. 1867 */ 1868 if (obj_priv->active) { 1869 #if WATCH_BUF 1870 DRM_INFO("%s: object %p wait for seqno %08x\n", 1871 __func__, obj, obj_priv->last_rendering_seqno); 1872 #endif 1873 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); 1874 if (ret != 0) 1875 return ret; 1876 } 1877 1878 return 0; 1879 } 1880 1881 /** 1882 * Unbinds an object from the GTT aperture. 1883 */ 1884 int 1885 i915_gem_object_unbind(struct drm_gem_object *obj) 1886 { 1887 struct drm_device *dev = obj->dev; 1888 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1889 int ret = 0; 1890 1891 #if WATCH_BUF 1892 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); 1893 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); 1894 #endif 1895 if (obj_priv->gtt_space == NULL) 1896 return 0; 1897 1898 if (obj_priv->pin_count != 0) { 1899 DRM_ERROR("Attempting to unbind pinned buffer\n"); 1900 return -EINVAL; 1901 } 1902 1903 /* Move the object to the CPU domain to ensure that 1904 * any possible CPU writes while it's not in the GTT 1905 * are flushed when we go to remap it. This will 1906 * also ensure that all pending GPU writes are finished 1907 * before we unbind. 1908 */ 1909 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1910 if (ret) { 1911 if (ret != -ERESTARTSYS) 1912 DRM_ERROR("set_domain failed: %d\n", ret); 1913 return ret; 1914 } 1915 1916 if (obj_priv->agp_mem != NULL) { 1917 drm_unbind_agp(obj_priv->agp_mem); 1918 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); 1919 obj_priv->agp_mem = NULL; 1920 } 1921 1922 BUG_ON(obj_priv->active); 1923 1924 /* blow away mappings if mapped through GTT */ 1925 i915_gem_release_mmap(obj); 1926 1927 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) 1928 i915_gem_clear_fence_reg(obj); 1929 1930 i915_gem_object_put_pages(obj); 1931 1932 if (obj_priv->gtt_space) { 1933 atomic_dec(&dev->gtt_count); 1934 atomic_sub(obj->size, &dev->gtt_memory); 1935 1936 drm_mm_put_block(obj_priv->gtt_space); 1937 obj_priv->gtt_space = NULL; 1938 } 1939 1940 /* Remove ourselves from the LRU list if present. */ 1941 if (!list_empty(&obj_priv->list)) 1942 list_del_init(&obj_priv->list); 1943 1944 return 0; 1945 } 1946 1947 static int 1948 i915_gem_evict_something(struct drm_device *dev) 1949 { 1950 drm_i915_private_t *dev_priv = dev->dev_private; 1951 struct drm_gem_object *obj; 1952 struct drm_i915_gem_object *obj_priv; 1953 int ret = 0; 1954 1955 for (;;) { 1956 /* If there's an inactive buffer available now, grab it 1957 * and be done. 1958 */ 1959 if (!list_empty(&dev_priv->mm.inactive_list)) { 1960 obj_priv = list_first_entry(&dev_priv->mm.inactive_list, 1961 struct drm_i915_gem_object, 1962 list); 1963 obj = obj_priv->obj; 1964 BUG_ON(obj_priv->pin_count != 0); 1965 #if WATCH_LRU 1966 DRM_INFO("%s: evicting %p\n", __func__, obj); 1967 #endif 1968 BUG_ON(obj_priv->active); 1969 1970 /* Wait on the rendering and unbind the buffer. */ 1971 ret = i915_gem_object_unbind(obj); 1972 break; 1973 } 1974 1975 /* If we didn't get anything, but the ring is still processing 1976 * things, wait for one of those things to finish and hopefully 1977 * leave us a buffer to evict. 1978 */ 1979 if (!list_empty(&dev_priv->mm.request_list)) { 1980 struct drm_i915_gem_request *request; 1981 1982 request = list_first_entry(&dev_priv->mm.request_list, 1983 struct drm_i915_gem_request, 1984 list); 1985 1986 ret = i915_wait_request(dev, request->seqno); 1987 if (ret) 1988 break; 1989 1990 /* if waiting caused an object to become inactive, 1991 * then loop around and wait for it. Otherwise, we 1992 * assume that waiting freed and unbound something, 1993 * so there should now be some space in the GTT 1994 */ 1995 if (!list_empty(&dev_priv->mm.inactive_list)) 1996 continue; 1997 break; 1998 } 1999 2000 /* If we didn't have anything on the request list but there 2001 * are buffers awaiting a flush, emit one and try again. 2002 * When we wait on it, those buffers waiting for that flush 2003 * will get moved to inactive. 2004 */ 2005 if (!list_empty(&dev_priv->mm.flushing_list)) { 2006 obj_priv = list_first_entry(&dev_priv->mm.flushing_list, 2007 struct drm_i915_gem_object, 2008 list); 2009 obj = obj_priv->obj; 2010 2011 i915_gem_flush(dev, 2012 obj->write_domain, 2013 obj->write_domain); 2014 i915_add_request(dev, NULL, obj->write_domain); 2015 2016 obj = NULL; 2017 continue; 2018 } 2019 2020 DRM_ERROR("inactive empty %d request empty %d " 2021 "flushing empty %d\n", 2022 list_empty(&dev_priv->mm.inactive_list), 2023 list_empty(&dev_priv->mm.request_list), 2024 list_empty(&dev_priv->mm.flushing_list)); 2025 /* If we didn't do any of the above, there's nothing to be done 2026 * and we just can't fit it in. 2027 */ 2028 return -ENOSPC; 2029 } 2030 return ret; 2031 } 2032 2033 static int 2034 i915_gem_evict_everything(struct drm_device *dev) 2035 { 2036 int ret; 2037 2038 for (;;) { 2039 ret = i915_gem_evict_something(dev); 2040 if (ret != 0) 2041 break; 2042 } 2043 if (ret == -ENOSPC) 2044 return 0; 2045 return ret; 2046 } 2047 2048 int 2049 i915_gem_object_get_pages(struct drm_gem_object *obj) 2050 { 2051 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2052 int page_count, i; 2053 struct address_space *mapping; 2054 struct inode *inode; 2055 struct page *page; 2056 int ret; 2057 2058 if (obj_priv->pages_refcount++ != 0) 2059 return 0; 2060 2061 /* Get the list of pages out of our struct file. They'll be pinned 2062 * at this point until we release them. 2063 */ 2064 page_count = obj->size / PAGE_SIZE; 2065 BUG_ON(obj_priv->pages != NULL); 2066 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); 2067 if (obj_priv->pages == NULL) { 2068 DRM_ERROR("Faled to allocate page list\n"); 2069 obj_priv->pages_refcount--; 2070 return -ENOMEM; 2071 } 2072 2073 inode = obj->filp->f_path.dentry->d_inode; 2074 mapping = inode->i_mapping; 2075 for (i = 0; i < page_count; i++) { 2076 page = read_mapping_page(mapping, i, NULL); 2077 if (IS_ERR(page)) { 2078 ret = PTR_ERR(page); 2079 DRM_ERROR("read_mapping_page failed: %d\n", ret); 2080 i915_gem_object_put_pages(obj); 2081 return ret; 2082 } 2083 obj_priv->pages[i] = page; 2084 } 2085 2086 if (obj_priv->tiling_mode != I915_TILING_NONE) 2087 i915_gem_object_do_bit_17_swizzle(obj); 2088 2089 return 0; 2090 } 2091 2092 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) 2093 { 2094 struct drm_gem_object *obj = reg->obj; 2095 struct drm_device *dev = obj->dev; 2096 drm_i915_private_t *dev_priv = dev->dev_private; 2097 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2098 int regnum = obj_priv->fence_reg; 2099 uint64_t val; 2100 2101 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 2102 0xfffff000) << 32; 2103 val |= obj_priv->gtt_offset & 0xfffff000; 2104 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2105 if (obj_priv->tiling_mode == I915_TILING_Y) 2106 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2107 val |= I965_FENCE_REG_VALID; 2108 2109 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); 2110 } 2111 2112 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) 2113 { 2114 struct drm_gem_object *obj = reg->obj; 2115 struct drm_device *dev = obj->dev; 2116 drm_i915_private_t *dev_priv = dev->dev_private; 2117 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2118 int regnum = obj_priv->fence_reg; 2119 int tile_width; 2120 uint32_t fence_reg, val; 2121 uint32_t pitch_val; 2122 2123 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 2124 (obj_priv->gtt_offset & (obj->size - 1))) { 2125 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n", 2126 __func__, obj_priv->gtt_offset, obj->size); 2127 return; 2128 } 2129 2130 if (obj_priv->tiling_mode == I915_TILING_Y && 2131 HAS_128_BYTE_Y_TILING(dev)) 2132 tile_width = 128; 2133 else 2134 tile_width = 512; 2135 2136 /* Note: pitch better be a power of two tile widths */ 2137 pitch_val = obj_priv->stride / tile_width; 2138 pitch_val = ffs(pitch_val) - 1; 2139 2140 val = obj_priv->gtt_offset; 2141 if (obj_priv->tiling_mode == I915_TILING_Y) 2142 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2143 val |= I915_FENCE_SIZE_BITS(obj->size); 2144 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2145 val |= I830_FENCE_REG_VALID; 2146 2147 if (regnum < 8) 2148 fence_reg = FENCE_REG_830_0 + (regnum * 4); 2149 else 2150 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); 2151 I915_WRITE(fence_reg, val); 2152 } 2153 2154 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) 2155 { 2156 struct drm_gem_object *obj = reg->obj; 2157 struct drm_device *dev = obj->dev; 2158 drm_i915_private_t *dev_priv = dev->dev_private; 2159 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2160 int regnum = obj_priv->fence_reg; 2161 uint32_t val; 2162 uint32_t pitch_val; 2163 uint32_t fence_size_bits; 2164 2165 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) || 2166 (obj_priv->gtt_offset & (obj->size - 1))) { 2167 WARN(1, "%s: object 0x%08x not 512K or size aligned\n", 2168 __func__, obj_priv->gtt_offset); 2169 return; 2170 } 2171 2172 pitch_val = obj_priv->stride / 128; 2173 pitch_val = ffs(pitch_val) - 1; 2174 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); 2175 2176 val = obj_priv->gtt_offset; 2177 if (obj_priv->tiling_mode == I915_TILING_Y) 2178 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2179 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size); 2180 WARN_ON(fence_size_bits & ~0x00000f00); 2181 val |= fence_size_bits; 2182 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2183 val |= I830_FENCE_REG_VALID; 2184 2185 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); 2186 } 2187 2188 /** 2189 * i915_gem_object_get_fence_reg - set up a fence reg for an object 2190 * @obj: object to map through a fence reg 2191 * 2192 * When mapping objects through the GTT, userspace wants to be able to write 2193 * to them without having to worry about swizzling if the object is tiled. 2194 * 2195 * This function walks the fence regs looking for a free one for @obj, 2196 * stealing one if it can't find any. 2197 * 2198 * It then sets up the reg based on the object's properties: address, pitch 2199 * and tiling format. 2200 */ 2201 int 2202 i915_gem_object_get_fence_reg(struct drm_gem_object *obj) 2203 { 2204 struct drm_device *dev = obj->dev; 2205 struct drm_i915_private *dev_priv = dev->dev_private; 2206 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2207 struct drm_i915_fence_reg *reg = NULL; 2208 struct drm_i915_gem_object *old_obj_priv = NULL; 2209 int i, ret, avail; 2210 2211 switch (obj_priv->tiling_mode) { 2212 case I915_TILING_NONE: 2213 WARN(1, "allocating a fence for non-tiled object?\n"); 2214 break; 2215 case I915_TILING_X: 2216 if (!obj_priv->stride) 2217 return -EINVAL; 2218 WARN((obj_priv->stride & (512 - 1)), 2219 "object 0x%08x is X tiled but has non-512B pitch\n", 2220 obj_priv->gtt_offset); 2221 break; 2222 case I915_TILING_Y: 2223 if (!obj_priv->stride) 2224 return -EINVAL; 2225 WARN((obj_priv->stride & (128 - 1)), 2226 "object 0x%08x is Y tiled but has non-128B pitch\n", 2227 obj_priv->gtt_offset); 2228 break; 2229 } 2230 2231 /* First try to find a free reg */ 2232 try_again: 2233 avail = 0; 2234 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2235 reg = &dev_priv->fence_regs[i]; 2236 if (!reg->obj) 2237 break; 2238 2239 old_obj_priv = reg->obj->driver_private; 2240 if (!old_obj_priv->pin_count) 2241 avail++; 2242 } 2243 2244 /* None available, try to steal one or wait for a user to finish */ 2245 if (i == dev_priv->num_fence_regs) { 2246 uint32_t seqno = dev_priv->mm.next_gem_seqno; 2247 2248 if (avail == 0) 2249 return -ENOSPC; 2250 2251 for (i = dev_priv->fence_reg_start; 2252 i < dev_priv->num_fence_regs; i++) { 2253 uint32_t this_seqno; 2254 2255 reg = &dev_priv->fence_regs[i]; 2256 old_obj_priv = reg->obj->driver_private; 2257 2258 if (old_obj_priv->pin_count) 2259 continue; 2260 2261 /* i915 uses fences for GPU access to tiled buffers */ 2262 if (IS_I965G(dev) || !old_obj_priv->active) 2263 break; 2264 2265 /* find the seqno of the first available fence */ 2266 this_seqno = old_obj_priv->last_rendering_seqno; 2267 if (this_seqno != 0 && 2268 reg->obj->write_domain == 0 && 2269 i915_seqno_passed(seqno, this_seqno)) 2270 seqno = this_seqno; 2271 } 2272 2273 /* 2274 * Now things get ugly... we have to wait for one of the 2275 * objects to finish before trying again. 2276 */ 2277 if (i == dev_priv->num_fence_regs) { 2278 if (seqno == dev_priv->mm.next_gem_seqno) { 2279 i915_gem_flush(dev, 2280 I915_GEM_GPU_DOMAINS, 2281 I915_GEM_GPU_DOMAINS); 2282 seqno = i915_add_request(dev, NULL, 2283 I915_GEM_GPU_DOMAINS); 2284 if (seqno == 0) 2285 return -ENOMEM; 2286 } 2287 2288 ret = i915_wait_request(dev, seqno); 2289 if (ret) 2290 return ret; 2291 goto try_again; 2292 } 2293 2294 /* 2295 * Zap this virtual mapping so we can set up a fence again 2296 * for this object next time we need it. 2297 */ 2298 i915_gem_release_mmap(reg->obj); 2299 old_obj_priv->fence_reg = I915_FENCE_REG_NONE; 2300 } 2301 2302 obj_priv->fence_reg = i; 2303 reg->obj = obj; 2304 2305 if (IS_I965G(dev)) 2306 i965_write_fence_reg(reg); 2307 else if (IS_I9XX(dev)) 2308 i915_write_fence_reg(reg); 2309 else 2310 i830_write_fence_reg(reg); 2311 2312 return 0; 2313 } 2314 2315 /** 2316 * i915_gem_clear_fence_reg - clear out fence register info 2317 * @obj: object to clear 2318 * 2319 * Zeroes out the fence register itself and clears out the associated 2320 * data structures in dev_priv and obj_priv. 2321 */ 2322 static void 2323 i915_gem_clear_fence_reg(struct drm_gem_object *obj) 2324 { 2325 struct drm_device *dev = obj->dev; 2326 drm_i915_private_t *dev_priv = dev->dev_private; 2327 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2328 2329 if (IS_I965G(dev)) 2330 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); 2331 else { 2332 uint32_t fence_reg; 2333 2334 if (obj_priv->fence_reg < 8) 2335 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; 2336 else 2337 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 2338 8) * 4; 2339 2340 I915_WRITE(fence_reg, 0); 2341 } 2342 2343 dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL; 2344 obj_priv->fence_reg = I915_FENCE_REG_NONE; 2345 } 2346 2347 /** 2348 * i915_gem_object_put_fence_reg - waits on outstanding fenced access 2349 * to the buffer to finish, and then resets the fence register. 2350 * @obj: tiled object holding a fence register. 2351 * 2352 * Zeroes out the fence register itself and clears out the associated 2353 * data structures in dev_priv and obj_priv. 2354 */ 2355 int 2356 i915_gem_object_put_fence_reg(struct drm_gem_object *obj) 2357 { 2358 struct drm_device *dev = obj->dev; 2359 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2360 2361 if (obj_priv->fence_reg == I915_FENCE_REG_NONE) 2362 return 0; 2363 2364 /* On the i915, GPU access to tiled buffers is via a fence, 2365 * therefore we must wait for any outstanding access to complete 2366 * before clearing the fence. 2367 */ 2368 if (!IS_I965G(dev)) { 2369 int ret; 2370 2371 i915_gem_object_flush_gpu_write_domain(obj); 2372 i915_gem_object_flush_gtt_write_domain(obj); 2373 ret = i915_gem_object_wait_rendering(obj); 2374 if (ret != 0) 2375 return ret; 2376 } 2377 2378 i915_gem_clear_fence_reg (obj); 2379 2380 return 0; 2381 } 2382 2383 /** 2384 * Finds free space in the GTT aperture and binds the object there. 2385 */ 2386 static int 2387 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) 2388 { 2389 struct drm_device *dev = obj->dev; 2390 drm_i915_private_t *dev_priv = dev->dev_private; 2391 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2392 struct drm_mm_node *free_space; 2393 int page_count, ret; 2394 2395 if (dev_priv->mm.suspended) 2396 return -EBUSY; 2397 if (alignment == 0) 2398 alignment = i915_gem_get_gtt_alignment(obj); 2399 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { 2400 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2401 return -EINVAL; 2402 } 2403 2404 search_free: 2405 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2406 obj->size, alignment, 0); 2407 if (free_space != NULL) { 2408 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size, 2409 alignment); 2410 if (obj_priv->gtt_space != NULL) { 2411 obj_priv->gtt_space->private = obj; 2412 obj_priv->gtt_offset = obj_priv->gtt_space->start; 2413 } 2414 } 2415 if (obj_priv->gtt_space == NULL) { 2416 bool lists_empty; 2417 2418 /* If the gtt is empty and we're still having trouble 2419 * fitting our object in, we're out of memory. 2420 */ 2421 #if WATCH_LRU 2422 DRM_INFO("%s: GTT full, evicting something\n", __func__); 2423 #endif 2424 spin_lock(&dev_priv->mm.active_list_lock); 2425 lists_empty = (list_empty(&dev_priv->mm.inactive_list) && 2426 list_empty(&dev_priv->mm.flushing_list) && 2427 list_empty(&dev_priv->mm.active_list)); 2428 spin_unlock(&dev_priv->mm.active_list_lock); 2429 if (lists_empty) { 2430 DRM_ERROR("GTT full, but LRU list empty\n"); 2431 return -ENOSPC; 2432 } 2433 2434 ret = i915_gem_evict_something(dev); 2435 if (ret != 0) { 2436 if (ret != -ERESTARTSYS) 2437 DRM_ERROR("Failed to evict a buffer %d\n", ret); 2438 return ret; 2439 } 2440 goto search_free; 2441 } 2442 2443 #if WATCH_BUF 2444 DRM_INFO("Binding object of size %zd at 0x%08x\n", 2445 obj->size, obj_priv->gtt_offset); 2446 #endif 2447 ret = i915_gem_object_get_pages(obj); 2448 if (ret) { 2449 drm_mm_put_block(obj_priv->gtt_space); 2450 obj_priv->gtt_space = NULL; 2451 return ret; 2452 } 2453 2454 page_count = obj->size / PAGE_SIZE; 2455 /* Create an AGP memory structure pointing at our pages, and bind it 2456 * into the GTT. 2457 */ 2458 obj_priv->agp_mem = drm_agp_bind_pages(dev, 2459 obj_priv->pages, 2460 page_count, 2461 obj_priv->gtt_offset, 2462 obj_priv->agp_type); 2463 if (obj_priv->agp_mem == NULL) { 2464 i915_gem_object_put_pages(obj); 2465 drm_mm_put_block(obj_priv->gtt_space); 2466 obj_priv->gtt_space = NULL; 2467 return -ENOMEM; 2468 } 2469 atomic_inc(&dev->gtt_count); 2470 atomic_add(obj->size, &dev->gtt_memory); 2471 2472 /* Assert that the object is not currently in any GPU domain. As it 2473 * wasn't in the GTT, there shouldn't be any way it could have been in 2474 * a GPU cache 2475 */ 2476 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2477 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2478 2479 return 0; 2480 } 2481 2482 void 2483 i915_gem_clflush_object(struct drm_gem_object *obj) 2484 { 2485 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2486 2487 /* If we don't have a page list set up, then we're not pinned 2488 * to GPU, and we can ignore the cache flush because it'll happen 2489 * again at bind time. 2490 */ 2491 if (obj_priv->pages == NULL) 2492 return; 2493 2494 /* XXX: The 865 in particular appears to be weird in how it handles 2495 * cache flushing. We haven't figured it out, but the 2496 * clflush+agp_chipset_flush doesn't appear to successfully get the 2497 * data visible to the PGU, while wbinvd + agp_chipset_flush does. 2498 */ 2499 if (IS_I865G(obj->dev)) { 2500 wbinvd(); 2501 return; 2502 } 2503 2504 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); 2505 } 2506 2507 /** Flushes any GPU write domain for the object if it's dirty. */ 2508 static void 2509 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 2510 { 2511 struct drm_device *dev = obj->dev; 2512 uint32_t seqno; 2513 2514 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 2515 return; 2516 2517 /* Queue the GPU write cache flushing we need. */ 2518 i915_gem_flush(dev, 0, obj->write_domain); 2519 seqno = i915_add_request(dev, NULL, obj->write_domain); 2520 obj->write_domain = 0; 2521 i915_gem_object_move_to_active(obj, seqno); 2522 } 2523 2524 /** Flushes the GTT write domain for the object if it's dirty. */ 2525 static void 2526 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 2527 { 2528 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 2529 return; 2530 2531 /* No actual flushing is required for the GTT write domain. Writes 2532 * to it immediately go to main memory as far as we know, so there's 2533 * no chipset flush. It also doesn't land in render cache. 2534 */ 2535 obj->write_domain = 0; 2536 } 2537 2538 /** Flushes the CPU write domain for the object if it's dirty. */ 2539 static void 2540 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 2541 { 2542 struct drm_device *dev = obj->dev; 2543 2544 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 2545 return; 2546 2547 i915_gem_clflush_object(obj); 2548 drm_agp_chipset_flush(dev); 2549 obj->write_domain = 0; 2550 } 2551 2552 /** 2553 * Moves a single object to the GTT read, and possibly write domain. 2554 * 2555 * This function returns when the move is complete, including waiting on 2556 * flushes to occur. 2557 */ 2558 int 2559 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 2560 { 2561 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2562 int ret; 2563 2564 /* Not valid to be called on unbound objects. */ 2565 if (obj_priv->gtt_space == NULL) 2566 return -EINVAL; 2567 2568 i915_gem_object_flush_gpu_write_domain(obj); 2569 /* Wait on any GPU rendering and flushing to occur. */ 2570 ret = i915_gem_object_wait_rendering(obj); 2571 if (ret != 0) 2572 return ret; 2573 2574 /* If we're writing through the GTT domain, then CPU and GPU caches 2575 * will need to be invalidated at next use. 2576 */ 2577 if (write) 2578 obj->read_domains &= I915_GEM_DOMAIN_GTT; 2579 2580 i915_gem_object_flush_cpu_write_domain(obj); 2581 2582 /* It should now be out of any other write domains, and we can update 2583 * the domain values for our changes. 2584 */ 2585 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2586 obj->read_domains |= I915_GEM_DOMAIN_GTT; 2587 if (write) { 2588 obj->write_domain = I915_GEM_DOMAIN_GTT; 2589 obj_priv->dirty = 1; 2590 } 2591 2592 return 0; 2593 } 2594 2595 /** 2596 * Moves a single object to the CPU read, and possibly write domain. 2597 * 2598 * This function returns when the move is complete, including waiting on 2599 * flushes to occur. 2600 */ 2601 static int 2602 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 2603 { 2604 int ret; 2605 2606 i915_gem_object_flush_gpu_write_domain(obj); 2607 /* Wait on any GPU rendering and flushing to occur. */ 2608 ret = i915_gem_object_wait_rendering(obj); 2609 if (ret != 0) 2610 return ret; 2611 2612 i915_gem_object_flush_gtt_write_domain(obj); 2613 2614 /* If we have a partially-valid cache of the object in the CPU, 2615 * finish invalidating it and free the per-page flags. 2616 */ 2617 i915_gem_object_set_to_full_cpu_read_domain(obj); 2618 2619 /* Flush the CPU cache if it's still invalid. */ 2620 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2621 i915_gem_clflush_object(obj); 2622 2623 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2624 } 2625 2626 /* It should now be out of any other write domains, and we can update 2627 * the domain values for our changes. 2628 */ 2629 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2630 2631 /* If we're writing through the CPU, then the GPU read domains will 2632 * need to be invalidated at next use. 2633 */ 2634 if (write) { 2635 obj->read_domains &= I915_GEM_DOMAIN_CPU; 2636 obj->write_domain = I915_GEM_DOMAIN_CPU; 2637 } 2638 2639 return 0; 2640 } 2641 2642 /* 2643 * Set the next domain for the specified object. This 2644 * may not actually perform the necessary flushing/invaliding though, 2645 * as that may want to be batched with other set_domain operations 2646 * 2647 * This is (we hope) the only really tricky part of gem. The goal 2648 * is fairly simple -- track which caches hold bits of the object 2649 * and make sure they remain coherent. A few concrete examples may 2650 * help to explain how it works. For shorthand, we use the notation 2651 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 2652 * a pair of read and write domain masks. 2653 * 2654 * Case 1: the batch buffer 2655 * 2656 * 1. Allocated 2657 * 2. Written by CPU 2658 * 3. Mapped to GTT 2659 * 4. Read by GPU 2660 * 5. Unmapped from GTT 2661 * 6. Freed 2662 * 2663 * Let's take these a step at a time 2664 * 2665 * 1. Allocated 2666 * Pages allocated from the kernel may still have 2667 * cache contents, so we set them to (CPU, CPU) always. 2668 * 2. Written by CPU (using pwrite) 2669 * The pwrite function calls set_domain (CPU, CPU) and 2670 * this function does nothing (as nothing changes) 2671 * 3. Mapped by GTT 2672 * This function asserts that the object is not 2673 * currently in any GPU-based read or write domains 2674 * 4. Read by GPU 2675 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 2676 * As write_domain is zero, this function adds in the 2677 * current read domains (CPU+COMMAND, 0). 2678 * flush_domains is set to CPU. 2679 * invalidate_domains is set to COMMAND 2680 * clflush is run to get data out of the CPU caches 2681 * then i915_dev_set_domain calls i915_gem_flush to 2682 * emit an MI_FLUSH and drm_agp_chipset_flush 2683 * 5. Unmapped from GTT 2684 * i915_gem_object_unbind calls set_domain (CPU, CPU) 2685 * flush_domains and invalidate_domains end up both zero 2686 * so no flushing/invalidating happens 2687 * 6. Freed 2688 * yay, done 2689 * 2690 * Case 2: The shared render buffer 2691 * 2692 * 1. Allocated 2693 * 2. Mapped to GTT 2694 * 3. Read/written by GPU 2695 * 4. set_domain to (CPU,CPU) 2696 * 5. Read/written by CPU 2697 * 6. Read/written by GPU 2698 * 2699 * 1. Allocated 2700 * Same as last example, (CPU, CPU) 2701 * 2. Mapped to GTT 2702 * Nothing changes (assertions find that it is not in the GPU) 2703 * 3. Read/written by GPU 2704 * execbuffer calls set_domain (RENDER, RENDER) 2705 * flush_domains gets CPU 2706 * invalidate_domains gets GPU 2707 * clflush (obj) 2708 * MI_FLUSH and drm_agp_chipset_flush 2709 * 4. set_domain (CPU, CPU) 2710 * flush_domains gets GPU 2711 * invalidate_domains gets CPU 2712 * wait_rendering (obj) to make sure all drawing is complete. 2713 * This will include an MI_FLUSH to get the data from GPU 2714 * to memory 2715 * clflush (obj) to invalidate the CPU cache 2716 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 2717 * 5. Read/written by CPU 2718 * cache lines are loaded and dirtied 2719 * 6. Read written by GPU 2720 * Same as last GPU access 2721 * 2722 * Case 3: The constant buffer 2723 * 2724 * 1. Allocated 2725 * 2. Written by CPU 2726 * 3. Read by GPU 2727 * 4. Updated (written) by CPU again 2728 * 5. Read by GPU 2729 * 2730 * 1. Allocated 2731 * (CPU, CPU) 2732 * 2. Written by CPU 2733 * (CPU, CPU) 2734 * 3. Read by GPU 2735 * (CPU+RENDER, 0) 2736 * flush_domains = CPU 2737 * invalidate_domains = RENDER 2738 * clflush (obj) 2739 * MI_FLUSH 2740 * drm_agp_chipset_flush 2741 * 4. Updated (written) by CPU again 2742 * (CPU, CPU) 2743 * flush_domains = 0 (no previous write domain) 2744 * invalidate_domains = 0 (no new read domains) 2745 * 5. Read by GPU 2746 * (CPU+RENDER, 0) 2747 * flush_domains = CPU 2748 * invalidate_domains = RENDER 2749 * clflush (obj) 2750 * MI_FLUSH 2751 * drm_agp_chipset_flush 2752 */ 2753 static void 2754 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) 2755 { 2756 struct drm_device *dev = obj->dev; 2757 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2758 uint32_t invalidate_domains = 0; 2759 uint32_t flush_domains = 0; 2760 2761 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); 2762 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); 2763 2764 #if WATCH_BUF 2765 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", 2766 __func__, obj, 2767 obj->read_domains, obj->pending_read_domains, 2768 obj->write_domain, obj->pending_write_domain); 2769 #endif 2770 /* 2771 * If the object isn't moving to a new write domain, 2772 * let the object stay in multiple read domains 2773 */ 2774 if (obj->pending_write_domain == 0) 2775 obj->pending_read_domains |= obj->read_domains; 2776 else 2777 obj_priv->dirty = 1; 2778 2779 /* 2780 * Flush the current write domain if 2781 * the new read domains don't match. Invalidate 2782 * any read domains which differ from the old 2783 * write domain 2784 */ 2785 if (obj->write_domain && 2786 obj->write_domain != obj->pending_read_domains) { 2787 flush_domains |= obj->write_domain; 2788 invalidate_domains |= 2789 obj->pending_read_domains & ~obj->write_domain; 2790 } 2791 /* 2792 * Invalidate any read caches which may have 2793 * stale data. That is, any new read domains. 2794 */ 2795 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains; 2796 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 2797 #if WATCH_BUF 2798 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", 2799 __func__, flush_domains, invalidate_domains); 2800 #endif 2801 i915_gem_clflush_object(obj); 2802 } 2803 2804 /* The actual obj->write_domain will be updated with 2805 * pending_write_domain after we emit the accumulated flush for all 2806 * of our domain changes in execbuffers (which clears objects' 2807 * write_domains). So if we have a current write domain that we 2808 * aren't changing, set pending_write_domain to that. 2809 */ 2810 if (flush_domains == 0 && obj->pending_write_domain == 0) 2811 obj->pending_write_domain = obj->write_domain; 2812 obj->read_domains = obj->pending_read_domains; 2813 2814 dev->invalidate_domains |= invalidate_domains; 2815 dev->flush_domains |= flush_domains; 2816 #if WATCH_BUF 2817 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n", 2818 __func__, 2819 obj->read_domains, obj->write_domain, 2820 dev->invalidate_domains, dev->flush_domains); 2821 #endif 2822 } 2823 2824 /** 2825 * Moves the object from a partially CPU read to a full one. 2826 * 2827 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 2828 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 2829 */ 2830 static void 2831 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 2832 { 2833 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2834 2835 if (!obj_priv->page_cpu_valid) 2836 return; 2837 2838 /* If we're partially in the CPU read domain, finish moving it in. 2839 */ 2840 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 2841 int i; 2842 2843 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 2844 if (obj_priv->page_cpu_valid[i]) 2845 continue; 2846 drm_clflush_pages(obj_priv->pages + i, 1); 2847 } 2848 } 2849 2850 /* Free the page_cpu_valid mappings which are now stale, whether 2851 * or not we've got I915_GEM_DOMAIN_CPU. 2852 */ 2853 kfree(obj_priv->page_cpu_valid); 2854 obj_priv->page_cpu_valid = NULL; 2855 } 2856 2857 /** 2858 * Set the CPU read domain on a range of the object. 2859 * 2860 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 2861 * not entirely valid. The page_cpu_valid member of the object flags which 2862 * pages have been flushed, and will be respected by 2863 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 2864 * of the whole object. 2865 * 2866 * This function returns when the move is complete, including waiting on 2867 * flushes to occur. 2868 */ 2869 static int 2870 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 2871 uint64_t offset, uint64_t size) 2872 { 2873 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2874 int i, ret; 2875 2876 if (offset == 0 && size == obj->size) 2877 return i915_gem_object_set_to_cpu_domain(obj, 0); 2878 2879 i915_gem_object_flush_gpu_write_domain(obj); 2880 /* Wait on any GPU rendering and flushing to occur. */ 2881 ret = i915_gem_object_wait_rendering(obj); 2882 if (ret != 0) 2883 return ret; 2884 i915_gem_object_flush_gtt_write_domain(obj); 2885 2886 /* If we're already fully in the CPU read domain, we're done. */ 2887 if (obj_priv->page_cpu_valid == NULL && 2888 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 2889 return 0; 2890 2891 /* Otherwise, create/clear the per-page CPU read domain flag if we're 2892 * newly adding I915_GEM_DOMAIN_CPU 2893 */ 2894 if (obj_priv->page_cpu_valid == NULL) { 2895 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE, 2896 GFP_KERNEL); 2897 if (obj_priv->page_cpu_valid == NULL) 2898 return -ENOMEM; 2899 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 2900 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 2901 2902 /* Flush the cache on any pages that are still invalid from the CPU's 2903 * perspective. 2904 */ 2905 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 2906 i++) { 2907 if (obj_priv->page_cpu_valid[i]) 2908 continue; 2909 2910 drm_clflush_pages(obj_priv->pages + i, 1); 2911 2912 obj_priv->page_cpu_valid[i] = 1; 2913 } 2914 2915 /* It should now be out of any other write domains, and we can update 2916 * the domain values for our changes. 2917 */ 2918 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2919 2920 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2921 2922 return 0; 2923 } 2924 2925 /** 2926 * Pin an object to the GTT and evaluate the relocations landing in it. 2927 */ 2928 static int 2929 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 2930 struct drm_file *file_priv, 2931 struct drm_i915_gem_exec_object *entry, 2932 struct drm_i915_gem_relocation_entry *relocs) 2933 { 2934 struct drm_device *dev = obj->dev; 2935 drm_i915_private_t *dev_priv = dev->dev_private; 2936 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2937 int i, ret; 2938 void __iomem *reloc_page; 2939 2940 /* Choose the GTT offset for our buffer and put it there. */ 2941 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 2942 if (ret) 2943 return ret; 2944 2945 entry->offset = obj_priv->gtt_offset; 2946 2947 /* Apply the relocations, using the GTT aperture to avoid cache 2948 * flushing requirements. 2949 */ 2950 for (i = 0; i < entry->relocation_count; i++) { 2951 struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; 2952 struct drm_gem_object *target_obj; 2953 struct drm_i915_gem_object *target_obj_priv; 2954 uint32_t reloc_val, reloc_offset; 2955 uint32_t __iomem *reloc_entry; 2956 2957 target_obj = drm_gem_object_lookup(obj->dev, file_priv, 2958 reloc->target_handle); 2959 if (target_obj == NULL) { 2960 i915_gem_object_unpin(obj); 2961 return -EBADF; 2962 } 2963 target_obj_priv = target_obj->driver_private; 2964 2965 /* The target buffer should have appeared before us in the 2966 * exec_object list, so it should have a GTT space bound by now. 2967 */ 2968 if (target_obj_priv->gtt_space == NULL) { 2969 DRM_ERROR("No GTT space found for object %d\n", 2970 reloc->target_handle); 2971 drm_gem_object_unreference(target_obj); 2972 i915_gem_object_unpin(obj); 2973 return -EINVAL; 2974 } 2975 2976 if (reloc->offset > obj->size - 4) { 2977 DRM_ERROR("Relocation beyond object bounds: " 2978 "obj %p target %d offset %d size %d.\n", 2979 obj, reloc->target_handle, 2980 (int) reloc->offset, (int) obj->size); 2981 drm_gem_object_unreference(target_obj); 2982 i915_gem_object_unpin(obj); 2983 return -EINVAL; 2984 } 2985 if (reloc->offset & 3) { 2986 DRM_ERROR("Relocation not 4-byte aligned: " 2987 "obj %p target %d offset %d.\n", 2988 obj, reloc->target_handle, 2989 (int) reloc->offset); 2990 drm_gem_object_unreference(target_obj); 2991 i915_gem_object_unpin(obj); 2992 return -EINVAL; 2993 } 2994 2995 if (reloc->write_domain & I915_GEM_DOMAIN_CPU || 2996 reloc->read_domains & I915_GEM_DOMAIN_CPU) { 2997 DRM_ERROR("reloc with read/write CPU domains: " 2998 "obj %p target %d offset %d " 2999 "read %08x write %08x", 3000 obj, reloc->target_handle, 3001 (int) reloc->offset, 3002 reloc->read_domains, 3003 reloc->write_domain); 3004 drm_gem_object_unreference(target_obj); 3005 i915_gem_object_unpin(obj); 3006 return -EINVAL; 3007 } 3008 3009 if (reloc->write_domain && target_obj->pending_write_domain && 3010 reloc->write_domain != target_obj->pending_write_domain) { 3011 DRM_ERROR("Write domain conflict: " 3012 "obj %p target %d offset %d " 3013 "new %08x old %08x\n", 3014 obj, reloc->target_handle, 3015 (int) reloc->offset, 3016 reloc->write_domain, 3017 target_obj->pending_write_domain); 3018 drm_gem_object_unreference(target_obj); 3019 i915_gem_object_unpin(obj); 3020 return -EINVAL; 3021 } 3022 3023 #if WATCH_RELOC 3024 DRM_INFO("%s: obj %p offset %08x target %d " 3025 "read %08x write %08x gtt %08x " 3026 "presumed %08x delta %08x\n", 3027 __func__, 3028 obj, 3029 (int) reloc->offset, 3030 (int) reloc->target_handle, 3031 (int) reloc->read_domains, 3032 (int) reloc->write_domain, 3033 (int) target_obj_priv->gtt_offset, 3034 (int) reloc->presumed_offset, 3035 reloc->delta); 3036 #endif 3037 3038 target_obj->pending_read_domains |= reloc->read_domains; 3039 target_obj->pending_write_domain |= reloc->write_domain; 3040 3041 /* If the relocation already has the right value in it, no 3042 * more work needs to be done. 3043 */ 3044 if (target_obj_priv->gtt_offset == reloc->presumed_offset) { 3045 drm_gem_object_unreference(target_obj); 3046 continue; 3047 } 3048 3049 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 3050 if (ret != 0) { 3051 drm_gem_object_unreference(target_obj); 3052 i915_gem_object_unpin(obj); 3053 return -EINVAL; 3054 } 3055 3056 /* Map the page containing the relocation we're going to 3057 * perform. 3058 */ 3059 reloc_offset = obj_priv->gtt_offset + reloc->offset; 3060 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, 3061 (reloc_offset & 3062 ~(PAGE_SIZE - 1))); 3063 reloc_entry = (uint32_t __iomem *)(reloc_page + 3064 (reloc_offset & (PAGE_SIZE - 1))); 3065 reloc_val = target_obj_priv->gtt_offset + reloc->delta; 3066 3067 #if WATCH_BUF 3068 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", 3069 obj, (unsigned int) reloc->offset, 3070 readl(reloc_entry), reloc_val); 3071 #endif 3072 writel(reloc_val, reloc_entry); 3073 io_mapping_unmap_atomic(reloc_page); 3074 3075 /* The updated presumed offset for this entry will be 3076 * copied back out to the user. 3077 */ 3078 reloc->presumed_offset = target_obj_priv->gtt_offset; 3079 3080 drm_gem_object_unreference(target_obj); 3081 } 3082 3083 #if WATCH_BUF 3084 if (0) 3085 i915_gem_dump_object(obj, 128, __func__, ~0); 3086 #endif 3087 return 0; 3088 } 3089 3090 /** Dispatch a batchbuffer to the ring 3091 */ 3092 static int 3093 i915_dispatch_gem_execbuffer(struct drm_device *dev, 3094 struct drm_i915_gem_execbuffer *exec, 3095 struct drm_clip_rect *cliprects, 3096 uint64_t exec_offset) 3097 { 3098 drm_i915_private_t *dev_priv = dev->dev_private; 3099 int nbox = exec->num_cliprects; 3100 int i = 0, count; 3101 uint32_t exec_start, exec_len; 3102 RING_LOCALS; 3103 3104 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3105 exec_len = (uint32_t) exec->batch_len; 3106 3107 count = nbox ? nbox : 1; 3108 3109 for (i = 0; i < count; i++) { 3110 if (i < nbox) { 3111 int ret = i915_emit_box(dev, cliprects, i, 3112 exec->DR1, exec->DR4); 3113 if (ret) 3114 return ret; 3115 } 3116 3117 if (IS_I830(dev) || IS_845G(dev)) { 3118 BEGIN_LP_RING(4); 3119 OUT_RING(MI_BATCH_BUFFER); 3120 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 3121 OUT_RING(exec_start + exec_len - 4); 3122 OUT_RING(0); 3123 ADVANCE_LP_RING(); 3124 } else { 3125 BEGIN_LP_RING(2); 3126 if (IS_I965G(dev)) { 3127 OUT_RING(MI_BATCH_BUFFER_START | 3128 (2 << 6) | 3129 MI_BATCH_NON_SECURE_I965); 3130 OUT_RING(exec_start); 3131 } else { 3132 OUT_RING(MI_BATCH_BUFFER_START | 3133 (2 << 6)); 3134 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 3135 } 3136 ADVANCE_LP_RING(); 3137 } 3138 } 3139 3140 /* XXX breadcrumb */ 3141 return 0; 3142 } 3143 3144 /* Throttle our rendering by waiting until the ring has completed our requests 3145 * emitted over 20 msec ago. 3146 * 3147 * Note that if we were to use the current jiffies each time around the loop, 3148 * we wouldn't escape the function with any frames outstanding if the time to 3149 * render a frame was over 20ms. 3150 * 3151 * This should get us reasonable parallelism between CPU and GPU but also 3152 * relatively low latency when blocking on a particular request to finish. 3153 */ 3154 static int 3155 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 3156 { 3157 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 3158 int ret = 0; 3159 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3160 3161 mutex_lock(&dev->struct_mutex); 3162 while (!list_empty(&i915_file_priv->mm.request_list)) { 3163 struct drm_i915_gem_request *request; 3164 3165 request = list_first_entry(&i915_file_priv->mm.request_list, 3166 struct drm_i915_gem_request, 3167 client_list); 3168 3169 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3170 break; 3171 3172 ret = i915_wait_request(dev, request->seqno); 3173 if (ret != 0) 3174 break; 3175 } 3176 mutex_unlock(&dev->struct_mutex); 3177 3178 return ret; 3179 } 3180 3181 static int 3182 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, 3183 uint32_t buffer_count, 3184 struct drm_i915_gem_relocation_entry **relocs) 3185 { 3186 uint32_t reloc_count = 0, reloc_index = 0, i; 3187 int ret; 3188 3189 *relocs = NULL; 3190 for (i = 0; i < buffer_count; i++) { 3191 if (reloc_count + exec_list[i].relocation_count < reloc_count) 3192 return -EINVAL; 3193 reloc_count += exec_list[i].relocation_count; 3194 } 3195 3196 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); 3197 if (*relocs == NULL) 3198 return -ENOMEM; 3199 3200 for (i = 0; i < buffer_count; i++) { 3201 struct drm_i915_gem_relocation_entry __user *user_relocs; 3202 3203 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3204 3205 ret = copy_from_user(&(*relocs)[reloc_index], 3206 user_relocs, 3207 exec_list[i].relocation_count * 3208 sizeof(**relocs)); 3209 if (ret != 0) { 3210 drm_free_large(*relocs); 3211 *relocs = NULL; 3212 return -EFAULT; 3213 } 3214 3215 reloc_index += exec_list[i].relocation_count; 3216 } 3217 3218 return 0; 3219 } 3220 3221 static int 3222 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, 3223 uint32_t buffer_count, 3224 struct drm_i915_gem_relocation_entry *relocs) 3225 { 3226 uint32_t reloc_count = 0, i; 3227 int ret = 0; 3228 3229 for (i = 0; i < buffer_count; i++) { 3230 struct drm_i915_gem_relocation_entry __user *user_relocs; 3231 int unwritten; 3232 3233 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3234 3235 unwritten = copy_to_user(user_relocs, 3236 &relocs[reloc_count], 3237 exec_list[i].relocation_count * 3238 sizeof(*relocs)); 3239 3240 if (unwritten) { 3241 ret = -EFAULT; 3242 goto err; 3243 } 3244 3245 reloc_count += exec_list[i].relocation_count; 3246 } 3247 3248 err: 3249 drm_free_large(relocs); 3250 3251 return ret; 3252 } 3253 3254 static int 3255 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec, 3256 uint64_t exec_offset) 3257 { 3258 uint32_t exec_start, exec_len; 3259 3260 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3261 exec_len = (uint32_t) exec->batch_len; 3262 3263 if ((exec_start | exec_len) & 0x7) 3264 return -EINVAL; 3265 3266 if (!exec_start) 3267 return -EINVAL; 3268 3269 return 0; 3270 } 3271 3272 int 3273 i915_gem_execbuffer(struct drm_device *dev, void *data, 3274 struct drm_file *file_priv) 3275 { 3276 drm_i915_private_t *dev_priv = dev->dev_private; 3277 struct drm_i915_gem_execbuffer *args = data; 3278 struct drm_i915_gem_exec_object *exec_list = NULL; 3279 struct drm_gem_object **object_list = NULL; 3280 struct drm_gem_object *batch_obj; 3281 struct drm_i915_gem_object *obj_priv; 3282 struct drm_clip_rect *cliprects = NULL; 3283 struct drm_i915_gem_relocation_entry *relocs; 3284 int ret, ret2, i, pinned = 0; 3285 uint64_t exec_offset; 3286 uint32_t seqno, flush_domains, reloc_index; 3287 int pin_tries; 3288 3289 #if WATCH_EXEC 3290 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3291 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3292 #endif 3293 3294 if (args->buffer_count < 1) { 3295 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 3296 return -EINVAL; 3297 } 3298 /* Copy in the exec list from userland */ 3299 exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count); 3300 object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count); 3301 if (exec_list == NULL || object_list == NULL) { 3302 DRM_ERROR("Failed to allocate exec or object list " 3303 "for %d buffers\n", 3304 args->buffer_count); 3305 ret = -ENOMEM; 3306 goto pre_mutex_err; 3307 } 3308 ret = copy_from_user(exec_list, 3309 (struct drm_i915_relocation_entry __user *) 3310 (uintptr_t) args->buffers_ptr, 3311 sizeof(*exec_list) * args->buffer_count); 3312 if (ret != 0) { 3313 DRM_ERROR("copy %d exec entries failed %d\n", 3314 args->buffer_count, ret); 3315 goto pre_mutex_err; 3316 } 3317 3318 if (args->num_cliprects != 0) { 3319 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects), 3320 GFP_KERNEL); 3321 if (cliprects == NULL) 3322 goto pre_mutex_err; 3323 3324 ret = copy_from_user(cliprects, 3325 (struct drm_clip_rect __user *) 3326 (uintptr_t) args->cliprects_ptr, 3327 sizeof(*cliprects) * args->num_cliprects); 3328 if (ret != 0) { 3329 DRM_ERROR("copy %d cliprects failed: %d\n", 3330 args->num_cliprects, ret); 3331 goto pre_mutex_err; 3332 } 3333 } 3334 3335 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, 3336 &relocs); 3337 if (ret != 0) 3338 goto pre_mutex_err; 3339 3340 mutex_lock(&dev->struct_mutex); 3341 3342 i915_verify_inactive(dev, __FILE__, __LINE__); 3343 3344 if (dev_priv->mm.wedged) { 3345 DRM_ERROR("Execbuf while wedged\n"); 3346 mutex_unlock(&dev->struct_mutex); 3347 ret = -EIO; 3348 goto pre_mutex_err; 3349 } 3350 3351 if (dev_priv->mm.suspended) { 3352 DRM_ERROR("Execbuf while VT-switched.\n"); 3353 mutex_unlock(&dev->struct_mutex); 3354 ret = -EBUSY; 3355 goto pre_mutex_err; 3356 } 3357 3358 /* Look up object handles */ 3359 for (i = 0; i < args->buffer_count; i++) { 3360 object_list[i] = drm_gem_object_lookup(dev, file_priv, 3361 exec_list[i].handle); 3362 if (object_list[i] == NULL) { 3363 DRM_ERROR("Invalid object handle %d at index %d\n", 3364 exec_list[i].handle, i); 3365 ret = -EBADF; 3366 goto err; 3367 } 3368 3369 obj_priv = object_list[i]->driver_private; 3370 if (obj_priv->in_execbuffer) { 3371 DRM_ERROR("Object %p appears more than once in object list\n", 3372 object_list[i]); 3373 ret = -EBADF; 3374 goto err; 3375 } 3376 obj_priv->in_execbuffer = true; 3377 } 3378 3379 /* Pin and relocate */ 3380 for (pin_tries = 0; ; pin_tries++) { 3381 ret = 0; 3382 reloc_index = 0; 3383 3384 for (i = 0; i < args->buffer_count; i++) { 3385 object_list[i]->pending_read_domains = 0; 3386 object_list[i]->pending_write_domain = 0; 3387 ret = i915_gem_object_pin_and_relocate(object_list[i], 3388 file_priv, 3389 &exec_list[i], 3390 &relocs[reloc_index]); 3391 if (ret) 3392 break; 3393 pinned = i + 1; 3394 reloc_index += exec_list[i].relocation_count; 3395 } 3396 /* success */ 3397 if (ret == 0) 3398 break; 3399 3400 /* error other than GTT full, or we've already tried again */ 3401 if (ret != -ENOSPC || pin_tries >= 1) { 3402 if (ret != -ERESTARTSYS) 3403 DRM_ERROR("Failed to pin buffers %d\n", ret); 3404 goto err; 3405 } 3406 3407 /* unpin all of our buffers */ 3408 for (i = 0; i < pinned; i++) 3409 i915_gem_object_unpin(object_list[i]); 3410 pinned = 0; 3411 3412 /* evict everyone we can from the aperture */ 3413 ret = i915_gem_evict_everything(dev); 3414 if (ret) 3415 goto err; 3416 } 3417 3418 /* Set the pending read domains for the batch buffer to COMMAND */ 3419 batch_obj = object_list[args->buffer_count-1]; 3420 if (batch_obj->pending_write_domain) { 3421 DRM_ERROR("Attempting to use self-modifying batch buffer\n"); 3422 ret = -EINVAL; 3423 goto err; 3424 } 3425 batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 3426 3427 /* Sanity check the batch buffer, prior to moving objects */ 3428 exec_offset = exec_list[args->buffer_count - 1].offset; 3429 ret = i915_gem_check_execbuffer (args, exec_offset); 3430 if (ret != 0) { 3431 DRM_ERROR("execbuf with invalid offset/length\n"); 3432 goto err; 3433 } 3434 3435 i915_verify_inactive(dev, __FILE__, __LINE__); 3436 3437 /* Zero the global flush/invalidate flags. These 3438 * will be modified as new domains are computed 3439 * for each object 3440 */ 3441 dev->invalidate_domains = 0; 3442 dev->flush_domains = 0; 3443 3444 for (i = 0; i < args->buffer_count; i++) { 3445 struct drm_gem_object *obj = object_list[i]; 3446 3447 /* Compute new gpu domains and update invalidate/flush */ 3448 i915_gem_object_set_to_gpu_domain(obj); 3449 } 3450 3451 i915_verify_inactive(dev, __FILE__, __LINE__); 3452 3453 if (dev->invalidate_domains | dev->flush_domains) { 3454 #if WATCH_EXEC 3455 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 3456 __func__, 3457 dev->invalidate_domains, 3458 dev->flush_domains); 3459 #endif 3460 i915_gem_flush(dev, 3461 dev->invalidate_domains, 3462 dev->flush_domains); 3463 if (dev->flush_domains) 3464 (void)i915_add_request(dev, file_priv, 3465 dev->flush_domains); 3466 } 3467 3468 for (i = 0; i < args->buffer_count; i++) { 3469 struct drm_gem_object *obj = object_list[i]; 3470 3471 obj->write_domain = obj->pending_write_domain; 3472 } 3473 3474 i915_verify_inactive(dev, __FILE__, __LINE__); 3475 3476 #if WATCH_COHERENCY 3477 for (i = 0; i < args->buffer_count; i++) { 3478 i915_gem_object_check_coherency(object_list[i], 3479 exec_list[i].handle); 3480 } 3481 #endif 3482 3483 #if WATCH_EXEC 3484 i915_gem_dump_object(batch_obj, 3485 args->batch_len, 3486 __func__, 3487 ~0); 3488 #endif 3489 3490 /* Exec the batchbuffer */ 3491 ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset); 3492 if (ret) { 3493 DRM_ERROR("dispatch failed %d\n", ret); 3494 goto err; 3495 } 3496 3497 /* 3498 * Ensure that the commands in the batch buffer are 3499 * finished before the interrupt fires 3500 */ 3501 flush_domains = i915_retire_commands(dev); 3502 3503 i915_verify_inactive(dev, __FILE__, __LINE__); 3504 3505 /* 3506 * Get a seqno representing the execution of the current buffer, 3507 * which we can wait on. We would like to mitigate these interrupts, 3508 * likely by only creating seqnos occasionally (so that we have 3509 * *some* interrupts representing completion of buffers that we can 3510 * wait on when trying to clear up gtt space). 3511 */ 3512 seqno = i915_add_request(dev, file_priv, flush_domains); 3513 BUG_ON(seqno == 0); 3514 for (i = 0; i < args->buffer_count; i++) { 3515 struct drm_gem_object *obj = object_list[i]; 3516 3517 i915_gem_object_move_to_active(obj, seqno); 3518 #if WATCH_LRU 3519 DRM_INFO("%s: move to exec list %p\n", __func__, obj); 3520 #endif 3521 } 3522 #if WATCH_LRU 3523 i915_dump_lru(dev, __func__); 3524 #endif 3525 3526 i915_verify_inactive(dev, __FILE__, __LINE__); 3527 3528 err: 3529 for (i = 0; i < pinned; i++) 3530 i915_gem_object_unpin(object_list[i]); 3531 3532 for (i = 0; i < args->buffer_count; i++) { 3533 if (object_list[i]) { 3534 obj_priv = object_list[i]->driver_private; 3535 obj_priv->in_execbuffer = false; 3536 } 3537 drm_gem_object_unreference(object_list[i]); 3538 } 3539 3540 mutex_unlock(&dev->struct_mutex); 3541 3542 if (!ret) { 3543 /* Copy the new buffer offsets back to the user's exec list. */ 3544 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 3545 (uintptr_t) args->buffers_ptr, 3546 exec_list, 3547 sizeof(*exec_list) * args->buffer_count); 3548 if (ret) { 3549 ret = -EFAULT; 3550 DRM_ERROR("failed to copy %d exec entries " 3551 "back to user (%d)\n", 3552 args->buffer_count, ret); 3553 } 3554 } 3555 3556 /* Copy the updated relocations out regardless of current error 3557 * state. Failure to update the relocs would mean that the next 3558 * time userland calls execbuf, it would do so with presumed offset 3559 * state that didn't match the actual object state. 3560 */ 3561 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count, 3562 relocs); 3563 if (ret2 != 0) { 3564 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2); 3565 3566 if (ret == 0) 3567 ret = ret2; 3568 } 3569 3570 pre_mutex_err: 3571 drm_free_large(object_list); 3572 drm_free_large(exec_list); 3573 kfree(cliprects); 3574 3575 return ret; 3576 } 3577 3578 int 3579 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 3580 { 3581 struct drm_device *dev = obj->dev; 3582 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3583 int ret; 3584 3585 i915_verify_inactive(dev, __FILE__, __LINE__); 3586 if (obj_priv->gtt_space == NULL) { 3587 ret = i915_gem_object_bind_to_gtt(obj, alignment); 3588 if (ret != 0) { 3589 if (ret != -EBUSY && ret != -ERESTARTSYS) 3590 DRM_ERROR("Failure to bind: %d\n", ret); 3591 return ret; 3592 } 3593 } 3594 /* 3595 * Pre-965 chips need a fence register set up in order to 3596 * properly handle tiled surfaces. 3597 */ 3598 if (!IS_I965G(dev) && 3599 obj_priv->fence_reg == I915_FENCE_REG_NONE && 3600 obj_priv->tiling_mode != I915_TILING_NONE) { 3601 ret = i915_gem_object_get_fence_reg(obj); 3602 if (ret != 0) { 3603 if (ret != -EBUSY && ret != -ERESTARTSYS) 3604 DRM_ERROR("Failure to install fence: %d\n", 3605 ret); 3606 return ret; 3607 } 3608 } 3609 obj_priv->pin_count++; 3610 3611 /* If the object is not active and not pending a flush, 3612 * remove it from the inactive list 3613 */ 3614 if (obj_priv->pin_count == 1) { 3615 atomic_inc(&dev->pin_count); 3616 atomic_add(obj->size, &dev->pin_memory); 3617 if (!obj_priv->active && 3618 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 && 3619 !list_empty(&obj_priv->list)) 3620 list_del_init(&obj_priv->list); 3621 } 3622 i915_verify_inactive(dev, __FILE__, __LINE__); 3623 3624 return 0; 3625 } 3626 3627 void 3628 i915_gem_object_unpin(struct drm_gem_object *obj) 3629 { 3630 struct drm_device *dev = obj->dev; 3631 drm_i915_private_t *dev_priv = dev->dev_private; 3632 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3633 3634 i915_verify_inactive(dev, __FILE__, __LINE__); 3635 obj_priv->pin_count--; 3636 BUG_ON(obj_priv->pin_count < 0); 3637 BUG_ON(obj_priv->gtt_space == NULL); 3638 3639 /* If the object is no longer pinned, and is 3640 * neither active nor being flushed, then stick it on 3641 * the inactive list 3642 */ 3643 if (obj_priv->pin_count == 0) { 3644 if (!obj_priv->active && 3645 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 3646 list_move_tail(&obj_priv->list, 3647 &dev_priv->mm.inactive_list); 3648 atomic_dec(&dev->pin_count); 3649 atomic_sub(obj->size, &dev->pin_memory); 3650 } 3651 i915_verify_inactive(dev, __FILE__, __LINE__); 3652 } 3653 3654 int 3655 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3656 struct drm_file *file_priv) 3657 { 3658 struct drm_i915_gem_pin *args = data; 3659 struct drm_gem_object *obj; 3660 struct drm_i915_gem_object *obj_priv; 3661 int ret; 3662 3663 mutex_lock(&dev->struct_mutex); 3664 3665 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3666 if (obj == NULL) { 3667 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 3668 args->handle); 3669 mutex_unlock(&dev->struct_mutex); 3670 return -EBADF; 3671 } 3672 obj_priv = obj->driver_private; 3673 3674 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { 3675 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3676 args->handle); 3677 drm_gem_object_unreference(obj); 3678 mutex_unlock(&dev->struct_mutex); 3679 return -EINVAL; 3680 } 3681 3682 obj_priv->user_pin_count++; 3683 obj_priv->pin_filp = file_priv; 3684 if (obj_priv->user_pin_count == 1) { 3685 ret = i915_gem_object_pin(obj, args->alignment); 3686 if (ret != 0) { 3687 drm_gem_object_unreference(obj); 3688 mutex_unlock(&dev->struct_mutex); 3689 return ret; 3690 } 3691 } 3692 3693 /* XXX - flush the CPU caches for pinned objects 3694 * as the X server doesn't manage domains yet 3695 */ 3696 i915_gem_object_flush_cpu_write_domain(obj); 3697 args->offset = obj_priv->gtt_offset; 3698 drm_gem_object_unreference(obj); 3699 mutex_unlock(&dev->struct_mutex); 3700 3701 return 0; 3702 } 3703 3704 int 3705 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3706 struct drm_file *file_priv) 3707 { 3708 struct drm_i915_gem_pin *args = data; 3709 struct drm_gem_object *obj; 3710 struct drm_i915_gem_object *obj_priv; 3711 3712 mutex_lock(&dev->struct_mutex); 3713 3714 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3715 if (obj == NULL) { 3716 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 3717 args->handle); 3718 mutex_unlock(&dev->struct_mutex); 3719 return -EBADF; 3720 } 3721 3722 obj_priv = obj->driver_private; 3723 if (obj_priv->pin_filp != file_priv) { 3724 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3725 args->handle); 3726 drm_gem_object_unreference(obj); 3727 mutex_unlock(&dev->struct_mutex); 3728 return -EINVAL; 3729 } 3730 obj_priv->user_pin_count--; 3731 if (obj_priv->user_pin_count == 0) { 3732 obj_priv->pin_filp = NULL; 3733 i915_gem_object_unpin(obj); 3734 } 3735 3736 drm_gem_object_unreference(obj); 3737 mutex_unlock(&dev->struct_mutex); 3738 return 0; 3739 } 3740 3741 int 3742 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3743 struct drm_file *file_priv) 3744 { 3745 struct drm_i915_gem_busy *args = data; 3746 struct drm_gem_object *obj; 3747 struct drm_i915_gem_object *obj_priv; 3748 3749 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3750 if (obj == NULL) { 3751 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 3752 args->handle); 3753 return -EBADF; 3754 } 3755 3756 mutex_lock(&dev->struct_mutex); 3757 /* Update the active list for the hardware's current position. 3758 * Otherwise this only updates on a delayed timer or when irqs are 3759 * actually unmasked, and our working set ends up being larger than 3760 * required. 3761 */ 3762 i915_gem_retire_requests(dev); 3763 3764 obj_priv = obj->driver_private; 3765 /* Don't count being on the flushing list against the object being 3766 * done. Otherwise, a buffer left on the flushing list but not getting 3767 * flushed (because nobody's flushing that domain) won't ever return 3768 * unbusy and get reused by libdrm's bo cache. The other expected 3769 * consumer of this interface, OpenGL's occlusion queries, also specs 3770 * that the objects get unbusy "eventually" without any interference. 3771 */ 3772 args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0; 3773 3774 drm_gem_object_unreference(obj); 3775 mutex_unlock(&dev->struct_mutex); 3776 return 0; 3777 } 3778 3779 int 3780 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3781 struct drm_file *file_priv) 3782 { 3783 return i915_gem_ring_throttle(dev, file_priv); 3784 } 3785 3786 int i915_gem_init_object(struct drm_gem_object *obj) 3787 { 3788 struct drm_i915_gem_object *obj_priv; 3789 3790 obj_priv = kzalloc(sizeof(*obj_priv), GFP_KERNEL); 3791 if (obj_priv == NULL) 3792 return -ENOMEM; 3793 3794 /* 3795 * We've just allocated pages from the kernel, 3796 * so they've just been written by the CPU with 3797 * zeros. They'll need to be clflushed before we 3798 * use them with the GPU. 3799 */ 3800 obj->write_domain = I915_GEM_DOMAIN_CPU; 3801 obj->read_domains = I915_GEM_DOMAIN_CPU; 3802 3803 obj_priv->agp_type = AGP_USER_MEMORY; 3804 3805 obj->driver_private = obj_priv; 3806 obj_priv->obj = obj; 3807 obj_priv->fence_reg = I915_FENCE_REG_NONE; 3808 INIT_LIST_HEAD(&obj_priv->list); 3809 3810 return 0; 3811 } 3812 3813 void i915_gem_free_object(struct drm_gem_object *obj) 3814 { 3815 struct drm_device *dev = obj->dev; 3816 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3817 3818 while (obj_priv->pin_count > 0) 3819 i915_gem_object_unpin(obj); 3820 3821 if (obj_priv->phys_obj) 3822 i915_gem_detach_phys_object(dev, obj); 3823 3824 i915_gem_object_unbind(obj); 3825 3826 i915_gem_free_mmap_offset(obj); 3827 3828 kfree(obj_priv->page_cpu_valid); 3829 kfree(obj_priv->bit_17); 3830 kfree(obj->driver_private); 3831 } 3832 3833 /** Unbinds all objects that are on the given buffer list. */ 3834 static int 3835 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) 3836 { 3837 struct drm_gem_object *obj; 3838 struct drm_i915_gem_object *obj_priv; 3839 int ret; 3840 3841 while (!list_empty(head)) { 3842 obj_priv = list_first_entry(head, 3843 struct drm_i915_gem_object, 3844 list); 3845 obj = obj_priv->obj; 3846 3847 if (obj_priv->pin_count != 0) { 3848 DRM_ERROR("Pinned object in unbind list\n"); 3849 mutex_unlock(&dev->struct_mutex); 3850 return -EINVAL; 3851 } 3852 3853 ret = i915_gem_object_unbind(obj); 3854 if (ret != 0) { 3855 DRM_ERROR("Error unbinding object in LeaveVT: %d\n", 3856 ret); 3857 mutex_unlock(&dev->struct_mutex); 3858 return ret; 3859 } 3860 } 3861 3862 3863 return 0; 3864 } 3865 3866 int 3867 i915_gem_idle(struct drm_device *dev) 3868 { 3869 drm_i915_private_t *dev_priv = dev->dev_private; 3870 uint32_t seqno, cur_seqno, last_seqno; 3871 int stuck, ret; 3872 3873 mutex_lock(&dev->struct_mutex); 3874 3875 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { 3876 mutex_unlock(&dev->struct_mutex); 3877 return 0; 3878 } 3879 3880 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3881 * We need to replace this with a semaphore, or something. 3882 */ 3883 dev_priv->mm.suspended = 1; 3884 3885 /* Cancel the retire work handler, wait for it to finish if running 3886 */ 3887 mutex_unlock(&dev->struct_mutex); 3888 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3889 mutex_lock(&dev->struct_mutex); 3890 3891 i915_kernel_lost_context(dev); 3892 3893 /* Flush the GPU along with all non-CPU write domains 3894 */ 3895 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 3896 seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); 3897 3898 if (seqno == 0) { 3899 mutex_unlock(&dev->struct_mutex); 3900 return -ENOMEM; 3901 } 3902 3903 dev_priv->mm.waiting_gem_seqno = seqno; 3904 last_seqno = 0; 3905 stuck = 0; 3906 for (;;) { 3907 cur_seqno = i915_get_gem_seqno(dev); 3908 if (i915_seqno_passed(cur_seqno, seqno)) 3909 break; 3910 if (last_seqno == cur_seqno) { 3911 if (stuck++ > 100) { 3912 DRM_ERROR("hardware wedged\n"); 3913 dev_priv->mm.wedged = 1; 3914 DRM_WAKEUP(&dev_priv->irq_queue); 3915 break; 3916 } 3917 } 3918 msleep(10); 3919 last_seqno = cur_seqno; 3920 } 3921 dev_priv->mm.waiting_gem_seqno = 0; 3922 3923 i915_gem_retire_requests(dev); 3924 3925 spin_lock(&dev_priv->mm.active_list_lock); 3926 if (!dev_priv->mm.wedged) { 3927 /* Active and flushing should now be empty as we've 3928 * waited for a sequence higher than any pending execbuffer 3929 */ 3930 WARN_ON(!list_empty(&dev_priv->mm.active_list)); 3931 WARN_ON(!list_empty(&dev_priv->mm.flushing_list)); 3932 /* Request should now be empty as we've also waited 3933 * for the last request in the list 3934 */ 3935 WARN_ON(!list_empty(&dev_priv->mm.request_list)); 3936 } 3937 3938 /* Empty the active and flushing lists to inactive. If there's 3939 * anything left at this point, it means that we're wedged and 3940 * nothing good's going to happen by leaving them there. So strip 3941 * the GPU domains and just stuff them onto inactive. 3942 */ 3943 while (!list_empty(&dev_priv->mm.active_list)) { 3944 struct drm_i915_gem_object *obj_priv; 3945 3946 obj_priv = list_first_entry(&dev_priv->mm.active_list, 3947 struct drm_i915_gem_object, 3948 list); 3949 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 3950 i915_gem_object_move_to_inactive(obj_priv->obj); 3951 } 3952 spin_unlock(&dev_priv->mm.active_list_lock); 3953 3954 while (!list_empty(&dev_priv->mm.flushing_list)) { 3955 struct drm_i915_gem_object *obj_priv; 3956 3957 obj_priv = list_first_entry(&dev_priv->mm.flushing_list, 3958 struct drm_i915_gem_object, 3959 list); 3960 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 3961 i915_gem_object_move_to_inactive(obj_priv->obj); 3962 } 3963 3964 3965 /* Move all inactive buffers out of the GTT. */ 3966 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); 3967 WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); 3968 if (ret) { 3969 mutex_unlock(&dev->struct_mutex); 3970 return ret; 3971 } 3972 3973 i915_gem_cleanup_ringbuffer(dev); 3974 mutex_unlock(&dev->struct_mutex); 3975 3976 return 0; 3977 } 3978 3979 static int 3980 i915_gem_init_hws(struct drm_device *dev) 3981 { 3982 drm_i915_private_t *dev_priv = dev->dev_private; 3983 struct drm_gem_object *obj; 3984 struct drm_i915_gem_object *obj_priv; 3985 int ret; 3986 3987 /* If we need a physical address for the status page, it's already 3988 * initialized at driver load time. 3989 */ 3990 if (!I915_NEED_GFX_HWS(dev)) 3991 return 0; 3992 3993 obj = drm_gem_object_alloc(dev, 4096); 3994 if (obj == NULL) { 3995 DRM_ERROR("Failed to allocate status page\n"); 3996 return -ENOMEM; 3997 } 3998 obj_priv = obj->driver_private; 3999 obj_priv->agp_type = AGP_USER_CACHED_MEMORY; 4000 4001 ret = i915_gem_object_pin(obj, 4096); 4002 if (ret != 0) { 4003 drm_gem_object_unreference(obj); 4004 return ret; 4005 } 4006 4007 dev_priv->status_gfx_addr = obj_priv->gtt_offset; 4008 4009 dev_priv->hw_status_page = kmap(obj_priv->pages[0]); 4010 if (dev_priv->hw_status_page == NULL) { 4011 DRM_ERROR("Failed to map status page.\n"); 4012 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 4013 i915_gem_object_unpin(obj); 4014 drm_gem_object_unreference(obj); 4015 return -EINVAL; 4016 } 4017 dev_priv->hws_obj = obj; 4018 memset(dev_priv->hw_status_page, 0, PAGE_SIZE); 4019 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); 4020 I915_READ(HWS_PGA); /* posting read */ 4021 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); 4022 4023 return 0; 4024 } 4025 4026 static void 4027 i915_gem_cleanup_hws(struct drm_device *dev) 4028 { 4029 drm_i915_private_t *dev_priv = dev->dev_private; 4030 struct drm_gem_object *obj; 4031 struct drm_i915_gem_object *obj_priv; 4032 4033 if (dev_priv->hws_obj == NULL) 4034 return; 4035 4036 obj = dev_priv->hws_obj; 4037 obj_priv = obj->driver_private; 4038 4039 kunmap(obj_priv->pages[0]); 4040 i915_gem_object_unpin(obj); 4041 drm_gem_object_unreference(obj); 4042 dev_priv->hws_obj = NULL; 4043 4044 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 4045 dev_priv->hw_status_page = NULL; 4046 4047 /* Write high address into HWS_PGA when disabling. */ 4048 I915_WRITE(HWS_PGA, 0x1ffff000); 4049 } 4050 4051 int 4052 i915_gem_init_ringbuffer(struct drm_device *dev) 4053 { 4054 drm_i915_private_t *dev_priv = dev->dev_private; 4055 struct drm_gem_object *obj; 4056 struct drm_i915_gem_object *obj_priv; 4057 drm_i915_ring_buffer_t *ring = &dev_priv->ring; 4058 int ret; 4059 u32 head; 4060 4061 ret = i915_gem_init_hws(dev); 4062 if (ret != 0) 4063 return ret; 4064 4065 obj = drm_gem_object_alloc(dev, 128 * 1024); 4066 if (obj == NULL) { 4067 DRM_ERROR("Failed to allocate ringbuffer\n"); 4068 i915_gem_cleanup_hws(dev); 4069 return -ENOMEM; 4070 } 4071 obj_priv = obj->driver_private; 4072 4073 ret = i915_gem_object_pin(obj, 4096); 4074 if (ret != 0) { 4075 drm_gem_object_unreference(obj); 4076 i915_gem_cleanup_hws(dev); 4077 return ret; 4078 } 4079 4080 /* Set up the kernel mapping for the ring. */ 4081 ring->Size = obj->size; 4082 ring->tail_mask = obj->size - 1; 4083 4084 ring->map.offset = dev->agp->base + obj_priv->gtt_offset; 4085 ring->map.size = obj->size; 4086 ring->map.type = 0; 4087 ring->map.flags = 0; 4088 ring->map.mtrr = 0; 4089 4090 drm_core_ioremap_wc(&ring->map, dev); 4091 if (ring->map.handle == NULL) { 4092 DRM_ERROR("Failed to map ringbuffer.\n"); 4093 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 4094 i915_gem_object_unpin(obj); 4095 drm_gem_object_unreference(obj); 4096 i915_gem_cleanup_hws(dev); 4097 return -EINVAL; 4098 } 4099 ring->ring_obj = obj; 4100 ring->virtual_start = ring->map.handle; 4101 4102 /* Stop the ring if it's running. */ 4103 I915_WRITE(PRB0_CTL, 0); 4104 I915_WRITE(PRB0_TAIL, 0); 4105 I915_WRITE(PRB0_HEAD, 0); 4106 4107 /* Initialize the ring. */ 4108 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 4109 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4110 4111 /* G45 ring initialization fails to reset head to zero */ 4112 if (head != 0) { 4113 DRM_ERROR("Ring head not reset to zero " 4114 "ctl %08x head %08x tail %08x start %08x\n", 4115 I915_READ(PRB0_CTL), 4116 I915_READ(PRB0_HEAD), 4117 I915_READ(PRB0_TAIL), 4118 I915_READ(PRB0_START)); 4119 I915_WRITE(PRB0_HEAD, 0); 4120 4121 DRM_ERROR("Ring head forced to zero " 4122 "ctl %08x head %08x tail %08x start %08x\n", 4123 I915_READ(PRB0_CTL), 4124 I915_READ(PRB0_HEAD), 4125 I915_READ(PRB0_TAIL), 4126 I915_READ(PRB0_START)); 4127 } 4128 4129 I915_WRITE(PRB0_CTL, 4130 ((obj->size - 4096) & RING_NR_PAGES) | 4131 RING_NO_REPORT | 4132 RING_VALID); 4133 4134 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4135 4136 /* If the head is still not zero, the ring is dead */ 4137 if (head != 0) { 4138 DRM_ERROR("Ring initialization failed " 4139 "ctl %08x head %08x tail %08x start %08x\n", 4140 I915_READ(PRB0_CTL), 4141 I915_READ(PRB0_HEAD), 4142 I915_READ(PRB0_TAIL), 4143 I915_READ(PRB0_START)); 4144 return -EIO; 4145 } 4146 4147 /* Update our cache of the ring state */ 4148 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4149 i915_kernel_lost_context(dev); 4150 else { 4151 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4152 ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; 4153 ring->space = ring->head - (ring->tail + 8); 4154 if (ring->space < 0) 4155 ring->space += ring->Size; 4156 } 4157 4158 return 0; 4159 } 4160 4161 void 4162 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4163 { 4164 drm_i915_private_t *dev_priv = dev->dev_private; 4165 4166 if (dev_priv->ring.ring_obj == NULL) 4167 return; 4168 4169 drm_core_ioremapfree(&dev_priv->ring.map, dev); 4170 4171 i915_gem_object_unpin(dev_priv->ring.ring_obj); 4172 drm_gem_object_unreference(dev_priv->ring.ring_obj); 4173 dev_priv->ring.ring_obj = NULL; 4174 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 4175 4176 i915_gem_cleanup_hws(dev); 4177 } 4178 4179 int 4180 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4181 struct drm_file *file_priv) 4182 { 4183 drm_i915_private_t *dev_priv = dev->dev_private; 4184 int ret; 4185 4186 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4187 return 0; 4188 4189 if (dev_priv->mm.wedged) { 4190 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4191 dev_priv->mm.wedged = 0; 4192 } 4193 4194 mutex_lock(&dev->struct_mutex); 4195 dev_priv->mm.suspended = 0; 4196 4197 ret = i915_gem_init_ringbuffer(dev); 4198 if (ret != 0) { 4199 mutex_unlock(&dev->struct_mutex); 4200 return ret; 4201 } 4202 4203 spin_lock(&dev_priv->mm.active_list_lock); 4204 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 4205 spin_unlock(&dev_priv->mm.active_list_lock); 4206 4207 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 4208 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 4209 BUG_ON(!list_empty(&dev_priv->mm.request_list)); 4210 mutex_unlock(&dev->struct_mutex); 4211 4212 drm_irq_install(dev); 4213 4214 return 0; 4215 } 4216 4217 int 4218 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4219 struct drm_file *file_priv) 4220 { 4221 int ret; 4222 4223 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4224 return 0; 4225 4226 ret = i915_gem_idle(dev); 4227 drm_irq_uninstall(dev); 4228 4229 return ret; 4230 } 4231 4232 void 4233 i915_gem_lastclose(struct drm_device *dev) 4234 { 4235 int ret; 4236 4237 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4238 return; 4239 4240 ret = i915_gem_idle(dev); 4241 if (ret) 4242 DRM_ERROR("failed to idle hardware: %d\n", ret); 4243 } 4244 4245 void 4246 i915_gem_load(struct drm_device *dev) 4247 { 4248 int i; 4249 drm_i915_private_t *dev_priv = dev->dev_private; 4250 4251 spin_lock_init(&dev_priv->mm.active_list_lock); 4252 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4253 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 4254 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4255 INIT_LIST_HEAD(&dev_priv->mm.request_list); 4256 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4257 i915_gem_retire_work_handler); 4258 dev_priv->mm.next_gem_seqno = 1; 4259 4260 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4261 dev_priv->fence_reg_start = 3; 4262 4263 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4264 dev_priv->num_fence_regs = 16; 4265 else 4266 dev_priv->num_fence_regs = 8; 4267 4268 /* Initialize fence registers to zero */ 4269 if (IS_I965G(dev)) { 4270 for (i = 0; i < 16; i++) 4271 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0); 4272 } else { 4273 for (i = 0; i < 8; i++) 4274 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0); 4275 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4276 for (i = 0; i < 8; i++) 4277 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0); 4278 } 4279 4280 i915_gem_detect_bit_6_swizzle(dev); 4281 } 4282 4283 /* 4284 * Create a physically contiguous memory object for this object 4285 * e.g. for cursor + overlay regs 4286 */ 4287 int i915_gem_init_phys_object(struct drm_device *dev, 4288 int id, int size) 4289 { 4290 drm_i915_private_t *dev_priv = dev->dev_private; 4291 struct drm_i915_gem_phys_object *phys_obj; 4292 int ret; 4293 4294 if (dev_priv->mm.phys_objs[id - 1] || !size) 4295 return 0; 4296 4297 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4298 if (!phys_obj) 4299 return -ENOMEM; 4300 4301 phys_obj->id = id; 4302 4303 phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff); 4304 if (!phys_obj->handle) { 4305 ret = -ENOMEM; 4306 goto kfree_obj; 4307 } 4308 #ifdef CONFIG_X86 4309 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4310 #endif 4311 4312 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4313 4314 return 0; 4315 kfree_obj: 4316 kfree(phys_obj); 4317 return ret; 4318 } 4319 4320 void i915_gem_free_phys_object(struct drm_device *dev, int id) 4321 { 4322 drm_i915_private_t *dev_priv = dev->dev_private; 4323 struct drm_i915_gem_phys_object *phys_obj; 4324 4325 if (!dev_priv->mm.phys_objs[id - 1]) 4326 return; 4327 4328 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4329 if (phys_obj->cur_obj) { 4330 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4331 } 4332 4333 #ifdef CONFIG_X86 4334 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4335 #endif 4336 drm_pci_free(dev, phys_obj->handle); 4337 kfree(phys_obj); 4338 dev_priv->mm.phys_objs[id - 1] = NULL; 4339 } 4340 4341 void i915_gem_free_all_phys_object(struct drm_device *dev) 4342 { 4343 int i; 4344 4345 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4346 i915_gem_free_phys_object(dev, i); 4347 } 4348 4349 void i915_gem_detach_phys_object(struct drm_device *dev, 4350 struct drm_gem_object *obj) 4351 { 4352 struct drm_i915_gem_object *obj_priv; 4353 int i; 4354 int ret; 4355 int page_count; 4356 4357 obj_priv = obj->driver_private; 4358 if (!obj_priv->phys_obj) 4359 return; 4360 4361 ret = i915_gem_object_get_pages(obj); 4362 if (ret) 4363 goto out; 4364 4365 page_count = obj->size / PAGE_SIZE; 4366 4367 for (i = 0; i < page_count; i++) { 4368 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0); 4369 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4370 4371 memcpy(dst, src, PAGE_SIZE); 4372 kunmap_atomic(dst, KM_USER0); 4373 } 4374 drm_clflush_pages(obj_priv->pages, page_count); 4375 drm_agp_chipset_flush(dev); 4376 4377 i915_gem_object_put_pages(obj); 4378 out: 4379 obj_priv->phys_obj->cur_obj = NULL; 4380 obj_priv->phys_obj = NULL; 4381 } 4382 4383 int 4384 i915_gem_attach_phys_object(struct drm_device *dev, 4385 struct drm_gem_object *obj, int id) 4386 { 4387 drm_i915_private_t *dev_priv = dev->dev_private; 4388 struct drm_i915_gem_object *obj_priv; 4389 int ret = 0; 4390 int page_count; 4391 int i; 4392 4393 if (id > I915_MAX_PHYS_OBJECT) 4394 return -EINVAL; 4395 4396 obj_priv = obj->driver_private; 4397 4398 if (obj_priv->phys_obj) { 4399 if (obj_priv->phys_obj->id == id) 4400 return 0; 4401 i915_gem_detach_phys_object(dev, obj); 4402 } 4403 4404 4405 /* create a new object */ 4406 if (!dev_priv->mm.phys_objs[id - 1]) { 4407 ret = i915_gem_init_phys_object(dev, id, 4408 obj->size); 4409 if (ret) { 4410 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size); 4411 goto out; 4412 } 4413 } 4414 4415 /* bind to the object */ 4416 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4417 obj_priv->phys_obj->cur_obj = obj; 4418 4419 ret = i915_gem_object_get_pages(obj); 4420 if (ret) { 4421 DRM_ERROR("failed to get page list\n"); 4422 goto out; 4423 } 4424 4425 page_count = obj->size / PAGE_SIZE; 4426 4427 for (i = 0; i < page_count; i++) { 4428 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0); 4429 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4430 4431 memcpy(dst, src, PAGE_SIZE); 4432 kunmap_atomic(src, KM_USER0); 4433 } 4434 4435 i915_gem_object_put_pages(obj); 4436 4437 return 0; 4438 out: 4439 return ret; 4440 } 4441 4442 static int 4443 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 4444 struct drm_i915_gem_pwrite *args, 4445 struct drm_file *file_priv) 4446 { 4447 struct drm_i915_gem_object *obj_priv = obj->driver_private; 4448 void *obj_addr; 4449 int ret; 4450 char __user *user_data; 4451 4452 user_data = (char __user *) (uintptr_t) args->data_ptr; 4453 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset; 4454 4455 DRM_DEBUG("obj_addr %p, %lld\n", obj_addr, args->size); 4456 ret = copy_from_user(obj_addr, user_data, args->size); 4457 if (ret) 4458 return -EFAULT; 4459 4460 drm_agp_chipset_flush(dev); 4461 return 0; 4462 } 4463 4464 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) 4465 { 4466 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 4467 4468 /* Clean up our request list when the client is going away, so that 4469 * later retire_requests won't dereference our soon-to-be-gone 4470 * file_priv. 4471 */ 4472 mutex_lock(&dev->struct_mutex); 4473 while (!list_empty(&i915_file_priv->mm.request_list)) 4474 list_del_init(i915_file_priv->mm.request_list.next); 4475 mutex_unlock(&dev->struct_mutex); 4476 } 4477