1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include <linux/swap.h> 33 34 static int 35 i915_gem_object_set_domain(struct drm_gem_object *obj, 36 uint32_t read_domains, 37 uint32_t write_domain); 38 static int 39 i915_gem_object_set_domain_range(struct drm_gem_object *obj, 40 uint64_t offset, 41 uint64_t size, 42 uint32_t read_domains, 43 uint32_t write_domain); 44 static int 45 i915_gem_set_domain(struct drm_gem_object *obj, 46 struct drm_file *file_priv, 47 uint32_t read_domains, 48 uint32_t write_domain); 49 static int i915_gem_object_get_page_list(struct drm_gem_object *obj); 50 static void i915_gem_object_free_page_list(struct drm_gem_object *obj); 51 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 52 53 static void 54 i915_gem_cleanup_ringbuffer(struct drm_device *dev); 55 56 int 57 i915_gem_init_ioctl(struct drm_device *dev, void *data, 58 struct drm_file *file_priv) 59 { 60 drm_i915_private_t *dev_priv = dev->dev_private; 61 struct drm_i915_gem_init *args = data; 62 63 mutex_lock(&dev->struct_mutex); 64 65 if (args->gtt_start >= args->gtt_end || 66 (args->gtt_start & (PAGE_SIZE - 1)) != 0 || 67 (args->gtt_end & (PAGE_SIZE - 1)) != 0) { 68 mutex_unlock(&dev->struct_mutex); 69 return -EINVAL; 70 } 71 72 drm_mm_init(&dev_priv->mm.gtt_space, args->gtt_start, 73 args->gtt_end - args->gtt_start); 74 75 dev->gtt_total = (uint32_t) (args->gtt_end - args->gtt_start); 76 77 mutex_unlock(&dev->struct_mutex); 78 79 return 0; 80 } 81 82 int 83 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 84 struct drm_file *file_priv) 85 { 86 drm_i915_private_t *dev_priv = dev->dev_private; 87 struct drm_i915_gem_get_aperture *args = data; 88 struct drm_i915_gem_object *obj_priv; 89 90 if (!(dev->driver->driver_features & DRIVER_GEM)) 91 return -ENODEV; 92 93 args->aper_size = dev->gtt_total; 94 args->aper_available_size = args->aper_size; 95 96 list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { 97 if (obj_priv->pin_count > 0) 98 args->aper_available_size -= obj_priv->obj->size; 99 } 100 101 return 0; 102 } 103 104 105 /** 106 * Creates a new mm object and returns a handle to it. 107 */ 108 int 109 i915_gem_create_ioctl(struct drm_device *dev, void *data, 110 struct drm_file *file_priv) 111 { 112 struct drm_i915_gem_create *args = data; 113 struct drm_gem_object *obj; 114 int handle, ret; 115 116 args->size = roundup(args->size, PAGE_SIZE); 117 118 /* Allocate the new object */ 119 obj = drm_gem_object_alloc(dev, args->size); 120 if (obj == NULL) 121 return -ENOMEM; 122 123 ret = drm_gem_handle_create(file_priv, obj, &handle); 124 mutex_lock(&dev->struct_mutex); 125 drm_gem_object_handle_unreference(obj); 126 mutex_unlock(&dev->struct_mutex); 127 128 if (ret) 129 return ret; 130 131 args->handle = handle; 132 133 return 0; 134 } 135 136 /** 137 * Reads data from the object referenced by handle. 138 * 139 * On error, the contents of *data are undefined. 140 */ 141 int 142 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 143 struct drm_file *file_priv) 144 { 145 struct drm_i915_gem_pread *args = data; 146 struct drm_gem_object *obj; 147 struct drm_i915_gem_object *obj_priv; 148 ssize_t read; 149 loff_t offset; 150 int ret; 151 152 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 153 if (obj == NULL) 154 return -EBADF; 155 obj_priv = obj->driver_private; 156 157 /* Bounds check source. 158 * 159 * XXX: This could use review for overflow issues... 160 */ 161 if (args->offset > obj->size || args->size > obj->size || 162 args->offset + args->size > obj->size) { 163 drm_gem_object_unreference(obj); 164 return -EINVAL; 165 } 166 167 mutex_lock(&dev->struct_mutex); 168 169 ret = i915_gem_object_set_domain_range(obj, args->offset, args->size, 170 I915_GEM_DOMAIN_CPU, 0); 171 if (ret != 0) { 172 drm_gem_object_unreference(obj); 173 mutex_unlock(&dev->struct_mutex); 174 return ret; 175 } 176 177 offset = args->offset; 178 179 read = vfs_read(obj->filp, (char __user *)(uintptr_t)args->data_ptr, 180 args->size, &offset); 181 if (read != args->size) { 182 drm_gem_object_unreference(obj); 183 mutex_unlock(&dev->struct_mutex); 184 if (read < 0) 185 return read; 186 else 187 return -EINVAL; 188 } 189 190 drm_gem_object_unreference(obj); 191 mutex_unlock(&dev->struct_mutex); 192 193 return 0; 194 } 195 196 /* This is the fast write path which cannot handle 197 * page faults in the source data 198 */ 199 200 static inline int 201 fast_user_write(struct io_mapping *mapping, 202 loff_t page_base, int page_offset, 203 char __user *user_data, 204 int length) 205 { 206 char *vaddr_atomic; 207 unsigned long unwritten; 208 209 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 210 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 211 user_data, length); 212 io_mapping_unmap_atomic(vaddr_atomic); 213 if (unwritten) 214 return -EFAULT; 215 return 0; 216 } 217 218 /* Here's the write path which can sleep for 219 * page faults 220 */ 221 222 static inline int 223 slow_user_write(struct io_mapping *mapping, 224 loff_t page_base, int page_offset, 225 char __user *user_data, 226 int length) 227 { 228 char __iomem *vaddr; 229 unsigned long unwritten; 230 231 vaddr = io_mapping_map_wc(mapping, page_base); 232 if (vaddr == NULL) 233 return -EFAULT; 234 unwritten = __copy_from_user(vaddr + page_offset, 235 user_data, length); 236 io_mapping_unmap(vaddr); 237 if (unwritten) 238 return -EFAULT; 239 return 0; 240 } 241 242 static int 243 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 244 struct drm_i915_gem_pwrite *args, 245 struct drm_file *file_priv) 246 { 247 struct drm_i915_gem_object *obj_priv = obj->driver_private; 248 drm_i915_private_t *dev_priv = dev->dev_private; 249 ssize_t remain; 250 loff_t offset, page_base; 251 char __user *user_data; 252 int page_offset, page_length; 253 int ret; 254 255 user_data = (char __user *) (uintptr_t) args->data_ptr; 256 remain = args->size; 257 if (!access_ok(VERIFY_READ, user_data, remain)) 258 return -EFAULT; 259 260 261 mutex_lock(&dev->struct_mutex); 262 ret = i915_gem_object_pin(obj, 0); 263 if (ret) { 264 mutex_unlock(&dev->struct_mutex); 265 return ret; 266 } 267 ret = i915_gem_set_domain(obj, file_priv, 268 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); 269 if (ret) 270 goto fail; 271 272 obj_priv = obj->driver_private; 273 offset = obj_priv->gtt_offset + args->offset; 274 obj_priv->dirty = 1; 275 276 while (remain > 0) { 277 /* Operation in this page 278 * 279 * page_base = page offset within aperture 280 * page_offset = offset within page 281 * page_length = bytes to copy for this page 282 */ 283 page_base = (offset & ~(PAGE_SIZE-1)); 284 page_offset = offset & (PAGE_SIZE-1); 285 page_length = remain; 286 if ((page_offset + remain) > PAGE_SIZE) 287 page_length = PAGE_SIZE - page_offset; 288 289 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, 290 page_offset, user_data, page_length); 291 292 /* If we get a fault while copying data, then (presumably) our 293 * source page isn't available. In this case, use the 294 * non-atomic function 295 */ 296 if (ret) { 297 ret = slow_user_write (dev_priv->mm.gtt_mapping, 298 page_base, page_offset, 299 user_data, page_length); 300 if (ret) 301 goto fail; 302 } 303 304 remain -= page_length; 305 user_data += page_length; 306 offset += page_length; 307 } 308 309 fail: 310 i915_gem_object_unpin(obj); 311 mutex_unlock(&dev->struct_mutex); 312 313 return ret; 314 } 315 316 static int 317 i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 318 struct drm_i915_gem_pwrite *args, 319 struct drm_file *file_priv) 320 { 321 int ret; 322 loff_t offset; 323 ssize_t written; 324 325 mutex_lock(&dev->struct_mutex); 326 327 ret = i915_gem_set_domain(obj, file_priv, 328 I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); 329 if (ret) { 330 mutex_unlock(&dev->struct_mutex); 331 return ret; 332 } 333 334 offset = args->offset; 335 336 written = vfs_write(obj->filp, 337 (char __user *)(uintptr_t) args->data_ptr, 338 args->size, &offset); 339 if (written != args->size) { 340 mutex_unlock(&dev->struct_mutex); 341 if (written < 0) 342 return written; 343 else 344 return -EINVAL; 345 } 346 347 mutex_unlock(&dev->struct_mutex); 348 349 return 0; 350 } 351 352 /** 353 * Writes data to the object referenced by handle. 354 * 355 * On error, the contents of the buffer that were to be modified are undefined. 356 */ 357 int 358 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 359 struct drm_file *file_priv) 360 { 361 struct drm_i915_gem_pwrite *args = data; 362 struct drm_gem_object *obj; 363 struct drm_i915_gem_object *obj_priv; 364 int ret = 0; 365 366 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 367 if (obj == NULL) 368 return -EBADF; 369 obj_priv = obj->driver_private; 370 371 /* Bounds check destination. 372 * 373 * XXX: This could use review for overflow issues... 374 */ 375 if (args->offset > obj->size || args->size > obj->size || 376 args->offset + args->size > obj->size) { 377 drm_gem_object_unreference(obj); 378 return -EINVAL; 379 } 380 381 /* We can only do the GTT pwrite on untiled buffers, as otherwise 382 * it would end up going through the fenced access, and we'll get 383 * different detiling behavior between reading and writing. 384 * pread/pwrite currently are reading and writing from the CPU 385 * perspective, requiring manual detiling by the client. 386 */ 387 if (obj_priv->tiling_mode == I915_TILING_NONE && 388 dev->gtt_total != 0) 389 ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv); 390 else 391 ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); 392 393 #if WATCH_PWRITE 394 if (ret) 395 DRM_INFO("pwrite failed %d\n", ret); 396 #endif 397 398 drm_gem_object_unreference(obj); 399 400 return ret; 401 } 402 403 /** 404 * Called when user space prepares to use an object 405 */ 406 int 407 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 408 struct drm_file *file_priv) 409 { 410 struct drm_i915_gem_set_domain *args = data; 411 struct drm_gem_object *obj; 412 int ret; 413 414 if (!(dev->driver->driver_features & DRIVER_GEM)) 415 return -ENODEV; 416 417 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 418 if (obj == NULL) 419 return -EBADF; 420 421 mutex_lock(&dev->struct_mutex); 422 #if WATCH_BUF 423 DRM_INFO("set_domain_ioctl %p(%d), %08x %08x\n", 424 obj, obj->size, args->read_domains, args->write_domain); 425 #endif 426 ret = i915_gem_set_domain(obj, file_priv, 427 args->read_domains, args->write_domain); 428 drm_gem_object_unreference(obj); 429 mutex_unlock(&dev->struct_mutex); 430 return ret; 431 } 432 433 /** 434 * Called when user space has done writes to this buffer 435 */ 436 int 437 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 438 struct drm_file *file_priv) 439 { 440 struct drm_i915_gem_sw_finish *args = data; 441 struct drm_gem_object *obj; 442 struct drm_i915_gem_object *obj_priv; 443 int ret = 0; 444 445 if (!(dev->driver->driver_features & DRIVER_GEM)) 446 return -ENODEV; 447 448 mutex_lock(&dev->struct_mutex); 449 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 450 if (obj == NULL) { 451 mutex_unlock(&dev->struct_mutex); 452 return -EBADF; 453 } 454 455 #if WATCH_BUF 456 DRM_INFO("%s: sw_finish %d (%p %d)\n", 457 __func__, args->handle, obj, obj->size); 458 #endif 459 obj_priv = obj->driver_private; 460 461 /* Pinned buffers may be scanout, so flush the cache */ 462 if ((obj->write_domain & I915_GEM_DOMAIN_CPU) && obj_priv->pin_count) { 463 i915_gem_clflush_object(obj); 464 drm_agp_chipset_flush(dev); 465 } 466 drm_gem_object_unreference(obj); 467 mutex_unlock(&dev->struct_mutex); 468 return ret; 469 } 470 471 /** 472 * Maps the contents of an object, returning the address it is mapped 473 * into. 474 * 475 * While the mapping holds a reference on the contents of the object, it doesn't 476 * imply a ref on the object itself. 477 */ 478 int 479 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 480 struct drm_file *file_priv) 481 { 482 struct drm_i915_gem_mmap *args = data; 483 struct drm_gem_object *obj; 484 loff_t offset; 485 unsigned long addr; 486 487 if (!(dev->driver->driver_features & DRIVER_GEM)) 488 return -ENODEV; 489 490 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 491 if (obj == NULL) 492 return -EBADF; 493 494 offset = args->offset; 495 496 down_write(¤t->mm->mmap_sem); 497 addr = do_mmap(obj->filp, 0, args->size, 498 PROT_READ | PROT_WRITE, MAP_SHARED, 499 args->offset); 500 up_write(¤t->mm->mmap_sem); 501 mutex_lock(&dev->struct_mutex); 502 drm_gem_object_unreference(obj); 503 mutex_unlock(&dev->struct_mutex); 504 if (IS_ERR((void *)addr)) 505 return addr; 506 507 args->addr_ptr = (uint64_t) addr; 508 509 return 0; 510 } 511 512 static void 513 i915_gem_object_free_page_list(struct drm_gem_object *obj) 514 { 515 struct drm_i915_gem_object *obj_priv = obj->driver_private; 516 int page_count = obj->size / PAGE_SIZE; 517 int i; 518 519 if (obj_priv->page_list == NULL) 520 return; 521 522 523 for (i = 0; i < page_count; i++) 524 if (obj_priv->page_list[i] != NULL) { 525 if (obj_priv->dirty) 526 set_page_dirty(obj_priv->page_list[i]); 527 mark_page_accessed(obj_priv->page_list[i]); 528 page_cache_release(obj_priv->page_list[i]); 529 } 530 obj_priv->dirty = 0; 531 532 drm_free(obj_priv->page_list, 533 page_count * sizeof(struct page *), 534 DRM_MEM_DRIVER); 535 obj_priv->page_list = NULL; 536 } 537 538 static void 539 i915_gem_object_move_to_active(struct drm_gem_object *obj) 540 { 541 struct drm_device *dev = obj->dev; 542 drm_i915_private_t *dev_priv = dev->dev_private; 543 struct drm_i915_gem_object *obj_priv = obj->driver_private; 544 545 /* Add a reference if we're newly entering the active list. */ 546 if (!obj_priv->active) { 547 drm_gem_object_reference(obj); 548 obj_priv->active = 1; 549 } 550 /* Move from whatever list we were on to the tail of execution. */ 551 list_move_tail(&obj_priv->list, 552 &dev_priv->mm.active_list); 553 } 554 555 556 static void 557 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 558 { 559 struct drm_device *dev = obj->dev; 560 drm_i915_private_t *dev_priv = dev->dev_private; 561 struct drm_i915_gem_object *obj_priv = obj->driver_private; 562 563 i915_verify_inactive(dev, __FILE__, __LINE__); 564 if (obj_priv->pin_count != 0) 565 list_del_init(&obj_priv->list); 566 else 567 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 568 569 if (obj_priv->active) { 570 obj_priv->active = 0; 571 drm_gem_object_unreference(obj); 572 } 573 i915_verify_inactive(dev, __FILE__, __LINE__); 574 } 575 576 /** 577 * Creates a new sequence number, emitting a write of it to the status page 578 * plus an interrupt, which will trigger i915_user_interrupt_handler. 579 * 580 * Must be called with struct_lock held. 581 * 582 * Returned sequence numbers are nonzero on success. 583 */ 584 static uint32_t 585 i915_add_request(struct drm_device *dev, uint32_t flush_domains) 586 { 587 drm_i915_private_t *dev_priv = dev->dev_private; 588 struct drm_i915_gem_request *request; 589 uint32_t seqno; 590 int was_empty; 591 RING_LOCALS; 592 593 request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); 594 if (request == NULL) 595 return 0; 596 597 /* Grab the seqno we're going to make this request be, and bump the 598 * next (skipping 0 so it can be the reserved no-seqno value). 599 */ 600 seqno = dev_priv->mm.next_gem_seqno; 601 dev_priv->mm.next_gem_seqno++; 602 if (dev_priv->mm.next_gem_seqno == 0) 603 dev_priv->mm.next_gem_seqno++; 604 605 BEGIN_LP_RING(4); 606 OUT_RING(MI_STORE_DWORD_INDEX); 607 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 608 OUT_RING(seqno); 609 610 OUT_RING(MI_USER_INTERRUPT); 611 ADVANCE_LP_RING(); 612 613 DRM_DEBUG("%d\n", seqno); 614 615 request->seqno = seqno; 616 request->emitted_jiffies = jiffies; 617 request->flush_domains = flush_domains; 618 was_empty = list_empty(&dev_priv->mm.request_list); 619 list_add_tail(&request->list, &dev_priv->mm.request_list); 620 621 if (was_empty && !dev_priv->mm.suspended) 622 schedule_delayed_work(&dev_priv->mm.retire_work, HZ); 623 return seqno; 624 } 625 626 /** 627 * Command execution barrier 628 * 629 * Ensures that all commands in the ring are finished 630 * before signalling the CPU 631 */ 632 static uint32_t 633 i915_retire_commands(struct drm_device *dev) 634 { 635 drm_i915_private_t *dev_priv = dev->dev_private; 636 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 637 uint32_t flush_domains = 0; 638 RING_LOCALS; 639 640 /* The sampler always gets flushed on i965 (sigh) */ 641 if (IS_I965G(dev)) 642 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 643 BEGIN_LP_RING(2); 644 OUT_RING(cmd); 645 OUT_RING(0); /* noop */ 646 ADVANCE_LP_RING(); 647 return flush_domains; 648 } 649 650 /** 651 * Moves buffers associated only with the given active seqno from the active 652 * to inactive list, potentially freeing them. 653 */ 654 static void 655 i915_gem_retire_request(struct drm_device *dev, 656 struct drm_i915_gem_request *request) 657 { 658 drm_i915_private_t *dev_priv = dev->dev_private; 659 660 /* Move any buffers on the active list that are no longer referenced 661 * by the ringbuffer to the flushing/inactive lists as appropriate. 662 */ 663 while (!list_empty(&dev_priv->mm.active_list)) { 664 struct drm_gem_object *obj; 665 struct drm_i915_gem_object *obj_priv; 666 667 obj_priv = list_first_entry(&dev_priv->mm.active_list, 668 struct drm_i915_gem_object, 669 list); 670 obj = obj_priv->obj; 671 672 /* If the seqno being retired doesn't match the oldest in the 673 * list, then the oldest in the list must still be newer than 674 * this seqno. 675 */ 676 if (obj_priv->last_rendering_seqno != request->seqno) 677 return; 678 #if WATCH_LRU 679 DRM_INFO("%s: retire %d moves to inactive list %p\n", 680 __func__, request->seqno, obj); 681 #endif 682 683 if (obj->write_domain != 0) { 684 list_move_tail(&obj_priv->list, 685 &dev_priv->mm.flushing_list); 686 } else { 687 i915_gem_object_move_to_inactive(obj); 688 } 689 } 690 691 if (request->flush_domains != 0) { 692 struct drm_i915_gem_object *obj_priv, *next; 693 694 /* Clear the write domain and activity from any buffers 695 * that are just waiting for a flush matching the one retired. 696 */ 697 list_for_each_entry_safe(obj_priv, next, 698 &dev_priv->mm.flushing_list, list) { 699 struct drm_gem_object *obj = obj_priv->obj; 700 701 if (obj->write_domain & request->flush_domains) { 702 obj->write_domain = 0; 703 i915_gem_object_move_to_inactive(obj); 704 } 705 } 706 707 } 708 } 709 710 /** 711 * Returns true if seq1 is later than seq2. 712 */ 713 static int 714 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 715 { 716 return (int32_t)(seq1 - seq2) >= 0; 717 } 718 719 uint32_t 720 i915_get_gem_seqno(struct drm_device *dev) 721 { 722 drm_i915_private_t *dev_priv = dev->dev_private; 723 724 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); 725 } 726 727 /** 728 * This function clears the request list as sequence numbers are passed. 729 */ 730 void 731 i915_gem_retire_requests(struct drm_device *dev) 732 { 733 drm_i915_private_t *dev_priv = dev->dev_private; 734 uint32_t seqno; 735 736 seqno = i915_get_gem_seqno(dev); 737 738 while (!list_empty(&dev_priv->mm.request_list)) { 739 struct drm_i915_gem_request *request; 740 uint32_t retiring_seqno; 741 742 request = list_first_entry(&dev_priv->mm.request_list, 743 struct drm_i915_gem_request, 744 list); 745 retiring_seqno = request->seqno; 746 747 if (i915_seqno_passed(seqno, retiring_seqno) || 748 dev_priv->mm.wedged) { 749 i915_gem_retire_request(dev, request); 750 751 list_del(&request->list); 752 drm_free(request, sizeof(*request), DRM_MEM_DRIVER); 753 } else 754 break; 755 } 756 } 757 758 void 759 i915_gem_retire_work_handler(struct work_struct *work) 760 { 761 drm_i915_private_t *dev_priv; 762 struct drm_device *dev; 763 764 dev_priv = container_of(work, drm_i915_private_t, 765 mm.retire_work.work); 766 dev = dev_priv->dev; 767 768 mutex_lock(&dev->struct_mutex); 769 i915_gem_retire_requests(dev); 770 if (!dev_priv->mm.suspended && 771 !list_empty(&dev_priv->mm.request_list)) 772 schedule_delayed_work(&dev_priv->mm.retire_work, HZ); 773 mutex_unlock(&dev->struct_mutex); 774 } 775 776 /** 777 * Waits for a sequence number to be signaled, and cleans up the 778 * request and object lists appropriately for that event. 779 */ 780 static int 781 i915_wait_request(struct drm_device *dev, uint32_t seqno) 782 { 783 drm_i915_private_t *dev_priv = dev->dev_private; 784 int ret = 0; 785 786 BUG_ON(seqno == 0); 787 788 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 789 dev_priv->mm.waiting_gem_seqno = seqno; 790 i915_user_irq_get(dev); 791 ret = wait_event_interruptible(dev_priv->irq_queue, 792 i915_seqno_passed(i915_get_gem_seqno(dev), 793 seqno) || 794 dev_priv->mm.wedged); 795 i915_user_irq_put(dev); 796 dev_priv->mm.waiting_gem_seqno = 0; 797 } 798 if (dev_priv->mm.wedged) 799 ret = -EIO; 800 801 if (ret && ret != -ERESTARTSYS) 802 DRM_ERROR("%s returns %d (awaiting %d at %d)\n", 803 __func__, ret, seqno, i915_get_gem_seqno(dev)); 804 805 /* Directly dispatch request retiring. While we have the work queue 806 * to handle this, the waiter on a request often wants an associated 807 * buffer to have made it to the inactive list, and we would need 808 * a separate wait queue to handle that. 809 */ 810 if (ret == 0) 811 i915_gem_retire_requests(dev); 812 813 return ret; 814 } 815 816 static void 817 i915_gem_flush(struct drm_device *dev, 818 uint32_t invalidate_domains, 819 uint32_t flush_domains) 820 { 821 drm_i915_private_t *dev_priv = dev->dev_private; 822 uint32_t cmd; 823 RING_LOCALS; 824 825 #if WATCH_EXEC 826 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, 827 invalidate_domains, flush_domains); 828 #endif 829 830 if (flush_domains & I915_GEM_DOMAIN_CPU) 831 drm_agp_chipset_flush(dev); 832 833 if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU | 834 I915_GEM_DOMAIN_GTT)) { 835 /* 836 * read/write caches: 837 * 838 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 839 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 840 * also flushed at 2d versus 3d pipeline switches. 841 * 842 * read-only caches: 843 * 844 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 845 * MI_READ_FLUSH is set, and is always flushed on 965. 846 * 847 * I915_GEM_DOMAIN_COMMAND may not exist? 848 * 849 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 850 * invalidated when MI_EXE_FLUSH is set. 851 * 852 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 853 * invalidated with every MI_FLUSH. 854 * 855 * TLBs: 856 * 857 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 858 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 859 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 860 * are flushed at any MI_FLUSH. 861 */ 862 863 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 864 if ((invalidate_domains|flush_domains) & 865 I915_GEM_DOMAIN_RENDER) 866 cmd &= ~MI_NO_WRITE_FLUSH; 867 if (!IS_I965G(dev)) { 868 /* 869 * On the 965, the sampler cache always gets flushed 870 * and this bit is reserved. 871 */ 872 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 873 cmd |= MI_READ_FLUSH; 874 } 875 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 876 cmd |= MI_EXE_FLUSH; 877 878 #if WATCH_EXEC 879 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); 880 #endif 881 BEGIN_LP_RING(2); 882 OUT_RING(cmd); 883 OUT_RING(0); /* noop */ 884 ADVANCE_LP_RING(); 885 } 886 } 887 888 /** 889 * Ensures that all rendering to the object has completed and the object is 890 * safe to unbind from the GTT or access from the CPU. 891 */ 892 static int 893 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 894 { 895 struct drm_device *dev = obj->dev; 896 struct drm_i915_gem_object *obj_priv = obj->driver_private; 897 int ret; 898 899 /* If there are writes queued to the buffer, flush and 900 * create a new seqno to wait for. 901 */ 902 if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) { 903 uint32_t write_domain = obj->write_domain; 904 #if WATCH_BUF 905 DRM_INFO("%s: flushing object %p from write domain %08x\n", 906 __func__, obj, write_domain); 907 #endif 908 i915_gem_flush(dev, 0, write_domain); 909 910 i915_gem_object_move_to_active(obj); 911 obj_priv->last_rendering_seqno = i915_add_request(dev, 912 write_domain); 913 BUG_ON(obj_priv->last_rendering_seqno == 0); 914 #if WATCH_LRU 915 DRM_INFO("%s: flush moves to exec list %p\n", __func__, obj); 916 #endif 917 } 918 919 /* If there is rendering queued on the buffer being evicted, wait for 920 * it. 921 */ 922 if (obj_priv->active) { 923 #if WATCH_BUF 924 DRM_INFO("%s: object %p wait for seqno %08x\n", 925 __func__, obj, obj_priv->last_rendering_seqno); 926 #endif 927 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); 928 if (ret != 0) 929 return ret; 930 } 931 932 return 0; 933 } 934 935 /** 936 * Unbinds an object from the GTT aperture. 937 */ 938 static int 939 i915_gem_object_unbind(struct drm_gem_object *obj) 940 { 941 struct drm_device *dev = obj->dev; 942 struct drm_i915_gem_object *obj_priv = obj->driver_private; 943 int ret = 0; 944 945 #if WATCH_BUF 946 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); 947 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); 948 #endif 949 if (obj_priv->gtt_space == NULL) 950 return 0; 951 952 if (obj_priv->pin_count != 0) { 953 DRM_ERROR("Attempting to unbind pinned buffer\n"); 954 return -EINVAL; 955 } 956 957 /* Wait for any rendering to complete 958 */ 959 ret = i915_gem_object_wait_rendering(obj); 960 if (ret) { 961 DRM_ERROR("wait_rendering failed: %d\n", ret); 962 return ret; 963 } 964 965 /* Move the object to the CPU domain to ensure that 966 * any possible CPU writes while it's not in the GTT 967 * are flushed when we go to remap it. This will 968 * also ensure that all pending GPU writes are finished 969 * before we unbind. 970 */ 971 ret = i915_gem_object_set_domain(obj, I915_GEM_DOMAIN_CPU, 972 I915_GEM_DOMAIN_CPU); 973 if (ret) { 974 DRM_ERROR("set_domain failed: %d\n", ret); 975 return ret; 976 } 977 978 if (obj_priv->agp_mem != NULL) { 979 drm_unbind_agp(obj_priv->agp_mem); 980 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); 981 obj_priv->agp_mem = NULL; 982 } 983 984 BUG_ON(obj_priv->active); 985 986 i915_gem_object_free_page_list(obj); 987 988 if (obj_priv->gtt_space) { 989 atomic_dec(&dev->gtt_count); 990 atomic_sub(obj->size, &dev->gtt_memory); 991 992 drm_mm_put_block(obj_priv->gtt_space); 993 obj_priv->gtt_space = NULL; 994 } 995 996 /* Remove ourselves from the LRU list if present. */ 997 if (!list_empty(&obj_priv->list)) 998 list_del_init(&obj_priv->list); 999 1000 return 0; 1001 } 1002 1003 static int 1004 i915_gem_evict_something(struct drm_device *dev) 1005 { 1006 drm_i915_private_t *dev_priv = dev->dev_private; 1007 struct drm_gem_object *obj; 1008 struct drm_i915_gem_object *obj_priv; 1009 int ret = 0; 1010 1011 for (;;) { 1012 /* If there's an inactive buffer available now, grab it 1013 * and be done. 1014 */ 1015 if (!list_empty(&dev_priv->mm.inactive_list)) { 1016 obj_priv = list_first_entry(&dev_priv->mm.inactive_list, 1017 struct drm_i915_gem_object, 1018 list); 1019 obj = obj_priv->obj; 1020 BUG_ON(obj_priv->pin_count != 0); 1021 #if WATCH_LRU 1022 DRM_INFO("%s: evicting %p\n", __func__, obj); 1023 #endif 1024 BUG_ON(obj_priv->active); 1025 1026 /* Wait on the rendering and unbind the buffer. */ 1027 ret = i915_gem_object_unbind(obj); 1028 break; 1029 } 1030 1031 /* If we didn't get anything, but the ring is still processing 1032 * things, wait for one of those things to finish and hopefully 1033 * leave us a buffer to evict. 1034 */ 1035 if (!list_empty(&dev_priv->mm.request_list)) { 1036 struct drm_i915_gem_request *request; 1037 1038 request = list_first_entry(&dev_priv->mm.request_list, 1039 struct drm_i915_gem_request, 1040 list); 1041 1042 ret = i915_wait_request(dev, request->seqno); 1043 if (ret) 1044 break; 1045 1046 /* if waiting caused an object to become inactive, 1047 * then loop around and wait for it. Otherwise, we 1048 * assume that waiting freed and unbound something, 1049 * so there should now be some space in the GTT 1050 */ 1051 if (!list_empty(&dev_priv->mm.inactive_list)) 1052 continue; 1053 break; 1054 } 1055 1056 /* If we didn't have anything on the request list but there 1057 * are buffers awaiting a flush, emit one and try again. 1058 * When we wait on it, those buffers waiting for that flush 1059 * will get moved to inactive. 1060 */ 1061 if (!list_empty(&dev_priv->mm.flushing_list)) { 1062 obj_priv = list_first_entry(&dev_priv->mm.flushing_list, 1063 struct drm_i915_gem_object, 1064 list); 1065 obj = obj_priv->obj; 1066 1067 i915_gem_flush(dev, 1068 obj->write_domain, 1069 obj->write_domain); 1070 i915_add_request(dev, obj->write_domain); 1071 1072 obj = NULL; 1073 continue; 1074 } 1075 1076 DRM_ERROR("inactive empty %d request empty %d " 1077 "flushing empty %d\n", 1078 list_empty(&dev_priv->mm.inactive_list), 1079 list_empty(&dev_priv->mm.request_list), 1080 list_empty(&dev_priv->mm.flushing_list)); 1081 /* If we didn't do any of the above, there's nothing to be done 1082 * and we just can't fit it in. 1083 */ 1084 return -ENOMEM; 1085 } 1086 return ret; 1087 } 1088 1089 static int 1090 i915_gem_object_get_page_list(struct drm_gem_object *obj) 1091 { 1092 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1093 int page_count, i; 1094 struct address_space *mapping; 1095 struct inode *inode; 1096 struct page *page; 1097 int ret; 1098 1099 if (obj_priv->page_list) 1100 return 0; 1101 1102 /* Get the list of pages out of our struct file. They'll be pinned 1103 * at this point until we release them. 1104 */ 1105 page_count = obj->size / PAGE_SIZE; 1106 BUG_ON(obj_priv->page_list != NULL); 1107 obj_priv->page_list = drm_calloc(page_count, sizeof(struct page *), 1108 DRM_MEM_DRIVER); 1109 if (obj_priv->page_list == NULL) { 1110 DRM_ERROR("Faled to allocate page list\n"); 1111 return -ENOMEM; 1112 } 1113 1114 inode = obj->filp->f_path.dentry->d_inode; 1115 mapping = inode->i_mapping; 1116 for (i = 0; i < page_count; i++) { 1117 page = read_mapping_page(mapping, i, NULL); 1118 if (IS_ERR(page)) { 1119 ret = PTR_ERR(page); 1120 DRM_ERROR("read_mapping_page failed: %d\n", ret); 1121 i915_gem_object_free_page_list(obj); 1122 return ret; 1123 } 1124 obj_priv->page_list[i] = page; 1125 } 1126 return 0; 1127 } 1128 1129 /** 1130 * Finds free space in the GTT aperture and binds the object there. 1131 */ 1132 static int 1133 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) 1134 { 1135 struct drm_device *dev = obj->dev; 1136 drm_i915_private_t *dev_priv = dev->dev_private; 1137 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1138 struct drm_mm_node *free_space; 1139 int page_count, ret; 1140 1141 if (alignment == 0) 1142 alignment = PAGE_SIZE; 1143 if (alignment & (PAGE_SIZE - 1)) { 1144 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 1145 return -EINVAL; 1146 } 1147 1148 search_free: 1149 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 1150 obj->size, alignment, 0); 1151 if (free_space != NULL) { 1152 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size, 1153 alignment); 1154 if (obj_priv->gtt_space != NULL) { 1155 obj_priv->gtt_space->private = obj; 1156 obj_priv->gtt_offset = obj_priv->gtt_space->start; 1157 } 1158 } 1159 if (obj_priv->gtt_space == NULL) { 1160 /* If the gtt is empty and we're still having trouble 1161 * fitting our object in, we're out of memory. 1162 */ 1163 #if WATCH_LRU 1164 DRM_INFO("%s: GTT full, evicting something\n", __func__); 1165 #endif 1166 if (list_empty(&dev_priv->mm.inactive_list) && 1167 list_empty(&dev_priv->mm.flushing_list) && 1168 list_empty(&dev_priv->mm.active_list)) { 1169 DRM_ERROR("GTT full, but LRU list empty\n"); 1170 return -ENOMEM; 1171 } 1172 1173 ret = i915_gem_evict_something(dev); 1174 if (ret != 0) { 1175 DRM_ERROR("Failed to evict a buffer %d\n", ret); 1176 return ret; 1177 } 1178 goto search_free; 1179 } 1180 1181 #if WATCH_BUF 1182 DRM_INFO("Binding object of size %d at 0x%08x\n", 1183 obj->size, obj_priv->gtt_offset); 1184 #endif 1185 ret = i915_gem_object_get_page_list(obj); 1186 if (ret) { 1187 drm_mm_put_block(obj_priv->gtt_space); 1188 obj_priv->gtt_space = NULL; 1189 return ret; 1190 } 1191 1192 page_count = obj->size / PAGE_SIZE; 1193 /* Create an AGP memory structure pointing at our pages, and bind it 1194 * into the GTT. 1195 */ 1196 obj_priv->agp_mem = drm_agp_bind_pages(dev, 1197 obj_priv->page_list, 1198 page_count, 1199 obj_priv->gtt_offset, 1200 obj_priv->agp_type); 1201 if (obj_priv->agp_mem == NULL) { 1202 i915_gem_object_free_page_list(obj); 1203 drm_mm_put_block(obj_priv->gtt_space); 1204 obj_priv->gtt_space = NULL; 1205 return -ENOMEM; 1206 } 1207 atomic_inc(&dev->gtt_count); 1208 atomic_add(obj->size, &dev->gtt_memory); 1209 1210 /* Assert that the object is not currently in any GPU domain. As it 1211 * wasn't in the GTT, there shouldn't be any way it could have been in 1212 * a GPU cache 1213 */ 1214 BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 1215 BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 1216 1217 return 0; 1218 } 1219 1220 void 1221 i915_gem_clflush_object(struct drm_gem_object *obj) 1222 { 1223 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1224 1225 /* If we don't have a page list set up, then we're not pinned 1226 * to GPU, and we can ignore the cache flush because it'll happen 1227 * again at bind time. 1228 */ 1229 if (obj_priv->page_list == NULL) 1230 return; 1231 1232 drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE); 1233 } 1234 1235 /* 1236 * Set the next domain for the specified object. This 1237 * may not actually perform the necessary flushing/invaliding though, 1238 * as that may want to be batched with other set_domain operations 1239 * 1240 * This is (we hope) the only really tricky part of gem. The goal 1241 * is fairly simple -- track which caches hold bits of the object 1242 * and make sure they remain coherent. A few concrete examples may 1243 * help to explain how it works. For shorthand, we use the notation 1244 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 1245 * a pair of read and write domain masks. 1246 * 1247 * Case 1: the batch buffer 1248 * 1249 * 1. Allocated 1250 * 2. Written by CPU 1251 * 3. Mapped to GTT 1252 * 4. Read by GPU 1253 * 5. Unmapped from GTT 1254 * 6. Freed 1255 * 1256 * Let's take these a step at a time 1257 * 1258 * 1. Allocated 1259 * Pages allocated from the kernel may still have 1260 * cache contents, so we set them to (CPU, CPU) always. 1261 * 2. Written by CPU (using pwrite) 1262 * The pwrite function calls set_domain (CPU, CPU) and 1263 * this function does nothing (as nothing changes) 1264 * 3. Mapped by GTT 1265 * This function asserts that the object is not 1266 * currently in any GPU-based read or write domains 1267 * 4. Read by GPU 1268 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 1269 * As write_domain is zero, this function adds in the 1270 * current read domains (CPU+COMMAND, 0). 1271 * flush_domains is set to CPU. 1272 * invalidate_domains is set to COMMAND 1273 * clflush is run to get data out of the CPU caches 1274 * then i915_dev_set_domain calls i915_gem_flush to 1275 * emit an MI_FLUSH and drm_agp_chipset_flush 1276 * 5. Unmapped from GTT 1277 * i915_gem_object_unbind calls set_domain (CPU, CPU) 1278 * flush_domains and invalidate_domains end up both zero 1279 * so no flushing/invalidating happens 1280 * 6. Freed 1281 * yay, done 1282 * 1283 * Case 2: The shared render buffer 1284 * 1285 * 1. Allocated 1286 * 2. Mapped to GTT 1287 * 3. Read/written by GPU 1288 * 4. set_domain to (CPU,CPU) 1289 * 5. Read/written by CPU 1290 * 6. Read/written by GPU 1291 * 1292 * 1. Allocated 1293 * Same as last example, (CPU, CPU) 1294 * 2. Mapped to GTT 1295 * Nothing changes (assertions find that it is not in the GPU) 1296 * 3. Read/written by GPU 1297 * execbuffer calls set_domain (RENDER, RENDER) 1298 * flush_domains gets CPU 1299 * invalidate_domains gets GPU 1300 * clflush (obj) 1301 * MI_FLUSH and drm_agp_chipset_flush 1302 * 4. set_domain (CPU, CPU) 1303 * flush_domains gets GPU 1304 * invalidate_domains gets CPU 1305 * wait_rendering (obj) to make sure all drawing is complete. 1306 * This will include an MI_FLUSH to get the data from GPU 1307 * to memory 1308 * clflush (obj) to invalidate the CPU cache 1309 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 1310 * 5. Read/written by CPU 1311 * cache lines are loaded and dirtied 1312 * 6. Read written by GPU 1313 * Same as last GPU access 1314 * 1315 * Case 3: The constant buffer 1316 * 1317 * 1. Allocated 1318 * 2. Written by CPU 1319 * 3. Read by GPU 1320 * 4. Updated (written) by CPU again 1321 * 5. Read by GPU 1322 * 1323 * 1. Allocated 1324 * (CPU, CPU) 1325 * 2. Written by CPU 1326 * (CPU, CPU) 1327 * 3. Read by GPU 1328 * (CPU+RENDER, 0) 1329 * flush_domains = CPU 1330 * invalidate_domains = RENDER 1331 * clflush (obj) 1332 * MI_FLUSH 1333 * drm_agp_chipset_flush 1334 * 4. Updated (written) by CPU again 1335 * (CPU, CPU) 1336 * flush_domains = 0 (no previous write domain) 1337 * invalidate_domains = 0 (no new read domains) 1338 * 5. Read by GPU 1339 * (CPU+RENDER, 0) 1340 * flush_domains = CPU 1341 * invalidate_domains = RENDER 1342 * clflush (obj) 1343 * MI_FLUSH 1344 * drm_agp_chipset_flush 1345 */ 1346 static int 1347 i915_gem_object_set_domain(struct drm_gem_object *obj, 1348 uint32_t read_domains, 1349 uint32_t write_domain) 1350 { 1351 struct drm_device *dev = obj->dev; 1352 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1353 uint32_t invalidate_domains = 0; 1354 uint32_t flush_domains = 0; 1355 int ret; 1356 1357 #if WATCH_BUF 1358 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", 1359 __func__, obj, 1360 obj->read_domains, read_domains, 1361 obj->write_domain, write_domain); 1362 #endif 1363 /* 1364 * If the object isn't moving to a new write domain, 1365 * let the object stay in multiple read domains 1366 */ 1367 if (write_domain == 0) 1368 read_domains |= obj->read_domains; 1369 else 1370 obj_priv->dirty = 1; 1371 1372 /* 1373 * Flush the current write domain if 1374 * the new read domains don't match. Invalidate 1375 * any read domains which differ from the old 1376 * write domain 1377 */ 1378 if (obj->write_domain && obj->write_domain != read_domains) { 1379 flush_domains |= obj->write_domain; 1380 invalidate_domains |= read_domains & ~obj->write_domain; 1381 } 1382 /* 1383 * Invalidate any read caches which may have 1384 * stale data. That is, any new read domains. 1385 */ 1386 invalidate_domains |= read_domains & ~obj->read_domains; 1387 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 1388 #if WATCH_BUF 1389 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", 1390 __func__, flush_domains, invalidate_domains); 1391 #endif 1392 /* 1393 * If we're invaliding the CPU cache and flushing a GPU cache, 1394 * then pause for rendering so that the GPU caches will be 1395 * flushed before the cpu cache is invalidated 1396 */ 1397 if ((invalidate_domains & I915_GEM_DOMAIN_CPU) && 1398 (flush_domains & ~(I915_GEM_DOMAIN_CPU | 1399 I915_GEM_DOMAIN_GTT))) { 1400 ret = i915_gem_object_wait_rendering(obj); 1401 if (ret) 1402 return ret; 1403 } 1404 i915_gem_clflush_object(obj); 1405 } 1406 1407 if ((write_domain | flush_domains) != 0) 1408 obj->write_domain = write_domain; 1409 1410 /* If we're invalidating the CPU domain, clear the per-page CPU 1411 * domain list as well. 1412 */ 1413 if (obj_priv->page_cpu_valid != NULL && 1414 (write_domain != 0 || 1415 read_domains & I915_GEM_DOMAIN_CPU)) { 1416 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, 1417 DRM_MEM_DRIVER); 1418 obj_priv->page_cpu_valid = NULL; 1419 } 1420 obj->read_domains = read_domains; 1421 1422 dev->invalidate_domains |= invalidate_domains; 1423 dev->flush_domains |= flush_domains; 1424 #if WATCH_BUF 1425 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n", 1426 __func__, 1427 obj->read_domains, obj->write_domain, 1428 dev->invalidate_domains, dev->flush_domains); 1429 #endif 1430 return 0; 1431 } 1432 1433 /** 1434 * Set the read/write domain on a range of the object. 1435 * 1436 * Currently only implemented for CPU reads, otherwise drops to normal 1437 * i915_gem_object_set_domain(). 1438 */ 1439 static int 1440 i915_gem_object_set_domain_range(struct drm_gem_object *obj, 1441 uint64_t offset, 1442 uint64_t size, 1443 uint32_t read_domains, 1444 uint32_t write_domain) 1445 { 1446 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1447 int ret, i; 1448 1449 if (obj->read_domains & I915_GEM_DOMAIN_CPU) 1450 return 0; 1451 1452 if (read_domains != I915_GEM_DOMAIN_CPU || 1453 write_domain != 0) 1454 return i915_gem_object_set_domain(obj, 1455 read_domains, write_domain); 1456 1457 /* Wait on any GPU rendering to the object to be flushed. */ 1458 if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) { 1459 ret = i915_gem_object_wait_rendering(obj); 1460 if (ret) 1461 return ret; 1462 } 1463 1464 if (obj_priv->page_cpu_valid == NULL) { 1465 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, 1466 DRM_MEM_DRIVER); 1467 } 1468 1469 /* Flush the cache on any pages that are still invalid from the CPU's 1470 * perspective. 1471 */ 1472 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; i++) { 1473 if (obj_priv->page_cpu_valid[i]) 1474 continue; 1475 1476 drm_clflush_pages(obj_priv->page_list + i, 1); 1477 1478 obj_priv->page_cpu_valid[i] = 1; 1479 } 1480 1481 return 0; 1482 } 1483 1484 /** 1485 * Once all of the objects have been set in the proper domain, 1486 * perform the necessary flush and invalidate operations. 1487 * 1488 * Returns the write domains flushed, for use in flush tracking. 1489 */ 1490 static uint32_t 1491 i915_gem_dev_set_domain(struct drm_device *dev) 1492 { 1493 uint32_t flush_domains = dev->flush_domains; 1494 1495 /* 1496 * Now that all the buffers are synced to the proper domains, 1497 * flush and invalidate the collected domains 1498 */ 1499 if (dev->invalidate_domains | dev->flush_domains) { 1500 #if WATCH_EXEC 1501 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 1502 __func__, 1503 dev->invalidate_domains, 1504 dev->flush_domains); 1505 #endif 1506 i915_gem_flush(dev, 1507 dev->invalidate_domains, 1508 dev->flush_domains); 1509 dev->invalidate_domains = 0; 1510 dev->flush_domains = 0; 1511 } 1512 1513 return flush_domains; 1514 } 1515 1516 /** 1517 * Pin an object to the GTT and evaluate the relocations landing in it. 1518 */ 1519 static int 1520 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 1521 struct drm_file *file_priv, 1522 struct drm_i915_gem_exec_object *entry) 1523 { 1524 struct drm_device *dev = obj->dev; 1525 drm_i915_private_t *dev_priv = dev->dev_private; 1526 struct drm_i915_gem_relocation_entry reloc; 1527 struct drm_i915_gem_relocation_entry __user *relocs; 1528 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1529 int i, ret; 1530 void __iomem *reloc_page; 1531 1532 /* Choose the GTT offset for our buffer and put it there. */ 1533 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 1534 if (ret) 1535 return ret; 1536 1537 entry->offset = obj_priv->gtt_offset; 1538 1539 relocs = (struct drm_i915_gem_relocation_entry __user *) 1540 (uintptr_t) entry->relocs_ptr; 1541 /* Apply the relocations, using the GTT aperture to avoid cache 1542 * flushing requirements. 1543 */ 1544 for (i = 0; i < entry->relocation_count; i++) { 1545 struct drm_gem_object *target_obj; 1546 struct drm_i915_gem_object *target_obj_priv; 1547 uint32_t reloc_val, reloc_offset; 1548 uint32_t __iomem *reloc_entry; 1549 1550 ret = copy_from_user(&reloc, relocs + i, sizeof(reloc)); 1551 if (ret != 0) { 1552 i915_gem_object_unpin(obj); 1553 return ret; 1554 } 1555 1556 target_obj = drm_gem_object_lookup(obj->dev, file_priv, 1557 reloc.target_handle); 1558 if (target_obj == NULL) { 1559 i915_gem_object_unpin(obj); 1560 return -EBADF; 1561 } 1562 target_obj_priv = target_obj->driver_private; 1563 1564 /* The target buffer should have appeared before us in the 1565 * exec_object list, so it should have a GTT space bound by now. 1566 */ 1567 if (target_obj_priv->gtt_space == NULL) { 1568 DRM_ERROR("No GTT space found for object %d\n", 1569 reloc.target_handle); 1570 drm_gem_object_unreference(target_obj); 1571 i915_gem_object_unpin(obj); 1572 return -EINVAL; 1573 } 1574 1575 if (reloc.offset > obj->size - 4) { 1576 DRM_ERROR("Relocation beyond object bounds: " 1577 "obj %p target %d offset %d size %d.\n", 1578 obj, reloc.target_handle, 1579 (int) reloc.offset, (int) obj->size); 1580 drm_gem_object_unreference(target_obj); 1581 i915_gem_object_unpin(obj); 1582 return -EINVAL; 1583 } 1584 if (reloc.offset & 3) { 1585 DRM_ERROR("Relocation not 4-byte aligned: " 1586 "obj %p target %d offset %d.\n", 1587 obj, reloc.target_handle, 1588 (int) reloc.offset); 1589 drm_gem_object_unreference(target_obj); 1590 i915_gem_object_unpin(obj); 1591 return -EINVAL; 1592 } 1593 1594 if (reloc.write_domain && target_obj->pending_write_domain && 1595 reloc.write_domain != target_obj->pending_write_domain) { 1596 DRM_ERROR("Write domain conflict: " 1597 "obj %p target %d offset %d " 1598 "new %08x old %08x\n", 1599 obj, reloc.target_handle, 1600 (int) reloc.offset, 1601 reloc.write_domain, 1602 target_obj->pending_write_domain); 1603 drm_gem_object_unreference(target_obj); 1604 i915_gem_object_unpin(obj); 1605 return -EINVAL; 1606 } 1607 1608 #if WATCH_RELOC 1609 DRM_INFO("%s: obj %p offset %08x target %d " 1610 "read %08x write %08x gtt %08x " 1611 "presumed %08x delta %08x\n", 1612 __func__, 1613 obj, 1614 (int) reloc.offset, 1615 (int) reloc.target_handle, 1616 (int) reloc.read_domains, 1617 (int) reloc.write_domain, 1618 (int) target_obj_priv->gtt_offset, 1619 (int) reloc.presumed_offset, 1620 reloc.delta); 1621 #endif 1622 1623 target_obj->pending_read_domains |= reloc.read_domains; 1624 target_obj->pending_write_domain |= reloc.write_domain; 1625 1626 /* If the relocation already has the right value in it, no 1627 * more work needs to be done. 1628 */ 1629 if (target_obj_priv->gtt_offset == reloc.presumed_offset) { 1630 drm_gem_object_unreference(target_obj); 1631 continue; 1632 } 1633 1634 /* Now that we're going to actually write some data in, 1635 * make sure that any rendering using this buffer's contents 1636 * is completed. 1637 */ 1638 i915_gem_object_wait_rendering(obj); 1639 1640 /* As we're writing through the gtt, flush 1641 * any CPU writes before we write the relocations 1642 */ 1643 if (obj->write_domain & I915_GEM_DOMAIN_CPU) { 1644 i915_gem_clflush_object(obj); 1645 drm_agp_chipset_flush(dev); 1646 obj->write_domain = 0; 1647 } 1648 1649 /* Map the page containing the relocation we're going to 1650 * perform. 1651 */ 1652 reloc_offset = obj_priv->gtt_offset + reloc.offset; 1653 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, 1654 (reloc_offset & 1655 ~(PAGE_SIZE - 1))); 1656 reloc_entry = (uint32_t __iomem *)(reloc_page + 1657 (reloc_offset & (PAGE_SIZE - 1))); 1658 reloc_val = target_obj_priv->gtt_offset + reloc.delta; 1659 1660 #if WATCH_BUF 1661 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", 1662 obj, (unsigned int) reloc.offset, 1663 readl(reloc_entry), reloc_val); 1664 #endif 1665 writel(reloc_val, reloc_entry); 1666 io_mapping_unmap_atomic(reloc_page); 1667 1668 /* Write the updated presumed offset for this entry back out 1669 * to the user. 1670 */ 1671 reloc.presumed_offset = target_obj_priv->gtt_offset; 1672 ret = copy_to_user(relocs + i, &reloc, sizeof(reloc)); 1673 if (ret != 0) { 1674 drm_gem_object_unreference(target_obj); 1675 i915_gem_object_unpin(obj); 1676 return ret; 1677 } 1678 1679 drm_gem_object_unreference(target_obj); 1680 } 1681 1682 #if WATCH_BUF 1683 if (0) 1684 i915_gem_dump_object(obj, 128, __func__, ~0); 1685 #endif 1686 return 0; 1687 } 1688 1689 /** Dispatch a batchbuffer to the ring 1690 */ 1691 static int 1692 i915_dispatch_gem_execbuffer(struct drm_device *dev, 1693 struct drm_i915_gem_execbuffer *exec, 1694 uint64_t exec_offset) 1695 { 1696 drm_i915_private_t *dev_priv = dev->dev_private; 1697 struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *) 1698 (uintptr_t) exec->cliprects_ptr; 1699 int nbox = exec->num_cliprects; 1700 int i = 0, count; 1701 uint32_t exec_start, exec_len; 1702 RING_LOCALS; 1703 1704 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 1705 exec_len = (uint32_t) exec->batch_len; 1706 1707 if ((exec_start | exec_len) & 0x7) { 1708 DRM_ERROR("alignment\n"); 1709 return -EINVAL; 1710 } 1711 1712 if (!exec_start) 1713 return -EINVAL; 1714 1715 count = nbox ? nbox : 1; 1716 1717 for (i = 0; i < count; i++) { 1718 if (i < nbox) { 1719 int ret = i915_emit_box(dev, boxes, i, 1720 exec->DR1, exec->DR4); 1721 if (ret) 1722 return ret; 1723 } 1724 1725 if (IS_I830(dev) || IS_845G(dev)) { 1726 BEGIN_LP_RING(4); 1727 OUT_RING(MI_BATCH_BUFFER); 1728 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 1729 OUT_RING(exec_start + exec_len - 4); 1730 OUT_RING(0); 1731 ADVANCE_LP_RING(); 1732 } else { 1733 BEGIN_LP_RING(2); 1734 if (IS_I965G(dev)) { 1735 OUT_RING(MI_BATCH_BUFFER_START | 1736 (2 << 6) | 1737 MI_BATCH_NON_SECURE_I965); 1738 OUT_RING(exec_start); 1739 } else { 1740 OUT_RING(MI_BATCH_BUFFER_START | 1741 (2 << 6)); 1742 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 1743 } 1744 ADVANCE_LP_RING(); 1745 } 1746 } 1747 1748 /* XXX breadcrumb */ 1749 return 0; 1750 } 1751 1752 /* Throttle our rendering by waiting until the ring has completed our requests 1753 * emitted over 20 msec ago. 1754 * 1755 * This should get us reasonable parallelism between CPU and GPU but also 1756 * relatively low latency when blocking on a particular request to finish. 1757 */ 1758 static int 1759 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 1760 { 1761 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 1762 int ret = 0; 1763 uint32_t seqno; 1764 1765 mutex_lock(&dev->struct_mutex); 1766 seqno = i915_file_priv->mm.last_gem_throttle_seqno; 1767 i915_file_priv->mm.last_gem_throttle_seqno = 1768 i915_file_priv->mm.last_gem_seqno; 1769 if (seqno) 1770 ret = i915_wait_request(dev, seqno); 1771 mutex_unlock(&dev->struct_mutex); 1772 return ret; 1773 } 1774 1775 int 1776 i915_gem_execbuffer(struct drm_device *dev, void *data, 1777 struct drm_file *file_priv) 1778 { 1779 drm_i915_private_t *dev_priv = dev->dev_private; 1780 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 1781 struct drm_i915_gem_execbuffer *args = data; 1782 struct drm_i915_gem_exec_object *exec_list = NULL; 1783 struct drm_gem_object **object_list = NULL; 1784 struct drm_gem_object *batch_obj; 1785 int ret, i, pinned = 0; 1786 uint64_t exec_offset; 1787 uint32_t seqno, flush_domains; 1788 1789 #if WATCH_EXEC 1790 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 1791 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 1792 #endif 1793 1794 if (args->buffer_count < 1) { 1795 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 1796 return -EINVAL; 1797 } 1798 /* Copy in the exec list from userland */ 1799 exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count, 1800 DRM_MEM_DRIVER); 1801 object_list = drm_calloc(sizeof(*object_list), args->buffer_count, 1802 DRM_MEM_DRIVER); 1803 if (exec_list == NULL || object_list == NULL) { 1804 DRM_ERROR("Failed to allocate exec or object list " 1805 "for %d buffers\n", 1806 args->buffer_count); 1807 ret = -ENOMEM; 1808 goto pre_mutex_err; 1809 } 1810 ret = copy_from_user(exec_list, 1811 (struct drm_i915_relocation_entry __user *) 1812 (uintptr_t) args->buffers_ptr, 1813 sizeof(*exec_list) * args->buffer_count); 1814 if (ret != 0) { 1815 DRM_ERROR("copy %d exec entries failed %d\n", 1816 args->buffer_count, ret); 1817 goto pre_mutex_err; 1818 } 1819 1820 mutex_lock(&dev->struct_mutex); 1821 1822 i915_verify_inactive(dev, __FILE__, __LINE__); 1823 1824 if (dev_priv->mm.wedged) { 1825 DRM_ERROR("Execbuf while wedged\n"); 1826 mutex_unlock(&dev->struct_mutex); 1827 return -EIO; 1828 } 1829 1830 if (dev_priv->mm.suspended) { 1831 DRM_ERROR("Execbuf while VT-switched.\n"); 1832 mutex_unlock(&dev->struct_mutex); 1833 return -EBUSY; 1834 } 1835 1836 /* Zero the gloabl flush/invalidate flags. These 1837 * will be modified as each object is bound to the 1838 * gtt 1839 */ 1840 dev->invalidate_domains = 0; 1841 dev->flush_domains = 0; 1842 1843 /* Look up object handles and perform the relocations */ 1844 for (i = 0; i < args->buffer_count; i++) { 1845 object_list[i] = drm_gem_object_lookup(dev, file_priv, 1846 exec_list[i].handle); 1847 if (object_list[i] == NULL) { 1848 DRM_ERROR("Invalid object handle %d at index %d\n", 1849 exec_list[i].handle, i); 1850 ret = -EBADF; 1851 goto err; 1852 } 1853 1854 object_list[i]->pending_read_domains = 0; 1855 object_list[i]->pending_write_domain = 0; 1856 ret = i915_gem_object_pin_and_relocate(object_list[i], 1857 file_priv, 1858 &exec_list[i]); 1859 if (ret) { 1860 DRM_ERROR("object bind and relocate failed %d\n", ret); 1861 goto err; 1862 } 1863 pinned = i + 1; 1864 } 1865 1866 /* Set the pending read domains for the batch buffer to COMMAND */ 1867 batch_obj = object_list[args->buffer_count-1]; 1868 batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND; 1869 batch_obj->pending_write_domain = 0; 1870 1871 i915_verify_inactive(dev, __FILE__, __LINE__); 1872 1873 for (i = 0; i < args->buffer_count; i++) { 1874 struct drm_gem_object *obj = object_list[i]; 1875 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1876 1877 if (obj_priv->gtt_space == NULL) { 1878 /* We evicted the buffer in the process of validating 1879 * our set of buffers in. We could try to recover by 1880 * kicking them everything out and trying again from 1881 * the start. 1882 */ 1883 ret = -ENOMEM; 1884 goto err; 1885 } 1886 1887 /* make sure all previous memory operations have passed */ 1888 ret = i915_gem_object_set_domain(obj, 1889 obj->pending_read_domains, 1890 obj->pending_write_domain); 1891 if (ret) 1892 goto err; 1893 } 1894 1895 i915_verify_inactive(dev, __FILE__, __LINE__); 1896 1897 /* Flush/invalidate caches and chipset buffer */ 1898 flush_domains = i915_gem_dev_set_domain(dev); 1899 1900 i915_verify_inactive(dev, __FILE__, __LINE__); 1901 1902 #if WATCH_COHERENCY 1903 for (i = 0; i < args->buffer_count; i++) { 1904 i915_gem_object_check_coherency(object_list[i], 1905 exec_list[i].handle); 1906 } 1907 #endif 1908 1909 exec_offset = exec_list[args->buffer_count - 1].offset; 1910 1911 #if WATCH_EXEC 1912 i915_gem_dump_object(object_list[args->buffer_count - 1], 1913 args->batch_len, 1914 __func__, 1915 ~0); 1916 #endif 1917 1918 (void)i915_add_request(dev, flush_domains); 1919 1920 /* Exec the batchbuffer */ 1921 ret = i915_dispatch_gem_execbuffer(dev, args, exec_offset); 1922 if (ret) { 1923 DRM_ERROR("dispatch failed %d\n", ret); 1924 goto err; 1925 } 1926 1927 /* 1928 * Ensure that the commands in the batch buffer are 1929 * finished before the interrupt fires 1930 */ 1931 flush_domains = i915_retire_commands(dev); 1932 1933 i915_verify_inactive(dev, __FILE__, __LINE__); 1934 1935 /* 1936 * Get a seqno representing the execution of the current buffer, 1937 * which we can wait on. We would like to mitigate these interrupts, 1938 * likely by only creating seqnos occasionally (so that we have 1939 * *some* interrupts representing completion of buffers that we can 1940 * wait on when trying to clear up gtt space). 1941 */ 1942 seqno = i915_add_request(dev, flush_domains); 1943 BUG_ON(seqno == 0); 1944 i915_file_priv->mm.last_gem_seqno = seqno; 1945 for (i = 0; i < args->buffer_count; i++) { 1946 struct drm_gem_object *obj = object_list[i]; 1947 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1948 1949 i915_gem_object_move_to_active(obj); 1950 obj_priv->last_rendering_seqno = seqno; 1951 #if WATCH_LRU 1952 DRM_INFO("%s: move to exec list %p\n", __func__, obj); 1953 #endif 1954 } 1955 #if WATCH_LRU 1956 i915_dump_lru(dev, __func__); 1957 #endif 1958 1959 i915_verify_inactive(dev, __FILE__, __LINE__); 1960 1961 /* Copy the new buffer offsets back to the user's exec list. */ 1962 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 1963 (uintptr_t) args->buffers_ptr, 1964 exec_list, 1965 sizeof(*exec_list) * args->buffer_count); 1966 if (ret) 1967 DRM_ERROR("failed to copy %d exec entries " 1968 "back to user (%d)\n", 1969 args->buffer_count, ret); 1970 err: 1971 if (object_list != NULL) { 1972 for (i = 0; i < pinned; i++) 1973 i915_gem_object_unpin(object_list[i]); 1974 1975 for (i = 0; i < args->buffer_count; i++) 1976 drm_gem_object_unreference(object_list[i]); 1977 } 1978 mutex_unlock(&dev->struct_mutex); 1979 1980 pre_mutex_err: 1981 drm_free(object_list, sizeof(*object_list) * args->buffer_count, 1982 DRM_MEM_DRIVER); 1983 drm_free(exec_list, sizeof(*exec_list) * args->buffer_count, 1984 DRM_MEM_DRIVER); 1985 1986 return ret; 1987 } 1988 1989 int 1990 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 1991 { 1992 struct drm_device *dev = obj->dev; 1993 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1994 int ret; 1995 1996 i915_verify_inactive(dev, __FILE__, __LINE__); 1997 if (obj_priv->gtt_space == NULL) { 1998 ret = i915_gem_object_bind_to_gtt(obj, alignment); 1999 if (ret != 0) { 2000 DRM_ERROR("Failure to bind: %d", ret); 2001 return ret; 2002 } 2003 } 2004 obj_priv->pin_count++; 2005 2006 /* If the object is not active and not pending a flush, 2007 * remove it from the inactive list 2008 */ 2009 if (obj_priv->pin_count == 1) { 2010 atomic_inc(&dev->pin_count); 2011 atomic_add(obj->size, &dev->pin_memory); 2012 if (!obj_priv->active && 2013 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 2014 I915_GEM_DOMAIN_GTT)) == 0 && 2015 !list_empty(&obj_priv->list)) 2016 list_del_init(&obj_priv->list); 2017 } 2018 i915_verify_inactive(dev, __FILE__, __LINE__); 2019 2020 return 0; 2021 } 2022 2023 void 2024 i915_gem_object_unpin(struct drm_gem_object *obj) 2025 { 2026 struct drm_device *dev = obj->dev; 2027 drm_i915_private_t *dev_priv = dev->dev_private; 2028 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2029 2030 i915_verify_inactive(dev, __FILE__, __LINE__); 2031 obj_priv->pin_count--; 2032 BUG_ON(obj_priv->pin_count < 0); 2033 BUG_ON(obj_priv->gtt_space == NULL); 2034 2035 /* If the object is no longer pinned, and is 2036 * neither active nor being flushed, then stick it on 2037 * the inactive list 2038 */ 2039 if (obj_priv->pin_count == 0) { 2040 if (!obj_priv->active && 2041 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 2042 I915_GEM_DOMAIN_GTT)) == 0) 2043 list_move_tail(&obj_priv->list, 2044 &dev_priv->mm.inactive_list); 2045 atomic_dec(&dev->pin_count); 2046 atomic_sub(obj->size, &dev->pin_memory); 2047 } 2048 i915_verify_inactive(dev, __FILE__, __LINE__); 2049 } 2050 2051 int 2052 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 2053 struct drm_file *file_priv) 2054 { 2055 struct drm_i915_gem_pin *args = data; 2056 struct drm_gem_object *obj; 2057 struct drm_i915_gem_object *obj_priv; 2058 int ret; 2059 2060 mutex_lock(&dev->struct_mutex); 2061 2062 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 2063 if (obj == NULL) { 2064 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 2065 args->handle); 2066 mutex_unlock(&dev->struct_mutex); 2067 return -EBADF; 2068 } 2069 obj_priv = obj->driver_private; 2070 2071 ret = i915_gem_object_pin(obj, args->alignment); 2072 if (ret != 0) { 2073 drm_gem_object_unreference(obj); 2074 mutex_unlock(&dev->struct_mutex); 2075 return ret; 2076 } 2077 2078 /* XXX - flush the CPU caches for pinned objects 2079 * as the X server doesn't manage domains yet 2080 */ 2081 if (obj->write_domain & I915_GEM_DOMAIN_CPU) { 2082 i915_gem_clflush_object(obj); 2083 drm_agp_chipset_flush(dev); 2084 obj->write_domain = 0; 2085 } 2086 args->offset = obj_priv->gtt_offset; 2087 drm_gem_object_unreference(obj); 2088 mutex_unlock(&dev->struct_mutex); 2089 2090 return 0; 2091 } 2092 2093 int 2094 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 2095 struct drm_file *file_priv) 2096 { 2097 struct drm_i915_gem_pin *args = data; 2098 struct drm_gem_object *obj; 2099 2100 mutex_lock(&dev->struct_mutex); 2101 2102 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 2103 if (obj == NULL) { 2104 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 2105 args->handle); 2106 mutex_unlock(&dev->struct_mutex); 2107 return -EBADF; 2108 } 2109 2110 i915_gem_object_unpin(obj); 2111 2112 drm_gem_object_unreference(obj); 2113 mutex_unlock(&dev->struct_mutex); 2114 return 0; 2115 } 2116 2117 int 2118 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 2119 struct drm_file *file_priv) 2120 { 2121 struct drm_i915_gem_busy *args = data; 2122 struct drm_gem_object *obj; 2123 struct drm_i915_gem_object *obj_priv; 2124 2125 mutex_lock(&dev->struct_mutex); 2126 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 2127 if (obj == NULL) { 2128 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 2129 args->handle); 2130 mutex_unlock(&dev->struct_mutex); 2131 return -EBADF; 2132 } 2133 2134 obj_priv = obj->driver_private; 2135 args->busy = obj_priv->active; 2136 2137 drm_gem_object_unreference(obj); 2138 mutex_unlock(&dev->struct_mutex); 2139 return 0; 2140 } 2141 2142 int 2143 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 2144 struct drm_file *file_priv) 2145 { 2146 return i915_gem_ring_throttle(dev, file_priv); 2147 } 2148 2149 int i915_gem_init_object(struct drm_gem_object *obj) 2150 { 2151 struct drm_i915_gem_object *obj_priv; 2152 2153 obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER); 2154 if (obj_priv == NULL) 2155 return -ENOMEM; 2156 2157 /* 2158 * We've just allocated pages from the kernel, 2159 * so they've just been written by the CPU with 2160 * zeros. They'll need to be clflushed before we 2161 * use them with the GPU. 2162 */ 2163 obj->write_domain = I915_GEM_DOMAIN_CPU; 2164 obj->read_domains = I915_GEM_DOMAIN_CPU; 2165 2166 obj_priv->agp_type = AGP_USER_MEMORY; 2167 2168 obj->driver_private = obj_priv; 2169 obj_priv->obj = obj; 2170 INIT_LIST_HEAD(&obj_priv->list); 2171 return 0; 2172 } 2173 2174 void i915_gem_free_object(struct drm_gem_object *obj) 2175 { 2176 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2177 2178 while (obj_priv->pin_count > 0) 2179 i915_gem_object_unpin(obj); 2180 2181 i915_gem_object_unbind(obj); 2182 2183 drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); 2184 drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); 2185 } 2186 2187 static int 2188 i915_gem_set_domain(struct drm_gem_object *obj, 2189 struct drm_file *file_priv, 2190 uint32_t read_domains, 2191 uint32_t write_domain) 2192 { 2193 struct drm_device *dev = obj->dev; 2194 int ret; 2195 uint32_t flush_domains; 2196 2197 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 2198 2199 ret = i915_gem_object_set_domain(obj, read_domains, write_domain); 2200 if (ret) 2201 return ret; 2202 flush_domains = i915_gem_dev_set_domain(obj->dev); 2203 2204 if (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) 2205 (void) i915_add_request(dev, flush_domains); 2206 2207 return 0; 2208 } 2209 2210 /** Unbinds all objects that are on the given buffer list. */ 2211 static int 2212 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) 2213 { 2214 struct drm_gem_object *obj; 2215 struct drm_i915_gem_object *obj_priv; 2216 int ret; 2217 2218 while (!list_empty(head)) { 2219 obj_priv = list_first_entry(head, 2220 struct drm_i915_gem_object, 2221 list); 2222 obj = obj_priv->obj; 2223 2224 if (obj_priv->pin_count != 0) { 2225 DRM_ERROR("Pinned object in unbind list\n"); 2226 mutex_unlock(&dev->struct_mutex); 2227 return -EINVAL; 2228 } 2229 2230 ret = i915_gem_object_unbind(obj); 2231 if (ret != 0) { 2232 DRM_ERROR("Error unbinding object in LeaveVT: %d\n", 2233 ret); 2234 mutex_unlock(&dev->struct_mutex); 2235 return ret; 2236 } 2237 } 2238 2239 2240 return 0; 2241 } 2242 2243 static int 2244 i915_gem_idle(struct drm_device *dev) 2245 { 2246 drm_i915_private_t *dev_priv = dev->dev_private; 2247 uint32_t seqno, cur_seqno, last_seqno; 2248 int stuck, ret; 2249 2250 mutex_lock(&dev->struct_mutex); 2251 2252 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { 2253 mutex_unlock(&dev->struct_mutex); 2254 return 0; 2255 } 2256 2257 /* Hack! Don't let anybody do execbuf while we don't control the chip. 2258 * We need to replace this with a semaphore, or something. 2259 */ 2260 dev_priv->mm.suspended = 1; 2261 2262 /* Cancel the retire work handler, wait for it to finish if running 2263 */ 2264 mutex_unlock(&dev->struct_mutex); 2265 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 2266 mutex_lock(&dev->struct_mutex); 2267 2268 i915_kernel_lost_context(dev); 2269 2270 /* Flush the GPU along with all non-CPU write domains 2271 */ 2272 i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), 2273 ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 2274 seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU | 2275 I915_GEM_DOMAIN_GTT)); 2276 2277 if (seqno == 0) { 2278 mutex_unlock(&dev->struct_mutex); 2279 return -ENOMEM; 2280 } 2281 2282 dev_priv->mm.waiting_gem_seqno = seqno; 2283 last_seqno = 0; 2284 stuck = 0; 2285 for (;;) { 2286 cur_seqno = i915_get_gem_seqno(dev); 2287 if (i915_seqno_passed(cur_seqno, seqno)) 2288 break; 2289 if (last_seqno == cur_seqno) { 2290 if (stuck++ > 100) { 2291 DRM_ERROR("hardware wedged\n"); 2292 dev_priv->mm.wedged = 1; 2293 DRM_WAKEUP(&dev_priv->irq_queue); 2294 break; 2295 } 2296 } 2297 msleep(10); 2298 last_seqno = cur_seqno; 2299 } 2300 dev_priv->mm.waiting_gem_seqno = 0; 2301 2302 i915_gem_retire_requests(dev); 2303 2304 /* Active and flushing should now be empty as we've 2305 * waited for a sequence higher than any pending execbuffer 2306 */ 2307 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 2308 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 2309 2310 /* Request should now be empty as we've also waited 2311 * for the last request in the list 2312 */ 2313 BUG_ON(!list_empty(&dev_priv->mm.request_list)); 2314 2315 /* Move all buffers out of the GTT. */ 2316 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); 2317 if (ret) { 2318 mutex_unlock(&dev->struct_mutex); 2319 return ret; 2320 } 2321 2322 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 2323 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 2324 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 2325 BUG_ON(!list_empty(&dev_priv->mm.request_list)); 2326 2327 i915_gem_cleanup_ringbuffer(dev); 2328 mutex_unlock(&dev->struct_mutex); 2329 2330 return 0; 2331 } 2332 2333 static int 2334 i915_gem_init_hws(struct drm_device *dev) 2335 { 2336 drm_i915_private_t *dev_priv = dev->dev_private; 2337 struct drm_gem_object *obj; 2338 struct drm_i915_gem_object *obj_priv; 2339 int ret; 2340 2341 /* If we need a physical address for the status page, it's already 2342 * initialized at driver load time. 2343 */ 2344 if (!I915_NEED_GFX_HWS(dev)) 2345 return 0; 2346 2347 obj = drm_gem_object_alloc(dev, 4096); 2348 if (obj == NULL) { 2349 DRM_ERROR("Failed to allocate status page\n"); 2350 return -ENOMEM; 2351 } 2352 obj_priv = obj->driver_private; 2353 obj_priv->agp_type = AGP_USER_CACHED_MEMORY; 2354 2355 ret = i915_gem_object_pin(obj, 4096); 2356 if (ret != 0) { 2357 drm_gem_object_unreference(obj); 2358 return ret; 2359 } 2360 2361 dev_priv->status_gfx_addr = obj_priv->gtt_offset; 2362 2363 dev_priv->hw_status_page = kmap(obj_priv->page_list[0]); 2364 if (dev_priv->hw_status_page == NULL) { 2365 DRM_ERROR("Failed to map status page.\n"); 2366 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 2367 drm_gem_object_unreference(obj); 2368 return -EINVAL; 2369 } 2370 dev_priv->hws_obj = obj; 2371 memset(dev_priv->hw_status_page, 0, PAGE_SIZE); 2372 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); 2373 I915_READ(HWS_PGA); /* posting read */ 2374 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); 2375 2376 return 0; 2377 } 2378 2379 static int 2380 i915_gem_init_ringbuffer(struct drm_device *dev) 2381 { 2382 drm_i915_private_t *dev_priv = dev->dev_private; 2383 struct drm_gem_object *obj; 2384 struct drm_i915_gem_object *obj_priv; 2385 int ret; 2386 u32 head; 2387 2388 ret = i915_gem_init_hws(dev); 2389 if (ret != 0) 2390 return ret; 2391 2392 obj = drm_gem_object_alloc(dev, 128 * 1024); 2393 if (obj == NULL) { 2394 DRM_ERROR("Failed to allocate ringbuffer\n"); 2395 return -ENOMEM; 2396 } 2397 obj_priv = obj->driver_private; 2398 2399 ret = i915_gem_object_pin(obj, 4096); 2400 if (ret != 0) { 2401 drm_gem_object_unreference(obj); 2402 return ret; 2403 } 2404 2405 /* Set up the kernel mapping for the ring. */ 2406 dev_priv->ring.Size = obj->size; 2407 dev_priv->ring.tail_mask = obj->size - 1; 2408 2409 dev_priv->ring.map.offset = dev->agp->base + obj_priv->gtt_offset; 2410 dev_priv->ring.map.size = obj->size; 2411 dev_priv->ring.map.type = 0; 2412 dev_priv->ring.map.flags = 0; 2413 dev_priv->ring.map.mtrr = 0; 2414 2415 drm_core_ioremap_wc(&dev_priv->ring.map, dev); 2416 if (dev_priv->ring.map.handle == NULL) { 2417 DRM_ERROR("Failed to map ringbuffer.\n"); 2418 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 2419 drm_gem_object_unreference(obj); 2420 return -EINVAL; 2421 } 2422 dev_priv->ring.ring_obj = obj; 2423 dev_priv->ring.virtual_start = dev_priv->ring.map.handle; 2424 2425 /* Stop the ring if it's running. */ 2426 I915_WRITE(PRB0_CTL, 0); 2427 I915_WRITE(PRB0_TAIL, 0); 2428 I915_WRITE(PRB0_HEAD, 0); 2429 2430 /* Initialize the ring. */ 2431 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 2432 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 2433 2434 /* G45 ring initialization fails to reset head to zero */ 2435 if (head != 0) { 2436 DRM_ERROR("Ring head not reset to zero " 2437 "ctl %08x head %08x tail %08x start %08x\n", 2438 I915_READ(PRB0_CTL), 2439 I915_READ(PRB0_HEAD), 2440 I915_READ(PRB0_TAIL), 2441 I915_READ(PRB0_START)); 2442 I915_WRITE(PRB0_HEAD, 0); 2443 2444 DRM_ERROR("Ring head forced to zero " 2445 "ctl %08x head %08x tail %08x start %08x\n", 2446 I915_READ(PRB0_CTL), 2447 I915_READ(PRB0_HEAD), 2448 I915_READ(PRB0_TAIL), 2449 I915_READ(PRB0_START)); 2450 } 2451 2452 I915_WRITE(PRB0_CTL, 2453 ((obj->size - 4096) & RING_NR_PAGES) | 2454 RING_NO_REPORT | 2455 RING_VALID); 2456 2457 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 2458 2459 /* If the head is still not zero, the ring is dead */ 2460 if (head != 0) { 2461 DRM_ERROR("Ring initialization failed " 2462 "ctl %08x head %08x tail %08x start %08x\n", 2463 I915_READ(PRB0_CTL), 2464 I915_READ(PRB0_HEAD), 2465 I915_READ(PRB0_TAIL), 2466 I915_READ(PRB0_START)); 2467 return -EIO; 2468 } 2469 2470 /* Update our cache of the ring state */ 2471 i915_kernel_lost_context(dev); 2472 2473 return 0; 2474 } 2475 2476 static void 2477 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 2478 { 2479 drm_i915_private_t *dev_priv = dev->dev_private; 2480 2481 if (dev_priv->ring.ring_obj == NULL) 2482 return; 2483 2484 drm_core_ioremapfree(&dev_priv->ring.map, dev); 2485 2486 i915_gem_object_unpin(dev_priv->ring.ring_obj); 2487 drm_gem_object_unreference(dev_priv->ring.ring_obj); 2488 dev_priv->ring.ring_obj = NULL; 2489 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 2490 2491 if (dev_priv->hws_obj != NULL) { 2492 struct drm_gem_object *obj = dev_priv->hws_obj; 2493 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2494 2495 kunmap(obj_priv->page_list[0]); 2496 i915_gem_object_unpin(obj); 2497 drm_gem_object_unreference(obj); 2498 dev_priv->hws_obj = NULL; 2499 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 2500 dev_priv->hw_status_page = NULL; 2501 2502 /* Write high address into HWS_PGA when disabling. */ 2503 I915_WRITE(HWS_PGA, 0x1ffff000); 2504 } 2505 } 2506 2507 int 2508 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 2509 struct drm_file *file_priv) 2510 { 2511 drm_i915_private_t *dev_priv = dev->dev_private; 2512 int ret; 2513 2514 if (dev_priv->mm.wedged) { 2515 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 2516 dev_priv->mm.wedged = 0; 2517 } 2518 2519 ret = i915_gem_init_ringbuffer(dev); 2520 if (ret != 0) 2521 return ret; 2522 2523 dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base, 2524 dev->agp->agp_info.aper_size 2525 * 1024 * 1024); 2526 2527 mutex_lock(&dev->struct_mutex); 2528 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 2529 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 2530 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 2531 BUG_ON(!list_empty(&dev_priv->mm.request_list)); 2532 dev_priv->mm.suspended = 0; 2533 mutex_unlock(&dev->struct_mutex); 2534 2535 drm_irq_install(dev); 2536 2537 return 0; 2538 } 2539 2540 int 2541 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 2542 struct drm_file *file_priv) 2543 { 2544 drm_i915_private_t *dev_priv = dev->dev_private; 2545 int ret; 2546 2547 ret = i915_gem_idle(dev); 2548 drm_irq_uninstall(dev); 2549 2550 io_mapping_free(dev_priv->mm.gtt_mapping); 2551 return ret; 2552 } 2553 2554 void 2555 i915_gem_lastclose(struct drm_device *dev) 2556 { 2557 int ret; 2558 2559 ret = i915_gem_idle(dev); 2560 if (ret) 2561 DRM_ERROR("failed to idle hardware: %d\n", ret); 2562 } 2563 2564 void 2565 i915_gem_load(struct drm_device *dev) 2566 { 2567 drm_i915_private_t *dev_priv = dev->dev_private; 2568 2569 INIT_LIST_HEAD(&dev_priv->mm.active_list); 2570 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 2571 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 2572 INIT_LIST_HEAD(&dev_priv->mm.request_list); 2573 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 2574 i915_gem_retire_work_handler); 2575 dev_priv->mm.next_gem_seqno = 1; 2576 2577 i915_gem_detect_bit_6_swizzle(dev); 2578 } 2579