1 /* BEGIN CSTYLED */ 2 3 /* 4 * Copyright (c) 2009, Intel Corporation. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 * IN THE SOFTWARE. 25 * 26 * Authors: 27 * Eric Anholt <eric@anholt.net> 28 * 29 */ 30 31 /* 32 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 33 * Use is subject to license terms. 34 */ 35 36 #include <sys/x86_archext.h> 37 #include <sys/vfs_opreg.h> 38 #include "drmP.h" 39 #include "drm.h" 40 #include "i915_drm.h" 41 #include "i915_drv.h" 42 43 #ifndef roundup 44 #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) 45 #endif /* !roundup */ 46 47 #define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 48 49 static timeout_id_t worktimer_id = NULL; 50 51 extern int drm_mm_init(struct drm_mm *mm, 52 unsigned long start, unsigned long size); 53 extern void drm_mm_put_block(struct drm_mm_node *cur); 54 extern int choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off, 55 int vacalign, uint_t flags); 56 57 static void 58 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj, 59 uint32_t read_domains, 60 uint32_t write_domain); 61 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 62 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 63 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 64 static int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, 65 int write); 66 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 67 int write); 68 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 69 uint64_t offset, 70 uint64_t size); 71 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 72 static void i915_gem_object_free_page_list(struct drm_gem_object *obj); 73 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 74 static int i915_gem_object_get_page_list(struct drm_gem_object *obj); 75 76 static void 77 i915_gem_cleanup_ringbuffer(struct drm_device *dev); 78 79 /*ARGSUSED*/ 80 int 81 i915_gem_init_ioctl(DRM_IOCTL_ARGS) 82 { 83 DRM_DEVICE; 84 drm_i915_private_t *dev_priv = dev->dev_private; 85 struct drm_i915_gem_init args; 86 87 if (dev->driver->use_gem != 1) 88 return ENODEV; 89 90 DRM_COPYFROM_WITH_RETURN(&args, 91 (struct drm_i915_gem_init *) data, sizeof(args)); 92 93 spin_lock(&dev->struct_mutex); 94 95 if ((args.gtt_start >= args.gtt_end) || 96 ((args.gtt_start & (PAGE_SIZE - 1)) != 0) || 97 ((args.gtt_end & (PAGE_SIZE - 1)) != 0)) { 98 spin_unlock(&dev->struct_mutex); 99 DRM_ERROR("i915_gem_init_ioctel invalid arg 0x%lx args.start 0x%lx end 0x%lx", &args, args.gtt_start, args.gtt_end); 100 return EINVAL; 101 } 102 103 dev->gtt_total = (uint32_t) (args.gtt_end - args.gtt_start); 104 105 drm_mm_init(&dev_priv->mm.gtt_space, (unsigned long) args.gtt_start, 106 dev->gtt_total); 107 DRM_DEBUG("i915_gem_init_ioctl dev->gtt_total %x, dev_priv->mm.gtt_space 0x%x gtt_start 0x%lx", dev->gtt_total, dev_priv->mm.gtt_space, args.gtt_start); 108 ASSERT(dev->gtt_total != 0); 109 110 spin_unlock(&dev->struct_mutex); 111 112 113 return 0; 114 } 115 116 /*ARGSUSED*/ 117 int 118 i915_gem_get_aperture_ioctl(DRM_IOCTL_ARGS) 119 { 120 DRM_DEVICE; 121 struct drm_i915_gem_get_aperture args; 122 int ret; 123 124 if (dev->driver->use_gem != 1) 125 return ENODEV; 126 127 args.aper_size = (uint64_t)dev->gtt_total; 128 args.aper_available_size = (args.aper_size - 129 atomic_read(&dev->pin_memory)); 130 131 ret = DRM_COPY_TO_USER((struct drm_i915_gem_get_aperture __user *) data, &args, sizeof(args)); 132 133 if ( ret != 0) 134 DRM_ERROR(" i915_gem_get_aperture_ioctl error! %d", ret); 135 136 DRM_DEBUG("i915_gem_get_aaperture_ioctl called sizeof %d, aper_size 0x%x, aper_available_size 0x%x\n", sizeof(args), dev->gtt_total, args.aper_available_size); 137 138 return 0; 139 } 140 141 /** 142 * Creates a new mm object and returns a handle to it. 143 */ 144 /*ARGSUSED*/ 145 int 146 i915_gem_create_ioctl(DRM_IOCTL_ARGS) 147 { 148 DRM_DEVICE; 149 struct drm_i915_gem_create args; 150 struct drm_gem_object *obj; 151 int handlep; 152 int ret; 153 154 if (dev->driver->use_gem != 1) 155 return ENODEV; 156 157 DRM_COPYFROM_WITH_RETURN(&args, 158 (struct drm_i915_gem_create *) data, sizeof(args)); 159 160 161 args.size = (uint64_t) roundup(args.size, PAGE_SIZE); 162 163 if (args.size == 0) { 164 DRM_ERROR("Invalid obj size %d", args.size); 165 return EINVAL; 166 } 167 /* Allocate the new object */ 168 obj = drm_gem_object_alloc(dev, args.size); 169 if (obj == NULL) { 170 DRM_ERROR("Failed to alloc obj"); 171 return ENOMEM; 172 } 173 174 ret = drm_gem_handle_create(fpriv, obj, &handlep); 175 spin_lock(&dev->struct_mutex); 176 drm_gem_object_handle_unreference(obj); 177 spin_unlock(&dev->struct_mutex); 178 if (ret) 179 return ret; 180 181 args.handle = handlep; 182 183 ret = DRM_COPY_TO_USER((struct drm_i915_gem_create *) data, &args, sizeof(args)); 184 185 if ( ret != 0) 186 DRM_ERROR(" gem create error! %d", ret); 187 188 DRM_DEBUG("i915_gem_create_ioctl object name %d, size 0x%lx, list 0x%lx, obj 0x%lx",handlep, args.size, &fpriv->object_idr, obj); 189 190 return 0; 191 } 192 193 /** 194 * Reads data from the object referenced by handle. 195 * 196 * On error, the contents of *data are undefined. 197 */ 198 /*ARGSUSED*/ 199 int 200 i915_gem_pread_ioctl(DRM_IOCTL_ARGS) 201 { 202 DRM_DEVICE; 203 struct drm_i915_gem_pread args; 204 struct drm_gem_object *obj; 205 int ret; 206 207 if (dev->driver->use_gem != 1) 208 return ENODEV; 209 210 DRM_COPYFROM_WITH_RETURN(&args, 211 (struct drm_i915_gem_pread __user *) data, sizeof(args)); 212 213 obj = drm_gem_object_lookup(fpriv, args.handle); 214 if (obj == NULL) 215 return EBADF; 216 217 /* Bounds check source. 218 * 219 * XXX: This could use review for overflow issues... 220 */ 221 if (args.offset > obj->size || args.size > obj->size || 222 args.offset + args.size > obj->size) { 223 drm_gem_object_unreference(obj); 224 DRM_ERROR("i915_gem_pread_ioctl invalid args"); 225 return EINVAL; 226 } 227 228 spin_lock(&dev->struct_mutex); 229 230 ret = i915_gem_object_set_cpu_read_domain_range(obj, args.offset, args.size); 231 if (ret != 0) { 232 drm_gem_object_unreference(obj); 233 spin_unlock(&dev->struct_mutex); 234 DRM_ERROR("pread failed to read domain range ret %d!!!", ret); 235 return EFAULT; 236 } 237 238 unsigned long unwritten = 0; 239 uint32_t *user_data; 240 user_data = (uint32_t *) (uintptr_t) args.data_ptr; 241 242 unwritten = DRM_COPY_TO_USER(user_data, obj->kaddr + args.offset, args.size); 243 if (unwritten) { 244 ret = EFAULT; 245 DRM_ERROR("i915_gem_pread error!!! unwritten %d", unwritten); 246 } 247 248 drm_gem_object_unreference(obj); 249 spin_unlock(&dev->struct_mutex); 250 251 return ret; 252 } 253 254 /*ARGSUSED*/ 255 static int 256 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 257 struct drm_i915_gem_pwrite *args, 258 struct drm_file *file_priv) 259 { 260 uint32_t *user_data; 261 int ret = 0; 262 unsigned long unwritten = 0; 263 264 user_data = (uint32_t *) (uintptr_t) args->data_ptr; 265 spin_lock(&dev->struct_mutex); 266 ret = i915_gem_object_pin(obj, 0); 267 if (ret) { 268 spin_unlock(&dev->struct_mutex); 269 DRM_ERROR("i915_gem_gtt_pwrite failed to pin ret %d", ret); 270 return ret; 271 } 272 273 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 274 if (ret) 275 goto err; 276 277 DRM_DEBUG("obj %d write domain 0x%x read domain 0x%x", obj->name, obj->write_domain, obj->read_domains); 278 279 unwritten = DRM_COPY_FROM_USER(obj->kaddr + args->offset, user_data, args->size); 280 if (unwritten) { 281 ret = EFAULT; 282 DRM_ERROR("i915_gem_gtt_pwrite error!!! unwritten %d", unwritten); 283 goto err; 284 } 285 286 err: 287 i915_gem_object_unpin(obj); 288 spin_unlock(&dev->struct_mutex); 289 if (ret) 290 DRM_ERROR("i915_gem_gtt_pwrite error %d", ret); 291 return ret; 292 } 293 294 /*ARGSUSED*/ 295 int 296 i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 297 struct drm_i915_gem_pwrite *args, 298 struct drm_file *file_priv) 299 { 300 DRM_ERROR(" i915_gem_shmem_pwrite Not support"); 301 return -1; 302 } 303 304 /** 305 * Writes data to the object referenced by handle. 306 * 307 * On error, the contents of the buffer that were to be modified are undefined. 308 */ 309 /*ARGSUSED*/ 310 int 311 i915_gem_pwrite_ioctl(DRM_IOCTL_ARGS) 312 { 313 DRM_DEVICE; 314 struct drm_i915_gem_pwrite args; 315 struct drm_gem_object *obj; 316 struct drm_i915_gem_object *obj_priv; 317 int ret = 0; 318 319 if (dev->driver->use_gem != 1) 320 return ENODEV; 321 322 ret = DRM_COPY_FROM_USER(&args, 323 (struct drm_i915_gem_pwrite __user *) data, sizeof(args)); 324 if (ret) 325 DRM_ERROR("i915_gem_pwrite_ioctl failed to copy from user"); 326 obj = drm_gem_object_lookup(fpriv, args.handle); 327 if (obj == NULL) 328 return EBADF; 329 obj_priv = obj->driver_private; 330 DRM_DEBUG("i915_gem_pwrite_ioctl, obj->name %d",obj->name); 331 332 /* Bounds check destination. 333 * 334 * XXX: This could use review for overflow issues... 335 */ 336 if (args.offset > obj->size || args.size > obj->size || 337 args.offset + args.size > obj->size) { 338 drm_gem_object_unreference(obj); 339 DRM_ERROR("i915_gem_pwrite_ioctl invalid arg"); 340 return EINVAL; 341 } 342 343 /* We can only do the GTT pwrite on untiled buffers, as otherwise 344 * it would end up going through the fenced access, and we'll get 345 * different detiling behavior between reading and writing. 346 * pread/pwrite currently are reading and writing from the CPU 347 * perspective, requiring manual detiling by the client. 348 */ 349 if (obj_priv->tiling_mode == I915_TILING_NONE && 350 dev->gtt_total != 0) 351 ret = i915_gem_gtt_pwrite(dev, obj, &args, fpriv); 352 else 353 ret = i915_gem_shmem_pwrite(dev, obj, &args, fpriv); 354 355 if (ret) 356 DRM_ERROR("pwrite failed %d\n", ret); 357 358 drm_gem_object_unreference(obj); 359 360 return ret; 361 } 362 363 /** 364 * Called when user space prepares to use an object with the CPU, either 365 * through the mmap ioctl's mapping or a GTT mapping. 366 */ 367 /*ARGSUSED*/ 368 int 369 i915_gem_set_domain_ioctl(DRM_IOCTL_ARGS) 370 { 371 DRM_DEVICE; 372 struct drm_i915_gem_set_domain args; 373 struct drm_gem_object *obj; 374 int ret = 0; 375 376 if (dev->driver->use_gem != 1) 377 return ENODEV; 378 379 DRM_COPYFROM_WITH_RETURN(&args, 380 (struct drm_i915_gem_set_domain __user *) data, sizeof(args)); 381 382 uint32_t read_domains = args.read_domains; 383 uint32_t write_domain = args.write_domain; 384 385 /* Only handle setting domains to types used by the CPU. */ 386 if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 387 ret = EINVAL; 388 389 if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 390 ret = EINVAL; 391 392 /* Having something in the write domain implies it's in the read 393 * domain, and only that read domain. Enforce that in the request. 394 */ 395 if (write_domain != 0 && read_domains != write_domain) 396 ret = EINVAL; 397 if (ret) { 398 DRM_ERROR("set_domain invalid read or write"); 399 return EINVAL; 400 } 401 402 obj = drm_gem_object_lookup(fpriv, args.handle); 403 if (obj == NULL) 404 return EBADF; 405 406 spin_lock(&dev->struct_mutex); 407 DRM_DEBUG("set_domain_ioctl %p(name %d size 0x%x), %08x %08x\n", 408 obj, obj->name, obj->size, args.read_domains, args.write_domain); 409 410 if (read_domains & I915_GEM_DOMAIN_GTT) { 411 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 412 413 /* Silently promote "you're not bound, there was nothing to do" 414 * to success, since the client was just asking us to 415 * make sure everything was done. 416 */ 417 if (ret == EINVAL) 418 ret = 0; 419 } else { 420 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 421 } 422 423 drm_gem_object_unreference(obj); 424 spin_unlock(&dev->struct_mutex); 425 if (ret) 426 DRM_ERROR("i915_set_domain_ioctl ret %d", ret); 427 return ret; 428 } 429 430 /** 431 * Called when user space has done writes to this buffer 432 */ 433 /*ARGSUSED*/ 434 int 435 i915_gem_sw_finish_ioctl(DRM_IOCTL_ARGS) 436 { 437 DRM_DEVICE; 438 struct drm_i915_gem_sw_finish args; 439 struct drm_gem_object *obj; 440 struct drm_i915_gem_object *obj_priv; 441 int ret = 0; 442 443 if (dev->driver->use_gem != 1) 444 return ENODEV; 445 446 DRM_COPYFROM_WITH_RETURN(&args, 447 (struct drm_i915_gem_sw_finish __user *) data, sizeof(args)); 448 449 spin_lock(&dev->struct_mutex); 450 obj = drm_gem_object_lookup(fpriv, args.handle); 451 if (obj == NULL) { 452 spin_unlock(&dev->struct_mutex); 453 return EBADF; 454 } 455 456 DRM_DEBUG("%s: sw_finish %d (%p name %d size 0x%x)\n", 457 __func__, args.handle, obj, obj->name, obj->size); 458 459 obj_priv = obj->driver_private; 460 /* Pinned buffers may be scanout, so flush the cache */ 461 if (obj_priv->pin_count) 462 { 463 i915_gem_object_flush_cpu_write_domain(obj); 464 } 465 466 drm_gem_object_unreference(obj); 467 spin_unlock(&dev->struct_mutex); 468 return ret; 469 } 470 471 /** 472 * Maps the contents of an object, returning the address it is mapped 473 * into. 474 * 475 * While the mapping holds a reference on the contents of the object, it doesn't 476 * imply a ref on the object itself. 477 */ 478 /*ARGSUSED*/ 479 int 480 i915_gem_mmap_ioctl(DRM_IOCTL_ARGS) 481 { 482 DRM_DEVICE; 483 struct drm_i915_gem_mmap args; 484 struct drm_gem_object *obj; 485 caddr_t vvaddr = NULL; 486 int ret; 487 488 if (dev->driver->use_gem != 1) 489 return ENODEV; 490 491 DRM_COPYFROM_WITH_RETURN( 492 &args, (struct drm_i915_gem_mmap __user *)data, 493 sizeof (struct drm_i915_gem_mmap)); 494 495 obj = drm_gem_object_lookup(fpriv, args.handle); 496 if (obj == NULL) 497 return EBADF; 498 499 ret = ddi_devmap_segmap(fpriv->dev, (off_t)obj->map->handle, 500 ttoproc(curthread)->p_as, &vvaddr, obj->map->size, 501 PROT_ALL, PROT_ALL, MAP_SHARED, fpriv->credp); 502 if (ret) 503 return ret; 504 505 spin_lock(&dev->struct_mutex); 506 drm_gem_object_unreference(obj); 507 spin_unlock(&dev->struct_mutex); 508 509 args.addr_ptr = (uint64_t)(uintptr_t)vvaddr; 510 511 DRM_COPYTO_WITH_RETURN( 512 (struct drm_i915_gem_mmap __user *)data, 513 &args, sizeof (struct drm_i915_gem_mmap)); 514 515 return 0; 516 } 517 518 static void 519 i915_gem_object_free_page_list(struct drm_gem_object *obj) 520 { 521 struct drm_i915_gem_object *obj_priv = obj->driver_private; 522 if (obj_priv->page_list == NULL) 523 return; 524 525 kmem_free(obj_priv->page_list, 526 btop(obj->size) * sizeof(caddr_t)); 527 528 obj_priv->page_list = NULL; 529 } 530 531 static void 532 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno) 533 { 534 struct drm_device *dev = obj->dev; 535 drm_i915_private_t *dev_priv = dev->dev_private; 536 struct drm_i915_gem_object *obj_priv = obj->driver_private; 537 538 /* Add a reference if we're newly entering the active list. */ 539 if (!obj_priv->active) { 540 drm_gem_object_reference(obj); 541 obj_priv->active = 1; 542 } 543 /* Move from whatever list we were on to the tail of execution. */ 544 list_move_tail(&obj_priv->list, 545 &dev_priv->mm.active_list, (caddr_t)obj_priv); 546 obj_priv->last_rendering_seqno = seqno; 547 } 548 549 static void 550 i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 551 { 552 struct drm_device *dev = obj->dev; 553 drm_i915_private_t *dev_priv = dev->dev_private; 554 struct drm_i915_gem_object *obj_priv = obj->driver_private; 555 556 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list, (caddr_t)obj_priv); 557 obj_priv->last_rendering_seqno = 0; 558 } 559 560 static void 561 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 562 { 563 struct drm_device *dev = obj->dev; 564 drm_i915_private_t *dev_priv = dev->dev_private; 565 struct drm_i915_gem_object *obj_priv = obj->driver_private; 566 567 if (obj_priv->pin_count != 0) 568 { 569 list_del_init(&obj_priv->list); 570 } else { 571 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list, (caddr_t)obj_priv); 572 } 573 obj_priv->last_rendering_seqno = 0; 574 if (obj_priv->active) { 575 obj_priv->active = 0; 576 drm_gem_object_unreference(obj); 577 } 578 } 579 580 /** 581 * Creates a new sequence number, emitting a write of it to the status page 582 * plus an interrupt, which will trigger i915_user_interrupt_handler. 583 * 584 * Must be called with struct_lock held. 585 * 586 * Returned sequence numbers are nonzero on success. 587 */ 588 static uint32_t 589 i915_add_request(struct drm_device *dev, uint32_t flush_domains) 590 { 591 drm_i915_private_t *dev_priv = dev->dev_private; 592 struct drm_i915_gem_request *request; 593 uint32_t seqno; 594 int was_empty; 595 RING_LOCALS; 596 597 request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); 598 if (request == NULL) { 599 DRM_ERROR("Failed to alloc request"); 600 return 0; 601 } 602 /* Grab the seqno we're going to make this request be, and bump the 603 * next (skipping 0 so it can be the reserved no-seqno value). 604 */ 605 seqno = dev_priv->mm.next_gem_seqno; 606 dev_priv->mm.next_gem_seqno++; 607 if (dev_priv->mm.next_gem_seqno == 0) 608 dev_priv->mm.next_gem_seqno++; 609 610 DRM_DEBUG("add_request seqno = %d dev 0x%lx", seqno, dev); 611 612 BEGIN_LP_RING(4); 613 OUT_RING(MI_STORE_DWORD_INDEX); 614 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 615 OUT_RING(seqno); 616 OUT_RING(0); 617 ADVANCE_LP_RING(); 618 619 BEGIN_LP_RING(2); 620 OUT_RING(0); 621 OUT_RING(MI_USER_INTERRUPT); 622 ADVANCE_LP_RING(); 623 624 request->seqno = seqno; 625 request->emitted_jiffies = jiffies; 626 was_empty = list_empty(&dev_priv->mm.request_list); 627 list_add_tail(&request->list, &dev_priv->mm.request_list, (caddr_t)request); 628 629 /* Associate any objects on the flushing list matching the write 630 * domain we're flushing with our flush. 631 */ 632 if (flush_domains != 0) { 633 struct drm_i915_gem_object *obj_priv, *next; 634 635 obj_priv = list_entry(dev_priv->mm.flushing_list.next, struct drm_i915_gem_object, list), 636 next = list_entry(obj_priv->list.next, struct drm_i915_gem_object, list); 637 for(; &obj_priv->list != &dev_priv->mm.flushing_list; 638 obj_priv = next, 639 next = list_entry(next->list.next, struct drm_i915_gem_object, list)) { 640 struct drm_gem_object *obj = obj_priv->obj; 641 642 if ((obj->write_domain & flush_domains) == 643 obj->write_domain) { 644 obj->write_domain = 0; 645 i915_gem_object_move_to_active(obj, seqno); 646 } 647 } 648 649 } 650 651 if (was_empty && !dev_priv->mm.suspended) 652 { 653 /* change to delay HZ and then run work (not insert to workqueue of Linux) */ 654 worktimer_id = timeout(i915_gem_retire_work_handler, (void *) dev, DRM_HZ); 655 DRM_DEBUG("i915_gem: schedule_delayed_work"); 656 } 657 return seqno; 658 } 659 660 /** 661 * Command execution barrier 662 * 663 * Ensures that all commands in the ring are finished 664 * before signalling the CPU 665 */ 666 uint32_t 667 i915_retire_commands(struct drm_device *dev) 668 { 669 drm_i915_private_t *dev_priv = dev->dev_private; 670 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 671 uint32_t flush_domains = 0; 672 RING_LOCALS; 673 674 /* The sampler always gets flushed on i965 (sigh) */ 675 if (IS_I965G(dev)) 676 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 677 BEGIN_LP_RING(2); 678 OUT_RING(cmd); 679 OUT_RING(0); /* noop */ 680 ADVANCE_LP_RING(); 681 682 return flush_domains; 683 } 684 685 /** 686 * Moves buffers associated only with the given active seqno from the active 687 * to inactive list, potentially freeing them. 688 */ 689 static void 690 i915_gem_retire_request(struct drm_device *dev, 691 struct drm_i915_gem_request *request) 692 { 693 drm_i915_private_t *dev_priv = dev->dev_private; 694 /* Move any buffers on the active list that are no longer referenced 695 * by the ringbuffer to the flushing/inactive lists as appropriate. 696 */ 697 while (!list_empty(&dev_priv->mm.active_list)) { 698 struct drm_gem_object *obj; 699 struct drm_i915_gem_object *obj_priv; 700 701 obj_priv = list_entry(dev_priv->mm.active_list.next, 702 struct drm_i915_gem_object, 703 list); 704 obj = obj_priv->obj; 705 706 /* If the seqno being retired doesn't match the oldest in the 707 * list, then the oldest in the list must still be newer than 708 * this seqno. 709 */ 710 if (obj_priv->last_rendering_seqno != request->seqno) 711 return; 712 713 DRM_DEBUG("%s: retire %d moves to inactive list %p\n", 714 __func__, request->seqno, obj); 715 716 if (obj->write_domain != 0) { 717 i915_gem_object_move_to_flushing(obj); 718 } else { 719 i915_gem_object_move_to_inactive(obj); 720 } 721 } 722 } 723 724 /** 725 * Returns true if seq1 is later than seq2. 726 */ 727 static int 728 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 729 { 730 return (int32_t)(seq1 - seq2) >= 0; 731 } 732 733 uint32_t 734 i915_get_gem_seqno(struct drm_device *dev) 735 { 736 drm_i915_private_t *dev_priv = dev->dev_private; 737 738 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); 739 } 740 741 /** 742 * This function clears the request list as sequence numbers are passed. 743 */ 744 void 745 i915_gem_retire_requests(struct drm_device *dev) 746 { 747 drm_i915_private_t *dev_priv = dev->dev_private; 748 uint32_t seqno; 749 750 seqno = i915_get_gem_seqno(dev); 751 752 while (!list_empty(&dev_priv->mm.request_list)) { 753 struct drm_i915_gem_request *request; 754 uint32_t retiring_seqno; 755 request = (struct drm_i915_gem_request *)(uintptr_t)(dev_priv->mm.request_list.next->contain_ptr); 756 retiring_seqno = request->seqno; 757 758 if (i915_seqno_passed(seqno, retiring_seqno) || 759 dev_priv->mm.wedged) { 760 i915_gem_retire_request(dev, request); 761 762 list_del(&request->list); 763 drm_free(request, sizeof(*request), DRM_MEM_DRIVER); 764 } else 765 break; 766 } 767 } 768 769 void 770 i915_gem_retire_work_handler(void *device) 771 { 772 struct drm_device *dev = (struct drm_device *)device; 773 drm_i915_private_t *dev_priv = dev->dev_private; 774 775 spin_lock(&dev->struct_mutex); 776 777 /* Return if gem idle */ 778 if (worktimer_id == NULL) { 779 spin_unlock(&dev->struct_mutex); 780 return; 781 } 782 783 i915_gem_retire_requests(dev); 784 if (!dev_priv->mm.suspended && !list_empty(&dev_priv->mm.request_list)) 785 { 786 DRM_DEBUG("i915_gem: schedule_delayed_work"); 787 worktimer_id = timeout(i915_gem_retire_work_handler, (void *) dev, DRM_HZ); 788 } 789 spin_unlock(&dev->struct_mutex); 790 } 791 792 /** 793 * i965_reset - reset chip after a hang 794 * @dev: drm device to reset 795 * @flags: reset domains 796 * 797 * Reset the chip. Useful if a hang is detected. 798 * 799 * Procedure is fairly simple: 800 * - reset the chip using the reset reg 801 * - re-init context state 802 * - re-init hardware status page 803 * - re-init ring buffer 804 * - re-init interrupt state 805 * - re-init display 806 */ 807 void i965_reset(struct drm_device *dev, u8 flags) 808 { 809 ddi_acc_handle_t conf_hdl; 810 drm_i915_private_t *dev_priv = dev->dev_private; 811 int timeout = 0; 812 uint8_t gdrst; 813 814 if (flags & GDRST_FULL) 815 i915_save_display(dev); 816 817 if (pci_config_setup(dev->dip, &conf_hdl) != DDI_SUCCESS) { 818 DRM_ERROR(("i915_reset: pci_config_setup fail")); 819 return; 820 } 821 822 /* 823 * Set the reset bit, wait for reset, then clear it. Hardware 824 * will clear the status bit (bit 1) when it's actually ready 825 * for action again. 826 */ 827 gdrst = pci_config_get8(conf_hdl, GDRST); 828 pci_config_put8(conf_hdl, GDRST, gdrst | flags); 829 drv_usecwait(50); 830 pci_config_put8(conf_hdl, GDRST, gdrst | 0xfe); 831 832 /* ...we don't want to loop forever though, 500ms should be plenty */ 833 do { 834 drv_usecwait(100); 835 gdrst = pci_config_get8(conf_hdl, GDRST); 836 } while ((gdrst & 2) && (timeout++ < 5)); 837 838 /* Ok now get things going again... */ 839 840 /* 841 * Everything depends on having the GTT running, so we need to start 842 * there. Fortunately we don't need to do this unless we reset the 843 * chip at a PCI level. 844 * 845 * Next we need to restore the context, but we don't use those 846 * yet either... 847 * 848 * Ring buffer needs to be re-initialized in the KMS case, or if X 849 * was running at the time of the reset (i.e. we weren't VT 850 * switched away). 851 */ 852 if (!dev_priv->mm.suspended) { 853 drm_i915_ring_buffer_t *ring = &dev_priv->ring; 854 struct drm_gem_object *obj = ring->ring_obj; 855 struct drm_i915_gem_object *obj_priv = obj->driver_private; 856 dev_priv->mm.suspended = 0; 857 858 /* Stop the ring if it's running. */ 859 I915_WRITE(PRB0_CTL, 0); 860 I915_WRITE(PRB0_TAIL, 0); 861 I915_WRITE(PRB0_HEAD, 0); 862 863 /* Initialize the ring. */ 864 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 865 I915_WRITE(PRB0_CTL, 866 ((obj->size - 4096) & RING_NR_PAGES) | 867 RING_NO_REPORT | 868 RING_VALID); 869 i915_kernel_lost_context(dev); 870 871 drm_irq_install(dev); 872 } 873 874 /* 875 * Display needs restore too... 876 */ 877 if (flags & GDRST_FULL) 878 i915_restore_display(dev); 879 } 880 881 /** 882 * Waits for a sequence number to be signaled, and cleans up the 883 * request and object lists appropriately for that event. 884 */ 885 int 886 i915_wait_request(struct drm_device *dev, uint32_t seqno) 887 { 888 drm_i915_private_t *dev_priv = dev->dev_private; 889 u32 ier; 890 int ret = 0; 891 892 ASSERT(seqno != 0); 893 894 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 895 if (IS_IGDNG(dev)) 896 ier = I915_READ(DEIER) | I915_READ(GTIER); 897 else 898 ier = I915_READ(IER); 899 if (!ier) { 900 DRM_ERROR("something (likely vbetool) disabled " 901 "interrupts, re-enabling\n"); 902 (void) i915_driver_irq_preinstall(dev); 903 i915_driver_irq_postinstall(dev); 904 } 905 906 dev_priv->mm.waiting_gem_seqno = seqno; 907 i915_user_irq_on(dev); 908 DRM_WAIT(ret, &dev_priv->irq_queue, 909 (i915_seqno_passed(i915_get_gem_seqno(dev), seqno) || 910 dev_priv->mm.wedged)); 911 i915_user_irq_off(dev); 912 dev_priv->mm.waiting_gem_seqno = 0; 913 } 914 if (dev_priv->mm.wedged) { 915 ret = EIO; 916 } 917 918 /* GPU maybe hang, reset needed*/ 919 if (ret == -2 && (seqno > i915_get_gem_seqno(dev))) { 920 if (IS_I965G(dev)) { 921 DRM_ERROR("GPU hang detected try to reset ... wait for irq_queue seqno %d, now seqno %d", seqno, i915_get_gem_seqno(dev)); 922 dev_priv->mm.wedged = 1; 923 i965_reset(dev, GDRST_RENDER); 924 i915_gem_retire_requests(dev); 925 dev_priv->mm.wedged = 0; 926 } 927 else 928 DRM_ERROR("GPU hang detected.... reboot required"); 929 return 0; 930 } 931 /* Directly dispatch request retiring. While we have the work queue 932 * to handle this, the waiter on a request often wants an associated 933 * buffer to have made it to the inactive list, and we would need 934 * a separate wait queue to handle that. 935 */ 936 if (ret == 0) 937 i915_gem_retire_requests(dev); 938 939 return ret; 940 } 941 942 static void 943 i915_gem_flush(struct drm_device *dev, 944 uint32_t invalidate_domains, 945 uint32_t flush_domains) 946 { 947 drm_i915_private_t *dev_priv = dev->dev_private; 948 uint32_t cmd; 949 RING_LOCALS; 950 951 DRM_DEBUG("%s: invalidate %08x flush %08x\n", __func__, 952 invalidate_domains, flush_domains); 953 954 if (flush_domains & I915_GEM_DOMAIN_CPU) 955 drm_agp_chipset_flush(dev); 956 957 if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU | 958 I915_GEM_DOMAIN_GTT)) { 959 /* 960 * read/write caches: 961 * 962 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 963 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 964 * also flushed at 2d versus 3d pipeline switches. 965 * 966 * read-only caches: 967 * 968 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 969 * MI_READ_FLUSH is set, and is always flushed on 965. 970 * 971 * I915_GEM_DOMAIN_COMMAND may not exist? 972 * 973 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 974 * invalidated when MI_EXE_FLUSH is set. 975 * 976 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 977 * invalidated with every MI_FLUSH. 978 * 979 * TLBs: 980 * 981 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 982 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 983 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 984 * are flushed at any MI_FLUSH. 985 */ 986 987 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 988 if ((invalidate_domains|flush_domains) & 989 I915_GEM_DOMAIN_RENDER) 990 cmd &= ~MI_NO_WRITE_FLUSH; 991 if (!IS_I965G(dev)) { 992 /* 993 * On the 965, the sampler cache always gets flushed 994 * and this bit is reserved. 995 */ 996 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 997 cmd |= MI_READ_FLUSH; 998 } 999 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 1000 cmd |= MI_EXE_FLUSH; 1001 1002 DRM_DEBUG("%s: queue flush %08x to ring\n", __func__, cmd); 1003 1004 BEGIN_LP_RING(2); 1005 OUT_RING(cmd); 1006 OUT_RING(0); /* noop */ 1007 ADVANCE_LP_RING(); 1008 } 1009 } 1010 1011 /** 1012 * Ensures that all rendering to the object has completed and the object is 1013 * safe to unbind from the GTT or access from the CPU. 1014 */ 1015 static int 1016 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1017 { 1018 struct drm_device *dev = obj->dev; 1019 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1020 int ret, seqno; 1021 1022 /* This function only exists to support waiting for existing rendering, 1023 * not for emitting required flushes. 1024 */ 1025 1026 if((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0) { 1027 DRM_ERROR("write domain should not be GPU DOMAIN %d", obj_priv->active); 1028 return 0; 1029 } 1030 1031 /* If there is rendering queued on the buffer being evicted, wait for 1032 * it. 1033 */ 1034 if (obj_priv->active) { 1035 DRM_DEBUG("%s: object %d %p wait for seqno %08x\n", 1036 __func__, obj->name, obj, obj_priv->last_rendering_seqno); 1037 1038 seqno = obj_priv->last_rendering_seqno; 1039 if (seqno == 0) { 1040 DRM_DEBUG("last rendering maybe finished"); 1041 return 0; 1042 } 1043 ret = i915_wait_request(dev, seqno); 1044 if (ret != 0) { 1045 DRM_ERROR("%s: i915_wait_request request->seqno %d now %d\n", __func__, seqno, i915_get_gem_seqno(dev)); 1046 return ret; 1047 } 1048 } 1049 1050 return 0; 1051 } 1052 1053 /** 1054 * Unbinds an object from the GTT aperture. 1055 */ 1056 int 1057 i915_gem_object_unbind(struct drm_gem_object *obj, uint32_t type) 1058 { 1059 struct drm_device *dev = obj->dev; 1060 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1061 int ret = 0; 1062 1063 if (obj_priv->gtt_space == NULL) 1064 return 0; 1065 1066 if (obj_priv->pin_count != 0) { 1067 DRM_ERROR("Attempting to unbind pinned buffer\n"); 1068 return EINVAL; 1069 } 1070 1071 /* Wait for any rendering to complete 1072 */ 1073 ret = i915_gem_object_wait_rendering(obj); 1074 if (ret) { 1075 DRM_ERROR("wait_rendering failed: %d\n", ret); 1076 return ret; 1077 } 1078 1079 /* Move the object to the CPU domain to ensure that 1080 * any possible CPU writes while it's not in the GTT 1081 * are flushed when we go to remap it. This will 1082 * also ensure that all pending GPU writes are finished 1083 * before we unbind. 1084 */ 1085 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1086 if (ret) { 1087 DRM_ERROR("set_domain failed: %d\n", ret); 1088 return ret; 1089 } 1090 1091 if (!obj_priv->agp_mem) { 1092 drm_agp_unbind_pages(dev, obj->size / PAGE_SIZE, obj_priv->gtt_offset, type); 1093 obj_priv->agp_mem = -1; 1094 } 1095 1096 ASSERT(!obj_priv->active); 1097 1098 i915_gem_object_free_page_list(obj); 1099 1100 if (obj_priv->gtt_space) { 1101 atomic_dec(&dev->gtt_count); 1102 atomic_sub(obj->size, &dev->gtt_memory); 1103 drm_mm_put_block(obj_priv->gtt_space); 1104 obj_priv->gtt_space = NULL; 1105 } 1106 1107 /* Remove ourselves from the LRU list if present. */ 1108 if (!list_empty(&obj_priv->list)) 1109 list_del_init(&obj_priv->list); 1110 1111 return 0; 1112 } 1113 1114 static int 1115 i915_gem_evict_something(struct drm_device *dev) 1116 { 1117 drm_i915_private_t *dev_priv = dev->dev_private; 1118 struct drm_gem_object *obj; 1119 struct drm_i915_gem_object *obj_priv; 1120 int ret = 0; 1121 1122 for (;;) { 1123 /* If there's an inactive buffer available now, grab it 1124 * and be done. 1125 */ 1126 if (!list_empty(&dev_priv->mm.inactive_list)) { 1127 obj_priv = list_entry(dev_priv->mm.inactive_list.next, 1128 struct drm_i915_gem_object, 1129 list); 1130 obj = obj_priv->obj; 1131 ASSERT(!(obj_priv->pin_count != 0)); 1132 DRM_DEBUG("%s: evicting %d\n", __func__, obj->name); 1133 ASSERT(!(obj_priv->active)); 1134 /* Wait on the rendering and unbind the buffer. */ 1135 ret = i915_gem_object_unbind(obj, 1); 1136 break; 1137 } 1138 /* If we didn't get anything, but the ring is still processing 1139 * things, wait for one of those things to finish and hopefully 1140 * leave us a buffer to evict. 1141 */ 1142 if (!list_empty(&dev_priv->mm.request_list)) { 1143 struct drm_i915_gem_request *request; 1144 1145 request = list_entry(dev_priv->mm.request_list.next, 1146 struct drm_i915_gem_request, 1147 list); 1148 1149 ret = i915_wait_request(dev, request->seqno); 1150 if (ret) { 1151 break; 1152 } 1153 /* if waiting caused an object to become inactive, 1154 * then loop around and wait for it. Otherwise, we 1155 * assume that waiting freed and unbound something, 1156 * so there should now be some space in the GTT 1157 */ 1158 if (!list_empty(&dev_priv->mm.inactive_list)) 1159 continue; 1160 break; 1161 } 1162 1163 /* If we didn't have anything on the request list but there 1164 * are buffers awaiting a flush, emit one and try again. 1165 * When we wait on it, those buffers waiting for that flush 1166 * will get moved to inactive. 1167 */ 1168 if (!list_empty(&dev_priv->mm.flushing_list)) { 1169 obj_priv = list_entry(dev_priv->mm.flushing_list.next, 1170 struct drm_i915_gem_object, 1171 list); 1172 obj = obj_priv->obj; 1173 1174 i915_gem_flush(dev, 1175 obj->write_domain, 1176 obj->write_domain); 1177 (void) i915_add_request(dev, obj->write_domain); 1178 1179 obj = NULL; 1180 continue; 1181 } 1182 1183 DRM_ERROR("inactive empty %d request empty %d " 1184 "flushing empty %d\n", 1185 list_empty(&dev_priv->mm.inactive_list), 1186 list_empty(&dev_priv->mm.request_list), 1187 list_empty(&dev_priv->mm.flushing_list)); 1188 /* If we didn't do any of the above, there's nothing to be done 1189 * and we just can't fit it in. 1190 */ 1191 return ENOMEM; 1192 } 1193 return ret; 1194 } 1195 1196 static int 1197 i915_gem_evict_everything(struct drm_device *dev) 1198 { 1199 int ret; 1200 1201 for (;;) { 1202 ret = i915_gem_evict_something(dev); 1203 if (ret != 0) 1204 break; 1205 } 1206 if (ret == ENOMEM) 1207 return 0; 1208 else 1209 DRM_ERROR("evict_everything ret %d", ret); 1210 return ret; 1211 } 1212 1213 /** 1214 * Finds free space in the GTT aperture and binds the object there. 1215 */ 1216 static int 1217 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, uint32_t alignment) 1218 { 1219 struct drm_device *dev = obj->dev; 1220 drm_i915_private_t *dev_priv = dev->dev_private; 1221 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1222 struct drm_mm_node *free_space; 1223 int page_count, ret; 1224 1225 if (dev_priv->mm.suspended) 1226 return EBUSY; 1227 if (alignment == 0) 1228 alignment = PAGE_SIZE; 1229 if (alignment & (PAGE_SIZE - 1)) { 1230 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 1231 return EINVAL; 1232 } 1233 1234 if (obj_priv->gtt_space) { 1235 DRM_ERROR("Already bind!!"); 1236 return 0; 1237 } 1238 search_free: 1239 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 1240 (unsigned long) obj->size, alignment, 0); 1241 if (free_space != NULL) { 1242 obj_priv->gtt_space = drm_mm_get_block(free_space, (unsigned long) obj->size, 1243 alignment); 1244 if (obj_priv->gtt_space != NULL) { 1245 obj_priv->gtt_space->private = obj; 1246 obj_priv->gtt_offset = obj_priv->gtt_space->start; 1247 } 1248 } 1249 if (obj_priv->gtt_space == NULL) { 1250 /* If the gtt is empty and we're still having trouble 1251 * fitting our object in, we're out of memory. 1252 */ 1253 if (list_empty(&dev_priv->mm.inactive_list) && 1254 list_empty(&dev_priv->mm.flushing_list) && 1255 list_empty(&dev_priv->mm.active_list)) { 1256 DRM_ERROR("GTT full, but LRU list empty\n"); 1257 return ENOMEM; 1258 } 1259 1260 ret = i915_gem_evict_something(dev); 1261 if (ret != 0) { 1262 DRM_ERROR("Failed to evict a buffer %d\n", ret); 1263 return ret; 1264 } 1265 goto search_free; 1266 } 1267 1268 ret = i915_gem_object_get_page_list(obj); 1269 if (ret) { 1270 drm_mm_put_block(obj_priv->gtt_space); 1271 obj_priv->gtt_space = NULL; 1272 DRM_ERROR("bind to gtt failed to get page list"); 1273 return ret; 1274 } 1275 1276 page_count = obj->size / PAGE_SIZE; 1277 /* Create an AGP memory structure pointing at our pages, and bind it 1278 * into the GTT. 1279 */ 1280 DRM_DEBUG("Binding object %d of page_count %d at gtt_offset 0x%x obj->pfnarray = 0x%lx", 1281 obj->name, page_count, obj_priv->gtt_offset, obj->pfnarray); 1282 1283 obj_priv->agp_mem = drm_agp_bind_pages(dev, 1284 obj->pfnarray, 1285 page_count, 1286 obj_priv->gtt_offset); 1287 if (obj_priv->agp_mem) { 1288 i915_gem_object_free_page_list(obj); 1289 drm_mm_put_block(obj_priv->gtt_space); 1290 obj_priv->gtt_space = NULL; 1291 DRM_ERROR("Failed to bind pages obj %d, obj 0x%lx", obj->name, obj); 1292 return ENOMEM; 1293 } 1294 atomic_inc(&dev->gtt_count); 1295 atomic_add(obj->size, &dev->gtt_memory); 1296 1297 /* Assert that the object is not currently in any GPU domain. As it 1298 * wasn't in the GTT, there shouldn't be any way it could have been in 1299 * a GPU cache 1300 */ 1301 ASSERT(!(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))); 1302 ASSERT(!(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))); 1303 1304 return 0; 1305 } 1306 1307 void 1308 i915_gem_clflush_object(struct drm_gem_object *obj) 1309 { 1310 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1311 1312 /* If we don't have a page list set up, then we're not pinned 1313 * to GPU, and we can ignore the cache flush because it'll happen 1314 * again at bind time. 1315 */ 1316 1317 if (obj_priv->page_list == NULL) 1318 return; 1319 drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE); 1320 } 1321 1322 /** Flushes any GPU write domain for the object if it's dirty. */ 1323 static void 1324 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 1325 { 1326 struct drm_device *dev = obj->dev; 1327 uint32_t seqno; 1328 1329 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 1330 return; 1331 1332 /* Queue the GPU write cache flushing we need. */ 1333 i915_gem_flush(dev, 0, obj->write_domain); 1334 seqno = i915_add_request(dev, obj->write_domain); 1335 DRM_DEBUG("flush_gpu_write_domain seqno = %d", seqno); 1336 obj->write_domain = 0; 1337 i915_gem_object_move_to_active(obj, seqno); 1338 } 1339 1340 /** Flushes the GTT write domain for the object if it's dirty. */ 1341 static void 1342 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 1343 { 1344 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 1345 return; 1346 1347 /* No actual flushing is required for the GTT write domain. Writes 1348 * to it immediately go to main memory as far as we know, so there's 1349 * no chipset flush. It also doesn't land in render cache. 1350 */ 1351 obj->write_domain = 0; 1352 } 1353 1354 /** Flushes the CPU write domain for the object if it's dirty. */ 1355 static void 1356 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 1357 { 1358 struct drm_device *dev = obj->dev; 1359 1360 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 1361 return; 1362 1363 i915_gem_clflush_object(obj); 1364 drm_agp_chipset_flush(dev); 1365 obj->write_domain = 0; 1366 } 1367 1368 /** 1369 * Moves a single object to the GTT read, and possibly write domain. 1370 * 1371 * This function returns when the move is complete, including waiting on 1372 * flushes to occur. 1373 */ 1374 static int 1375 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 1376 { 1377 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1378 int ret; 1379 1380 /* Not valid to be called on unbound objects. */ 1381 if (obj_priv->gtt_space == NULL) 1382 return EINVAL; 1383 1384 i915_gem_object_flush_gpu_write_domain(obj); 1385 /* Wait on any GPU rendering and flushing to occur. */ 1386 ret = i915_gem_object_wait_rendering(obj); 1387 if (ret != 0) { 1388 DRM_ERROR("set_to_gtt_domain wait_rendering ret %d", ret); 1389 return ret; 1390 } 1391 /* If we're writing through the GTT domain, then CPU and GPU caches 1392 * will need to be invalidated at next use. 1393 */ 1394 if (write) 1395 obj->read_domains &= I915_GEM_DOMAIN_GTT; 1396 i915_gem_object_flush_cpu_write_domain(obj); 1397 1398 DRM_DEBUG("i915_gem_object_set_to_gtt_domain obj->read_domains %x ", obj->read_domains); 1399 /* It should now be out of any other write domains, and we can update 1400 * the domain values for our changes. 1401 */ 1402 ASSERT(!((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0)); 1403 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1404 if (write) { 1405 obj->write_domain = I915_GEM_DOMAIN_GTT; 1406 obj_priv->dirty = 1; 1407 } 1408 1409 return 0; 1410 } 1411 1412 /** 1413 * Moves a single object to the CPU read, and possibly write domain. 1414 * 1415 * This function returns when the move is complete, including waiting on 1416 * flushes to occur. 1417 */ 1418 static int 1419 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 1420 { 1421 struct drm_device *dev = obj->dev; 1422 int ret; 1423 1424 1425 i915_gem_object_flush_gpu_write_domain(obj); 1426 /* Wait on any GPU rendering and flushing to occur. */ 1427 1428 ret = i915_gem_object_wait_rendering(obj); 1429 if (ret != 0) 1430 return ret; 1431 1432 i915_gem_object_flush_gtt_write_domain(obj); 1433 1434 /* If we have a partially-valid cache of the object in the CPU, 1435 * finish invalidating it and free the per-page flags. 1436 */ 1437 i915_gem_object_set_to_full_cpu_read_domain(obj); 1438 1439 /* Flush the CPU cache if it's still invalid. */ 1440 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 1441 i915_gem_clflush_object(obj); 1442 drm_agp_chipset_flush(dev); 1443 obj->read_domains |= I915_GEM_DOMAIN_CPU; 1444 } 1445 1446 /* It should now be out of any other write domains, and we can update 1447 * the domain values for our changes. 1448 */ 1449 ASSERT(!((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0)); 1450 1451 /* If we're writing through the CPU, then the GPU read domains will 1452 * need to be invalidated at next use. 1453 */ 1454 if (write) { 1455 obj->read_domains &= I915_GEM_DOMAIN_CPU; 1456 obj->write_domain = I915_GEM_DOMAIN_CPU; 1457 } 1458 1459 return 0; 1460 } 1461 1462 /* 1463 * Set the next domain for the specified object. This 1464 * may not actually perform the necessary flushing/invaliding though, 1465 * as that may want to be batched with other set_domain operations 1466 * 1467 * This is (we hope) the only really tricky part of gem. The goal 1468 * is fairly simple -- track which caches hold bits of the object 1469 * and make sure they remain coherent. A few concrete examples may 1470 * help to explain how it works. For shorthand, we use the notation 1471 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 1472 * a pair of read and write domain masks. 1473 * 1474 * Case 1: the batch buffer 1475 * 1476 * 1. Allocated 1477 * 2. Written by CPU 1478 * 3. Mapped to GTT 1479 * 4. Read by GPU 1480 * 5. Unmapped from GTT 1481 * 6. Freed 1482 * 1483 * Let's take these a step at a time 1484 * 1485 * 1. Allocated 1486 * Pages allocated from the kernel may still have 1487 * cache contents, so we set them to (CPU, CPU) always. 1488 * 2. Written by CPU (using pwrite) 1489 * The pwrite function calls set_domain (CPU, CPU) and 1490 * this function does nothing (as nothing changes) 1491 * 3. Mapped by GTT 1492 * This function asserts that the object is not 1493 * currently in any GPU-based read or write domains 1494 * 4. Read by GPU 1495 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 1496 * As write_domain is zero, this function adds in the 1497 * current read domains (CPU+COMMAND, 0). 1498 * flush_domains is set to CPU. 1499 * invalidate_domains is set to COMMAND 1500 * clflush is run to get data out of the CPU caches 1501 * then i915_dev_set_domain calls i915_gem_flush to 1502 * emit an MI_FLUSH and drm_agp_chipset_flush 1503 * 5. Unmapped from GTT 1504 * i915_gem_object_unbind calls set_domain (CPU, CPU) 1505 * flush_domains and invalidate_domains end up both zero 1506 * so no flushing/invalidating happens 1507 * 6. Freed 1508 * yay, done 1509 * 1510 * Case 2: The shared render buffer 1511 * 1512 * 1. Allocated 1513 * 2. Mapped to GTT 1514 * 3. Read/written by GPU 1515 * 4. set_domain to (CPU,CPU) 1516 * 5. Read/written by CPU 1517 * 6. Read/written by GPU 1518 * 1519 * 1. Allocated 1520 * Same as last example, (CPU, CPU) 1521 * 2. Mapped to GTT 1522 * Nothing changes (assertions find that it is not in the GPU) 1523 * 3. Read/written by GPU 1524 * execbuffer calls set_domain (RENDER, RENDER) 1525 * flush_domains gets CPU 1526 * invalidate_domains gets GPU 1527 * clflush (obj) 1528 * MI_FLUSH and drm_agp_chipset_flush 1529 * 4. set_domain (CPU, CPU) 1530 * flush_domains gets GPU 1531 * invalidate_domains gets CPU 1532 * wait_rendering (obj) to make sure all drawing is complete. 1533 * This will include an MI_FLUSH to get the data from GPU 1534 * to memory 1535 * clflush (obj) to invalidate the CPU cache 1536 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 1537 * 5. Read/written by CPU 1538 * cache lines are loaded and dirtied 1539 * 6. Read written by GPU 1540 * Same as last GPU access 1541 * 1542 * Case 3: The constant buffer 1543 * 1544 * 1. Allocated 1545 * 2. Written by CPU 1546 * 3. Read by GPU 1547 * 4. Updated (written) by CPU again 1548 * 5. Read by GPU 1549 * 1550 * 1. Allocated 1551 * (CPU, CPU) 1552 * 2. Written by CPU 1553 * (CPU, CPU) 1554 * 3. Read by GPU 1555 * (CPU+RENDER, 0) 1556 * flush_domains = CPU 1557 * invalidate_domains = RENDER 1558 * clflush (obj) 1559 * MI_FLUSH 1560 * drm_agp_chipset_flush 1561 * 4. Updated (written) by CPU again 1562 * (CPU, CPU) 1563 * flush_domains = 0 (no previous write domain) 1564 * invalidate_domains = 0 (no new read domains) 1565 * 5. Read by GPU 1566 * (CPU+RENDER, 0) 1567 * flush_domains = CPU 1568 * invalidate_domains = RENDER 1569 * clflush (obj) 1570 * MI_FLUSH 1571 * drm_agp_chipset_flush 1572 */ 1573 static void 1574 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj, 1575 uint32_t read_domains, 1576 uint32_t write_domain) 1577 { 1578 struct drm_device *dev = obj->dev; 1579 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1580 uint32_t invalidate_domains = 0; 1581 uint32_t flush_domains = 0; 1582 1583 DRM_DEBUG("%s: object %p read %08x -> %08x write %08x -> %08x\n", 1584 __func__, obj, 1585 obj->read_domains, read_domains, 1586 obj->write_domain, write_domain); 1587 /* 1588 * If the object isn't moving to a new write domain, 1589 * let the object stay in multiple read domains 1590 */ 1591 if (write_domain == 0) 1592 read_domains |= obj->read_domains; 1593 else 1594 obj_priv->dirty = 1; 1595 1596 /* 1597 * Flush the current write domain if 1598 * the new read domains don't match. Invalidate 1599 * any read domains which differ from the old 1600 * write domain 1601 */ 1602 if (obj->write_domain && obj->write_domain != read_domains) { 1603 flush_domains |= obj->write_domain; 1604 invalidate_domains |= read_domains & ~obj->write_domain; 1605 } 1606 /* 1607 * Invalidate any read caches which may have 1608 * stale data. That is, any new read domains. 1609 */ 1610 invalidate_domains |= read_domains & ~obj->read_domains; 1611 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 1612 DRM_DEBUG("%s: CPU domain flush %08x invalidate %08x\n", 1613 __func__, flush_domains, invalidate_domains); 1614 i915_gem_clflush_object(obj); 1615 } 1616 1617 if ((write_domain | flush_domains) != 0) 1618 obj->write_domain = write_domain; 1619 obj->read_domains = read_domains; 1620 1621 dev->invalidate_domains |= invalidate_domains; 1622 dev->flush_domains |= flush_domains; 1623 1624 DRM_DEBUG("%s: read %08x write %08x invalidate %08x flush %08x\n", 1625 __func__, 1626 obj->read_domains, obj->write_domain, 1627 dev->invalidate_domains, dev->flush_domains); 1628 1629 } 1630 1631 /** 1632 * Moves the object from a partially CPU read to a full one. 1633 * 1634 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 1635 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 1636 */ 1637 static void 1638 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 1639 { 1640 struct drm_device *dev = obj->dev; 1641 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1642 1643 if (!obj_priv->page_cpu_valid) 1644 return; 1645 1646 /* If we're partially in the CPU read domain, finish moving it in. 1647 */ 1648 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 1649 int i; 1650 1651 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 1652 if (obj_priv->page_cpu_valid[i]) 1653 continue; 1654 drm_clflush_pages(obj_priv->page_list + i, 1); 1655 } 1656 drm_agp_chipset_flush(dev); 1657 } 1658 1659 /* Free the page_cpu_valid mappings which are now stale, whether 1660 * or not we've got I915_GEM_DOMAIN_CPU. 1661 */ 1662 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, 1663 DRM_MEM_DRIVER); 1664 obj_priv->page_cpu_valid = NULL; 1665 } 1666 1667 /** 1668 * Set the CPU read domain on a range of the object. 1669 * 1670 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 1671 * not entirely valid. The page_cpu_valid member of the object flags which 1672 * pages have been flushed, and will be respected by 1673 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 1674 * of the whole object. 1675 * 1676 * This function returns when the move is complete, including waiting on 1677 * flushes to occur. 1678 */ 1679 static int 1680 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 1681 uint64_t offset, uint64_t size) 1682 { 1683 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1684 int i, ret; 1685 1686 if (offset == 0 && size == obj->size) 1687 return i915_gem_object_set_to_cpu_domain(obj, 0); 1688 1689 i915_gem_object_flush_gpu_write_domain(obj); 1690 /* Wait on any GPU rendering and flushing to occur. */ 1691 ret = i915_gem_object_wait_rendering(obj); 1692 if (ret != 0) 1693 return ret; 1694 i915_gem_object_flush_gtt_write_domain(obj); 1695 1696 /* If we're already fully in the CPU read domain, we're done. */ 1697 if (obj_priv->page_cpu_valid == NULL && 1698 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 1699 return 0; 1700 1701 /* Otherwise, create/clear the per-page CPU read domain flag if we're 1702 * newly adding I915_GEM_DOMAIN_CPU 1703 */ 1704 if (obj_priv->page_cpu_valid == NULL) { 1705 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, 1706 DRM_MEM_DRIVER); 1707 if (obj_priv->page_cpu_valid == NULL) 1708 return ENOMEM; 1709 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 1710 (void) memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 1711 1712 /* Flush the cache on any pages that are still invalid from the CPU's 1713 * perspective. 1714 */ 1715 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 1716 i++) { 1717 if (obj_priv->page_cpu_valid[i]) 1718 continue; 1719 1720 drm_clflush_pages(obj_priv->page_list + i, 1); 1721 obj_priv->page_cpu_valid[i] = 1; 1722 } 1723 1724 /* It should now be out of any other write domains, and we can update 1725 * the domain values for our changes. 1726 */ 1727 ASSERT(!((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0)); 1728 1729 obj->read_domains |= I915_GEM_DOMAIN_CPU; 1730 1731 return 0; 1732 } 1733 1734 /** 1735 * Pin an object to the GTT and evaluate the relocations landing in it. 1736 */ 1737 static int 1738 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 1739 struct drm_file *file_priv, 1740 struct drm_i915_gem_exec_object *entry) 1741 { 1742 struct drm_i915_gem_relocation_entry reloc; 1743 struct drm_i915_gem_relocation_entry __user *relocs; 1744 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1745 int i, ret; 1746 1747 /* Choose the GTT offset for our buffer and put it there. */ 1748 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 1749 if (ret) { 1750 DRM_ERROR("failed to pin"); 1751 return ret; 1752 } 1753 entry->offset = obj_priv->gtt_offset; 1754 1755 relocs = (struct drm_i915_gem_relocation_entry __user *) 1756 (uintptr_t) entry->relocs_ptr; 1757 /* Apply the relocations, using the GTT aperture to avoid cache 1758 * flushing requirements. 1759 */ 1760 for (i = 0; i < entry->relocation_count; i++) { 1761 struct drm_gem_object *target_obj; 1762 struct drm_i915_gem_object *target_obj_priv; 1763 uint32_t reloc_val, reloc_offset, *reloc_entry; 1764 1765 ret = DRM_COPY_FROM_USER(&reloc, relocs + i, sizeof(reloc)); 1766 if (ret != 0) { 1767 i915_gem_object_unpin(obj); 1768 DRM_ERROR("failed to copy from user"); 1769 return ret; 1770 } 1771 1772 target_obj = drm_gem_object_lookup(file_priv, 1773 reloc.target_handle); 1774 if (target_obj == NULL) { 1775 i915_gem_object_unpin(obj); 1776 return EBADF; 1777 } 1778 target_obj_priv = target_obj->driver_private; 1779 1780 /* The target buffer should have appeared before us in the 1781 * exec_object list, so it should have a GTT space bound by now. 1782 */ 1783 if (target_obj_priv->gtt_space == NULL) { 1784 DRM_ERROR("No GTT space found for object %d\n", 1785 reloc.target_handle); 1786 drm_gem_object_unreference(target_obj); 1787 i915_gem_object_unpin(obj); 1788 return EINVAL; 1789 } 1790 1791 if (reloc.offset > obj->size - 4) { 1792 DRM_ERROR("Relocation beyond object bounds: " 1793 "obj %p target %d offset %d size %d.\n", 1794 obj, reloc.target_handle, 1795 (int) reloc.offset, (int) obj->size); 1796 drm_gem_object_unreference(target_obj); 1797 i915_gem_object_unpin(obj); 1798 return EINVAL; 1799 } 1800 if (reloc.offset & 3) { 1801 DRM_ERROR("Relocation not 4-byte aligned: " 1802 "obj %p target %d offset %d.\n", 1803 obj, reloc.target_handle, 1804 (int) reloc.offset); 1805 drm_gem_object_unreference(target_obj); 1806 i915_gem_object_unpin(obj); 1807 return EINVAL; 1808 } 1809 1810 if (reloc.write_domain & I915_GEM_DOMAIN_CPU || 1811 reloc.read_domains & I915_GEM_DOMAIN_CPU) { 1812 DRM_ERROR("reloc with read/write CPU domains: " 1813 "obj %p target %d offset %d " 1814 "read %08x write %08x", 1815 obj, reloc.target_handle, 1816 (int) reloc.offset, 1817 reloc.read_domains, 1818 reloc.write_domain); 1819 drm_gem_object_unreference(target_obj); 1820 i915_gem_object_unpin(obj); 1821 return EINVAL; 1822 } 1823 1824 if (reloc.write_domain && target_obj->pending_write_domain && 1825 reloc.write_domain != target_obj->pending_write_domain) { 1826 DRM_ERROR("Write domain conflict: " 1827 "obj %p target %d offset %d " 1828 "new %08x old %08x\n", 1829 obj, reloc.target_handle, 1830 (int) reloc.offset, 1831 reloc.write_domain, 1832 target_obj->pending_write_domain); 1833 drm_gem_object_unreference(target_obj); 1834 i915_gem_object_unpin(obj); 1835 return EINVAL; 1836 } 1837 DRM_DEBUG("%s: obj %p offset %08x target %d " 1838 "read %08x write %08x gtt %08x " 1839 "presumed %08x delta %08x\n", 1840 __func__, 1841 obj, 1842 (int) reloc.offset, 1843 (int) reloc.target_handle, 1844 (int) reloc.read_domains, 1845 (int) reloc.write_domain, 1846 (int) target_obj_priv->gtt_offset, 1847 (int) reloc.presumed_offset, 1848 reloc.delta); 1849 1850 target_obj->pending_read_domains |= reloc.read_domains; 1851 target_obj->pending_write_domain |= reloc.write_domain; 1852 1853 /* If the relocation already has the right value in it, no 1854 * more work needs to be done. 1855 */ 1856 if (target_obj_priv->gtt_offset == reloc.presumed_offset) { 1857 drm_gem_object_unreference(target_obj); 1858 continue; 1859 } 1860 1861 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 1862 if (ret != 0) { 1863 drm_gem_object_unreference(target_obj); 1864 i915_gem_object_unpin(obj); 1865 return EINVAL; 1866 } 1867 1868 /* Map the page containing the relocation we're going to 1869 * perform. 1870 */ 1871 1872 int reloc_base = (reloc.offset & ~(PAGE_SIZE-1)); 1873 reloc_offset = reloc.offset & (PAGE_SIZE-1); 1874 reloc_entry = (uint32_t *)(uintptr_t)(obj_priv->page_list[reloc_base/PAGE_SIZE] + reloc_offset); 1875 reloc_val = target_obj_priv->gtt_offset + reloc.delta; 1876 *reloc_entry = reloc_val; 1877 1878 /* Write the updated presumed offset for this entry back out 1879 * to the user. 1880 */ 1881 reloc.presumed_offset = target_obj_priv->gtt_offset; 1882 ret = DRM_COPY_TO_USER(relocs + i, &reloc, sizeof(reloc)); 1883 if (ret != 0) { 1884 drm_gem_object_unreference(target_obj); 1885 i915_gem_object_unpin(obj); 1886 DRM_ERROR("%s: Failed to copy to user ret %d", __func__, ret); 1887 return ret; 1888 } 1889 1890 drm_gem_object_unreference(target_obj); 1891 } 1892 1893 return 0; 1894 } 1895 1896 /** Dispatch a batchbuffer to the ring 1897 */ 1898 static int 1899 i915_dispatch_gem_execbuffer(struct drm_device *dev, 1900 struct drm_i915_gem_execbuffer *exec, 1901 uint64_t exec_offset) 1902 { 1903 drm_i915_private_t *dev_priv = dev->dev_private; 1904 struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *) 1905 (uintptr_t) exec->cliprects_ptr; 1906 int nbox = exec->num_cliprects; 1907 int i = 0, count; 1908 uint64_t exec_start, exec_len; 1909 RING_LOCALS; 1910 1911 exec_start = exec_offset + exec->batch_start_offset; 1912 exec_len = exec->batch_len; 1913 1914 if ((exec_start | exec_len) & 0x7) { 1915 DRM_ERROR("alignment\n"); 1916 return EINVAL; 1917 } 1918 1919 if (!exec_start) { 1920 DRM_ERROR("wrong arg"); 1921 return EINVAL; 1922 } 1923 1924 count = nbox ? nbox : 1; 1925 1926 for (i = 0; i < count; i++) { 1927 if (i < nbox) { 1928 int ret = i915_emit_box(dev, boxes, i, 1929 exec->DR1, exec->DR4); 1930 if (ret) { 1931 DRM_ERROR("i915_emit_box %d DR1 0x%lx DRI2 0x%lx", ret, exec->DR1, exec->DR4); 1932 return ret; 1933 } 1934 } 1935 if (IS_I830(dev) || IS_845G(dev)) { 1936 BEGIN_LP_RING(4); 1937 OUT_RING(MI_BATCH_BUFFER); 1938 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 1939 OUT_RING(exec_start + exec_len - 4); 1940 OUT_RING(0); 1941 ADVANCE_LP_RING(); 1942 } else { 1943 BEGIN_LP_RING(2); 1944 if (IS_I965G(dev)) { 1945 OUT_RING(MI_BATCH_BUFFER_START | 1946 (2 << 6) | 1947 (3 << 9) | 1948 MI_BATCH_NON_SECURE_I965); 1949 OUT_RING(exec_start); 1950 1951 } else { 1952 OUT_RING(MI_BATCH_BUFFER_START | 1953 (2 << 6)); 1954 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 1955 } 1956 ADVANCE_LP_RING(); 1957 } 1958 } 1959 /* XXX breadcrumb */ 1960 return 0; 1961 } 1962 1963 /* Throttle our rendering by waiting until the ring has completed our requests 1964 * emitted over 20 msec ago. 1965 * 1966 * This should get us reasonable parallelism between CPU and GPU but also 1967 * relatively low latency when blocking on a particular request to finish. 1968 */ 1969 static int 1970 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 1971 { 1972 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 1973 int ret = 0; 1974 uint32_t seqno; 1975 1976 spin_lock(&dev->struct_mutex); 1977 seqno = i915_file_priv->mm.last_gem_throttle_seqno; 1978 i915_file_priv->mm.last_gem_throttle_seqno = 1979 i915_file_priv->mm.last_gem_seqno; 1980 if (seqno) { 1981 ret = i915_wait_request(dev, seqno); 1982 if (ret != 0) 1983 DRM_ERROR("%s: i915_wait_request request->seqno %d now %d\n", __func__, seqno, i915_get_gem_seqno(dev)); 1984 } 1985 spin_unlock(&dev->struct_mutex); 1986 return ret; 1987 } 1988 1989 /*ARGSUSED*/ 1990 int 1991 i915_gem_execbuffer(DRM_IOCTL_ARGS) 1992 { 1993 DRM_DEVICE; 1994 drm_i915_private_t *dev_priv = dev->dev_private; 1995 struct drm_i915_file_private *i915_file_priv = fpriv->driver_priv; 1996 struct drm_i915_gem_execbuffer args; 1997 struct drm_i915_gem_exec_object *exec_list = NULL; 1998 struct drm_gem_object **object_list = NULL; 1999 struct drm_gem_object *batch_obj; 2000 struct drm_i915_gem_object *obj_priv; 2001 int ret = 0, i, pinned = 0; 2002 uint64_t exec_offset; 2003 uint32_t seqno, flush_domains; 2004 int pin_tries; 2005 2006 if (dev->driver->use_gem != 1) 2007 return ENODEV; 2008 2009 DRM_COPYFROM_WITH_RETURN(&args, 2010 (struct drm_i915_gem_execbuffer __user *) data, sizeof(args)); 2011 2012 DRM_DEBUG("buffer_count %d len %x\n", args.buffer_count, args.batch_len); 2013 2014 if (args.buffer_count < 1) { 2015 DRM_ERROR("execbuf with %d buffers\n", args.buffer_count); 2016 return EINVAL; 2017 } 2018 /* Copy in the exec list from userland */ 2019 exec_list = drm_calloc(sizeof(*exec_list), args.buffer_count, 2020 DRM_MEM_DRIVER); 2021 object_list = drm_calloc(sizeof(*object_list), args.buffer_count, 2022 DRM_MEM_DRIVER); 2023 if (exec_list == NULL || object_list == NULL) { 2024 DRM_ERROR("Failed to allocate exec or object list " 2025 "for %d buffers\n", 2026 args.buffer_count); 2027 ret = ENOMEM; 2028 goto pre_mutex_err; 2029 } 2030 2031 ret = DRM_COPY_FROM_USER(exec_list, 2032 (struct drm_i915_gem_exec_object __user *) 2033 (uintptr_t) args.buffers_ptr, 2034 sizeof(*exec_list) * args.buffer_count); 2035 if (ret != 0) { 2036 DRM_ERROR("copy %d exec entries failed %d\n", 2037 args.buffer_count, ret); 2038 goto pre_mutex_err; 2039 } 2040 spin_lock(&dev->struct_mutex); 2041 2042 if (dev_priv->mm.wedged) { 2043 DRM_ERROR("Execbuf while wedged\n"); 2044 spin_unlock(&dev->struct_mutex); 2045 return EIO; 2046 } 2047 2048 if (dev_priv->mm.suspended) { 2049 DRM_ERROR("Execbuf while VT-switched.\n"); 2050 spin_unlock(&dev->struct_mutex); 2051 return EBUSY; 2052 } 2053 2054 /* Look up object handles */ 2055 for (i = 0; i < args.buffer_count; i++) { 2056 object_list[i] = drm_gem_object_lookup(fpriv, 2057 exec_list[i].handle); 2058 if (object_list[i] == NULL) { 2059 DRM_ERROR("Invalid object handle %d at index %d\n", 2060 exec_list[i].handle, i); 2061 ret = EBADF; 2062 goto err; 2063 } 2064 obj_priv = object_list[i]->driver_private; 2065 if (obj_priv->in_execbuffer) { 2066 DRM_ERROR("Object[%d] (%d) %p appears more than once in object list in args.buffer_count %d \n", 2067 i, object_list[i]->name, object_list[i], args.buffer_count); 2068 2069 ret = EBADF; 2070 goto err; 2071 } 2072 2073 obj_priv->in_execbuffer = 1; 2074 } 2075 2076 /* Pin and relocate */ 2077 for (pin_tries = 0; ; pin_tries++) { 2078 ret = 0; 2079 for (i = 0; i < args.buffer_count; i++) { 2080 object_list[i]->pending_read_domains = 0; 2081 object_list[i]->pending_write_domain = 0; 2082 ret = i915_gem_object_pin_and_relocate(object_list[i], 2083 fpriv, 2084 &exec_list[i]); 2085 if (ret) { 2086 DRM_ERROR("Not all object pinned"); 2087 break; 2088 } 2089 pinned = i + 1; 2090 } 2091 /* success */ 2092 if (ret == 0) 2093 { 2094 DRM_DEBUG("gem_execbuffer pin_relocate success"); 2095 break; 2096 } 2097 /* error other than GTT full, or we've already tried again */ 2098 if (ret != ENOMEM || pin_tries >= 1) { 2099 if (ret != ERESTART) 2100 DRM_ERROR("Failed to pin buffers %d\n", ret); 2101 goto err; 2102 } 2103 2104 /* unpin all of our buffers */ 2105 for (i = 0; i < pinned; i++) 2106 i915_gem_object_unpin(object_list[i]); 2107 pinned = 0; 2108 2109 /* evict everyone we can from the aperture */ 2110 ret = i915_gem_evict_everything(dev); 2111 if (ret) 2112 goto err; 2113 } 2114 2115 /* Set the pending read domains for the batch buffer to COMMAND */ 2116 batch_obj = object_list[args.buffer_count-1]; 2117 batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND; 2118 batch_obj->pending_write_domain = 0; 2119 2120 /* Zero the gloabl flush/invalidate flags. These 2121 * will be modified as each object is bound to the 2122 * gtt 2123 */ 2124 dev->invalidate_domains = 0; 2125 dev->flush_domains = 0; 2126 2127 for (i = 0; i < args.buffer_count; i++) { 2128 struct drm_gem_object *obj = object_list[i]; 2129 2130 /* Compute new gpu domains and update invalidate/flush */ 2131 i915_gem_object_set_to_gpu_domain(obj, 2132 obj->pending_read_domains, 2133 obj->pending_write_domain); 2134 } 2135 2136 if (dev->invalidate_domains | dev->flush_domains) { 2137 2138 DRM_DEBUG("%s: invalidate_domains %08x flush_domains %08x Then flush\n", 2139 __func__, 2140 dev->invalidate_domains, 2141 dev->flush_domains); 2142 i915_gem_flush(dev, 2143 dev->invalidate_domains, 2144 dev->flush_domains); 2145 if (dev->flush_domains) { 2146 (void) i915_add_request(dev, dev->flush_domains); 2147 2148 } 2149 } 2150 2151 for (i = 0; i < args.buffer_count; i++) { 2152 struct drm_gem_object *obj = object_list[i]; 2153 2154 obj->write_domain = obj->pending_write_domain; 2155 } 2156 2157 exec_offset = exec_list[args.buffer_count - 1].offset; 2158 2159 /* Exec the batchbuffer */ 2160 ret = i915_dispatch_gem_execbuffer(dev, &args, exec_offset); 2161 if (ret) { 2162 DRM_ERROR("dispatch failed %d\n", ret); 2163 goto err; 2164 } 2165 2166 /* 2167 * Ensure that the commands in the batch buffer are 2168 * finished before the interrupt fires 2169 */ 2170 flush_domains = i915_retire_commands(dev); 2171 2172 /* 2173 * Get a seqno representing the execution of the current buffer, 2174 * which we can wait on. We would like to mitigate these interrupts, 2175 * likely by only creating seqnos occasionally (so that we have 2176 * *some* interrupts representing completion of buffers that we can 2177 * wait on when trying to clear up gtt space). 2178 */ 2179 seqno = i915_add_request(dev, flush_domains); 2180 ASSERT(!(seqno == 0)); 2181 i915_file_priv->mm.last_gem_seqno = seqno; 2182 for (i = 0; i < args.buffer_count; i++) { 2183 struct drm_gem_object *obj = object_list[i]; 2184 i915_gem_object_move_to_active(obj, seqno); 2185 DRM_DEBUG("%s: move to exec list %p\n", __func__, obj); 2186 } 2187 2188 err: 2189 if (object_list != NULL) { 2190 for (i = 0; i < pinned; i++) 2191 i915_gem_object_unpin(object_list[i]); 2192 2193 for (i = 0; i < args.buffer_count; i++) { 2194 if (object_list[i]) { 2195 obj_priv = object_list[i]->driver_private; 2196 obj_priv->in_execbuffer = 0; 2197 } 2198 drm_gem_object_unreference(object_list[i]); 2199 } 2200 } 2201 spin_unlock(&dev->struct_mutex); 2202 2203 if (!ret) { 2204 /* Copy the new buffer offsets back to the user's exec list. */ 2205 ret = DRM_COPY_TO_USER((struct drm_i915_relocation_entry __user *) 2206 (uintptr_t) args.buffers_ptr, 2207 exec_list, 2208 sizeof(*exec_list) * args.buffer_count); 2209 if (ret) 2210 DRM_ERROR("failed to copy %d exec entries " 2211 "back to user (%d)\n", 2212 args.buffer_count, ret); 2213 } 2214 2215 pre_mutex_err: 2216 drm_free(object_list, sizeof(*object_list) * args.buffer_count, 2217 DRM_MEM_DRIVER); 2218 drm_free(exec_list, sizeof(*exec_list) * args.buffer_count, 2219 DRM_MEM_DRIVER); 2220 2221 return ret; 2222 } 2223 2224 int 2225 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 2226 { 2227 struct drm_device *dev = obj->dev; 2228 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2229 int ret; 2230 2231 if (obj_priv->gtt_space == NULL) { 2232 ret = i915_gem_object_bind_to_gtt(obj, alignment); 2233 if (ret != 0) { 2234 DRM_ERROR("Failure to bind: %d", ret); 2235 return ret; 2236 } 2237 } 2238 obj_priv->pin_count++; 2239 2240 /* If the object is not active and not pending a flush, 2241 * remove it from the inactive list 2242 */ 2243 if (obj_priv->pin_count == 1) { 2244 atomic_inc(&dev->pin_count); 2245 atomic_add(obj->size, &dev->pin_memory); 2246 if (!obj_priv->active && 2247 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 2248 I915_GEM_DOMAIN_GTT)) == 0 && 2249 !list_empty(&obj_priv->list)) 2250 list_del_init(&obj_priv->list); 2251 } 2252 return 0; 2253 } 2254 2255 void 2256 i915_gem_object_unpin(struct drm_gem_object *obj) 2257 { 2258 struct drm_device *dev = obj->dev; 2259 drm_i915_private_t *dev_priv = dev->dev_private; 2260 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2261 obj_priv->pin_count--; 2262 ASSERT(!(obj_priv->pin_count < 0)); 2263 ASSERT(!(obj_priv->gtt_space == NULL)); 2264 2265 /* If the object is no longer pinned, and is 2266 * neither active nor being flushed, then stick it on 2267 * the inactive list 2268 */ 2269 if (obj_priv->pin_count == 0) { 2270 if (!obj_priv->active && 2271 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 2272 I915_GEM_DOMAIN_GTT)) == 0) 2273 list_move_tail(&obj_priv->list, 2274 &dev_priv->mm.inactive_list, (caddr_t)obj_priv); 2275 atomic_dec(&dev->pin_count); 2276 atomic_sub(obj->size, &dev->pin_memory); 2277 } 2278 } 2279 2280 /*ARGSUSED*/ 2281 int 2282 i915_gem_pin_ioctl(DRM_IOCTL_ARGS) 2283 { 2284 DRM_DEVICE; 2285 struct drm_i915_gem_pin args; 2286 struct drm_gem_object *obj; 2287 struct drm_i915_gem_object *obj_priv; 2288 int ret; 2289 2290 if (dev->driver->use_gem != 1) 2291 return ENODEV; 2292 2293 DRM_COPYFROM_WITH_RETURN(&args, 2294 (struct drm_i915_gem_pin __user *) data, sizeof(args)); 2295 2296 spin_lock(&dev->struct_mutex); 2297 2298 obj = drm_gem_object_lookup(fpriv, args.handle); 2299 if (obj == NULL) { 2300 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 2301 args.handle); 2302 spin_unlock(&dev->struct_mutex); 2303 return EBADF; 2304 } 2305 DRM_DEBUG("i915_gem_pin_ioctl obj->name %d", obj->name); 2306 obj_priv = obj->driver_private; 2307 2308 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != fpriv) { 2309 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 2310 args.handle); 2311 drm_gem_object_unreference(obj); 2312 spin_unlock(&dev->struct_mutex); 2313 return EINVAL; 2314 } 2315 2316 obj_priv->user_pin_count++; 2317 obj_priv->pin_filp = fpriv; 2318 if (obj_priv->user_pin_count == 1) { 2319 ret = i915_gem_object_pin(obj, args.alignment); 2320 if (ret != 0) { 2321 drm_gem_object_unreference(obj); 2322 spin_unlock(&dev->struct_mutex); 2323 return ret; 2324 } 2325 } 2326 2327 /* XXX - flush the CPU caches for pinned objects 2328 * as the X server doesn't manage domains yet 2329 */ 2330 i915_gem_object_flush_cpu_write_domain(obj); 2331 args.offset = obj_priv->gtt_offset; 2332 2333 ret = DRM_COPY_TO_USER((struct drm_i915_gem_pin __user *) data, &args, sizeof(args)); 2334 if ( ret != 0) 2335 DRM_ERROR(" gem pin ioctl error! %d", ret); 2336 2337 drm_gem_object_unreference(obj); 2338 spin_unlock(&dev->struct_mutex); 2339 2340 return 0; 2341 } 2342 2343 /*ARGSUSED*/ 2344 int 2345 i915_gem_unpin_ioctl(DRM_IOCTL_ARGS) 2346 { 2347 DRM_DEVICE; 2348 struct drm_i915_gem_pin args; 2349 struct drm_gem_object *obj; 2350 struct drm_i915_gem_object *obj_priv; 2351 2352 if (dev->driver->use_gem != 1) 2353 return ENODEV; 2354 2355 DRM_COPYFROM_WITH_RETURN(&args, 2356 (struct drm_i915_gem_pin __user *) data, sizeof(args)); 2357 2358 spin_lock(&dev->struct_mutex); 2359 2360 obj = drm_gem_object_lookup(fpriv, args.handle); 2361 if (obj == NULL) { 2362 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 2363 args.handle); 2364 spin_unlock(&dev->struct_mutex); 2365 return EBADF; 2366 } 2367 obj_priv = obj->driver_private; 2368 DRM_DEBUG("i915_gem_unpin_ioctl, obj->name %d", obj->name); 2369 if (obj_priv->pin_filp != fpriv) { 2370 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 2371 args.handle); 2372 drm_gem_object_unreference(obj); 2373 spin_unlock(&dev->struct_mutex); 2374 return EINVAL; 2375 } 2376 obj_priv->user_pin_count--; 2377 if (obj_priv->user_pin_count == 0) { 2378 obj_priv->pin_filp = NULL; 2379 i915_gem_object_unpin(obj); 2380 } 2381 drm_gem_object_unreference(obj); 2382 spin_unlock(&dev->struct_mutex); 2383 return 0; 2384 } 2385 2386 /*ARGSUSED*/ 2387 int 2388 i915_gem_busy_ioctl(DRM_IOCTL_ARGS) 2389 { 2390 DRM_DEVICE; 2391 struct drm_i915_gem_busy args; 2392 struct drm_gem_object *obj; 2393 struct drm_i915_gem_object *obj_priv; 2394 int ret; 2395 2396 if (dev->driver->use_gem != 1) 2397 return ENODEV; 2398 2399 DRM_COPYFROM_WITH_RETURN(&args, 2400 (struct drm_i915_gem_busy __user *) data, sizeof(args)); 2401 2402 spin_lock(&dev->struct_mutex); 2403 obj = drm_gem_object_lookup(fpriv, args.handle); 2404 if (obj == NULL) { 2405 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 2406 args.handle); 2407 spin_unlock(&dev->struct_mutex); 2408 return EBADF; 2409 } 2410 2411 obj_priv = obj->driver_private; 2412 /* Don't count being on the flushing list against the object being 2413 * done. Otherwise, a buffer left on the flushing list but not getting 2414 * flushed (because nobody's flushing that domain) won't ever return 2415 * unbusy and get reused by libdrm's bo cache. The other expected 2416 * consumer of this interface, OpenGL's occlusion queries, also specs 2417 * that the objects get unbusy "eventually" without any interference. 2418 */ 2419 args.busy = obj_priv->active && obj_priv->last_rendering_seqno != 0; 2420 DRM_DEBUG("i915_gem_busy_ioctl call obj->name %d busy %d", obj->name, args.busy); 2421 2422 ret = DRM_COPY_TO_USER((struct drm_i915_gem_busy __user *) data, &args, sizeof(args)); 2423 if ( ret != 0) 2424 DRM_ERROR(" gem busy error! %d", ret); 2425 2426 drm_gem_object_unreference(obj); 2427 spin_unlock(&dev->struct_mutex); 2428 return 0; 2429 } 2430 2431 /*ARGSUSED*/ 2432 int 2433 i915_gem_throttle_ioctl(DRM_IOCTL_ARGS) 2434 { 2435 DRM_DEVICE; 2436 2437 if (dev->driver->use_gem != 1) 2438 return ENODEV; 2439 2440 return i915_gem_ring_throttle(dev, fpriv); 2441 } 2442 2443 static int 2444 i915_gem_object_get_page_list(struct drm_gem_object *obj) 2445 { 2446 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2447 caddr_t va; 2448 long i; 2449 2450 if (obj_priv->page_list) 2451 return 0; 2452 pgcnt_t np = btop(obj->size); 2453 2454 obj_priv->page_list = kmem_zalloc(np * sizeof(caddr_t), KM_SLEEP); 2455 if (obj_priv->page_list == NULL) { 2456 DRM_ERROR("Faled to allocate page list\n"); 2457 return ENOMEM; 2458 } 2459 2460 for (i = 0, va = obj->kaddr; i < np; i++, va += PAGESIZE) { 2461 obj_priv->page_list[i] = va; 2462 } 2463 return 0; 2464 } 2465 2466 2467 int i915_gem_init_object(struct drm_gem_object *obj) 2468 { 2469 struct drm_i915_gem_object *obj_priv; 2470 2471 obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER); 2472 if (obj_priv == NULL) 2473 return ENOMEM; 2474 2475 /* 2476 * We've just allocated pages from the kernel, 2477 * so they've just been written by the CPU with 2478 * zeros. They'll need to be clflushed before we 2479 * use them with the GPU. 2480 */ 2481 obj->write_domain = I915_GEM_DOMAIN_CPU; 2482 obj->read_domains = I915_GEM_DOMAIN_CPU; 2483 2484 obj->driver_private = obj_priv; 2485 obj_priv->obj = obj; 2486 INIT_LIST_HEAD(&obj_priv->list); 2487 return 0; 2488 } 2489 2490 void i915_gem_free_object(struct drm_gem_object *obj) 2491 { 2492 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2493 2494 while (obj_priv->pin_count > 0) 2495 i915_gem_object_unpin(obj); 2496 2497 DRM_DEBUG("%s: obj %d",__func__, obj->name); 2498 2499 (void) i915_gem_object_unbind(obj, 1); 2500 if (obj_priv->page_cpu_valid != NULL) 2501 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, DRM_MEM_DRIVER); 2502 drm_free(obj->driver_private, sizeof(*obj_priv), DRM_MEM_DRIVER); 2503 } 2504 2505 /** Unbinds all objects that are on the given buffer list. */ 2506 static int 2507 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head, uint32_t type) 2508 { 2509 struct drm_gem_object *obj; 2510 struct drm_i915_gem_object *obj_priv; 2511 int ret; 2512 2513 while (!list_empty(head)) { 2514 obj_priv = list_entry(head->next, 2515 struct drm_i915_gem_object, 2516 list); 2517 obj = obj_priv->obj; 2518 2519 if (obj_priv->pin_count != 0) { 2520 DRM_ERROR("Pinned object in unbind list\n"); 2521 spin_unlock(&dev->struct_mutex); 2522 return EINVAL; 2523 } 2524 DRM_DEBUG("%s: obj %d type %d",__func__, obj->name, type); 2525 ret = i915_gem_object_unbind(obj, type); 2526 if (ret != 0) { 2527 DRM_ERROR("Error unbinding object in LeaveVT: %d\n", 2528 ret); 2529 spin_unlock(&dev->struct_mutex); 2530 return ret; 2531 } 2532 } 2533 2534 2535 return 0; 2536 } 2537 2538 static int 2539 i915_gem_idle(struct drm_device *dev, uint32_t type) 2540 { 2541 drm_i915_private_t *dev_priv = dev->dev_private; 2542 uint32_t seqno, cur_seqno, last_seqno; 2543 int stuck, ret; 2544 2545 spin_lock(&dev->struct_mutex); 2546 2547 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { 2548 spin_unlock(&dev->struct_mutex); 2549 return 0; 2550 } 2551 2552 /* Hack! Don't let anybody do execbuf while we don't control the chip. 2553 * We need to replace this with a semaphore, or something. 2554 */ 2555 dev_priv->mm.suspended = 1; 2556 2557 /* Cancel the retire work handler, wait for it to finish if running 2558 */ 2559 if (worktimer_id != NULL) { 2560 (void) untimeout(worktimer_id); 2561 worktimer_id = NULL; 2562 } 2563 2564 i915_kernel_lost_context(dev); 2565 2566 /* Flush the GPU along with all non-CPU write domains 2567 */ 2568 i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), 2569 ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 2570 seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU | 2571 I915_GEM_DOMAIN_GTT)); 2572 if (seqno == 0) { 2573 spin_unlock(&dev->struct_mutex); 2574 return ENOMEM; 2575 } 2576 2577 dev_priv->mm.waiting_gem_seqno = seqno; 2578 last_seqno = 0; 2579 stuck = 0; 2580 for (;;) { 2581 cur_seqno = i915_get_gem_seqno(dev); 2582 if (i915_seqno_passed(cur_seqno, seqno)) 2583 break; 2584 if (last_seqno == cur_seqno) { 2585 if (stuck++ > 100) { 2586 DRM_ERROR("hardware wedged\n"); 2587 dev_priv->mm.wedged = 1; 2588 DRM_WAKEUP(&dev_priv->irq_queue); 2589 break; 2590 } 2591 } 2592 DRM_UDELAY(10); 2593 last_seqno = cur_seqno; 2594 } 2595 dev_priv->mm.waiting_gem_seqno = 0; 2596 2597 i915_gem_retire_requests(dev); 2598 2599 /* Empty the active and flushing lists to inactive. If there's 2600 * anything left at this point, it means that we're wedged and 2601 * nothing good's going to happen by leaving them there. So strip 2602 * the GPU domains and just stuff them onto inactive. 2603 */ 2604 while (!list_empty(&dev_priv->mm.active_list)) { 2605 struct drm_i915_gem_object *obj_priv; 2606 2607 obj_priv = list_entry(dev_priv->mm.active_list.next, 2608 struct drm_i915_gem_object, 2609 list); 2610 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 2611 i915_gem_object_move_to_inactive(obj_priv->obj); 2612 } 2613 2614 while (!list_empty(&dev_priv->mm.flushing_list)) { 2615 struct drm_i915_gem_object *obj_priv; 2616 2617 obj_priv = list_entry(dev_priv->mm.flushing_list.next, 2618 struct drm_i915_gem_object, 2619 list); 2620 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 2621 i915_gem_object_move_to_inactive(obj_priv->obj); 2622 } 2623 2624 /* Move all inactive buffers out of the GTT. */ 2625 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list, type); 2626 ASSERT(list_empty(&dev_priv->mm.inactive_list)); 2627 if (ret) { 2628 spin_unlock(&dev->struct_mutex); 2629 return ret; 2630 } 2631 2632 i915_gem_cleanup_ringbuffer(dev); 2633 spin_unlock(&dev->struct_mutex); 2634 2635 return 0; 2636 } 2637 2638 static int 2639 i915_gem_init_hws(struct drm_device *dev) 2640 { 2641 drm_i915_private_t *dev_priv = dev->dev_private; 2642 struct drm_gem_object *obj; 2643 struct drm_i915_gem_object *obj_priv; 2644 int ret; 2645 2646 /* If we need a physical address for the status page, it's already 2647 * initialized at driver load time. 2648 */ 2649 if (!I915_NEED_GFX_HWS(dev)) 2650 return 0; 2651 2652 2653 obj = drm_gem_object_alloc(dev, 4096); 2654 if (obj == NULL) { 2655 DRM_ERROR("Failed to allocate status page\n"); 2656 return ENOMEM; 2657 } 2658 2659 obj_priv = obj->driver_private; 2660 2661 ret = i915_gem_object_pin(obj, 4096); 2662 if (ret != 0) { 2663 drm_gem_object_unreference(obj); 2664 return ret; 2665 } 2666 2667 dev_priv->status_gfx_addr = obj_priv->gtt_offset; 2668 dev_priv->hws_map.offset = dev->agp->agp_info.agpi_aperbase + obj_priv->gtt_offset; 2669 dev_priv->hws_map.size = 4096; 2670 dev_priv->hws_map.type = 0; 2671 dev_priv->hws_map.flags = 0; 2672 dev_priv->hws_map.mtrr = 0; 2673 2674 drm_core_ioremap(&dev_priv->hws_map, dev); 2675 if (dev_priv->hws_map.handle == NULL) { 2676 DRM_ERROR("Failed to map status page.\n"); 2677 (void) memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 2678 drm_gem_object_unreference(obj); 2679 return EINVAL; 2680 } 2681 2682 dev_priv->hws_obj = obj; 2683 2684 dev_priv->hw_status_page = dev_priv->hws_map.handle; 2685 2686 (void) memset(dev_priv->hw_status_page, 0, PAGE_SIZE); 2687 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); 2688 (void) I915_READ(HWS_PGA); /* posting read */ 2689 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); 2690 2691 return 0; 2692 } 2693 2694 static void 2695 i915_gem_cleanup_hws(struct drm_device *dev) 2696 { 2697 drm_i915_private_t *dev_priv = dev->dev_private; 2698 struct drm_gem_object *obj; 2699 2700 if (dev_priv->hws_obj == NULL) 2701 return; 2702 2703 obj = dev_priv->hws_obj; 2704 2705 drm_core_ioremapfree(&dev_priv->hws_map, dev); 2706 i915_gem_object_unpin(obj); 2707 drm_gem_object_unreference(obj); 2708 dev_priv->hws_obj = NULL; 2709 2710 (void) memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 2711 dev_priv->hw_status_page = NULL; 2712 2713 /* Write high address into HWS_PGA when disabling. */ 2714 I915_WRITE(HWS_PGA, 0x1ffff000); 2715 } 2716 2717 int 2718 i915_gem_init_ringbuffer(struct drm_device *dev) 2719 { 2720 drm_i915_private_t *dev_priv = dev->dev_private; 2721 struct drm_gem_object *obj; 2722 struct drm_i915_gem_object *obj_priv; 2723 int ret; 2724 u32 head; 2725 2726 ret = i915_gem_init_hws(dev); 2727 if (ret != 0) 2728 return ret; 2729 obj = drm_gem_object_alloc(dev, 128 * 1024); 2730 if (obj == NULL) { 2731 DRM_ERROR("Failed to allocate ringbuffer\n"); 2732 i915_gem_cleanup_hws(dev); 2733 return ENOMEM; 2734 } 2735 2736 obj_priv = obj->driver_private; 2737 ret = i915_gem_object_pin(obj, 4096); 2738 if (ret != 0) { 2739 drm_gem_object_unreference(obj); 2740 i915_gem_cleanup_hws(dev); 2741 return ret; 2742 } 2743 2744 /* Set up the kernel mapping for the ring. */ 2745 dev_priv->ring.Size = obj->size; 2746 dev_priv->ring.tail_mask = obj->size - 1; 2747 2748 dev_priv->ring.map.offset = dev->agp->agp_info.agpi_aperbase + obj_priv->gtt_offset; 2749 dev_priv->ring.map.size = obj->size; 2750 dev_priv->ring.map.type = 0; 2751 dev_priv->ring.map.flags = 0; 2752 dev_priv->ring.map.mtrr = 0; 2753 2754 drm_core_ioremap(&dev_priv->ring.map, dev); 2755 if (dev_priv->ring.map.handle == NULL) { 2756 DRM_ERROR("Failed to map ringbuffer.\n"); 2757 (void) memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 2758 drm_gem_object_unreference(obj); 2759 i915_gem_cleanup_hws(dev); 2760 return EINVAL; 2761 } 2762 2763 dev_priv->ring.ring_obj = obj; 2764 2765 dev_priv->ring.virtual_start = (u8 *) dev_priv->ring.map.handle; 2766 2767 /* Stop the ring if it's running. */ 2768 I915_WRITE(PRB0_CTL, 0); 2769 I915_WRITE(PRB0_HEAD, 0); 2770 I915_WRITE(PRB0_TAIL, 0); 2771 2772 2773 /* Initialize the ring. */ 2774 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 2775 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 2776 2777 /* G45 ring initialization fails to reset head to zero */ 2778 if (head != 0) { 2779 DRM_ERROR("Ring head not reset to zero " 2780 "ctl %08x head %08x tail %08x start %08x\n", 2781 I915_READ(PRB0_CTL), 2782 I915_READ(PRB0_HEAD), 2783 I915_READ(PRB0_TAIL), 2784 I915_READ(PRB0_START)); 2785 I915_WRITE(PRB0_HEAD, 0); 2786 2787 DRM_ERROR("Ring head forced to zero " 2788 "ctl %08x head %08x tail %08x start %08x\n", 2789 I915_READ(PRB0_CTL), 2790 I915_READ(PRB0_HEAD), 2791 I915_READ(PRB0_TAIL), 2792 I915_READ(PRB0_START)); 2793 } 2794 2795 I915_WRITE(PRB0_CTL, 2796 ((obj->size - 4096) & RING_NR_PAGES) | 2797 RING_NO_REPORT | 2798 RING_VALID); 2799 2800 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 2801 2802 /* If the head is still not zero, the ring is dead */ 2803 if (head != 0) { 2804 DRM_ERROR("Ring initialization failed " 2805 "ctl %08x head %08x tail %08x start %08x\n", 2806 I915_READ(PRB0_CTL), 2807 I915_READ(PRB0_HEAD), 2808 I915_READ(PRB0_TAIL), 2809 I915_READ(PRB0_START)); 2810 return EIO; 2811 } 2812 2813 /* Update our cache of the ring state */ 2814 i915_kernel_lost_context(dev); 2815 2816 return 0; 2817 } 2818 2819 static void 2820 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 2821 { 2822 drm_i915_private_t *dev_priv = dev->dev_private; 2823 2824 if (dev_priv->ring.ring_obj == NULL) 2825 return; 2826 2827 drm_core_ioremapfree(&dev_priv->ring.map, dev); 2828 2829 i915_gem_object_unpin(dev_priv->ring.ring_obj); 2830 drm_gem_object_unreference(dev_priv->ring.ring_obj); 2831 dev_priv->ring.ring_obj = NULL; 2832 (void) memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 2833 i915_gem_cleanup_hws(dev); 2834 } 2835 2836 /*ARGSUSED*/ 2837 int 2838 i915_gem_entervt_ioctl(DRM_IOCTL_ARGS) 2839 { 2840 DRM_DEVICE; 2841 drm_i915_private_t *dev_priv = dev->dev_private; 2842 int ret; 2843 2844 if (dev->driver->use_gem != 1) 2845 return ENODEV; 2846 2847 if (dev_priv->mm.wedged) { 2848 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 2849 dev_priv->mm.wedged = 0; 2850 } 2851 /* Set up the kernel mapping for the ring. */ 2852 dev_priv->mm.gtt_mapping.offset = dev->agp->agp_info.agpi_aperbase; 2853 dev_priv->mm.gtt_mapping.size = dev->agp->agp_info.agpi_apersize; 2854 dev_priv->mm.gtt_mapping.type = 0; 2855 dev_priv->mm.gtt_mapping.flags = 0; 2856 dev_priv->mm.gtt_mapping.mtrr = 0; 2857 2858 drm_core_ioremap(&dev_priv->mm.gtt_mapping, dev); 2859 2860 spin_lock(&dev->struct_mutex); 2861 dev_priv->mm.suspended = 0; 2862 ret = i915_gem_init_ringbuffer(dev); 2863 if (ret != 0) 2864 return ret; 2865 2866 spin_unlock(&dev->struct_mutex); 2867 2868 drm_irq_install(dev); 2869 2870 return 0; 2871 } 2872 2873 /*ARGSUSED*/ 2874 int 2875 i915_gem_leavevt_ioctl(DRM_IOCTL_ARGS) 2876 { 2877 DRM_DEVICE; 2878 drm_i915_private_t *dev_priv = dev->dev_private; 2879 int ret; 2880 2881 if (dev->driver->use_gem != 1) 2882 return ENODEV; 2883 2884 ret = i915_gem_idle(dev, 0); 2885 drm_irq_uninstall(dev); 2886 2887 drm_core_ioremapfree(&dev_priv->mm.gtt_mapping, dev); 2888 return ret; 2889 } 2890 2891 void 2892 i915_gem_lastclose(struct drm_device *dev) 2893 { 2894 drm_i915_private_t *dev_priv = dev->dev_private; 2895 int ret; 2896 2897 ret = i915_gem_idle(dev, 1); 2898 if (ret) 2899 DRM_ERROR("failed to idle hardware: %d\n", ret); 2900 2901 drm_mm_clean_ml(&dev_priv->mm.gtt_space); 2902 } 2903 2904 void 2905 i915_gem_load(struct drm_device *dev) 2906 { 2907 drm_i915_private_t *dev_priv = dev->dev_private; 2908 2909 INIT_LIST_HEAD(&dev_priv->mm.active_list); 2910 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 2911 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 2912 INIT_LIST_HEAD(&dev_priv->mm.request_list); 2913 dev_priv->mm.next_gem_seqno = 1; 2914 2915 i915_gem_detect_bit_6_swizzle(dev); 2916 2917 } 2918 2919