1 /* BEGIN CSTYLED */ 2 3 /* 4 * Copyright (c) 2009, Intel Corporation. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 * IN THE SOFTWARE. 25 * 26 * Authors: 27 * Eric Anholt <eric@anholt.net> 28 * 29 */ 30 31 /* 32 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 33 * Use is subject to license terms. 34 */ 35 36 #include <sys/x86_archext.h> 37 #include <sys/vfs_opreg.h> 38 #include "drmP.h" 39 #include "drm.h" 40 #include "i915_drm.h" 41 #include "i915_drv.h" 42 43 #ifndef roundup 44 #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) 45 #endif /* !roundup */ 46 47 #define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 48 49 static timeout_id_t worktimer_id = NULL; 50 51 extern int drm_mm_init(struct drm_mm *mm, 52 unsigned long start, unsigned long size); 53 extern void drm_mm_put_block(struct drm_mm_node *cur); 54 extern int choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off, 55 int vacalign, uint_t flags); 56 57 static void 58 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj, 59 uint32_t read_domains, 60 uint32_t write_domain); 61 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 62 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 63 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 64 static int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, 65 int write); 66 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 67 int write); 68 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 69 uint64_t offset, 70 uint64_t size); 71 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 72 static void i915_gem_object_free_page_list(struct drm_gem_object *obj); 73 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 74 static int i915_gem_object_get_page_list(struct drm_gem_object *obj); 75 76 static void 77 i915_gem_cleanup_ringbuffer(struct drm_device *dev); 78 79 /*ARGSUSED*/ 80 int 81 i915_gem_init_ioctl(DRM_IOCTL_ARGS) 82 { 83 DRM_DEVICE; 84 drm_i915_private_t *dev_priv = dev->dev_private; 85 struct drm_i915_gem_init args; 86 87 if (dev->driver->use_gem != 1) 88 return ENODEV; 89 90 DRM_COPYFROM_WITH_RETURN(&args, 91 (struct drm_i915_gem_init *) data, sizeof(args)); 92 93 spin_lock(&dev->struct_mutex); 94 95 if ((args.gtt_start >= args.gtt_end) || 96 ((args.gtt_start & (PAGE_SIZE - 1)) != 0) || 97 ((args.gtt_end & (PAGE_SIZE - 1)) != 0)) { 98 spin_unlock(&dev->struct_mutex); 99 DRM_ERROR("i915_gem_init_ioctel invalid arg 0x%lx args.start 0x%lx end 0x%lx", &args, args.gtt_start, args.gtt_end); 100 return EINVAL; 101 } 102 103 dev->gtt_total = (uint32_t) (args.gtt_end - args.gtt_start); 104 105 drm_mm_init(&dev_priv->mm.gtt_space, (unsigned long) args.gtt_start, 106 dev->gtt_total); 107 DRM_DEBUG("i915_gem_init_ioctl dev->gtt_total %x, dev_priv->mm.gtt_space 0x%x gtt_start 0x%lx", dev->gtt_total, dev_priv->mm.gtt_space, args.gtt_start); 108 ASSERT(dev->gtt_total != 0); 109 110 spin_unlock(&dev->struct_mutex); 111 112 113 return 0; 114 } 115 116 /*ARGSUSED*/ 117 int 118 i915_gem_get_aperture_ioctl(DRM_IOCTL_ARGS) 119 { 120 DRM_DEVICE; 121 struct drm_i915_gem_get_aperture args; 122 int ret; 123 124 if (dev->driver->use_gem != 1) 125 return ENODEV; 126 127 args.aper_size = (uint64_t)dev->gtt_total; 128 args.aper_available_size = (args.aper_size - 129 atomic_read(&dev->pin_memory)); 130 131 ret = DRM_COPY_TO_USER((struct drm_i915_gem_get_aperture __user *) data, &args, sizeof(args)); 132 133 if ( ret != 0) 134 DRM_ERROR(" i915_gem_get_aperture_ioctl error! %d", ret); 135 136 DRM_DEBUG("i915_gem_get_aaperture_ioctl called sizeof %d, aper_size 0x%x, aper_available_size 0x%x\n", sizeof(args), dev->gtt_total, args.aper_available_size); 137 138 return 0; 139 } 140 141 /** 142 * Creates a new mm object and returns a handle to it. 143 */ 144 /*ARGSUSED*/ 145 int 146 i915_gem_create_ioctl(DRM_IOCTL_ARGS) 147 { 148 DRM_DEVICE; 149 struct drm_i915_gem_create args; 150 struct drm_gem_object *obj; 151 int handlep; 152 int ret; 153 154 if (dev->driver->use_gem != 1) 155 return ENODEV; 156 157 DRM_COPYFROM_WITH_RETURN(&args, 158 (struct drm_i915_gem_create *) data, sizeof(args)); 159 160 161 args.size = (uint64_t) roundup(args.size, PAGE_SIZE); 162 163 if (args.size == 0) { 164 DRM_ERROR("Invalid obj size %d", args.size); 165 return EINVAL; 166 } 167 /* Allocate the new object */ 168 obj = drm_gem_object_alloc(dev, args.size); 169 if (obj == NULL) { 170 DRM_ERROR("Failed to alloc obj"); 171 return ENOMEM; 172 } 173 174 ret = drm_gem_handle_create(fpriv, obj, &handlep); 175 spin_lock(&dev->struct_mutex); 176 drm_gem_object_handle_unreference(obj); 177 spin_unlock(&dev->struct_mutex); 178 if (ret) 179 return ret; 180 181 args.handle = handlep; 182 183 ret = DRM_COPY_TO_USER((struct drm_i915_gem_create *) data, &args, sizeof(args)); 184 185 if ( ret != 0) 186 DRM_ERROR(" gem create error! %d", ret); 187 188 DRM_DEBUG("i915_gem_create_ioctl object name %d, size 0x%lx, list 0x%lx, obj 0x%lx",handlep, args.size, &fpriv->object_idr, obj); 189 190 return 0; 191 } 192 193 /** 194 * Reads data from the object referenced by handle. 195 * 196 * On error, the contents of *data are undefined. 197 */ 198 /*ARGSUSED*/ 199 int 200 i915_gem_pread_ioctl(DRM_IOCTL_ARGS) 201 { 202 DRM_DEVICE; 203 struct drm_i915_gem_pread args; 204 struct drm_gem_object *obj; 205 int ret; 206 207 if (dev->driver->use_gem != 1) 208 return ENODEV; 209 210 DRM_COPYFROM_WITH_RETURN(&args, 211 (struct drm_i915_gem_pread __user *) data, sizeof(args)); 212 213 obj = drm_gem_object_lookup(fpriv, args.handle); 214 if (obj == NULL) 215 return EBADF; 216 217 /* Bounds check source. 218 * 219 * XXX: This could use review for overflow issues... 220 */ 221 if (args.offset > obj->size || args.size > obj->size || 222 args.offset + args.size > obj->size) { 223 drm_gem_object_unreference(obj); 224 DRM_ERROR("i915_gem_pread_ioctl invalid args"); 225 return EINVAL; 226 } 227 228 spin_lock(&dev->struct_mutex); 229 230 ret = i915_gem_object_set_cpu_read_domain_range(obj, args.offset, args.size); 231 if (ret != 0) { 232 drm_gem_object_unreference(obj); 233 spin_unlock(&dev->struct_mutex); 234 DRM_ERROR("pread failed to read domain range ret %d!!!", ret); 235 return EFAULT; 236 } 237 238 unsigned long unwritten = 0; 239 uint32_t *user_data; 240 user_data = (uint32_t *) (uintptr_t) args.data_ptr; 241 242 unwritten = DRM_COPY_TO_USER(user_data, obj->kaddr + args.offset, args.size); 243 if (unwritten) { 244 ret = EFAULT; 245 DRM_ERROR("i915_gem_pread error!!! unwritten %d", unwritten); 246 } 247 248 drm_gem_object_unreference(obj); 249 spin_unlock(&dev->struct_mutex); 250 251 return ret; 252 } 253 254 /*ARGSUSED*/ 255 static int 256 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 257 struct drm_i915_gem_pwrite *args, 258 struct drm_file *file_priv) 259 { 260 uint32_t *user_data; 261 int ret = 0; 262 unsigned long unwritten = 0; 263 264 user_data = (uint32_t *) (uintptr_t) args->data_ptr; 265 spin_lock(&dev->struct_mutex); 266 ret = i915_gem_object_pin(obj, 0); 267 if (ret) { 268 spin_unlock(&dev->struct_mutex); 269 DRM_ERROR("i915_gem_gtt_pwrite failed to pin ret %d", ret); 270 return ret; 271 } 272 273 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 274 if (ret) 275 goto err; 276 277 DRM_DEBUG("obj %d write domain 0x%x read domain 0x%x", obj->name, obj->write_domain, obj->read_domains); 278 279 unwritten = DRM_COPY_FROM_USER(obj->kaddr + args->offset, user_data, args->size); 280 if (unwritten) { 281 ret = EFAULT; 282 DRM_ERROR("i915_gem_gtt_pwrite error!!! unwritten %d", unwritten); 283 goto err; 284 } 285 286 err: 287 i915_gem_object_unpin(obj); 288 spin_unlock(&dev->struct_mutex); 289 if (ret) 290 DRM_ERROR("i915_gem_gtt_pwrite error %d", ret); 291 return ret; 292 } 293 294 /*ARGSUSED*/ 295 int 296 i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 297 struct drm_i915_gem_pwrite *args, 298 struct drm_file *file_priv) 299 { 300 DRM_ERROR(" i915_gem_shmem_pwrite Not support"); 301 return -1; 302 } 303 304 /** 305 * Writes data to the object referenced by handle. 306 * 307 * On error, the contents of the buffer that were to be modified are undefined. 308 */ 309 /*ARGSUSED*/ 310 int 311 i915_gem_pwrite_ioctl(DRM_IOCTL_ARGS) 312 { 313 DRM_DEVICE; 314 struct drm_i915_gem_pwrite args; 315 struct drm_gem_object *obj; 316 struct drm_i915_gem_object *obj_priv; 317 int ret = 0; 318 319 if (dev->driver->use_gem != 1) 320 return ENODEV; 321 322 ret = DRM_COPY_FROM_USER(&args, 323 (struct drm_i915_gem_pwrite __user *) data, sizeof(args)); 324 if (ret) 325 DRM_ERROR("i915_gem_pwrite_ioctl failed to copy from user"); 326 obj = drm_gem_object_lookup(fpriv, args.handle); 327 if (obj == NULL) 328 return EBADF; 329 obj_priv = obj->driver_private; 330 DRM_DEBUG("i915_gem_pwrite_ioctl, obj->name %d",obj->name); 331 332 /* Bounds check destination. 333 * 334 * XXX: This could use review for overflow issues... 335 */ 336 if (args.offset > obj->size || args.size > obj->size || 337 args.offset + args.size > obj->size) { 338 drm_gem_object_unreference(obj); 339 DRM_ERROR("i915_gem_pwrite_ioctl invalid arg"); 340 return EINVAL; 341 } 342 343 /* We can only do the GTT pwrite on untiled buffers, as otherwise 344 * it would end up going through the fenced access, and we'll get 345 * different detiling behavior between reading and writing. 346 * pread/pwrite currently are reading and writing from the CPU 347 * perspective, requiring manual detiling by the client. 348 */ 349 if (obj_priv->tiling_mode == I915_TILING_NONE && 350 dev->gtt_total != 0) 351 ret = i915_gem_gtt_pwrite(dev, obj, &args, fpriv); 352 else 353 ret = i915_gem_shmem_pwrite(dev, obj, &args, fpriv); 354 355 if (ret) 356 DRM_ERROR("pwrite failed %d\n", ret); 357 358 drm_gem_object_unreference(obj); 359 360 return ret; 361 } 362 363 /** 364 * Called when user space prepares to use an object with the CPU, either 365 * through the mmap ioctl's mapping or a GTT mapping. 366 */ 367 /*ARGSUSED*/ 368 int 369 i915_gem_set_domain_ioctl(DRM_IOCTL_ARGS) 370 { 371 DRM_DEVICE; 372 struct drm_i915_gem_set_domain args; 373 struct drm_gem_object *obj; 374 int ret = 0; 375 376 if (dev->driver->use_gem != 1) 377 return ENODEV; 378 379 DRM_COPYFROM_WITH_RETURN(&args, 380 (struct drm_i915_gem_set_domain __user *) data, sizeof(args)); 381 382 uint32_t read_domains = args.read_domains; 383 uint32_t write_domain = args.write_domain; 384 385 /* Only handle setting domains to types used by the CPU. */ 386 if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 387 ret = EINVAL; 388 389 if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 390 ret = EINVAL; 391 392 /* Having something in the write domain implies it's in the read 393 * domain, and only that read domain. Enforce that in the request. 394 */ 395 if (write_domain != 0 && read_domains != write_domain) 396 ret = EINVAL; 397 if (ret) { 398 DRM_ERROR("set_domain invalid read or write"); 399 return EINVAL; 400 } 401 402 obj = drm_gem_object_lookup(fpriv, args.handle); 403 if (obj == NULL) 404 return EBADF; 405 406 spin_lock(&dev->struct_mutex); 407 DRM_DEBUG("set_domain_ioctl %p(name %d size 0x%x), %08x %08x\n", 408 obj, obj->name, obj->size, args.read_domains, args.write_domain); 409 410 if (read_domains & I915_GEM_DOMAIN_GTT) { 411 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 412 413 /* Silently promote "you're not bound, there was nothing to do" 414 * to success, since the client was just asking us to 415 * make sure everything was done. 416 */ 417 if (ret == EINVAL) 418 ret = 0; 419 } else { 420 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 421 } 422 423 drm_gem_object_unreference(obj); 424 spin_unlock(&dev->struct_mutex); 425 if (ret) 426 DRM_ERROR("i915_set_domain_ioctl ret %d", ret); 427 return ret; 428 } 429 430 /** 431 * Called when user space has done writes to this buffer 432 */ 433 /*ARGSUSED*/ 434 int 435 i915_gem_sw_finish_ioctl(DRM_IOCTL_ARGS) 436 { 437 DRM_DEVICE; 438 struct drm_i915_gem_sw_finish args; 439 struct drm_gem_object *obj; 440 struct drm_i915_gem_object *obj_priv; 441 int ret = 0; 442 443 if (dev->driver->use_gem != 1) 444 return ENODEV; 445 446 DRM_COPYFROM_WITH_RETURN(&args, 447 (struct drm_i915_gem_sw_finish __user *) data, sizeof(args)); 448 449 spin_lock(&dev->struct_mutex); 450 obj = drm_gem_object_lookup(fpriv, args.handle); 451 if (obj == NULL) { 452 spin_unlock(&dev->struct_mutex); 453 return EBADF; 454 } 455 456 DRM_DEBUG("%s: sw_finish %d (%p name %d size 0x%x)\n", 457 __func__, args.handle, obj, obj->name, obj->size); 458 459 obj_priv = obj->driver_private; 460 /* Pinned buffers may be scanout, so flush the cache */ 461 if (obj_priv->pin_count) 462 { 463 i915_gem_object_flush_cpu_write_domain(obj); 464 } 465 466 drm_gem_object_unreference(obj); 467 spin_unlock(&dev->struct_mutex); 468 return ret; 469 } 470 471 /** 472 * Maps the contents of an object, returning the address it is mapped 473 * into. 474 * 475 * While the mapping holds a reference on the contents of the object, it doesn't 476 * imply a ref on the object itself. 477 */ 478 /*ARGSUSED*/ 479 int 480 i915_gem_mmap_ioctl(DRM_IOCTL_ARGS) 481 { 482 DRM_DEVICE; 483 struct drm_i915_gem_mmap args; 484 struct drm_gem_object *obj; 485 caddr_t vvaddr = NULL; 486 int ret; 487 488 if (dev->driver->use_gem != 1) 489 return ENODEV; 490 491 DRM_COPYFROM_WITH_RETURN( 492 &args, (struct drm_i915_gem_mmap __user *)data, 493 sizeof (struct drm_i915_gem_mmap)); 494 495 obj = drm_gem_object_lookup(fpriv, args.handle); 496 if (obj == NULL) 497 return EBADF; 498 499 ret = ddi_devmap_segmap(fpriv->dev, (off_t)obj->map->handle, 500 ttoproc(curthread)->p_as, &vvaddr, obj->map->size, 501 PROT_ALL, PROT_ALL, MAP_SHARED, fpriv->credp); 502 if (ret) 503 return ret; 504 505 spin_lock(&dev->struct_mutex); 506 drm_gem_object_unreference(obj); 507 spin_unlock(&dev->struct_mutex); 508 509 args.addr_ptr = (uint64_t)(uintptr_t)vvaddr; 510 511 DRM_COPYTO_WITH_RETURN( 512 (struct drm_i915_gem_mmap __user *)data, 513 &args, sizeof (struct drm_i915_gem_mmap)); 514 515 return 0; 516 } 517 518 static void 519 i915_gem_object_free_page_list(struct drm_gem_object *obj) 520 { 521 struct drm_i915_gem_object *obj_priv = obj->driver_private; 522 if (obj_priv->page_list == NULL) 523 return; 524 525 kmem_free(obj_priv->page_list, 526 btop(obj->size) * sizeof(caddr_t)); 527 528 obj_priv->page_list = NULL; 529 } 530 531 static void 532 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno) 533 { 534 struct drm_device *dev = obj->dev; 535 drm_i915_private_t *dev_priv = dev->dev_private; 536 struct drm_i915_gem_object *obj_priv = obj->driver_private; 537 538 /* Add a reference if we're newly entering the active list. */ 539 if (!obj_priv->active) { 540 drm_gem_object_reference(obj); 541 obj_priv->active = 1; 542 } 543 /* Move from whatever list we were on to the tail of execution. */ 544 list_move_tail(&obj_priv->list, 545 &dev_priv->mm.active_list, (caddr_t)obj_priv); 546 obj_priv->last_rendering_seqno = seqno; 547 } 548 549 static void 550 i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 551 { 552 struct drm_device *dev = obj->dev; 553 drm_i915_private_t *dev_priv = dev->dev_private; 554 struct drm_i915_gem_object *obj_priv = obj->driver_private; 555 556 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list, (caddr_t)obj_priv); 557 obj_priv->last_rendering_seqno = 0; 558 } 559 560 static void 561 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 562 { 563 struct drm_device *dev = obj->dev; 564 drm_i915_private_t *dev_priv = dev->dev_private; 565 struct drm_i915_gem_object *obj_priv = obj->driver_private; 566 567 if (obj_priv->pin_count != 0) 568 { 569 list_del_init(&obj_priv->list); 570 } else { 571 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list, (caddr_t)obj_priv); 572 } 573 obj_priv->last_rendering_seqno = 0; 574 if (obj_priv->active) { 575 obj_priv->active = 0; 576 drm_gem_object_unreference(obj); 577 } 578 } 579 580 /** 581 * Creates a new sequence number, emitting a write of it to the status page 582 * plus an interrupt, which will trigger i915_user_interrupt_handler. 583 * 584 * Must be called with struct_lock held. 585 * 586 * Returned sequence numbers are nonzero on success. 587 */ 588 static uint32_t 589 i915_add_request(struct drm_device *dev, uint32_t flush_domains) 590 { 591 drm_i915_private_t *dev_priv = dev->dev_private; 592 struct drm_i915_gem_request *request; 593 uint32_t seqno; 594 int was_empty; 595 RING_LOCALS; 596 597 request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); 598 if (request == NULL) { 599 DRM_ERROR("Failed to alloc request"); 600 return 0; 601 } 602 /* Grab the seqno we're going to make this request be, and bump the 603 * next (skipping 0 so it can be the reserved no-seqno value). 604 */ 605 seqno = dev_priv->mm.next_gem_seqno; 606 dev_priv->mm.next_gem_seqno++; 607 if (dev_priv->mm.next_gem_seqno == 0) 608 dev_priv->mm.next_gem_seqno++; 609 610 DRM_DEBUG("add_request seqno = %d dev 0x%lx", seqno, dev); 611 612 BEGIN_LP_RING(4); 613 OUT_RING(MI_STORE_DWORD_INDEX); 614 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 615 OUT_RING(seqno); 616 OUT_RING(0); 617 ADVANCE_LP_RING(); 618 619 BEGIN_LP_RING(2); 620 OUT_RING(0); 621 OUT_RING(MI_USER_INTERRUPT); 622 ADVANCE_LP_RING(); 623 624 request->seqno = seqno; 625 request->emitted_jiffies = jiffies; 626 was_empty = list_empty(&dev_priv->mm.request_list); 627 list_add_tail(&request->list, &dev_priv->mm.request_list, (caddr_t)request); 628 629 /* Associate any objects on the flushing list matching the write 630 * domain we're flushing with our flush. 631 */ 632 if (flush_domains != 0) { 633 struct drm_i915_gem_object *obj_priv, *next; 634 635 obj_priv = list_entry(dev_priv->mm.flushing_list.next, struct drm_i915_gem_object, list), 636 next = list_entry(obj_priv->list.next, struct drm_i915_gem_object, list); 637 for(; &obj_priv->list != &dev_priv->mm.flushing_list; 638 obj_priv = next, 639 next = list_entry(next->list.next, struct drm_i915_gem_object, list)) { 640 struct drm_gem_object *obj = obj_priv->obj; 641 642 if ((obj->write_domain & flush_domains) == 643 obj->write_domain) { 644 obj->write_domain = 0; 645 i915_gem_object_move_to_active(obj, seqno); 646 } 647 } 648 649 } 650 651 if (was_empty && !dev_priv->mm.suspended) 652 { 653 /* change to delay HZ and then run work (not insert to workqueue of Linux) */ 654 worktimer_id = timeout(i915_gem_retire_work_handler, (void *) dev, DRM_HZ); 655 DRM_DEBUG("i915_gem: schedule_delayed_work"); 656 } 657 return seqno; 658 } 659 660 /** 661 * Command execution barrier 662 * 663 * Ensures that all commands in the ring are finished 664 * before signalling the CPU 665 */ 666 uint32_t 667 i915_retire_commands(struct drm_device *dev) 668 { 669 drm_i915_private_t *dev_priv = dev->dev_private; 670 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 671 uint32_t flush_domains = 0; 672 RING_LOCALS; 673 674 /* The sampler always gets flushed on i965 (sigh) */ 675 if (IS_I965G(dev)) 676 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 677 BEGIN_LP_RING(3); 678 OUT_RING(cmd); 679 OUT_RING(0); /* noop */ 680 ADVANCE_LP_RING(); 681 682 return flush_domains; 683 } 684 685 /** 686 * Moves buffers associated only with the given active seqno from the active 687 * to inactive list, potentially freeing them. 688 */ 689 static void 690 i915_gem_retire_request(struct drm_device *dev, 691 struct drm_i915_gem_request *request) 692 { 693 drm_i915_private_t *dev_priv = dev->dev_private; 694 /* Move any buffers on the active list that are no longer referenced 695 * by the ringbuffer to the flushing/inactive lists as appropriate. 696 */ 697 while (!list_empty(&dev_priv->mm.active_list)) { 698 struct drm_gem_object *obj; 699 struct drm_i915_gem_object *obj_priv; 700 701 obj_priv = list_entry(dev_priv->mm.active_list.next, 702 struct drm_i915_gem_object, 703 list); 704 obj = obj_priv->obj; 705 706 /* If the seqno being retired doesn't match the oldest in the 707 * list, then the oldest in the list must still be newer than 708 * this seqno. 709 */ 710 if (obj_priv->last_rendering_seqno != request->seqno) 711 return; 712 713 DRM_DEBUG("%s: retire %d moves to inactive list %p\n", 714 __func__, request->seqno, obj); 715 716 if (obj->write_domain != 0) { 717 i915_gem_object_move_to_flushing(obj); 718 } else { 719 i915_gem_object_move_to_inactive(obj); 720 } 721 } 722 } 723 724 /** 725 * Returns true if seq1 is later than seq2. 726 */ 727 static int 728 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 729 { 730 return (int32_t)(seq1 - seq2) >= 0; 731 } 732 733 uint32_t 734 i915_get_gem_seqno(struct drm_device *dev) 735 { 736 drm_i915_private_t *dev_priv = dev->dev_private; 737 738 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); 739 } 740 741 /** 742 * This function clears the request list as sequence numbers are passed. 743 */ 744 void 745 i915_gem_retire_requests(struct drm_device *dev) 746 { 747 drm_i915_private_t *dev_priv = dev->dev_private; 748 uint32_t seqno; 749 750 seqno = i915_get_gem_seqno(dev); 751 752 while (!list_empty(&dev_priv->mm.request_list)) { 753 struct drm_i915_gem_request *request; 754 uint32_t retiring_seqno; 755 request = (struct drm_i915_gem_request *)(uintptr_t)(dev_priv->mm.request_list.next->contain_ptr); 756 retiring_seqno = request->seqno; 757 758 if (i915_seqno_passed(seqno, retiring_seqno) || 759 dev_priv->mm.wedged) { 760 i915_gem_retire_request(dev, request); 761 762 list_del(&request->list); 763 drm_free(request, sizeof(*request), DRM_MEM_DRIVER); 764 } else 765 break; 766 } 767 } 768 769 void 770 i915_gem_retire_work_handler(void *device) 771 { 772 struct drm_device *dev = (struct drm_device *)device; 773 drm_i915_private_t *dev_priv = dev->dev_private; 774 775 spin_lock(&dev->struct_mutex); 776 777 /* Return if gem idle */ 778 if (worktimer_id == NULL) { 779 spin_unlock(&dev->struct_mutex); 780 return; 781 } 782 783 i915_gem_retire_requests(dev); 784 if (!dev_priv->mm.suspended && !list_empty(&dev_priv->mm.request_list)) 785 { 786 DRM_DEBUG("i915_gem: schedule_delayed_work"); 787 worktimer_id = timeout(i915_gem_retire_work_handler, (void *) dev, DRM_HZ); 788 } 789 spin_unlock(&dev->struct_mutex); 790 } 791 792 /** 793 * i965_reset - reset chip after a hang 794 * @dev: drm device to reset 795 * @flags: reset domains 796 * 797 * Reset the chip. Useful if a hang is detected. 798 * 799 * Procedure is fairly simple: 800 * - reset the chip using the reset reg 801 * - re-init context state 802 * - re-init hardware status page 803 * - re-init ring buffer 804 * - re-init interrupt state 805 * - re-init display 806 */ 807 void i965_reset(struct drm_device *dev, u8 flags) 808 { 809 ddi_acc_handle_t conf_hdl; 810 drm_i915_private_t *dev_priv = dev->dev_private; 811 int timeout = 0; 812 uint8_t gdrst; 813 814 if (flags & GDRST_FULL) 815 i915_save_display(dev); 816 817 if (pci_config_setup(dev->dip, &conf_hdl) != DDI_SUCCESS) { 818 DRM_ERROR(("i915_reset: pci_config_setup fail")); 819 return; 820 } 821 822 /* 823 * Set the reset bit, wait for reset, then clear it. Hardware 824 * will clear the status bit (bit 1) when it's actually ready 825 * for action again. 826 */ 827 gdrst = pci_config_get8(conf_hdl, GDRST); 828 pci_config_put8(conf_hdl, GDRST, gdrst | flags); 829 drv_usecwait(50); 830 pci_config_put8(conf_hdl, GDRST, gdrst | 0xfe); 831 832 /* ...we don't want to loop forever though, 500ms should be plenty */ 833 do { 834 drv_usecwait(100); 835 gdrst = pci_config_get8(conf_hdl, GDRST); 836 } while ((gdrst & 2) && (timeout++ < 5)); 837 838 /* Ok now get things going again... */ 839 840 /* 841 * Everything depends on having the GTT running, so we need to start 842 * there. Fortunately we don't need to do this unless we reset the 843 * chip at a PCI level. 844 * 845 * Next we need to restore the context, but we don't use those 846 * yet either... 847 * 848 * Ring buffer needs to be re-initialized in the KMS case, or if X 849 * was running at the time of the reset (i.e. we weren't VT 850 * switched away). 851 */ 852 if (!dev_priv->mm.suspended) { 853 drm_i915_ring_buffer_t *ring = &dev_priv->ring; 854 struct drm_gem_object *obj = ring->ring_obj; 855 struct drm_i915_gem_object *obj_priv = obj->driver_private; 856 dev_priv->mm.suspended = 0; 857 858 /* Stop the ring if it's running. */ 859 I915_WRITE(PRB0_CTL, 0); 860 I915_WRITE(PRB0_TAIL, 0); 861 I915_WRITE(PRB0_HEAD, 0); 862 863 /* Initialize the ring. */ 864 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 865 I915_WRITE(PRB0_CTL, 866 ((obj->size - 4096) & RING_NR_PAGES) | 867 RING_NO_REPORT | 868 RING_VALID); 869 i915_kernel_lost_context(dev); 870 871 drm_irq_install(dev); 872 } 873 874 /* 875 * Display needs restore too... 876 */ 877 if (flags & GDRST_FULL) 878 i915_restore_display(dev); 879 } 880 881 /** 882 * Waits for a sequence number to be signaled, and cleans up the 883 * request and object lists appropriately for that event. 884 */ 885 int 886 i915_wait_request(struct drm_device *dev, uint32_t seqno) 887 { 888 drm_i915_private_t *dev_priv = dev->dev_private; 889 int ret = 0; 890 891 ASSERT(seqno != 0); 892 893 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 894 dev_priv->mm.waiting_gem_seqno = seqno; 895 i915_user_irq_on(dev); 896 DRM_WAIT(ret, &dev_priv->irq_queue, 897 (i915_seqno_passed(i915_get_gem_seqno(dev), seqno) || 898 dev_priv->mm.wedged)); 899 i915_user_irq_off(dev); 900 dev_priv->mm.waiting_gem_seqno = 0; 901 } 902 if (dev_priv->mm.wedged) { 903 ret = EIO; 904 } 905 906 /* GPU maybe hang, reset needed*/ 907 if (ret == -2 && (seqno > i915_get_gem_seqno(dev))) { 908 if (IS_I965G(dev)) { 909 DRM_ERROR("GPU hang detected try to reset ... wait for irq_queue seqno %d, now seqno %d", seqno, i915_get_gem_seqno(dev)); 910 dev_priv->mm.wedged = 1; 911 i965_reset(dev, GDRST_RENDER); 912 i915_gem_retire_requests(dev); 913 dev_priv->mm.wedged = 0; 914 } 915 else 916 DRM_ERROR("GPU hang detected.... reboot required"); 917 return 0; 918 } 919 /* Directly dispatch request retiring. While we have the work queue 920 * to handle this, the waiter on a request often wants an associated 921 * buffer to have made it to the inactive list, and we would need 922 * a separate wait queue to handle that. 923 */ 924 if (ret == 0) 925 i915_gem_retire_requests(dev); 926 927 return ret; 928 } 929 930 static void 931 i915_gem_flush(struct drm_device *dev, 932 uint32_t invalidate_domains, 933 uint32_t flush_domains) 934 { 935 drm_i915_private_t *dev_priv = dev->dev_private; 936 uint32_t cmd; 937 RING_LOCALS; 938 939 DRM_DEBUG("%s: invalidate %08x flush %08x\n", __func__, 940 invalidate_domains, flush_domains); 941 942 if (flush_domains & I915_GEM_DOMAIN_CPU) 943 drm_agp_chipset_flush(dev); 944 945 if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU | 946 I915_GEM_DOMAIN_GTT)) { 947 /* 948 * read/write caches: 949 * 950 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 951 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 952 * also flushed at 2d versus 3d pipeline switches. 953 * 954 * read-only caches: 955 * 956 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 957 * MI_READ_FLUSH is set, and is always flushed on 965. 958 * 959 * I915_GEM_DOMAIN_COMMAND may not exist? 960 * 961 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 962 * invalidated when MI_EXE_FLUSH is set. 963 * 964 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 965 * invalidated with every MI_FLUSH. 966 * 967 * TLBs: 968 * 969 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 970 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 971 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 972 * are flushed at any MI_FLUSH. 973 */ 974 975 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 976 if ((invalidate_domains|flush_domains) & 977 I915_GEM_DOMAIN_RENDER) 978 cmd &= ~MI_NO_WRITE_FLUSH; 979 if (!IS_I965G(dev)) { 980 /* 981 * On the 965, the sampler cache always gets flushed 982 * and this bit is reserved. 983 */ 984 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 985 cmd |= MI_READ_FLUSH; 986 } 987 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 988 cmd |= MI_EXE_FLUSH; 989 990 DRM_DEBUG("%s: queue flush %08x to ring\n", __func__, cmd); 991 992 BEGIN_LP_RING(2); 993 OUT_RING(cmd); 994 OUT_RING(0); /* noop */ 995 ADVANCE_LP_RING(); 996 } 997 } 998 999 /** 1000 * Ensures that all rendering to the object has completed and the object is 1001 * safe to unbind from the GTT or access from the CPU. 1002 */ 1003 static int 1004 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1005 { 1006 struct drm_device *dev = obj->dev; 1007 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1008 int ret, seqno; 1009 1010 /* This function only exists to support waiting for existing rendering, 1011 * not for emitting required flushes. 1012 */ 1013 1014 if((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0) { 1015 DRM_ERROR("write domain should not be GPU DOMAIN %d", obj_priv->active); 1016 return 0; 1017 } 1018 1019 /* If there is rendering queued on the buffer being evicted, wait for 1020 * it. 1021 */ 1022 if (obj_priv->active) { 1023 DRM_DEBUG("%s: object %d %p wait for seqno %08x\n", 1024 __func__, obj->name, obj, obj_priv->last_rendering_seqno); 1025 1026 seqno = obj_priv->last_rendering_seqno; 1027 if (seqno == 0) { 1028 DRM_DEBUG("last rendering maybe finished"); 1029 return 0; 1030 } 1031 ret = i915_wait_request(dev, seqno); 1032 if (ret != 0) { 1033 DRM_ERROR("%s: i915_wait_request request->seqno %d now %d\n", __func__, seqno, i915_get_gem_seqno(dev)); 1034 return ret; 1035 } 1036 } 1037 1038 return 0; 1039 } 1040 1041 /** 1042 * Unbinds an object from the GTT aperture. 1043 */ 1044 int 1045 i915_gem_object_unbind(struct drm_gem_object *obj, uint32_t type) 1046 { 1047 struct drm_device *dev = obj->dev; 1048 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1049 int ret = 0; 1050 1051 if (obj_priv->gtt_space == NULL) 1052 return 0; 1053 1054 if (obj_priv->pin_count != 0) { 1055 DRM_ERROR("Attempting to unbind pinned buffer\n"); 1056 return EINVAL; 1057 } 1058 1059 /* Wait for any rendering to complete 1060 */ 1061 ret = i915_gem_object_wait_rendering(obj); 1062 if (ret) { 1063 DRM_ERROR("wait_rendering failed: %d\n", ret); 1064 return ret; 1065 } 1066 1067 /* Move the object to the CPU domain to ensure that 1068 * any possible CPU writes while it's not in the GTT 1069 * are flushed when we go to remap it. This will 1070 * also ensure that all pending GPU writes are finished 1071 * before we unbind. 1072 */ 1073 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1074 if (ret) { 1075 DRM_ERROR("set_domain failed: %d\n", ret); 1076 return ret; 1077 } 1078 1079 if (!obj_priv->agp_mem) { 1080 drm_agp_unbind_pages(dev, obj->size / PAGE_SIZE, obj_priv->gtt_offset, type); 1081 obj_priv->agp_mem = -1; 1082 } 1083 1084 ASSERT(!obj_priv->active); 1085 1086 i915_gem_object_free_page_list(obj); 1087 1088 if (obj_priv->gtt_space) { 1089 atomic_dec(&dev->gtt_count); 1090 atomic_sub(obj->size, &dev->gtt_memory); 1091 drm_mm_put_block(obj_priv->gtt_space); 1092 obj_priv->gtt_space = NULL; 1093 } 1094 1095 /* Remove ourselves from the LRU list if present. */ 1096 if (!list_empty(&obj_priv->list)) 1097 list_del_init(&obj_priv->list); 1098 1099 return 0; 1100 } 1101 1102 static int 1103 i915_gem_evict_something(struct drm_device *dev) 1104 { 1105 drm_i915_private_t *dev_priv = dev->dev_private; 1106 struct drm_gem_object *obj; 1107 struct drm_i915_gem_object *obj_priv; 1108 int ret = 0; 1109 1110 for (;;) { 1111 /* If there's an inactive buffer available now, grab it 1112 * and be done. 1113 */ 1114 if (!list_empty(&dev_priv->mm.inactive_list)) { 1115 obj_priv = list_entry(dev_priv->mm.inactive_list.next, 1116 struct drm_i915_gem_object, 1117 list); 1118 obj = obj_priv->obj; 1119 ASSERT(!(obj_priv->pin_count != 0)); 1120 DRM_DEBUG("%s: evicting %d\n", __func__, obj->name); 1121 ASSERT(!(obj_priv->active)); 1122 /* Wait on the rendering and unbind the buffer. */ 1123 ret = i915_gem_object_unbind(obj, 1); 1124 break; 1125 } 1126 /* If we didn't get anything, but the ring is still processing 1127 * things, wait for one of those things to finish and hopefully 1128 * leave us a buffer to evict. 1129 */ 1130 if (!list_empty(&dev_priv->mm.request_list)) { 1131 struct drm_i915_gem_request *request; 1132 1133 request = list_entry(dev_priv->mm.request_list.next, 1134 struct drm_i915_gem_request, 1135 list); 1136 1137 ret = i915_wait_request(dev, request->seqno); 1138 if (ret) { 1139 break; 1140 } 1141 /* if waiting caused an object to become inactive, 1142 * then loop around and wait for it. Otherwise, we 1143 * assume that waiting freed and unbound something, 1144 * so there should now be some space in the GTT 1145 */ 1146 if (!list_empty(&dev_priv->mm.inactive_list)) 1147 continue; 1148 break; 1149 } 1150 1151 /* If we didn't have anything on the request list but there 1152 * are buffers awaiting a flush, emit one and try again. 1153 * When we wait on it, those buffers waiting for that flush 1154 * will get moved to inactive. 1155 */ 1156 if (!list_empty(&dev_priv->mm.flushing_list)) { 1157 obj_priv = list_entry(dev_priv->mm.flushing_list.next, 1158 struct drm_i915_gem_object, 1159 list); 1160 obj = obj_priv->obj; 1161 1162 i915_gem_flush(dev, 1163 obj->write_domain, 1164 obj->write_domain); 1165 (void) i915_add_request(dev, obj->write_domain); 1166 1167 obj = NULL; 1168 continue; 1169 } 1170 1171 DRM_ERROR("inactive empty %d request empty %d " 1172 "flushing empty %d\n", 1173 list_empty(&dev_priv->mm.inactive_list), 1174 list_empty(&dev_priv->mm.request_list), 1175 list_empty(&dev_priv->mm.flushing_list)); 1176 /* If we didn't do any of the above, there's nothing to be done 1177 * and we just can't fit it in. 1178 */ 1179 return ENOMEM; 1180 } 1181 return ret; 1182 } 1183 1184 static int 1185 i915_gem_evict_everything(struct drm_device *dev) 1186 { 1187 int ret; 1188 1189 for (;;) { 1190 ret = i915_gem_evict_something(dev); 1191 if (ret != 0) 1192 break; 1193 } 1194 if (ret == ENOMEM) 1195 return 0; 1196 else 1197 DRM_ERROR("evict_everything ret %d", ret); 1198 return ret; 1199 } 1200 1201 /** 1202 * Finds free space in the GTT aperture and binds the object there. 1203 */ 1204 static int 1205 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, uint32_t alignment) 1206 { 1207 struct drm_device *dev = obj->dev; 1208 drm_i915_private_t *dev_priv = dev->dev_private; 1209 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1210 struct drm_mm_node *free_space; 1211 int page_count, ret; 1212 1213 if (dev_priv->mm.suspended) 1214 return EBUSY; 1215 if (alignment == 0) 1216 alignment = PAGE_SIZE; 1217 if (alignment & (PAGE_SIZE - 1)) { 1218 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 1219 return EINVAL; 1220 } 1221 1222 if (obj_priv->gtt_space) { 1223 DRM_ERROR("Already bind!!"); 1224 return 0; 1225 } 1226 search_free: 1227 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 1228 (unsigned long) obj->size, alignment, 0); 1229 if (free_space != NULL) { 1230 obj_priv->gtt_space = drm_mm_get_block(free_space, (unsigned long) obj->size, 1231 alignment); 1232 if (obj_priv->gtt_space != NULL) { 1233 obj_priv->gtt_space->private = obj; 1234 obj_priv->gtt_offset = obj_priv->gtt_space->start; 1235 } 1236 } 1237 if (obj_priv->gtt_space == NULL) { 1238 /* If the gtt is empty and we're still having trouble 1239 * fitting our object in, we're out of memory. 1240 */ 1241 if (list_empty(&dev_priv->mm.inactive_list) && 1242 list_empty(&dev_priv->mm.flushing_list) && 1243 list_empty(&dev_priv->mm.active_list)) { 1244 DRM_ERROR("GTT full, but LRU list empty\n"); 1245 return ENOMEM; 1246 } 1247 1248 ret = i915_gem_evict_something(dev); 1249 if (ret != 0) { 1250 DRM_ERROR("Failed to evict a buffer %d\n", ret); 1251 return ret; 1252 } 1253 goto search_free; 1254 } 1255 1256 ret = i915_gem_object_get_page_list(obj); 1257 if (ret) { 1258 drm_mm_put_block(obj_priv->gtt_space); 1259 obj_priv->gtt_space = NULL; 1260 DRM_ERROR("bind to gtt failed to get page list"); 1261 return ret; 1262 } 1263 1264 page_count = obj->size / PAGE_SIZE; 1265 /* Create an AGP memory structure pointing at our pages, and bind it 1266 * into the GTT. 1267 */ 1268 DRM_DEBUG("Binding object %d of page_count %d at gtt_offset 0x%x obj->pfnarray = 0x%lx", 1269 obj->name, page_count, obj_priv->gtt_offset, obj->pfnarray); 1270 1271 obj_priv->agp_mem = drm_agp_bind_pages(dev, 1272 obj->pfnarray, 1273 page_count, 1274 obj_priv->gtt_offset); 1275 if (obj_priv->agp_mem) { 1276 i915_gem_object_free_page_list(obj); 1277 drm_mm_put_block(obj_priv->gtt_space); 1278 obj_priv->gtt_space = NULL; 1279 DRM_ERROR("Failed to bind pages obj %d, obj 0x%lx", obj->name, obj); 1280 return ENOMEM; 1281 } 1282 atomic_inc(&dev->gtt_count); 1283 atomic_add(obj->size, &dev->gtt_memory); 1284 1285 /* Assert that the object is not currently in any GPU domain. As it 1286 * wasn't in the GTT, there shouldn't be any way it could have been in 1287 * a GPU cache 1288 */ 1289 ASSERT(!(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))); 1290 ASSERT(!(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))); 1291 1292 return 0; 1293 } 1294 1295 void 1296 i915_gem_clflush_object(struct drm_gem_object *obj) 1297 { 1298 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1299 1300 /* If we don't have a page list set up, then we're not pinned 1301 * to GPU, and we can ignore the cache flush because it'll happen 1302 * again at bind time. 1303 */ 1304 1305 if (obj_priv->page_list == NULL) 1306 return; 1307 drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE); 1308 } 1309 1310 /** Flushes any GPU write domain for the object if it's dirty. */ 1311 static void 1312 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 1313 { 1314 struct drm_device *dev = obj->dev; 1315 uint32_t seqno; 1316 1317 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 1318 return; 1319 1320 /* Queue the GPU write cache flushing we need. */ 1321 i915_gem_flush(dev, 0, obj->write_domain); 1322 seqno = i915_add_request(dev, obj->write_domain); 1323 DRM_DEBUG("flush_gpu_write_domain seqno = %d", seqno); 1324 obj->write_domain = 0; 1325 i915_gem_object_move_to_active(obj, seqno); 1326 } 1327 1328 /** Flushes the GTT write domain for the object if it's dirty. */ 1329 static void 1330 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 1331 { 1332 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 1333 return; 1334 1335 /* No actual flushing is required for the GTT write domain. Writes 1336 * to it immediately go to main memory as far as we know, so there's 1337 * no chipset flush. It also doesn't land in render cache. 1338 */ 1339 obj->write_domain = 0; 1340 } 1341 1342 /** Flushes the CPU write domain for the object if it's dirty. */ 1343 static void 1344 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 1345 { 1346 struct drm_device *dev = obj->dev; 1347 1348 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 1349 return; 1350 1351 i915_gem_clflush_object(obj); 1352 drm_agp_chipset_flush(dev); 1353 obj->write_domain = 0; 1354 } 1355 1356 /** 1357 * Moves a single object to the GTT read, and possibly write domain. 1358 * 1359 * This function returns when the move is complete, including waiting on 1360 * flushes to occur. 1361 */ 1362 static int 1363 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 1364 { 1365 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1366 int ret; 1367 1368 /* Not valid to be called on unbound objects. */ 1369 if (obj_priv->gtt_space == NULL) 1370 return EINVAL; 1371 1372 i915_gem_object_flush_gpu_write_domain(obj); 1373 /* Wait on any GPU rendering and flushing to occur. */ 1374 ret = i915_gem_object_wait_rendering(obj); 1375 if (ret != 0) { 1376 DRM_ERROR("set_to_gtt_domain wait_rendering ret %d", ret); 1377 return ret; 1378 } 1379 /* If we're writing through the GTT domain, then CPU and GPU caches 1380 * will need to be invalidated at next use. 1381 */ 1382 if (write) 1383 obj->read_domains &= I915_GEM_DOMAIN_GTT; 1384 i915_gem_object_flush_cpu_write_domain(obj); 1385 1386 DRM_DEBUG("i915_gem_object_set_to_gtt_domain obj->read_domains %x ", obj->read_domains); 1387 /* It should now be out of any other write domains, and we can update 1388 * the domain values for our changes. 1389 */ 1390 ASSERT(!((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0)); 1391 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1392 if (write) { 1393 obj->write_domain = I915_GEM_DOMAIN_GTT; 1394 obj_priv->dirty = 1; 1395 } 1396 1397 return 0; 1398 } 1399 1400 /** 1401 * Moves a single object to the CPU read, and possibly write domain. 1402 * 1403 * This function returns when the move is complete, including waiting on 1404 * flushes to occur. 1405 */ 1406 static int 1407 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 1408 { 1409 struct drm_device *dev = obj->dev; 1410 int ret; 1411 1412 1413 i915_gem_object_flush_gpu_write_domain(obj); 1414 /* Wait on any GPU rendering and flushing to occur. */ 1415 1416 ret = i915_gem_object_wait_rendering(obj); 1417 if (ret != 0) 1418 return ret; 1419 1420 i915_gem_object_flush_gtt_write_domain(obj); 1421 1422 /* If we have a partially-valid cache of the object in the CPU, 1423 * finish invalidating it and free the per-page flags. 1424 */ 1425 i915_gem_object_set_to_full_cpu_read_domain(obj); 1426 1427 /* Flush the CPU cache if it's still invalid. */ 1428 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 1429 i915_gem_clflush_object(obj); 1430 drm_agp_chipset_flush(dev); 1431 obj->read_domains |= I915_GEM_DOMAIN_CPU; 1432 } 1433 1434 /* It should now be out of any other write domains, and we can update 1435 * the domain values for our changes. 1436 */ 1437 ASSERT(!((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0)); 1438 1439 /* If we're writing through the CPU, then the GPU read domains will 1440 * need to be invalidated at next use. 1441 */ 1442 if (write) { 1443 obj->read_domains &= I915_GEM_DOMAIN_CPU; 1444 obj->write_domain = I915_GEM_DOMAIN_CPU; 1445 } 1446 1447 return 0; 1448 } 1449 1450 /* 1451 * Set the next domain for the specified object. This 1452 * may not actually perform the necessary flushing/invaliding though, 1453 * as that may want to be batched with other set_domain operations 1454 * 1455 * This is (we hope) the only really tricky part of gem. The goal 1456 * is fairly simple -- track which caches hold bits of the object 1457 * and make sure they remain coherent. A few concrete examples may 1458 * help to explain how it works. For shorthand, we use the notation 1459 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 1460 * a pair of read and write domain masks. 1461 * 1462 * Case 1: the batch buffer 1463 * 1464 * 1. Allocated 1465 * 2. Written by CPU 1466 * 3. Mapped to GTT 1467 * 4. Read by GPU 1468 * 5. Unmapped from GTT 1469 * 6. Freed 1470 * 1471 * Let's take these a step at a time 1472 * 1473 * 1. Allocated 1474 * Pages allocated from the kernel may still have 1475 * cache contents, so we set them to (CPU, CPU) always. 1476 * 2. Written by CPU (using pwrite) 1477 * The pwrite function calls set_domain (CPU, CPU) and 1478 * this function does nothing (as nothing changes) 1479 * 3. Mapped by GTT 1480 * This function asserts that the object is not 1481 * currently in any GPU-based read or write domains 1482 * 4. Read by GPU 1483 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 1484 * As write_domain is zero, this function adds in the 1485 * current read domains (CPU+COMMAND, 0). 1486 * flush_domains is set to CPU. 1487 * invalidate_domains is set to COMMAND 1488 * clflush is run to get data out of the CPU caches 1489 * then i915_dev_set_domain calls i915_gem_flush to 1490 * emit an MI_FLUSH and drm_agp_chipset_flush 1491 * 5. Unmapped from GTT 1492 * i915_gem_object_unbind calls set_domain (CPU, CPU) 1493 * flush_domains and invalidate_domains end up both zero 1494 * so no flushing/invalidating happens 1495 * 6. Freed 1496 * yay, done 1497 * 1498 * Case 2: The shared render buffer 1499 * 1500 * 1. Allocated 1501 * 2. Mapped to GTT 1502 * 3. Read/written by GPU 1503 * 4. set_domain to (CPU,CPU) 1504 * 5. Read/written by CPU 1505 * 6. Read/written by GPU 1506 * 1507 * 1. Allocated 1508 * Same as last example, (CPU, CPU) 1509 * 2. Mapped to GTT 1510 * Nothing changes (assertions find that it is not in the GPU) 1511 * 3. Read/written by GPU 1512 * execbuffer calls set_domain (RENDER, RENDER) 1513 * flush_domains gets CPU 1514 * invalidate_domains gets GPU 1515 * clflush (obj) 1516 * MI_FLUSH and drm_agp_chipset_flush 1517 * 4. set_domain (CPU, CPU) 1518 * flush_domains gets GPU 1519 * invalidate_domains gets CPU 1520 * wait_rendering (obj) to make sure all drawing is complete. 1521 * This will include an MI_FLUSH to get the data from GPU 1522 * to memory 1523 * clflush (obj) to invalidate the CPU cache 1524 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 1525 * 5. Read/written by CPU 1526 * cache lines are loaded and dirtied 1527 * 6. Read written by GPU 1528 * Same as last GPU access 1529 * 1530 * Case 3: The constant buffer 1531 * 1532 * 1. Allocated 1533 * 2. Written by CPU 1534 * 3. Read by GPU 1535 * 4. Updated (written) by CPU again 1536 * 5. Read by GPU 1537 * 1538 * 1. Allocated 1539 * (CPU, CPU) 1540 * 2. Written by CPU 1541 * (CPU, CPU) 1542 * 3. Read by GPU 1543 * (CPU+RENDER, 0) 1544 * flush_domains = CPU 1545 * invalidate_domains = RENDER 1546 * clflush (obj) 1547 * MI_FLUSH 1548 * drm_agp_chipset_flush 1549 * 4. Updated (written) by CPU again 1550 * (CPU, CPU) 1551 * flush_domains = 0 (no previous write domain) 1552 * invalidate_domains = 0 (no new read domains) 1553 * 5. Read by GPU 1554 * (CPU+RENDER, 0) 1555 * flush_domains = CPU 1556 * invalidate_domains = RENDER 1557 * clflush (obj) 1558 * MI_FLUSH 1559 * drm_agp_chipset_flush 1560 */ 1561 static void 1562 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj, 1563 uint32_t read_domains, 1564 uint32_t write_domain) 1565 { 1566 struct drm_device *dev = obj->dev; 1567 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1568 uint32_t invalidate_domains = 0; 1569 uint32_t flush_domains = 0; 1570 1571 DRM_DEBUG("%s: object %p read %08x -> %08x write %08x -> %08x\n", 1572 __func__, obj, 1573 obj->read_domains, read_domains, 1574 obj->write_domain, write_domain); 1575 /* 1576 * If the object isn't moving to a new write domain, 1577 * let the object stay in multiple read domains 1578 */ 1579 if (write_domain == 0) 1580 read_domains |= obj->read_domains; 1581 else 1582 obj_priv->dirty = 1; 1583 1584 /* 1585 * Flush the current write domain if 1586 * the new read domains don't match. Invalidate 1587 * any read domains which differ from the old 1588 * write domain 1589 */ 1590 if (obj->write_domain && obj->write_domain != read_domains) { 1591 flush_domains |= obj->write_domain; 1592 invalidate_domains |= read_domains & ~obj->write_domain; 1593 } 1594 /* 1595 * Invalidate any read caches which may have 1596 * stale data. That is, any new read domains. 1597 */ 1598 invalidate_domains |= read_domains & ~obj->read_domains; 1599 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 1600 DRM_DEBUG("%s: CPU domain flush %08x invalidate %08x\n", 1601 __func__, flush_domains, invalidate_domains); 1602 i915_gem_clflush_object(obj); 1603 } 1604 1605 if ((write_domain | flush_domains) != 0) 1606 obj->write_domain = write_domain; 1607 obj->read_domains = read_domains; 1608 1609 dev->invalidate_domains |= invalidate_domains; 1610 dev->flush_domains |= flush_domains; 1611 1612 DRM_DEBUG("%s: read %08x write %08x invalidate %08x flush %08x\n", 1613 __func__, 1614 obj->read_domains, obj->write_domain, 1615 dev->invalidate_domains, dev->flush_domains); 1616 1617 } 1618 1619 /** 1620 * Moves the object from a partially CPU read to a full one. 1621 * 1622 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 1623 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 1624 */ 1625 static void 1626 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 1627 { 1628 struct drm_device *dev = obj->dev; 1629 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1630 1631 if (!obj_priv->page_cpu_valid) 1632 return; 1633 1634 /* If we're partially in the CPU read domain, finish moving it in. 1635 */ 1636 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 1637 int i; 1638 1639 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 1640 if (obj_priv->page_cpu_valid[i]) 1641 continue; 1642 drm_clflush_pages(obj_priv->page_list + i, 1); 1643 } 1644 drm_agp_chipset_flush(dev); 1645 } 1646 1647 /* Free the page_cpu_valid mappings which are now stale, whether 1648 * or not we've got I915_GEM_DOMAIN_CPU. 1649 */ 1650 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, 1651 DRM_MEM_DRIVER); 1652 obj_priv->page_cpu_valid = NULL; 1653 } 1654 1655 /** 1656 * Set the CPU read domain on a range of the object. 1657 * 1658 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 1659 * not entirely valid. The page_cpu_valid member of the object flags which 1660 * pages have been flushed, and will be respected by 1661 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 1662 * of the whole object. 1663 * 1664 * This function returns when the move is complete, including waiting on 1665 * flushes to occur. 1666 */ 1667 static int 1668 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 1669 uint64_t offset, uint64_t size) 1670 { 1671 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1672 int i, ret; 1673 1674 if (offset == 0 && size == obj->size) 1675 return i915_gem_object_set_to_cpu_domain(obj, 0); 1676 1677 i915_gem_object_flush_gpu_write_domain(obj); 1678 /* Wait on any GPU rendering and flushing to occur. */ 1679 ret = i915_gem_object_wait_rendering(obj); 1680 if (ret != 0) 1681 return ret; 1682 i915_gem_object_flush_gtt_write_domain(obj); 1683 1684 /* If we're already fully in the CPU read domain, we're done. */ 1685 if (obj_priv->page_cpu_valid == NULL && 1686 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 1687 return 0; 1688 1689 /* Otherwise, create/clear the per-page CPU read domain flag if we're 1690 * newly adding I915_GEM_DOMAIN_CPU 1691 */ 1692 if (obj_priv->page_cpu_valid == NULL) { 1693 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, 1694 DRM_MEM_DRIVER); 1695 if (obj_priv->page_cpu_valid == NULL) 1696 return ENOMEM; 1697 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 1698 (void) memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 1699 1700 /* Flush the cache on any pages that are still invalid from the CPU's 1701 * perspective. 1702 */ 1703 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 1704 i++) { 1705 if (obj_priv->page_cpu_valid[i]) 1706 continue; 1707 1708 drm_clflush_pages(obj_priv->page_list + i, 1); 1709 obj_priv->page_cpu_valid[i] = 1; 1710 } 1711 1712 /* It should now be out of any other write domains, and we can update 1713 * the domain values for our changes. 1714 */ 1715 ASSERT(!((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0)); 1716 1717 obj->read_domains |= I915_GEM_DOMAIN_CPU; 1718 1719 return 0; 1720 } 1721 1722 /** 1723 * Pin an object to the GTT and evaluate the relocations landing in it. 1724 */ 1725 static int 1726 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 1727 struct drm_file *file_priv, 1728 struct drm_i915_gem_exec_object *entry) 1729 { 1730 struct drm_i915_gem_relocation_entry reloc; 1731 struct drm_i915_gem_relocation_entry __user *relocs; 1732 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1733 int i, ret; 1734 1735 /* Choose the GTT offset for our buffer and put it there. */ 1736 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 1737 if (ret) { 1738 DRM_ERROR("failed to pin"); 1739 return ret; 1740 } 1741 entry->offset = obj_priv->gtt_offset; 1742 1743 relocs = (struct drm_i915_gem_relocation_entry __user *) 1744 (uintptr_t) entry->relocs_ptr; 1745 /* Apply the relocations, using the GTT aperture to avoid cache 1746 * flushing requirements. 1747 */ 1748 for (i = 0; i < entry->relocation_count; i++) { 1749 struct drm_gem_object *target_obj; 1750 struct drm_i915_gem_object *target_obj_priv; 1751 uint32_t reloc_val, reloc_offset, *reloc_entry; 1752 1753 ret = DRM_COPY_FROM_USER(&reloc, relocs + i, sizeof(reloc)); 1754 if (ret != 0) { 1755 i915_gem_object_unpin(obj); 1756 DRM_ERROR("failed to copy from user"); 1757 return ret; 1758 } 1759 1760 target_obj = drm_gem_object_lookup(file_priv, 1761 reloc.target_handle); 1762 if (target_obj == NULL) { 1763 i915_gem_object_unpin(obj); 1764 return EBADF; 1765 } 1766 target_obj_priv = target_obj->driver_private; 1767 1768 /* The target buffer should have appeared before us in the 1769 * exec_object list, so it should have a GTT space bound by now. 1770 */ 1771 if (target_obj_priv->gtt_space == NULL) { 1772 DRM_ERROR("No GTT space found for object %d\n", 1773 reloc.target_handle); 1774 drm_gem_object_unreference(target_obj); 1775 i915_gem_object_unpin(obj); 1776 return EINVAL; 1777 } 1778 1779 if (reloc.offset > obj->size - 4) { 1780 DRM_ERROR("Relocation beyond object bounds: " 1781 "obj %p target %d offset %d size %d.\n", 1782 obj, reloc.target_handle, 1783 (int) reloc.offset, (int) obj->size); 1784 drm_gem_object_unreference(target_obj); 1785 i915_gem_object_unpin(obj); 1786 return EINVAL; 1787 } 1788 if (reloc.offset & 3) { 1789 DRM_ERROR("Relocation not 4-byte aligned: " 1790 "obj %p target %d offset %d.\n", 1791 obj, reloc.target_handle, 1792 (int) reloc.offset); 1793 drm_gem_object_unreference(target_obj); 1794 i915_gem_object_unpin(obj); 1795 return EINVAL; 1796 } 1797 1798 if (reloc.write_domain & I915_GEM_DOMAIN_CPU || 1799 reloc.read_domains & I915_GEM_DOMAIN_CPU) { 1800 DRM_ERROR("reloc with read/write CPU domains: " 1801 "obj %p target %d offset %d " 1802 "read %08x write %08x", 1803 obj, reloc.target_handle, 1804 (int) reloc.offset, 1805 reloc.read_domains, 1806 reloc.write_domain); 1807 drm_gem_object_unreference(target_obj); 1808 i915_gem_object_unpin(obj); 1809 return EINVAL; 1810 } 1811 1812 if (reloc.write_domain && target_obj->pending_write_domain && 1813 reloc.write_domain != target_obj->pending_write_domain) { 1814 DRM_ERROR("Write domain conflict: " 1815 "obj %p target %d offset %d " 1816 "new %08x old %08x\n", 1817 obj, reloc.target_handle, 1818 (int) reloc.offset, 1819 reloc.write_domain, 1820 target_obj->pending_write_domain); 1821 drm_gem_object_unreference(target_obj); 1822 i915_gem_object_unpin(obj); 1823 return EINVAL; 1824 } 1825 DRM_DEBUG("%s: obj %p offset %08x target %d " 1826 "read %08x write %08x gtt %08x " 1827 "presumed %08x delta %08x\n", 1828 __func__, 1829 obj, 1830 (int) reloc.offset, 1831 (int) reloc.target_handle, 1832 (int) reloc.read_domains, 1833 (int) reloc.write_domain, 1834 (int) target_obj_priv->gtt_offset, 1835 (int) reloc.presumed_offset, 1836 reloc.delta); 1837 1838 target_obj->pending_read_domains |= reloc.read_domains; 1839 target_obj->pending_write_domain |= reloc.write_domain; 1840 1841 /* If the relocation already has the right value in it, no 1842 * more work needs to be done. 1843 */ 1844 if (target_obj_priv->gtt_offset == reloc.presumed_offset) { 1845 drm_gem_object_unreference(target_obj); 1846 continue; 1847 } 1848 1849 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 1850 if (ret != 0) { 1851 drm_gem_object_unreference(target_obj); 1852 i915_gem_object_unpin(obj); 1853 return EINVAL; 1854 } 1855 1856 /* Map the page containing the relocation we're going to 1857 * perform. 1858 */ 1859 1860 int reloc_base = (reloc.offset & ~(PAGE_SIZE-1)); 1861 reloc_offset = reloc.offset & (PAGE_SIZE-1); 1862 reloc_entry = (uint32_t *)(uintptr_t)(obj_priv->page_list[reloc_base/PAGE_SIZE] + reloc_offset); 1863 reloc_val = target_obj_priv->gtt_offset + reloc.delta; 1864 *reloc_entry = reloc_val; 1865 1866 /* Write the updated presumed offset for this entry back out 1867 * to the user. 1868 */ 1869 reloc.presumed_offset = target_obj_priv->gtt_offset; 1870 ret = DRM_COPY_TO_USER(relocs + i, &reloc, sizeof(reloc)); 1871 if (ret != 0) { 1872 drm_gem_object_unreference(target_obj); 1873 i915_gem_object_unpin(obj); 1874 DRM_ERROR("%s: Failed to copy to user ret %d", __func__, ret); 1875 return ret; 1876 } 1877 1878 drm_gem_object_unreference(target_obj); 1879 } 1880 1881 return 0; 1882 } 1883 1884 /** Dispatch a batchbuffer to the ring 1885 */ 1886 static int 1887 i915_dispatch_gem_execbuffer(struct drm_device *dev, 1888 struct drm_i915_gem_execbuffer *exec, 1889 uint64_t exec_offset) 1890 { 1891 drm_i915_private_t *dev_priv = dev->dev_private; 1892 struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *) 1893 (uintptr_t) exec->cliprects_ptr; 1894 int nbox = exec->num_cliprects; 1895 int i = 0, count; 1896 uint64_t exec_start, exec_len; 1897 RING_LOCALS; 1898 1899 exec_start = exec_offset + exec->batch_start_offset; 1900 exec_len = exec->batch_len; 1901 1902 if ((exec_start | exec_len) & 0x7) { 1903 DRM_ERROR("alignment\n"); 1904 return EINVAL; 1905 } 1906 1907 if (!exec_start) { 1908 DRM_ERROR("wrong arg"); 1909 return EINVAL; 1910 } 1911 1912 count = nbox ? nbox : 1; 1913 1914 for (i = 0; i < count; i++) { 1915 if (i < nbox) { 1916 int ret = i915_emit_box(dev, boxes, i, 1917 exec->DR1, exec->DR4); 1918 if (ret) { 1919 DRM_ERROR("i915_emit_box %d DR1 0x%lx DRI2 0x%lx", ret, exec->DR1, exec->DR4); 1920 return ret; 1921 } 1922 } 1923 if (IS_I830(dev) || IS_845G(dev)) { 1924 BEGIN_LP_RING(4); 1925 OUT_RING(MI_BATCH_BUFFER); 1926 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 1927 OUT_RING(exec_start + exec_len - 4); 1928 OUT_RING(0); 1929 ADVANCE_LP_RING(); 1930 } else { 1931 BEGIN_LP_RING(2); 1932 if (IS_I965G(dev)) { 1933 OUT_RING(MI_BATCH_BUFFER_START | 1934 (2 << 6) | 1935 (3 << 9) | 1936 MI_BATCH_NON_SECURE_I965); 1937 OUT_RING(exec_start); 1938 1939 } else { 1940 OUT_RING(MI_BATCH_BUFFER_START | 1941 (2 << 6)); 1942 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 1943 } 1944 ADVANCE_LP_RING(); 1945 } 1946 } 1947 /* XXX breadcrumb */ 1948 return 0; 1949 } 1950 1951 /* Throttle our rendering by waiting until the ring has completed our requests 1952 * emitted over 20 msec ago. 1953 * 1954 * This should get us reasonable parallelism between CPU and GPU but also 1955 * relatively low latency when blocking on a particular request to finish. 1956 */ 1957 static int 1958 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 1959 { 1960 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 1961 int ret = 0; 1962 uint32_t seqno; 1963 1964 spin_lock(&dev->struct_mutex); 1965 seqno = i915_file_priv->mm.last_gem_throttle_seqno; 1966 i915_file_priv->mm.last_gem_throttle_seqno = 1967 i915_file_priv->mm.last_gem_seqno; 1968 if (seqno) { 1969 ret = i915_wait_request(dev, seqno); 1970 if (ret != 0) 1971 DRM_ERROR("%s: i915_wait_request request->seqno %d now %d\n", __func__, seqno, i915_get_gem_seqno(dev)); 1972 } 1973 spin_unlock(&dev->struct_mutex); 1974 return ret; 1975 } 1976 1977 /*ARGSUSED*/ 1978 int 1979 i915_gem_execbuffer(DRM_IOCTL_ARGS) 1980 { 1981 DRM_DEVICE; 1982 drm_i915_private_t *dev_priv = dev->dev_private; 1983 struct drm_i915_file_private *i915_file_priv = fpriv->driver_priv; 1984 struct drm_i915_gem_execbuffer args; 1985 struct drm_i915_gem_exec_object *exec_list = NULL; 1986 struct drm_gem_object **object_list = NULL; 1987 struct drm_gem_object *batch_obj; 1988 struct drm_i915_gem_object *obj_priv; 1989 int ret = 0, i, pinned = 0; 1990 uint64_t exec_offset; 1991 uint32_t seqno, flush_domains; 1992 int pin_tries; 1993 1994 if (dev->driver->use_gem != 1) 1995 return ENODEV; 1996 1997 DRM_COPYFROM_WITH_RETURN(&args, 1998 (struct drm_i915_gem_execbuffer __user *) data, sizeof(args)); 1999 2000 DRM_DEBUG("buffer_count %d len %x\n", args.buffer_count, args.batch_len); 2001 2002 if (args.buffer_count < 1) { 2003 DRM_ERROR("execbuf with %d buffers\n", args.buffer_count); 2004 return EINVAL; 2005 } 2006 /* Copy in the exec list from userland */ 2007 exec_list = drm_calloc(sizeof(*exec_list), args.buffer_count, 2008 DRM_MEM_DRIVER); 2009 object_list = drm_calloc(sizeof(*object_list), args.buffer_count, 2010 DRM_MEM_DRIVER); 2011 if (exec_list == NULL || object_list == NULL) { 2012 DRM_ERROR("Failed to allocate exec or object list " 2013 "for %d buffers\n", 2014 args.buffer_count); 2015 ret = ENOMEM; 2016 goto pre_mutex_err; 2017 } 2018 2019 ret = DRM_COPY_FROM_USER(exec_list, 2020 (struct drm_i915_gem_exec_object __user *) 2021 (uintptr_t) args.buffers_ptr, 2022 sizeof(*exec_list) * args.buffer_count); 2023 if (ret != 0) { 2024 DRM_ERROR("copy %d exec entries failed %d\n", 2025 args.buffer_count, ret); 2026 goto pre_mutex_err; 2027 } 2028 spin_lock(&dev->struct_mutex); 2029 2030 if (dev_priv->mm.wedged) { 2031 DRM_ERROR("Execbuf while wedged\n"); 2032 spin_unlock(&dev->struct_mutex); 2033 return EIO; 2034 } 2035 2036 if (dev_priv->mm.suspended) { 2037 DRM_ERROR("Execbuf while VT-switched.\n"); 2038 spin_unlock(&dev->struct_mutex); 2039 return EBUSY; 2040 } 2041 2042 /* Look up object handles */ 2043 for (i = 0; i < args.buffer_count; i++) { 2044 object_list[i] = drm_gem_object_lookup(fpriv, 2045 exec_list[i].handle); 2046 if (object_list[i] == NULL) { 2047 DRM_ERROR("Invalid object handle %d at index %d\n", 2048 exec_list[i].handle, i); 2049 ret = EBADF; 2050 goto err; 2051 } 2052 obj_priv = object_list[i]->driver_private; 2053 if (obj_priv->in_execbuffer) { 2054 DRM_ERROR("Object[%d] (%d) %p appears more than once in object list in args.buffer_count %d \n", 2055 i, object_list[i]->name, object_list[i], args.buffer_count); 2056 2057 ret = EBADF; 2058 goto err; 2059 } 2060 2061 obj_priv->in_execbuffer = 1; 2062 } 2063 2064 /* Pin and relocate */ 2065 for (pin_tries = 0; ; pin_tries++) { 2066 ret = 0; 2067 for (i = 0; i < args.buffer_count; i++) { 2068 object_list[i]->pending_read_domains = 0; 2069 object_list[i]->pending_write_domain = 0; 2070 ret = i915_gem_object_pin_and_relocate(object_list[i], 2071 fpriv, 2072 &exec_list[i]); 2073 if (ret) { 2074 DRM_ERROR("Not all object pinned"); 2075 break; 2076 } 2077 pinned = i + 1; 2078 } 2079 /* success */ 2080 if (ret == 0) 2081 { 2082 DRM_DEBUG("gem_execbuffer pin_relocate success"); 2083 break; 2084 } 2085 /* error other than GTT full, or we've already tried again */ 2086 if (ret != ENOMEM || pin_tries >= 1) { 2087 if (ret != ERESTART) 2088 DRM_ERROR("Failed to pin buffers %d\n", ret); 2089 goto err; 2090 } 2091 2092 /* unpin all of our buffers */ 2093 for (i = 0; i < pinned; i++) 2094 i915_gem_object_unpin(object_list[i]); 2095 pinned = 0; 2096 2097 /* evict everyone we can from the aperture */ 2098 ret = i915_gem_evict_everything(dev); 2099 if (ret) 2100 goto err; 2101 } 2102 2103 /* Set the pending read domains for the batch buffer to COMMAND */ 2104 batch_obj = object_list[args.buffer_count-1]; 2105 batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND; 2106 batch_obj->pending_write_domain = 0; 2107 2108 /* Zero the gloabl flush/invalidate flags. These 2109 * will be modified as each object is bound to the 2110 * gtt 2111 */ 2112 dev->invalidate_domains = 0; 2113 dev->flush_domains = 0; 2114 2115 for (i = 0; i < args.buffer_count; i++) { 2116 struct drm_gem_object *obj = object_list[i]; 2117 2118 /* Compute new gpu domains and update invalidate/flush */ 2119 i915_gem_object_set_to_gpu_domain(obj, 2120 obj->pending_read_domains, 2121 obj->pending_write_domain); 2122 } 2123 2124 if (dev->invalidate_domains | dev->flush_domains) { 2125 2126 DRM_DEBUG("%s: invalidate_domains %08x flush_domains %08x Then flush\n", 2127 __func__, 2128 dev->invalidate_domains, 2129 dev->flush_domains); 2130 i915_gem_flush(dev, 2131 dev->invalidate_domains, 2132 dev->flush_domains); 2133 if (dev->flush_domains) { 2134 (void) i915_add_request(dev, dev->flush_domains); 2135 2136 } 2137 } 2138 2139 for (i = 0; i < args.buffer_count; i++) { 2140 struct drm_gem_object *obj = object_list[i]; 2141 2142 obj->write_domain = obj->pending_write_domain; 2143 } 2144 2145 exec_offset = exec_list[args.buffer_count - 1].offset; 2146 2147 /* Exec the batchbuffer */ 2148 ret = i915_dispatch_gem_execbuffer(dev, &args, exec_offset); 2149 if (ret) { 2150 DRM_ERROR("dispatch failed %d\n", ret); 2151 goto err; 2152 } 2153 2154 /* 2155 * Ensure that the commands in the batch buffer are 2156 * finished before the interrupt fires 2157 */ 2158 flush_domains = i915_retire_commands(dev); 2159 2160 /* 2161 * Get a seqno representing the execution of the current buffer, 2162 * which we can wait on. We would like to mitigate these interrupts, 2163 * likely by only creating seqnos occasionally (so that we have 2164 * *some* interrupts representing completion of buffers that we can 2165 * wait on when trying to clear up gtt space). 2166 */ 2167 seqno = i915_add_request(dev, flush_domains); 2168 ASSERT(!(seqno == 0)); 2169 i915_file_priv->mm.last_gem_seqno = seqno; 2170 for (i = 0; i < args.buffer_count; i++) { 2171 struct drm_gem_object *obj = object_list[i]; 2172 i915_gem_object_move_to_active(obj, seqno); 2173 DRM_DEBUG("%s: move to exec list %p\n", __func__, obj); 2174 } 2175 2176 err: 2177 if (object_list != NULL) { 2178 for (i = 0; i < pinned; i++) 2179 i915_gem_object_unpin(object_list[i]); 2180 2181 for (i = 0; i < args.buffer_count; i++) { 2182 if (object_list[i]) { 2183 obj_priv = object_list[i]->driver_private; 2184 obj_priv->in_execbuffer = 0; 2185 } 2186 drm_gem_object_unreference(object_list[i]); 2187 } 2188 } 2189 spin_unlock(&dev->struct_mutex); 2190 2191 if (!ret) { 2192 /* Copy the new buffer offsets back to the user's exec list. */ 2193 ret = DRM_COPY_TO_USER((struct drm_i915_relocation_entry __user *) 2194 (uintptr_t) args.buffers_ptr, 2195 exec_list, 2196 sizeof(*exec_list) * args.buffer_count); 2197 if (ret) 2198 DRM_ERROR("failed to copy %d exec entries " 2199 "back to user (%d)\n", 2200 args.buffer_count, ret); 2201 } 2202 2203 pre_mutex_err: 2204 drm_free(object_list, sizeof(*object_list) * args.buffer_count, 2205 DRM_MEM_DRIVER); 2206 drm_free(exec_list, sizeof(*exec_list) * args.buffer_count, 2207 DRM_MEM_DRIVER); 2208 2209 return ret; 2210 } 2211 2212 int 2213 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 2214 { 2215 struct drm_device *dev = obj->dev; 2216 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2217 int ret; 2218 2219 if (obj_priv->gtt_space == NULL) { 2220 ret = i915_gem_object_bind_to_gtt(obj, alignment); 2221 if (ret != 0) { 2222 DRM_ERROR("Failure to bind: %d", ret); 2223 return ret; 2224 } 2225 } 2226 obj_priv->pin_count++; 2227 2228 /* If the object is not active and not pending a flush, 2229 * remove it from the inactive list 2230 */ 2231 if (obj_priv->pin_count == 1) { 2232 atomic_inc(&dev->pin_count); 2233 atomic_add(obj->size, &dev->pin_memory); 2234 if (!obj_priv->active && 2235 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 2236 I915_GEM_DOMAIN_GTT)) == 0 && 2237 !list_empty(&obj_priv->list)) 2238 list_del_init(&obj_priv->list); 2239 } 2240 return 0; 2241 } 2242 2243 void 2244 i915_gem_object_unpin(struct drm_gem_object *obj) 2245 { 2246 struct drm_device *dev = obj->dev; 2247 drm_i915_private_t *dev_priv = dev->dev_private; 2248 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2249 obj_priv->pin_count--; 2250 ASSERT(!(obj_priv->pin_count < 0)); 2251 ASSERT(!(obj_priv->gtt_space == NULL)); 2252 2253 /* If the object is no longer pinned, and is 2254 * neither active nor being flushed, then stick it on 2255 * the inactive list 2256 */ 2257 if (obj_priv->pin_count == 0) { 2258 if (!obj_priv->active && 2259 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 2260 I915_GEM_DOMAIN_GTT)) == 0) 2261 list_move_tail(&obj_priv->list, 2262 &dev_priv->mm.inactive_list, (caddr_t)obj_priv); 2263 atomic_dec(&dev->pin_count); 2264 atomic_sub(obj->size, &dev->pin_memory); 2265 } 2266 } 2267 2268 /*ARGSUSED*/ 2269 int 2270 i915_gem_pin_ioctl(DRM_IOCTL_ARGS) 2271 { 2272 DRM_DEVICE; 2273 struct drm_i915_gem_pin args; 2274 struct drm_gem_object *obj; 2275 struct drm_i915_gem_object *obj_priv; 2276 int ret; 2277 2278 if (dev->driver->use_gem != 1) 2279 return ENODEV; 2280 2281 DRM_COPYFROM_WITH_RETURN(&args, 2282 (struct drm_i915_gem_pin __user *) data, sizeof(args)); 2283 2284 spin_lock(&dev->struct_mutex); 2285 2286 obj = drm_gem_object_lookup(fpriv, args.handle); 2287 if (obj == NULL) { 2288 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 2289 args.handle); 2290 spin_unlock(&dev->struct_mutex); 2291 return EBADF; 2292 } 2293 DRM_DEBUG("i915_gem_pin_ioctl obj->name %d", obj->name); 2294 obj_priv = obj->driver_private; 2295 2296 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != fpriv) { 2297 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 2298 args.handle); 2299 drm_gem_object_unreference(obj); 2300 spin_unlock(&dev->struct_mutex); 2301 return EINVAL; 2302 } 2303 2304 obj_priv->user_pin_count++; 2305 obj_priv->pin_filp = fpriv; 2306 if (obj_priv->user_pin_count == 1) { 2307 ret = i915_gem_object_pin(obj, args.alignment); 2308 if (ret != 0) { 2309 drm_gem_object_unreference(obj); 2310 spin_unlock(&dev->struct_mutex); 2311 return ret; 2312 } 2313 } 2314 2315 /* XXX - flush the CPU caches for pinned objects 2316 * as the X server doesn't manage domains yet 2317 */ 2318 i915_gem_object_flush_cpu_write_domain(obj); 2319 args.offset = obj_priv->gtt_offset; 2320 2321 ret = DRM_COPY_TO_USER((struct drm_i915_gem_pin __user *) data, &args, sizeof(args)); 2322 if ( ret != 0) 2323 DRM_ERROR(" gem pin ioctl error! %d", ret); 2324 2325 drm_gem_object_unreference(obj); 2326 spin_unlock(&dev->struct_mutex); 2327 2328 return 0; 2329 } 2330 2331 /*ARGSUSED*/ 2332 int 2333 i915_gem_unpin_ioctl(DRM_IOCTL_ARGS) 2334 { 2335 DRM_DEVICE; 2336 struct drm_i915_gem_pin args; 2337 struct drm_gem_object *obj; 2338 struct drm_i915_gem_object *obj_priv; 2339 2340 if (dev->driver->use_gem != 1) 2341 return ENODEV; 2342 2343 DRM_COPYFROM_WITH_RETURN(&args, 2344 (struct drm_i915_gem_pin __user *) data, sizeof(args)); 2345 2346 spin_lock(&dev->struct_mutex); 2347 2348 obj = drm_gem_object_lookup(fpriv, args.handle); 2349 if (obj == NULL) { 2350 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 2351 args.handle); 2352 spin_unlock(&dev->struct_mutex); 2353 return EBADF; 2354 } 2355 obj_priv = obj->driver_private; 2356 DRM_DEBUG("i915_gem_unpin_ioctl, obj->name %d", obj->name); 2357 if (obj_priv->pin_filp != fpriv) { 2358 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 2359 args.handle); 2360 drm_gem_object_unreference(obj); 2361 spin_unlock(&dev->struct_mutex); 2362 return EINVAL; 2363 } 2364 obj_priv->user_pin_count--; 2365 if (obj_priv->user_pin_count == 0) { 2366 obj_priv->pin_filp = NULL; 2367 i915_gem_object_unpin(obj); 2368 } 2369 drm_gem_object_unreference(obj); 2370 spin_unlock(&dev->struct_mutex); 2371 return 0; 2372 } 2373 2374 /*ARGSUSED*/ 2375 int 2376 i915_gem_busy_ioctl(DRM_IOCTL_ARGS) 2377 { 2378 DRM_DEVICE; 2379 struct drm_i915_gem_busy args; 2380 struct drm_gem_object *obj; 2381 struct drm_i915_gem_object *obj_priv; 2382 int ret; 2383 2384 if (dev->driver->use_gem != 1) 2385 return ENODEV; 2386 2387 DRM_COPYFROM_WITH_RETURN(&args, 2388 (struct drm_i915_gem_busy __user *) data, sizeof(args)); 2389 2390 spin_lock(&dev->struct_mutex); 2391 obj = drm_gem_object_lookup(fpriv, args.handle); 2392 if (obj == NULL) { 2393 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 2394 args.handle); 2395 spin_unlock(&dev->struct_mutex); 2396 return EBADF; 2397 } 2398 2399 obj_priv = obj->driver_private; 2400 /* Don't count being on the flushing list against the object being 2401 * done. Otherwise, a buffer left on the flushing list but not getting 2402 * flushed (because nobody's flushing that domain) won't ever return 2403 * unbusy and get reused by libdrm's bo cache. The other expected 2404 * consumer of this interface, OpenGL's occlusion queries, also specs 2405 * that the objects get unbusy "eventually" without any interference. 2406 */ 2407 args.busy = obj_priv->active && obj_priv->last_rendering_seqno != 0; 2408 DRM_DEBUG("i915_gem_busy_ioctl call obj->name %d busy %d", obj->name, args.busy); 2409 2410 ret = DRM_COPY_TO_USER((struct drm_i915_gem_busy __user *) data, &args, sizeof(args)); 2411 if ( ret != 0) 2412 DRM_ERROR(" gem busy error! %d", ret); 2413 2414 drm_gem_object_unreference(obj); 2415 spin_unlock(&dev->struct_mutex); 2416 return 0; 2417 } 2418 2419 /*ARGSUSED*/ 2420 int 2421 i915_gem_throttle_ioctl(DRM_IOCTL_ARGS) 2422 { 2423 DRM_DEVICE; 2424 2425 if (dev->driver->use_gem != 1) 2426 return ENODEV; 2427 2428 return i915_gem_ring_throttle(dev, fpriv); 2429 } 2430 2431 static int 2432 i915_gem_object_get_page_list(struct drm_gem_object *obj) 2433 { 2434 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2435 caddr_t va; 2436 long i; 2437 2438 if (obj_priv->page_list) 2439 return 0; 2440 pgcnt_t np = btop(obj->size); 2441 2442 obj_priv->page_list = kmem_zalloc(np * sizeof(caddr_t), KM_SLEEP); 2443 if (obj_priv->page_list == NULL) { 2444 DRM_ERROR("Faled to allocate page list\n"); 2445 return ENOMEM; 2446 } 2447 2448 for (i = 0, va = obj->kaddr; i < np; i++, va += PAGESIZE) { 2449 obj_priv->page_list[i] = va; 2450 } 2451 return 0; 2452 } 2453 2454 2455 int i915_gem_init_object(struct drm_gem_object *obj) 2456 { 2457 struct drm_i915_gem_object *obj_priv; 2458 2459 obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER); 2460 if (obj_priv == NULL) 2461 return ENOMEM; 2462 2463 /* 2464 * We've just allocated pages from the kernel, 2465 * so they've just been written by the CPU with 2466 * zeros. They'll need to be clflushed before we 2467 * use them with the GPU. 2468 */ 2469 obj->write_domain = I915_GEM_DOMAIN_CPU; 2470 obj->read_domains = I915_GEM_DOMAIN_CPU; 2471 2472 obj->driver_private = obj_priv; 2473 obj_priv->obj = obj; 2474 INIT_LIST_HEAD(&obj_priv->list); 2475 return 0; 2476 } 2477 2478 void i915_gem_free_object(struct drm_gem_object *obj) 2479 { 2480 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2481 2482 while (obj_priv->pin_count > 0) 2483 i915_gem_object_unpin(obj); 2484 2485 DRM_DEBUG("%s: obj %d",__func__, obj->name); 2486 2487 (void) i915_gem_object_unbind(obj, 1); 2488 if (obj_priv->page_cpu_valid != NULL) 2489 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, DRM_MEM_DRIVER); 2490 drm_free(obj->driver_private, sizeof(*obj_priv), DRM_MEM_DRIVER); 2491 } 2492 2493 /** Unbinds all objects that are on the given buffer list. */ 2494 static int 2495 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head, uint32_t type) 2496 { 2497 struct drm_gem_object *obj; 2498 struct drm_i915_gem_object *obj_priv; 2499 int ret; 2500 2501 while (!list_empty(head)) { 2502 obj_priv = list_entry(head->next, 2503 struct drm_i915_gem_object, 2504 list); 2505 obj = obj_priv->obj; 2506 2507 if (obj_priv->pin_count != 0) { 2508 DRM_ERROR("Pinned object in unbind list\n"); 2509 spin_unlock(&dev->struct_mutex); 2510 return EINVAL; 2511 } 2512 DRM_DEBUG("%s: obj %d type %d",__func__, obj->name, type); 2513 ret = i915_gem_object_unbind(obj, type); 2514 if (ret != 0) { 2515 DRM_ERROR("Error unbinding object in LeaveVT: %d\n", 2516 ret); 2517 spin_unlock(&dev->struct_mutex); 2518 return ret; 2519 } 2520 } 2521 2522 2523 return 0; 2524 } 2525 2526 static int 2527 i915_gem_idle(struct drm_device *dev, uint32_t type) 2528 { 2529 drm_i915_private_t *dev_priv = dev->dev_private; 2530 uint32_t seqno, cur_seqno, last_seqno; 2531 int stuck, ret; 2532 2533 spin_lock(&dev->struct_mutex); 2534 2535 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { 2536 spin_unlock(&dev->struct_mutex); 2537 return 0; 2538 } 2539 2540 /* Hack! Don't let anybody do execbuf while we don't control the chip. 2541 * We need to replace this with a semaphore, or something. 2542 */ 2543 dev_priv->mm.suspended = 1; 2544 2545 /* Cancel the retire work handler, wait for it to finish if running 2546 */ 2547 if (worktimer_id != NULL) { 2548 (void) untimeout(worktimer_id); 2549 worktimer_id = NULL; 2550 } 2551 2552 i915_kernel_lost_context(dev); 2553 2554 /* Flush the GPU along with all non-CPU write domains 2555 */ 2556 i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), 2557 ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 2558 seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU | 2559 I915_GEM_DOMAIN_GTT)); 2560 if (seqno == 0) { 2561 spin_unlock(&dev->struct_mutex); 2562 return ENOMEM; 2563 } 2564 2565 dev_priv->mm.waiting_gem_seqno = seqno; 2566 last_seqno = 0; 2567 stuck = 0; 2568 for (;;) { 2569 cur_seqno = i915_get_gem_seqno(dev); 2570 if (i915_seqno_passed(cur_seqno, seqno)) 2571 break; 2572 if (last_seqno == cur_seqno) { 2573 if (stuck++ > 100) { 2574 DRM_ERROR("hardware wedged\n"); 2575 dev_priv->mm.wedged = 1; 2576 DRM_WAKEUP(&dev_priv->irq_queue); 2577 break; 2578 } 2579 } 2580 DRM_UDELAY(10); 2581 last_seqno = cur_seqno; 2582 } 2583 dev_priv->mm.waiting_gem_seqno = 0; 2584 2585 i915_gem_retire_requests(dev); 2586 2587 /* Empty the active and flushing lists to inactive. If there's 2588 * anything left at this point, it means that we're wedged and 2589 * nothing good's going to happen by leaving them there. So strip 2590 * the GPU domains and just stuff them onto inactive. 2591 */ 2592 while (!list_empty(&dev_priv->mm.active_list)) { 2593 struct drm_i915_gem_object *obj_priv; 2594 2595 obj_priv = list_entry(dev_priv->mm.active_list.next, 2596 struct drm_i915_gem_object, 2597 list); 2598 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 2599 i915_gem_object_move_to_inactive(obj_priv->obj); 2600 } 2601 2602 while (!list_empty(&dev_priv->mm.flushing_list)) { 2603 struct drm_i915_gem_object *obj_priv; 2604 2605 obj_priv = list_entry(dev_priv->mm.flushing_list.next, 2606 struct drm_i915_gem_object, 2607 list); 2608 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 2609 i915_gem_object_move_to_inactive(obj_priv->obj); 2610 } 2611 2612 /* Move all inactive buffers out of the GTT. */ 2613 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list, type); 2614 ASSERT(list_empty(&dev_priv->mm.inactive_list)); 2615 if (ret) { 2616 spin_unlock(&dev->struct_mutex); 2617 return ret; 2618 } 2619 2620 i915_gem_cleanup_ringbuffer(dev); 2621 spin_unlock(&dev->struct_mutex); 2622 2623 return 0; 2624 } 2625 2626 static int 2627 i915_gem_init_hws(struct drm_device *dev) 2628 { 2629 drm_i915_private_t *dev_priv = dev->dev_private; 2630 struct drm_gem_object *obj; 2631 struct drm_i915_gem_object *obj_priv; 2632 int ret; 2633 2634 /* If we need a physical address for the status page, it's already 2635 * initialized at driver load time. 2636 */ 2637 if (!I915_NEED_GFX_HWS(dev)) 2638 return 0; 2639 2640 2641 obj = drm_gem_object_alloc(dev, 4096); 2642 if (obj == NULL) { 2643 DRM_ERROR("Failed to allocate status page\n"); 2644 return ENOMEM; 2645 } 2646 2647 obj_priv = obj->driver_private; 2648 2649 ret = i915_gem_object_pin(obj, 4096); 2650 if (ret != 0) { 2651 drm_gem_object_unreference(obj); 2652 return ret; 2653 } 2654 2655 dev_priv->status_gfx_addr = obj_priv->gtt_offset; 2656 dev_priv->hws_map.offset = dev->agp->agp_info.agpi_aperbase + obj_priv->gtt_offset; 2657 dev_priv->hws_map.size = 4096; 2658 dev_priv->hws_map.type = 0; 2659 dev_priv->hws_map.flags = 0; 2660 dev_priv->hws_map.mtrr = 0; 2661 2662 drm_core_ioremap(&dev_priv->hws_map, dev); 2663 if (dev_priv->hws_map.handle == NULL) { 2664 DRM_ERROR("Failed to map status page.\n"); 2665 (void) memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 2666 drm_gem_object_unreference(obj); 2667 return EINVAL; 2668 } 2669 2670 dev_priv->hws_obj = obj; 2671 2672 dev_priv->hw_status_page = dev_priv->hws_map.handle; 2673 2674 (void) memset(dev_priv->hw_status_page, 0, PAGE_SIZE); 2675 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); 2676 (void) I915_READ(HWS_PGA); /* posting read */ 2677 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); 2678 2679 return 0; 2680 } 2681 2682 static void 2683 i915_gem_cleanup_hws(struct drm_device *dev) 2684 { 2685 drm_i915_private_t *dev_priv = dev->dev_private; 2686 struct drm_gem_object *obj; 2687 2688 if (dev_priv->hws_obj == NULL) 2689 return; 2690 2691 obj = dev_priv->hws_obj; 2692 2693 drm_core_ioremapfree(&dev_priv->hws_map, dev); 2694 i915_gem_object_unpin(obj); 2695 drm_gem_object_unreference(obj); 2696 dev_priv->hws_obj = NULL; 2697 2698 (void) memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 2699 dev_priv->hw_status_page = NULL; 2700 2701 /* Write high address into HWS_PGA when disabling. */ 2702 I915_WRITE(HWS_PGA, 0x1ffff000); 2703 } 2704 2705 int 2706 i915_gem_init_ringbuffer(struct drm_device *dev) 2707 { 2708 drm_i915_private_t *dev_priv = dev->dev_private; 2709 struct drm_gem_object *obj; 2710 struct drm_i915_gem_object *obj_priv; 2711 int ret; 2712 u32 head; 2713 2714 ret = i915_gem_init_hws(dev); 2715 if (ret != 0) 2716 return ret; 2717 obj = drm_gem_object_alloc(dev, 128 * 1024); 2718 if (obj == NULL) { 2719 DRM_ERROR("Failed to allocate ringbuffer\n"); 2720 i915_gem_cleanup_hws(dev); 2721 return ENOMEM; 2722 } 2723 2724 obj_priv = obj->driver_private; 2725 ret = i915_gem_object_pin(obj, 4096); 2726 if (ret != 0) { 2727 drm_gem_object_unreference(obj); 2728 i915_gem_cleanup_hws(dev); 2729 return ret; 2730 } 2731 2732 /* Set up the kernel mapping for the ring. */ 2733 dev_priv->ring.Size = obj->size; 2734 dev_priv->ring.tail_mask = obj->size - 1; 2735 2736 dev_priv->ring.map.offset = dev->agp->agp_info.agpi_aperbase + obj_priv->gtt_offset; 2737 dev_priv->ring.map.size = obj->size; 2738 dev_priv->ring.map.type = 0; 2739 dev_priv->ring.map.flags = 0; 2740 dev_priv->ring.map.mtrr = 0; 2741 2742 drm_core_ioremap(&dev_priv->ring.map, dev); 2743 if (dev_priv->ring.map.handle == NULL) { 2744 DRM_ERROR("Failed to map ringbuffer.\n"); 2745 (void) memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 2746 drm_gem_object_unreference(obj); 2747 i915_gem_cleanup_hws(dev); 2748 return EINVAL; 2749 } 2750 2751 dev_priv->ring.ring_obj = obj; 2752 2753 dev_priv->ring.virtual_start = (u8 *) dev_priv->ring.map.handle; 2754 2755 /* Stop the ring if it's running. */ 2756 I915_WRITE(PRB0_CTL, 0); 2757 I915_WRITE(PRB0_HEAD, 0); 2758 I915_WRITE(PRB0_TAIL, 0); 2759 2760 2761 /* Initialize the ring. */ 2762 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 2763 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 2764 2765 /* G45 ring initialization fails to reset head to zero */ 2766 if (head != 0) { 2767 DRM_ERROR("Ring head not reset to zero " 2768 "ctl %08x head %08x tail %08x start %08x\n", 2769 I915_READ(PRB0_CTL), 2770 I915_READ(PRB0_HEAD), 2771 I915_READ(PRB0_TAIL), 2772 I915_READ(PRB0_START)); 2773 I915_WRITE(PRB0_HEAD, 0); 2774 2775 DRM_ERROR("Ring head forced to zero " 2776 "ctl %08x head %08x tail %08x start %08x\n", 2777 I915_READ(PRB0_CTL), 2778 I915_READ(PRB0_HEAD), 2779 I915_READ(PRB0_TAIL), 2780 I915_READ(PRB0_START)); 2781 } 2782 2783 I915_WRITE(PRB0_CTL, 2784 ((obj->size - 4096) & RING_NR_PAGES) | 2785 RING_NO_REPORT | 2786 RING_VALID); 2787 2788 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 2789 2790 /* If the head is still not zero, the ring is dead */ 2791 if (head != 0) { 2792 DRM_ERROR("Ring initialization failed " 2793 "ctl %08x head %08x tail %08x start %08x\n", 2794 I915_READ(PRB0_CTL), 2795 I915_READ(PRB0_HEAD), 2796 I915_READ(PRB0_TAIL), 2797 I915_READ(PRB0_START)); 2798 return EIO; 2799 } 2800 2801 /* Update our cache of the ring state */ 2802 i915_kernel_lost_context(dev); 2803 2804 return 0; 2805 } 2806 2807 static void 2808 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 2809 { 2810 drm_i915_private_t *dev_priv = dev->dev_private; 2811 2812 if (dev_priv->ring.ring_obj == NULL) 2813 return; 2814 2815 drm_core_ioremapfree(&dev_priv->ring.map, dev); 2816 2817 i915_gem_object_unpin(dev_priv->ring.ring_obj); 2818 drm_gem_object_unreference(dev_priv->ring.ring_obj); 2819 dev_priv->ring.ring_obj = NULL; 2820 (void) memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 2821 i915_gem_cleanup_hws(dev); 2822 } 2823 2824 /*ARGSUSED*/ 2825 int 2826 i915_gem_entervt_ioctl(DRM_IOCTL_ARGS) 2827 { 2828 DRM_DEVICE; 2829 drm_i915_private_t *dev_priv = dev->dev_private; 2830 int ret; 2831 2832 if (dev->driver->use_gem != 1) 2833 return ENODEV; 2834 2835 if (dev_priv->mm.wedged) { 2836 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 2837 dev_priv->mm.wedged = 0; 2838 } 2839 /* Set up the kernel mapping for the ring. */ 2840 dev_priv->mm.gtt_mapping.offset = dev->agp->agp_info.agpi_aperbase; 2841 dev_priv->mm.gtt_mapping.size = dev->agp->agp_info.agpi_apersize; 2842 dev_priv->mm.gtt_mapping.type = 0; 2843 dev_priv->mm.gtt_mapping.flags = 0; 2844 dev_priv->mm.gtt_mapping.mtrr = 0; 2845 2846 drm_core_ioremap(&dev_priv->mm.gtt_mapping, dev); 2847 2848 spin_lock(&dev->struct_mutex); 2849 dev_priv->mm.suspended = 0; 2850 ret = i915_gem_init_ringbuffer(dev); 2851 if (ret != 0) 2852 return ret; 2853 2854 spin_unlock(&dev->struct_mutex); 2855 2856 drm_irq_install(dev); 2857 2858 return 0; 2859 } 2860 2861 /*ARGSUSED*/ 2862 int 2863 i915_gem_leavevt_ioctl(DRM_IOCTL_ARGS) 2864 { 2865 DRM_DEVICE; 2866 drm_i915_private_t *dev_priv = dev->dev_private; 2867 int ret; 2868 2869 if (dev->driver->use_gem != 1) 2870 return ENODEV; 2871 2872 ret = i915_gem_idle(dev, 0); 2873 drm_irq_uninstall(dev); 2874 2875 drm_core_ioremapfree(&dev_priv->mm.gtt_mapping, dev); 2876 return ret; 2877 } 2878 2879 void 2880 i915_gem_lastclose(struct drm_device *dev) 2881 { 2882 drm_i915_private_t *dev_priv = dev->dev_private; 2883 int ret; 2884 2885 ret = i915_gem_idle(dev, 1); 2886 if (ret) 2887 DRM_ERROR("failed to idle hardware: %d\n", ret); 2888 2889 drm_mm_clean_ml(&dev_priv->mm.gtt_space); 2890 } 2891 2892 void 2893 i915_gem_load(struct drm_device *dev) 2894 { 2895 drm_i915_private_t *dev_priv = dev->dev_private; 2896 2897 INIT_LIST_HEAD(&dev_priv->mm.active_list); 2898 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 2899 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 2900 INIT_LIST_HEAD(&dev_priv->mm.request_list); 2901 dev_priv->mm.next_gem_seqno = 1; 2902 2903 i915_gem_detect_bit_6_swizzle(dev); 2904 2905 } 2906 2907