1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_display.h" 8 #include "gt/intel_gt.h" 9 10 #include "i915_drv.h" 11 #include "i915_gem_clflush.h" 12 #include "i915_gem_domain.h" 13 #include "i915_gem_gtt.h" 14 #include "i915_gem_ioctls.h" 15 #include "i915_gem_lmem.h" 16 #include "i915_gem_mman.h" 17 #include "i915_gem_object.h" 18 #include "i915_gem_object_frontbuffer.h" 19 #include "i915_vma.h" 20 21 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 22 { 23 struct drm_i915_private *i915 = to_i915(obj->base.dev); 24 25 if (IS_DGFX(i915)) 26 return false; 27 28 /* 29 * For objects created by userspace through GEM_CREATE with pat_index 30 * set by set_pat extension, i915_gem_object_has_cache_level() will 31 * always return true, because the coherency of such object is managed 32 * by userspace. Othereise the call here would fall back to checking 33 * whether the object is un-cached or write-through. 34 */ 35 return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) || 36 i915_gem_object_has_cache_level(obj, I915_CACHE_WT)); 37 } 38 39 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 40 { 41 struct drm_i915_private *i915 = to_i915(obj->base.dev); 42 43 if (obj->cache_dirty) 44 return false; 45 46 if (IS_DGFX(i915)) 47 return false; 48 49 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 50 return true; 51 52 /* Currently in use by HW (display engine)? Keep flushed. */ 53 return i915_gem_object_is_framebuffer(obj); 54 } 55 56 static void 57 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 58 { 59 struct i915_vma *vma; 60 61 assert_object_held(obj); 62 63 if (!(obj->write_domain & flush_domains)) 64 return; 65 66 switch (obj->write_domain) { 67 case I915_GEM_DOMAIN_GTT: 68 spin_lock(&obj->vma.lock); 69 for_each_ggtt_vma(vma, obj) 70 i915_vma_flush_writes(vma); 71 spin_unlock(&obj->vma.lock); 72 73 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 74 break; 75 76 case I915_GEM_DOMAIN_WC: 77 wmb(); 78 break; 79 80 case I915_GEM_DOMAIN_CPU: 81 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 82 break; 83 84 case I915_GEM_DOMAIN_RENDER: 85 if (gpu_write_needs_clflush(obj)) 86 obj->cache_dirty = true; 87 break; 88 } 89 90 obj->write_domain = 0; 91 } 92 93 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 94 { 95 /* 96 * We manually flush the CPU domain so that we can override and 97 * force the flush for the display, and perform it asyncrhonously. 98 */ 99 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 100 if (obj->cache_dirty) 101 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 102 obj->write_domain = 0; 103 } 104 105 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 106 { 107 if (!i915_gem_object_is_framebuffer(obj)) 108 return; 109 110 i915_gem_object_lock(obj, NULL); 111 __i915_gem_object_flush_for_display(obj); 112 i915_gem_object_unlock(obj); 113 } 114 115 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 116 { 117 if (i915_gem_object_is_framebuffer(obj)) 118 __i915_gem_object_flush_for_display(obj); 119 } 120 121 /** 122 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and 123 * possibly write domain. 124 * @obj: object to act on 125 * @write: ask for write access or read only 126 * 127 * This function returns when the move is complete, including waiting on 128 * flushes to occur. 129 */ 130 int 131 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 132 { 133 int ret; 134 135 assert_object_held(obj); 136 137 ret = i915_gem_object_wait(obj, 138 I915_WAIT_INTERRUPTIBLE | 139 (write ? I915_WAIT_ALL : 0), 140 MAX_SCHEDULE_TIMEOUT); 141 if (ret) 142 return ret; 143 144 if (obj->write_domain == I915_GEM_DOMAIN_WC) 145 return 0; 146 147 /* Flush and acquire obj->pages so that we are coherent through 148 * direct access in memory with previous cached writes through 149 * shmemfs and that our cache domain tracking remains valid. 150 * For example, if the obj->filp was moved to swap without us 151 * being notified and releasing the pages, we would mistakenly 152 * continue to assume that the obj remained out of the CPU cached 153 * domain. 154 */ 155 ret = i915_gem_object_pin_pages(obj); 156 if (ret) 157 return ret; 158 159 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 160 161 /* Serialise direct access to this object with the barriers for 162 * coherent writes from the GPU, by effectively invalidating the 163 * WC domain upon first access. 164 */ 165 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 166 mb(); 167 168 /* It should now be out of any other write domains, and we can update 169 * the domain values for our changes. 170 */ 171 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 172 obj->read_domains |= I915_GEM_DOMAIN_WC; 173 if (write) { 174 obj->read_domains = I915_GEM_DOMAIN_WC; 175 obj->write_domain = I915_GEM_DOMAIN_WC; 176 obj->mm.dirty = true; 177 } 178 179 i915_gem_object_unpin_pages(obj); 180 return 0; 181 } 182 183 /** 184 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read, 185 * and possibly write domain. 186 * @obj: object to act on 187 * @write: ask for write access or read only 188 * 189 * This function returns when the move is complete, including waiting on 190 * flushes to occur. 191 */ 192 int 193 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 194 { 195 int ret; 196 197 assert_object_held(obj); 198 199 ret = i915_gem_object_wait(obj, 200 I915_WAIT_INTERRUPTIBLE | 201 (write ? I915_WAIT_ALL : 0), 202 MAX_SCHEDULE_TIMEOUT); 203 if (ret) 204 return ret; 205 206 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 207 return 0; 208 209 /* Flush and acquire obj->pages so that we are coherent through 210 * direct access in memory with previous cached writes through 211 * shmemfs and that our cache domain tracking remains valid. 212 * For example, if the obj->filp was moved to swap without us 213 * being notified and releasing the pages, we would mistakenly 214 * continue to assume that the obj remained out of the CPU cached 215 * domain. 216 */ 217 ret = i915_gem_object_pin_pages(obj); 218 if (ret) 219 return ret; 220 221 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 222 223 /* Serialise direct access to this object with the barriers for 224 * coherent writes from the GPU, by effectively invalidating the 225 * GTT domain upon first access. 226 */ 227 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 228 mb(); 229 230 /* It should now be out of any other write domains, and we can update 231 * the domain values for our changes. 232 */ 233 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 234 obj->read_domains |= I915_GEM_DOMAIN_GTT; 235 if (write) { 236 struct i915_vma *vma; 237 238 obj->read_domains = I915_GEM_DOMAIN_GTT; 239 obj->write_domain = I915_GEM_DOMAIN_GTT; 240 obj->mm.dirty = true; 241 242 spin_lock(&obj->vma.lock); 243 for_each_ggtt_vma(vma, obj) 244 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 245 i915_vma_set_ggtt_write(vma); 246 spin_unlock(&obj->vma.lock); 247 } 248 249 i915_gem_object_unpin_pages(obj); 250 return 0; 251 } 252 253 /** 254 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA. 255 * @obj: object to act on 256 * @cache_level: new cache level to set for the object 257 * 258 * After this function returns, the object will be in the new cache-level 259 * across all GTT and the contents of the backing storage will be coherent, 260 * with respect to the new cache-level. In order to keep the backing storage 261 * coherent for all users, we only allow a single cache level to be set 262 * globally on the object and prevent it from being changed whilst the 263 * hardware is reading from the object. That is if the object is currently 264 * on the scanout it will be set to uncached (or equivalent display 265 * cache coherency) and all non-MOCS GPU access will also be uncached so 266 * that all direct access to the scanout remains coherent. 267 */ 268 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 269 enum i915_cache_level cache_level) 270 { 271 int ret; 272 273 /* 274 * For objects created by userspace through GEM_CREATE with pat_index 275 * set by set_pat extension, simply return 0 here without touching 276 * the cache setting, because such objects should have an immutable 277 * cache setting by design and always managed by userspace. 278 */ 279 if (i915_gem_object_has_cache_level(obj, cache_level)) 280 return 0; 281 282 ret = i915_gem_object_wait(obj, 283 I915_WAIT_INTERRUPTIBLE | 284 I915_WAIT_ALL, 285 MAX_SCHEDULE_TIMEOUT); 286 if (ret) 287 return ret; 288 289 /* Always invalidate stale cachelines */ 290 i915_gem_object_set_cache_coherency(obj, cache_level); 291 obj->cache_dirty = true; 292 293 /* The cache-level will be applied when each vma is rebound. */ 294 return i915_gem_object_unbind(obj, 295 I915_GEM_OBJECT_UNBIND_ACTIVE | 296 I915_GEM_OBJECT_UNBIND_BARRIER); 297 } 298 299 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 300 struct drm_file *file) 301 { 302 struct drm_i915_gem_caching *args = data; 303 struct drm_i915_gem_object *obj; 304 int err = 0; 305 306 if (IS_DGFX(to_i915(dev))) 307 return -ENODEV; 308 309 rcu_read_lock(); 310 obj = i915_gem_object_lookup_rcu(file, args->handle); 311 if (!obj) { 312 err = -ENOENT; 313 goto out; 314 } 315 316 /* 317 * This ioctl should be disabled for the objects with pat_index 318 * set by user space. 319 */ 320 if (obj->pat_set_by_user) { 321 err = -EOPNOTSUPP; 322 goto out; 323 } 324 325 if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) || 326 i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC)) 327 args->caching = I915_CACHING_CACHED; 328 else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT)) 329 args->caching = I915_CACHING_DISPLAY; 330 else 331 args->caching = I915_CACHING_NONE; 332 out: 333 rcu_read_unlock(); 334 return err; 335 } 336 337 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 338 struct drm_file *file) 339 { 340 struct drm_i915_private *i915 = to_i915(dev); 341 struct drm_i915_gem_caching *args = data; 342 struct drm_i915_gem_object *obj; 343 enum i915_cache_level level; 344 int ret = 0; 345 346 if (IS_DGFX(i915)) 347 return -ENODEV; 348 349 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 350 return -EOPNOTSUPP; 351 352 switch (args->caching) { 353 case I915_CACHING_NONE: 354 level = I915_CACHE_NONE; 355 break; 356 case I915_CACHING_CACHED: 357 /* 358 * Due to a HW issue on BXT A stepping, GPU stores via a 359 * snooped mapping may leave stale data in a corresponding CPU 360 * cacheline, whereas normally such cachelines would get 361 * invalidated. 362 */ 363 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 364 return -ENODEV; 365 366 level = I915_CACHE_LLC; 367 break; 368 case I915_CACHING_DISPLAY: 369 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 370 break; 371 default: 372 return -EINVAL; 373 } 374 375 obj = i915_gem_object_lookup(file, args->handle); 376 if (!obj) 377 return -ENOENT; 378 379 /* 380 * This ioctl should be disabled for the objects with pat_index 381 * set by user space. 382 */ 383 if (obj->pat_set_by_user) { 384 ret = -EOPNOTSUPP; 385 goto out; 386 } 387 388 /* 389 * The caching mode of proxy object is handled by its generator, and 390 * not allowed to be changed by userspace. 391 */ 392 if (i915_gem_object_is_proxy(obj)) { 393 /* 394 * Silently allow cached for userptr; the vulkan driver 395 * sets all objects to cached 396 */ 397 if (!i915_gem_object_is_userptr(obj) || 398 args->caching != I915_CACHING_CACHED) 399 ret = -ENXIO; 400 401 goto out; 402 } 403 404 ret = i915_gem_object_lock_interruptible(obj, NULL); 405 if (ret) 406 goto out; 407 408 ret = i915_gem_object_set_cache_level(obj, level); 409 i915_gem_object_unlock(obj); 410 411 out: 412 i915_gem_object_put(obj); 413 return ret; 414 } 415 416 /* 417 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 418 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 419 * (for pageflips). We only flush the caches while preparing the buffer for 420 * display, the callers are responsible for frontbuffer flush. 421 */ 422 struct i915_vma * 423 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 424 struct i915_gem_ww_ctx *ww, 425 u32 alignment, unsigned int guard, 426 const struct i915_gtt_view *view, 427 unsigned int flags) 428 { 429 struct drm_i915_private *i915 = to_i915(obj->base.dev); 430 struct i915_vma *vma; 431 int ret; 432 433 /* Frame buffer must be in LMEM */ 434 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 435 return ERR_PTR(-EINVAL); 436 437 /* 438 * The display engine is not coherent with the LLC cache on gen6. As 439 * a result, we make sure that the pinning that is about to occur is 440 * done with uncached PTEs. This is lowest common denominator for all 441 * chipsets. 442 * 443 * However for gen6+, we could do better by using the GFDT bit instead 444 * of uncaching, which would allow us to flush all the LLC-cached data 445 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 446 */ 447 ret = i915_gem_object_set_cache_level(obj, 448 HAS_WT(i915) ? 449 I915_CACHE_WT : I915_CACHE_NONE); 450 if (ret) 451 return ERR_PTR(ret); 452 453 /* VT-d may overfetch before/after the vma, so pad with scratch */ 454 if (guard) 455 flags |= PIN_OFFSET_GUARD | (guard * I915_GTT_PAGE_SIZE); 456 457 /* 458 * As the user may map the buffer once pinned in the display plane 459 * (e.g. libkms for the bootup splash), we have to ensure that we 460 * always use map_and_fenceable for all scanout buffers. However, 461 * it may simply be too big to fit into mappable, in which case 462 * put it anyway and hope that userspace can cope (but always first 463 * try to preserve the existing ABI). 464 */ 465 vma = ERR_PTR(-ENOSPC); 466 if ((flags & PIN_MAPPABLE) == 0 && 467 (!view || view->type == I915_GTT_VIEW_NORMAL)) 468 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 469 flags | PIN_MAPPABLE | 470 PIN_NONBLOCK); 471 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 472 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 473 alignment, flags); 474 if (IS_ERR(vma)) 475 return vma; 476 477 vma->display_alignment = max(vma->display_alignment, alignment); 478 i915_vma_mark_scanout(vma); 479 480 i915_gem_object_flush_if_display_locked(obj); 481 482 return vma; 483 } 484 485 /** 486 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read, 487 * and possibly write domain. 488 * @obj: object to act on 489 * @write: requesting write or read-only access 490 * 491 * This function returns when the move is complete, including waiting on 492 * flushes to occur. 493 */ 494 int 495 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 496 { 497 int ret; 498 499 assert_object_held(obj); 500 501 ret = i915_gem_object_wait(obj, 502 I915_WAIT_INTERRUPTIBLE | 503 (write ? I915_WAIT_ALL : 0), 504 MAX_SCHEDULE_TIMEOUT); 505 if (ret) 506 return ret; 507 508 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 509 510 /* Flush the CPU cache if it's still invalid. */ 511 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 512 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 513 obj->read_domains |= I915_GEM_DOMAIN_CPU; 514 } 515 516 /* It should now be out of any other write domains, and we can update 517 * the domain values for our changes. 518 */ 519 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 520 521 /* If we're writing through the CPU, then the GPU read domains will 522 * need to be invalidated at next use. 523 */ 524 if (write) 525 __start_cpu_write(obj); 526 527 return 0; 528 } 529 530 /** 531 * i915_gem_set_domain_ioctl - Called when user space prepares to use an 532 * object with the CPU, either 533 * through the mmap ioctl's mapping or a GTT mapping. 534 * @dev: drm device 535 * @data: ioctl data blob 536 * @file: drm file 537 */ 538 int 539 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 540 struct drm_file *file) 541 { 542 struct drm_i915_gem_set_domain *args = data; 543 struct drm_i915_gem_object *obj; 544 u32 read_domains = args->read_domains; 545 u32 write_domain = args->write_domain; 546 int err; 547 548 if (IS_DGFX(to_i915(dev))) 549 return -ENODEV; 550 551 /* Only handle setting domains to types used by the CPU. */ 552 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 553 return -EINVAL; 554 555 /* 556 * Having something in the write domain implies it's in the read 557 * domain, and only that read domain. Enforce that in the request. 558 */ 559 if (write_domain && read_domains != write_domain) 560 return -EINVAL; 561 562 if (!read_domains) 563 return 0; 564 565 obj = i915_gem_object_lookup(file, args->handle); 566 if (!obj) 567 return -ENOENT; 568 569 /* 570 * Try to flush the object off the GPU without holding the lock. 571 * We will repeat the flush holding the lock in the normal manner 572 * to catch cases where we are gazumped. 573 */ 574 err = i915_gem_object_wait(obj, 575 I915_WAIT_INTERRUPTIBLE | 576 I915_WAIT_PRIORITY | 577 (write_domain ? I915_WAIT_ALL : 0), 578 MAX_SCHEDULE_TIMEOUT); 579 if (err) 580 goto out; 581 582 if (i915_gem_object_is_userptr(obj)) { 583 /* 584 * Try to grab userptr pages, iris uses set_domain to check 585 * userptr validity 586 */ 587 err = i915_gem_object_userptr_validate(obj); 588 if (!err) 589 err = i915_gem_object_wait(obj, 590 I915_WAIT_INTERRUPTIBLE | 591 I915_WAIT_PRIORITY | 592 (write_domain ? I915_WAIT_ALL : 0), 593 MAX_SCHEDULE_TIMEOUT); 594 goto out; 595 } 596 597 /* 598 * Proxy objects do not control access to the backing storage, ergo 599 * they cannot be used as a means to manipulate the cache domain 600 * tracking for that backing storage. The proxy object is always 601 * considered to be outside of any cache domain. 602 */ 603 if (i915_gem_object_is_proxy(obj)) { 604 err = -ENXIO; 605 goto out; 606 } 607 608 err = i915_gem_object_lock_interruptible(obj, NULL); 609 if (err) 610 goto out; 611 612 /* 613 * Flush and acquire obj->pages so that we are coherent through 614 * direct access in memory with previous cached writes through 615 * shmemfs and that our cache domain tracking remains valid. 616 * For example, if the obj->filp was moved to swap without us 617 * being notified and releasing the pages, we would mistakenly 618 * continue to assume that the obj remained out of the CPU cached 619 * domain. 620 */ 621 err = i915_gem_object_pin_pages(obj); 622 if (err) 623 goto out_unlock; 624 625 /* 626 * Already in the desired write domain? Nothing for us to do! 627 * 628 * We apply a little bit of cunning here to catch a broader set of 629 * no-ops. If obj->write_domain is set, we must be in the same 630 * obj->read_domains, and only that domain. Therefore, if that 631 * obj->write_domain matches the request read_domains, we are 632 * already in the same read/write domain and can skip the operation, 633 * without having to further check the requested write_domain. 634 */ 635 if (READ_ONCE(obj->write_domain) == read_domains) 636 goto out_unpin; 637 638 if (read_domains & I915_GEM_DOMAIN_WC) 639 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 640 else if (read_domains & I915_GEM_DOMAIN_GTT) 641 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 642 else 643 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 644 645 out_unpin: 646 i915_gem_object_unpin_pages(obj); 647 648 out_unlock: 649 i915_gem_object_unlock(obj); 650 651 if (!err && write_domain) 652 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 653 654 out: 655 i915_gem_object_put(obj); 656 return err; 657 } 658 659 /* 660 * Pins the specified object's pages and synchronizes the object with 661 * GPU accesses. Sets needs_clflush to non-zero if the caller should 662 * flush the object from the CPU cache. 663 */ 664 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 665 unsigned int *needs_clflush) 666 { 667 int ret; 668 669 *needs_clflush = 0; 670 if (!i915_gem_object_has_struct_page(obj)) 671 return -ENODEV; 672 673 assert_object_held(obj); 674 675 ret = i915_gem_object_wait(obj, 676 I915_WAIT_INTERRUPTIBLE, 677 MAX_SCHEDULE_TIMEOUT); 678 if (ret) 679 return ret; 680 681 ret = i915_gem_object_pin_pages(obj); 682 if (ret) 683 return ret; 684 685 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 686 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 687 ret = i915_gem_object_set_to_cpu_domain(obj, false); 688 if (ret) 689 goto err_unpin; 690 else 691 goto out; 692 } 693 694 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 695 696 /* If we're not in the cpu read domain, set ourself into the gtt 697 * read domain and manually flush cachelines (if required). This 698 * optimizes for the case when the gpu will dirty the data 699 * anyway again before the next pread happens. 700 */ 701 if (!obj->cache_dirty && 702 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 703 *needs_clflush = CLFLUSH_BEFORE; 704 705 out: 706 /* return with the pages pinned */ 707 return 0; 708 709 err_unpin: 710 i915_gem_object_unpin_pages(obj); 711 return ret; 712 } 713 714 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 715 unsigned int *needs_clflush) 716 { 717 int ret; 718 719 *needs_clflush = 0; 720 if (!i915_gem_object_has_struct_page(obj)) 721 return -ENODEV; 722 723 assert_object_held(obj); 724 725 ret = i915_gem_object_wait(obj, 726 I915_WAIT_INTERRUPTIBLE | 727 I915_WAIT_ALL, 728 MAX_SCHEDULE_TIMEOUT); 729 if (ret) 730 return ret; 731 732 ret = i915_gem_object_pin_pages(obj); 733 if (ret) 734 return ret; 735 736 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 737 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 738 ret = i915_gem_object_set_to_cpu_domain(obj, true); 739 if (ret) 740 goto err_unpin; 741 else 742 goto out; 743 } 744 745 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 746 747 /* If we're not in the cpu write domain, set ourself into the 748 * gtt write domain and manually flush cachelines (as required). 749 * This optimizes for the case when the gpu will use the data 750 * right away and we therefore have to clflush anyway. 751 */ 752 if (!obj->cache_dirty) { 753 *needs_clflush |= CLFLUSH_AFTER; 754 755 /* 756 * Same trick applies to invalidate partially written 757 * cachelines read before writing. 758 */ 759 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 760 *needs_clflush |= CLFLUSH_BEFORE; 761 } 762 763 out: 764 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 765 obj->mm.dirty = true; 766 /* return with the pages pinned */ 767 return 0; 768 769 err_unpin: 770 i915_gem_object_unpin_pages(obj); 771 return ret; 772 } 773