1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include "display/intel_display.h" 8 #include "gt/intel_gt.h" 9 10 #include "i915_drv.h" 11 #include "i915_gem_clflush.h" 12 #include "i915_gem_domain.h" 13 #include "i915_gem_gtt.h" 14 #include "i915_gem_ioctls.h" 15 #include "i915_gem_lmem.h" 16 #include "i915_gem_mman.h" 17 #include "i915_gem_object.h" 18 #include "i915_gem_object_frontbuffer.h" 19 #include "i915_vma.h" 20 21 #define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */ 22 23 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 24 { 25 struct drm_i915_private *i915 = to_i915(obj->base.dev); 26 27 if (IS_DGFX(i915)) 28 return false; 29 30 /* 31 * For objects created by userspace through GEM_CREATE with pat_index 32 * set by set_pat extension, i915_gem_object_has_cache_level() will 33 * always return true, because the coherency of such object is managed 34 * by userspace. Othereise the call here would fall back to checking 35 * whether the object is un-cached or write-through. 36 */ 37 return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) || 38 i915_gem_object_has_cache_level(obj, I915_CACHE_WT)); 39 } 40 41 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 42 { 43 struct drm_i915_private *i915 = to_i915(obj->base.dev); 44 45 if (obj->cache_dirty) 46 return false; 47 48 if (IS_DGFX(i915)) 49 return false; 50 51 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 52 return true; 53 54 /* Currently in use by HW (display engine)? Keep flushed. */ 55 return i915_gem_object_is_framebuffer(obj); 56 } 57 58 static void 59 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 60 { 61 struct i915_vma *vma; 62 63 assert_object_held(obj); 64 65 if (!(obj->write_domain & flush_domains)) 66 return; 67 68 switch (obj->write_domain) { 69 case I915_GEM_DOMAIN_GTT: 70 spin_lock(&obj->vma.lock); 71 for_each_ggtt_vma(vma, obj) 72 i915_vma_flush_writes(vma); 73 spin_unlock(&obj->vma.lock); 74 75 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 76 break; 77 78 case I915_GEM_DOMAIN_WC: 79 wmb(); 80 break; 81 82 case I915_GEM_DOMAIN_CPU: 83 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 84 break; 85 86 case I915_GEM_DOMAIN_RENDER: 87 if (gpu_write_needs_clflush(obj)) 88 obj->cache_dirty = true; 89 break; 90 } 91 92 obj->write_domain = 0; 93 } 94 95 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 96 { 97 /* 98 * We manually flush the CPU domain so that we can override and 99 * force the flush for the display, and perform it asyncrhonously. 100 */ 101 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 102 if (obj->cache_dirty) 103 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 104 obj->write_domain = 0; 105 } 106 107 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 108 { 109 if (!i915_gem_object_is_framebuffer(obj)) 110 return; 111 112 i915_gem_object_lock(obj, NULL); 113 __i915_gem_object_flush_for_display(obj); 114 i915_gem_object_unlock(obj); 115 } 116 117 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 118 { 119 if (i915_gem_object_is_framebuffer(obj)) 120 __i915_gem_object_flush_for_display(obj); 121 } 122 123 /** 124 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and 125 * possibly write domain. 126 * @obj: object to act on 127 * @write: ask for write access or read only 128 * 129 * This function returns when the move is complete, including waiting on 130 * flushes to occur. 131 */ 132 int 133 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 134 { 135 int ret; 136 137 assert_object_held(obj); 138 139 ret = i915_gem_object_wait(obj, 140 I915_WAIT_INTERRUPTIBLE | 141 (write ? I915_WAIT_ALL : 0), 142 MAX_SCHEDULE_TIMEOUT); 143 if (ret) 144 return ret; 145 146 if (obj->write_domain == I915_GEM_DOMAIN_WC) 147 return 0; 148 149 /* Flush and acquire obj->pages so that we are coherent through 150 * direct access in memory with previous cached writes through 151 * shmemfs and that our cache domain tracking remains valid. 152 * For example, if the obj->filp was moved to swap without us 153 * being notified and releasing the pages, we would mistakenly 154 * continue to assume that the obj remained out of the CPU cached 155 * domain. 156 */ 157 ret = i915_gem_object_pin_pages(obj); 158 if (ret) 159 return ret; 160 161 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 162 163 /* Serialise direct access to this object with the barriers for 164 * coherent writes from the GPU, by effectively invalidating the 165 * WC domain upon first access. 166 */ 167 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 168 mb(); 169 170 /* It should now be out of any other write domains, and we can update 171 * the domain values for our changes. 172 */ 173 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 174 obj->read_domains |= I915_GEM_DOMAIN_WC; 175 if (write) { 176 obj->read_domains = I915_GEM_DOMAIN_WC; 177 obj->write_domain = I915_GEM_DOMAIN_WC; 178 obj->mm.dirty = true; 179 } 180 181 i915_gem_object_unpin_pages(obj); 182 return 0; 183 } 184 185 /** 186 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read, 187 * and possibly write domain. 188 * @obj: object to act on 189 * @write: ask for write access or read only 190 * 191 * This function returns when the move is complete, including waiting on 192 * flushes to occur. 193 */ 194 int 195 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 196 { 197 int ret; 198 199 assert_object_held(obj); 200 201 ret = i915_gem_object_wait(obj, 202 I915_WAIT_INTERRUPTIBLE | 203 (write ? I915_WAIT_ALL : 0), 204 MAX_SCHEDULE_TIMEOUT); 205 if (ret) 206 return ret; 207 208 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 209 return 0; 210 211 /* Flush and acquire obj->pages so that we are coherent through 212 * direct access in memory with previous cached writes through 213 * shmemfs and that our cache domain tracking remains valid. 214 * For example, if the obj->filp was moved to swap without us 215 * being notified and releasing the pages, we would mistakenly 216 * continue to assume that the obj remained out of the CPU cached 217 * domain. 218 */ 219 ret = i915_gem_object_pin_pages(obj); 220 if (ret) 221 return ret; 222 223 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 224 225 /* Serialise direct access to this object with the barriers for 226 * coherent writes from the GPU, by effectively invalidating the 227 * GTT domain upon first access. 228 */ 229 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 230 mb(); 231 232 /* It should now be out of any other write domains, and we can update 233 * the domain values for our changes. 234 */ 235 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 236 obj->read_domains |= I915_GEM_DOMAIN_GTT; 237 if (write) { 238 struct i915_vma *vma; 239 240 obj->read_domains = I915_GEM_DOMAIN_GTT; 241 obj->write_domain = I915_GEM_DOMAIN_GTT; 242 obj->mm.dirty = true; 243 244 spin_lock(&obj->vma.lock); 245 for_each_ggtt_vma(vma, obj) 246 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 247 i915_vma_set_ggtt_write(vma); 248 spin_unlock(&obj->vma.lock); 249 } 250 251 i915_gem_object_unpin_pages(obj); 252 return 0; 253 } 254 255 /** 256 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA. 257 * @obj: object to act on 258 * @cache_level: new cache level to set for the object 259 * 260 * After this function returns, the object will be in the new cache-level 261 * across all GTT and the contents of the backing storage will be coherent, 262 * with respect to the new cache-level. In order to keep the backing storage 263 * coherent for all users, we only allow a single cache level to be set 264 * globally on the object and prevent it from being changed whilst the 265 * hardware is reading from the object. That is if the object is currently 266 * on the scanout it will be set to uncached (or equivalent display 267 * cache coherency) and all non-MOCS GPU access will also be uncached so 268 * that all direct access to the scanout remains coherent. 269 */ 270 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 271 enum i915_cache_level cache_level) 272 { 273 int ret; 274 275 /* 276 * For objects created by userspace through GEM_CREATE with pat_index 277 * set by set_pat extension, simply return 0 here without touching 278 * the cache setting, because such objects should have an immutable 279 * cache setting by desgin and always managed by userspace. 280 */ 281 if (i915_gem_object_has_cache_level(obj, cache_level)) 282 return 0; 283 284 ret = i915_gem_object_wait(obj, 285 I915_WAIT_INTERRUPTIBLE | 286 I915_WAIT_ALL, 287 MAX_SCHEDULE_TIMEOUT); 288 if (ret) 289 return ret; 290 291 /* Always invalidate stale cachelines */ 292 i915_gem_object_set_cache_coherency(obj, cache_level); 293 obj->cache_dirty = true; 294 295 /* The cache-level will be applied when each vma is rebound. */ 296 return i915_gem_object_unbind(obj, 297 I915_GEM_OBJECT_UNBIND_ACTIVE | 298 I915_GEM_OBJECT_UNBIND_BARRIER); 299 } 300 301 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 302 struct drm_file *file) 303 { 304 struct drm_i915_gem_caching *args = data; 305 struct drm_i915_gem_object *obj; 306 int err = 0; 307 308 if (IS_DGFX(to_i915(dev))) 309 return -ENODEV; 310 311 rcu_read_lock(); 312 obj = i915_gem_object_lookup_rcu(file, args->handle); 313 if (!obj) { 314 err = -ENOENT; 315 goto out; 316 } 317 318 /* 319 * This ioctl should be disabled for the objects with pat_index 320 * set by user space. 321 */ 322 if (obj->pat_set_by_user) { 323 err = -EOPNOTSUPP; 324 goto out; 325 } 326 327 if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) || 328 i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC)) 329 args->caching = I915_CACHING_CACHED; 330 else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT)) 331 args->caching = I915_CACHING_DISPLAY; 332 else 333 args->caching = I915_CACHING_NONE; 334 out: 335 rcu_read_unlock(); 336 return err; 337 } 338 339 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 340 struct drm_file *file) 341 { 342 struct drm_i915_private *i915 = to_i915(dev); 343 struct drm_i915_gem_caching *args = data; 344 struct drm_i915_gem_object *obj; 345 enum i915_cache_level level; 346 int ret = 0; 347 348 if (IS_DGFX(i915)) 349 return -ENODEV; 350 351 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 352 return -EOPNOTSUPP; 353 354 switch (args->caching) { 355 case I915_CACHING_NONE: 356 level = I915_CACHE_NONE; 357 break; 358 case I915_CACHING_CACHED: 359 /* 360 * Due to a HW issue on BXT A stepping, GPU stores via a 361 * snooped mapping may leave stale data in a corresponding CPU 362 * cacheline, whereas normally such cachelines would get 363 * invalidated. 364 */ 365 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 366 return -ENODEV; 367 368 level = I915_CACHE_LLC; 369 break; 370 case I915_CACHING_DISPLAY: 371 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 372 break; 373 default: 374 return -EINVAL; 375 } 376 377 obj = i915_gem_object_lookup(file, args->handle); 378 if (!obj) 379 return -ENOENT; 380 381 /* 382 * This ioctl should be disabled for the objects with pat_index 383 * set by user space. 384 */ 385 if (obj->pat_set_by_user) { 386 ret = -EOPNOTSUPP; 387 goto out; 388 } 389 390 /* 391 * The caching mode of proxy object is handled by its generator, and 392 * not allowed to be changed by userspace. 393 */ 394 if (i915_gem_object_is_proxy(obj)) { 395 /* 396 * Silently allow cached for userptr; the vulkan driver 397 * sets all objects to cached 398 */ 399 if (!i915_gem_object_is_userptr(obj) || 400 args->caching != I915_CACHING_CACHED) 401 ret = -ENXIO; 402 403 goto out; 404 } 405 406 ret = i915_gem_object_lock_interruptible(obj, NULL); 407 if (ret) 408 goto out; 409 410 ret = i915_gem_object_set_cache_level(obj, level); 411 i915_gem_object_unlock(obj); 412 413 out: 414 i915_gem_object_put(obj); 415 return ret; 416 } 417 418 /* 419 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 420 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 421 * (for pageflips). We only flush the caches while preparing the buffer for 422 * display, the callers are responsible for frontbuffer flush. 423 */ 424 struct i915_vma * 425 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 426 struct i915_gem_ww_ctx *ww, 427 u32 alignment, 428 const struct i915_gtt_view *view, 429 unsigned int flags) 430 { 431 struct drm_i915_private *i915 = to_i915(obj->base.dev); 432 struct i915_vma *vma; 433 int ret; 434 435 /* Frame buffer must be in LMEM */ 436 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 437 return ERR_PTR(-EINVAL); 438 439 /* 440 * The display engine is not coherent with the LLC cache on gen6. As 441 * a result, we make sure that the pinning that is about to occur is 442 * done with uncached PTEs. This is lowest common denominator for all 443 * chipsets. 444 * 445 * However for gen6+, we could do better by using the GFDT bit instead 446 * of uncaching, which would allow us to flush all the LLC-cached data 447 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 448 */ 449 ret = i915_gem_object_set_cache_level(obj, 450 HAS_WT(i915) ? 451 I915_CACHE_WT : I915_CACHE_NONE); 452 if (ret) 453 return ERR_PTR(ret); 454 455 /* VT-d may overfetch before/after the vma, so pad with scratch */ 456 if (intel_scanout_needs_vtd_wa(i915)) { 457 unsigned int guard = VTD_GUARD; 458 459 if (i915_gem_object_is_tiled(obj)) 460 guard = max(guard, 461 i915_gem_object_get_tile_row_size(obj)); 462 463 flags |= PIN_OFFSET_GUARD | guard; 464 } 465 466 /* 467 * As the user may map the buffer once pinned in the display plane 468 * (e.g. libkms for the bootup splash), we have to ensure that we 469 * always use map_and_fenceable for all scanout buffers. However, 470 * it may simply be too big to fit into mappable, in which case 471 * put it anyway and hope that userspace can cope (but always first 472 * try to preserve the existing ABI). 473 */ 474 vma = ERR_PTR(-ENOSPC); 475 if ((flags & PIN_MAPPABLE) == 0 && 476 (!view || view->type == I915_GTT_VIEW_NORMAL)) 477 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 478 flags | PIN_MAPPABLE | 479 PIN_NONBLOCK); 480 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 481 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 482 alignment, flags); 483 if (IS_ERR(vma)) 484 return vma; 485 486 vma->display_alignment = max(vma->display_alignment, alignment); 487 i915_vma_mark_scanout(vma); 488 489 i915_gem_object_flush_if_display_locked(obj); 490 491 return vma; 492 } 493 494 /** 495 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read, 496 * and possibly write domain. 497 * @obj: object to act on 498 * @write: requesting write or read-only access 499 * 500 * This function returns when the move is complete, including waiting on 501 * flushes to occur. 502 */ 503 int 504 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 505 { 506 int ret; 507 508 assert_object_held(obj); 509 510 ret = i915_gem_object_wait(obj, 511 I915_WAIT_INTERRUPTIBLE | 512 (write ? I915_WAIT_ALL : 0), 513 MAX_SCHEDULE_TIMEOUT); 514 if (ret) 515 return ret; 516 517 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 518 519 /* Flush the CPU cache if it's still invalid. */ 520 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 521 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 522 obj->read_domains |= I915_GEM_DOMAIN_CPU; 523 } 524 525 /* It should now be out of any other write domains, and we can update 526 * the domain values for our changes. 527 */ 528 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 529 530 /* If we're writing through the CPU, then the GPU read domains will 531 * need to be invalidated at next use. 532 */ 533 if (write) 534 __start_cpu_write(obj); 535 536 return 0; 537 } 538 539 /** 540 * i915_gem_set_domain_ioctl - Called when user space prepares to use an 541 * object with the CPU, either 542 * through the mmap ioctl's mapping or a GTT mapping. 543 * @dev: drm device 544 * @data: ioctl data blob 545 * @file: drm file 546 */ 547 int 548 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 549 struct drm_file *file) 550 { 551 struct drm_i915_gem_set_domain *args = data; 552 struct drm_i915_gem_object *obj; 553 u32 read_domains = args->read_domains; 554 u32 write_domain = args->write_domain; 555 int err; 556 557 if (IS_DGFX(to_i915(dev))) 558 return -ENODEV; 559 560 /* Only handle setting domains to types used by the CPU. */ 561 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 562 return -EINVAL; 563 564 /* 565 * Having something in the write domain implies it's in the read 566 * domain, and only that read domain. Enforce that in the request. 567 */ 568 if (write_domain && read_domains != write_domain) 569 return -EINVAL; 570 571 if (!read_domains) 572 return 0; 573 574 obj = i915_gem_object_lookup(file, args->handle); 575 if (!obj) 576 return -ENOENT; 577 578 /* 579 * Try to flush the object off the GPU without holding the lock. 580 * We will repeat the flush holding the lock in the normal manner 581 * to catch cases where we are gazumped. 582 */ 583 err = i915_gem_object_wait(obj, 584 I915_WAIT_INTERRUPTIBLE | 585 I915_WAIT_PRIORITY | 586 (write_domain ? I915_WAIT_ALL : 0), 587 MAX_SCHEDULE_TIMEOUT); 588 if (err) 589 goto out; 590 591 if (i915_gem_object_is_userptr(obj)) { 592 /* 593 * Try to grab userptr pages, iris uses set_domain to check 594 * userptr validity 595 */ 596 err = i915_gem_object_userptr_validate(obj); 597 if (!err) 598 err = i915_gem_object_wait(obj, 599 I915_WAIT_INTERRUPTIBLE | 600 I915_WAIT_PRIORITY | 601 (write_domain ? I915_WAIT_ALL : 0), 602 MAX_SCHEDULE_TIMEOUT); 603 goto out; 604 } 605 606 /* 607 * Proxy objects do not control access to the backing storage, ergo 608 * they cannot be used as a means to manipulate the cache domain 609 * tracking for that backing storage. The proxy object is always 610 * considered to be outside of any cache domain. 611 */ 612 if (i915_gem_object_is_proxy(obj)) { 613 err = -ENXIO; 614 goto out; 615 } 616 617 err = i915_gem_object_lock_interruptible(obj, NULL); 618 if (err) 619 goto out; 620 621 /* 622 * Flush and acquire obj->pages so that we are coherent through 623 * direct access in memory with previous cached writes through 624 * shmemfs and that our cache domain tracking remains valid. 625 * For example, if the obj->filp was moved to swap without us 626 * being notified and releasing the pages, we would mistakenly 627 * continue to assume that the obj remained out of the CPU cached 628 * domain. 629 */ 630 err = i915_gem_object_pin_pages(obj); 631 if (err) 632 goto out_unlock; 633 634 /* 635 * Already in the desired write domain? Nothing for us to do! 636 * 637 * We apply a little bit of cunning here to catch a broader set of 638 * no-ops. If obj->write_domain is set, we must be in the same 639 * obj->read_domains, and only that domain. Therefore, if that 640 * obj->write_domain matches the request read_domains, we are 641 * already in the same read/write domain and can skip the operation, 642 * without having to further check the requested write_domain. 643 */ 644 if (READ_ONCE(obj->write_domain) == read_domains) 645 goto out_unpin; 646 647 if (read_domains & I915_GEM_DOMAIN_WC) 648 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 649 else if (read_domains & I915_GEM_DOMAIN_GTT) 650 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 651 else 652 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 653 654 out_unpin: 655 i915_gem_object_unpin_pages(obj); 656 657 out_unlock: 658 i915_gem_object_unlock(obj); 659 660 if (!err && write_domain) 661 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 662 663 out: 664 i915_gem_object_put(obj); 665 return err; 666 } 667 668 /* 669 * Pins the specified object's pages and synchronizes the object with 670 * GPU accesses. Sets needs_clflush to non-zero if the caller should 671 * flush the object from the CPU cache. 672 */ 673 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 674 unsigned int *needs_clflush) 675 { 676 int ret; 677 678 *needs_clflush = 0; 679 if (!i915_gem_object_has_struct_page(obj)) 680 return -ENODEV; 681 682 assert_object_held(obj); 683 684 ret = i915_gem_object_wait(obj, 685 I915_WAIT_INTERRUPTIBLE, 686 MAX_SCHEDULE_TIMEOUT); 687 if (ret) 688 return ret; 689 690 ret = i915_gem_object_pin_pages(obj); 691 if (ret) 692 return ret; 693 694 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 695 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 696 ret = i915_gem_object_set_to_cpu_domain(obj, false); 697 if (ret) 698 goto err_unpin; 699 else 700 goto out; 701 } 702 703 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 704 705 /* If we're not in the cpu read domain, set ourself into the gtt 706 * read domain and manually flush cachelines (if required). This 707 * optimizes for the case when the gpu will dirty the data 708 * anyway again before the next pread happens. 709 */ 710 if (!obj->cache_dirty && 711 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 712 *needs_clflush = CLFLUSH_BEFORE; 713 714 out: 715 /* return with the pages pinned */ 716 return 0; 717 718 err_unpin: 719 i915_gem_object_unpin_pages(obj); 720 return ret; 721 } 722 723 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 724 unsigned int *needs_clflush) 725 { 726 int ret; 727 728 *needs_clflush = 0; 729 if (!i915_gem_object_has_struct_page(obj)) 730 return -ENODEV; 731 732 assert_object_held(obj); 733 734 ret = i915_gem_object_wait(obj, 735 I915_WAIT_INTERRUPTIBLE | 736 I915_WAIT_ALL, 737 MAX_SCHEDULE_TIMEOUT); 738 if (ret) 739 return ret; 740 741 ret = i915_gem_object_pin_pages(obj); 742 if (ret) 743 return ret; 744 745 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 746 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 747 ret = i915_gem_object_set_to_cpu_domain(obj, true); 748 if (ret) 749 goto err_unpin; 750 else 751 goto out; 752 } 753 754 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 755 756 /* If we're not in the cpu write domain, set ourself into the 757 * gtt write domain and manually flush cachelines (as required). 758 * This optimizes for the case when the gpu will use the data 759 * right away and we therefore have to clflush anyway. 760 */ 761 if (!obj->cache_dirty) { 762 *needs_clflush |= CLFLUSH_AFTER; 763 764 /* 765 * Same trick applies to invalidate partially written 766 * cachelines read before writing. 767 */ 768 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 769 *needs_clflush |= CLFLUSH_BEFORE; 770 } 771 772 out: 773 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 774 obj->mm.dirty = true; 775 /* return with the pages pinned */ 776 return 0; 777 778 err_unpin: 779 i915_gem_object_unpin_pages(obj); 780 return ret; 781 } 782