1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014-2016 Intel Corporation 4 */ 5 6 #include "gt/intel_gt.h" 7 8 #include "i915_drv.h" 9 #include "i915_gem_clflush.h" 10 #include "i915_gem_domain.h" 11 #include "i915_gem_gtt.h" 12 #include "i915_gem_ioctls.h" 13 #include "i915_gem_lmem.h" 14 #include "i915_gem_mman.h" 15 #include "i915_gem_object.h" 16 #include "i915_gem_object_frontbuffer.h" 17 #include "i915_vma.h" 18 19 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 20 { 21 struct drm_i915_private *i915 = to_i915(obj->base.dev); 22 23 if (IS_DGFX(i915)) 24 return false; 25 26 /* 27 * For objects created by userspace through GEM_CREATE with pat_index 28 * set by set_pat extension, i915_gem_object_has_cache_level() will 29 * always return true, because the coherency of such object is managed 30 * by userspace. Othereise the call here would fall back to checking 31 * whether the object is un-cached or write-through. 32 */ 33 return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) || 34 i915_gem_object_has_cache_level(obj, I915_CACHE_WT)); 35 } 36 37 bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 38 { 39 struct drm_i915_private *i915 = to_i915(obj->base.dev); 40 41 if (obj->cache_dirty) 42 return false; 43 44 if (IS_DGFX(i915)) 45 return false; 46 47 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 48 return true; 49 50 /* Currently in use by HW (display engine)? Keep flushed. */ 51 return i915_gem_object_is_framebuffer(obj); 52 } 53 54 static void 55 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 56 { 57 struct i915_vma *vma; 58 59 assert_object_held(obj); 60 61 if (!(obj->write_domain & flush_domains)) 62 return; 63 64 switch (obj->write_domain) { 65 case I915_GEM_DOMAIN_GTT: 66 spin_lock(&obj->vma.lock); 67 for_each_ggtt_vma(vma, obj) 68 i915_vma_flush_writes(vma); 69 spin_unlock(&obj->vma.lock); 70 71 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 72 break; 73 74 case I915_GEM_DOMAIN_WC: 75 wmb(); 76 break; 77 78 case I915_GEM_DOMAIN_CPU: 79 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 80 break; 81 82 case I915_GEM_DOMAIN_RENDER: 83 if (gpu_write_needs_clflush(obj)) 84 obj->cache_dirty = true; 85 break; 86 } 87 88 obj->write_domain = 0; 89 } 90 91 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 92 { 93 /* 94 * We manually flush the CPU domain so that we can override and 95 * force the flush for the display, and perform it asyncrhonously. 96 */ 97 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 98 if (obj->cache_dirty) 99 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 100 obj->write_domain = 0; 101 } 102 103 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 104 { 105 if (!i915_gem_object_is_framebuffer(obj)) 106 return; 107 108 i915_gem_object_lock(obj, NULL); 109 __i915_gem_object_flush_for_display(obj); 110 i915_gem_object_unlock(obj); 111 } 112 113 void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) 114 { 115 if (i915_gem_object_is_framebuffer(obj)) 116 __i915_gem_object_flush_for_display(obj); 117 } 118 119 /** 120 * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and 121 * possibly write domain. 122 * @obj: object to act on 123 * @write: ask for write access or read only 124 * 125 * This function returns when the move is complete, including waiting on 126 * flushes to occur. 127 */ 128 int 129 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 130 { 131 int ret; 132 133 assert_object_held(obj); 134 135 ret = i915_gem_object_wait(obj, 136 I915_WAIT_INTERRUPTIBLE | 137 (write ? I915_WAIT_ALL : 0), 138 MAX_SCHEDULE_TIMEOUT); 139 if (ret) 140 return ret; 141 142 if (obj->write_domain == I915_GEM_DOMAIN_WC) 143 return 0; 144 145 /* Flush and acquire obj->pages so that we are coherent through 146 * direct access in memory with previous cached writes through 147 * shmemfs and that our cache domain tracking remains valid. 148 * For example, if the obj->filp was moved to swap without us 149 * being notified and releasing the pages, we would mistakenly 150 * continue to assume that the obj remained out of the CPU cached 151 * domain. 152 */ 153 ret = i915_gem_object_pin_pages(obj); 154 if (ret) 155 return ret; 156 157 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 158 159 /* Serialise direct access to this object with the barriers for 160 * coherent writes from the GPU, by effectively invalidating the 161 * WC domain upon first access. 162 */ 163 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 164 mb(); 165 166 /* It should now be out of any other write domains, and we can update 167 * the domain values for our changes. 168 */ 169 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 170 obj->read_domains |= I915_GEM_DOMAIN_WC; 171 if (write) { 172 obj->read_domains = I915_GEM_DOMAIN_WC; 173 obj->write_domain = I915_GEM_DOMAIN_WC; 174 obj->mm.dirty = true; 175 } 176 177 i915_gem_object_unpin_pages(obj); 178 return 0; 179 } 180 181 /** 182 * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read, 183 * and possibly write domain. 184 * @obj: object to act on 185 * @write: ask for write access or read only 186 * 187 * This function returns when the move is complete, including waiting on 188 * flushes to occur. 189 */ 190 int 191 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 192 { 193 int ret; 194 195 assert_object_held(obj); 196 197 ret = i915_gem_object_wait(obj, 198 I915_WAIT_INTERRUPTIBLE | 199 (write ? I915_WAIT_ALL : 0), 200 MAX_SCHEDULE_TIMEOUT); 201 if (ret) 202 return ret; 203 204 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 205 return 0; 206 207 /* Flush and acquire obj->pages so that we are coherent through 208 * direct access in memory with previous cached writes through 209 * shmemfs and that our cache domain tracking remains valid. 210 * For example, if the obj->filp was moved to swap without us 211 * being notified and releasing the pages, we would mistakenly 212 * continue to assume that the obj remained out of the CPU cached 213 * domain. 214 */ 215 ret = i915_gem_object_pin_pages(obj); 216 if (ret) 217 return ret; 218 219 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 220 221 /* Serialise direct access to this object with the barriers for 222 * coherent writes from the GPU, by effectively invalidating the 223 * GTT domain upon first access. 224 */ 225 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 226 mb(); 227 228 /* It should now be out of any other write domains, and we can update 229 * the domain values for our changes. 230 */ 231 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 232 obj->read_domains |= I915_GEM_DOMAIN_GTT; 233 if (write) { 234 struct i915_vma *vma; 235 236 obj->read_domains = I915_GEM_DOMAIN_GTT; 237 obj->write_domain = I915_GEM_DOMAIN_GTT; 238 obj->mm.dirty = true; 239 240 spin_lock(&obj->vma.lock); 241 for_each_ggtt_vma(vma, obj) 242 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 243 i915_vma_set_ggtt_write(vma); 244 spin_unlock(&obj->vma.lock); 245 } 246 247 i915_gem_object_unpin_pages(obj); 248 return 0; 249 } 250 251 /** 252 * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA. 253 * @obj: object to act on 254 * @cache_level: new cache level to set for the object 255 * 256 * After this function returns, the object will be in the new cache-level 257 * across all GTT and the contents of the backing storage will be coherent, 258 * with respect to the new cache-level. In order to keep the backing storage 259 * coherent for all users, we only allow a single cache level to be set 260 * globally on the object and prevent it from being changed whilst the 261 * hardware is reading from the object. That is if the object is currently 262 * on the scanout it will be set to uncached (or equivalent display 263 * cache coherency) and all non-MOCS GPU access will also be uncached so 264 * that all direct access to the scanout remains coherent. 265 */ 266 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 267 enum i915_cache_level cache_level) 268 { 269 int ret; 270 271 /* 272 * For objects created by userspace through GEM_CREATE with pat_index 273 * set by set_pat extension, simply return 0 here without touching 274 * the cache setting, because such objects should have an immutable 275 * cache setting by design and always managed by userspace. 276 */ 277 if (i915_gem_object_has_cache_level(obj, cache_level)) 278 return 0; 279 280 ret = i915_gem_object_wait(obj, 281 I915_WAIT_INTERRUPTIBLE | 282 I915_WAIT_ALL, 283 MAX_SCHEDULE_TIMEOUT); 284 if (ret) 285 return ret; 286 287 /* Always invalidate stale cachelines */ 288 i915_gem_object_set_cache_coherency(obj, cache_level); 289 obj->cache_dirty = true; 290 291 /* The cache-level will be applied when each vma is rebound. */ 292 return i915_gem_object_unbind(obj, 293 I915_GEM_OBJECT_UNBIND_ACTIVE | 294 I915_GEM_OBJECT_UNBIND_BARRIER); 295 } 296 297 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 298 struct drm_file *file) 299 { 300 struct drm_i915_gem_caching *args = data; 301 struct drm_i915_gem_object *obj; 302 int err = 0; 303 304 if (IS_DGFX(to_i915(dev))) 305 return -ENODEV; 306 307 rcu_read_lock(); 308 obj = i915_gem_object_lookup_rcu(file, args->handle); 309 if (!obj) { 310 err = -ENOENT; 311 goto out; 312 } 313 314 /* 315 * This ioctl should be disabled for the objects with pat_index 316 * set by user space. 317 */ 318 if (obj->pat_set_by_user) { 319 err = -EOPNOTSUPP; 320 goto out; 321 } 322 323 if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) || 324 i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC)) 325 args->caching = I915_CACHING_CACHED; 326 else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT)) 327 args->caching = I915_CACHING_DISPLAY; 328 else 329 args->caching = I915_CACHING_NONE; 330 out: 331 rcu_read_unlock(); 332 return err; 333 } 334 335 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 336 struct drm_file *file) 337 { 338 struct drm_i915_private *i915 = to_i915(dev); 339 struct drm_i915_gem_caching *args = data; 340 struct drm_i915_gem_object *obj; 341 enum i915_cache_level level; 342 int ret = 0; 343 344 if (IS_DGFX(i915)) 345 return -ENODEV; 346 347 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 348 return -EOPNOTSUPP; 349 350 switch (args->caching) { 351 case I915_CACHING_NONE: 352 level = I915_CACHE_NONE; 353 break; 354 case I915_CACHING_CACHED: 355 /* 356 * Due to a HW issue on BXT A stepping, GPU stores via a 357 * snooped mapping may leave stale data in a corresponding CPU 358 * cacheline, whereas normally such cachelines would get 359 * invalidated. 360 */ 361 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 362 return -ENODEV; 363 364 level = I915_CACHE_LLC; 365 break; 366 case I915_CACHING_DISPLAY: 367 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 368 break; 369 default: 370 return -EINVAL; 371 } 372 373 obj = i915_gem_object_lookup(file, args->handle); 374 if (!obj) 375 return -ENOENT; 376 377 /* 378 * This ioctl should be disabled for the objects with pat_index 379 * set by user space. 380 */ 381 if (obj->pat_set_by_user) { 382 ret = -EOPNOTSUPP; 383 goto out; 384 } 385 386 /* 387 * The caching mode of proxy object is handled by its generator, and 388 * not allowed to be changed by userspace. 389 */ 390 if (i915_gem_object_is_proxy(obj)) { 391 /* 392 * Silently allow cached for userptr; the vulkan driver 393 * sets all objects to cached 394 */ 395 if (!i915_gem_object_is_userptr(obj) || 396 args->caching != I915_CACHING_CACHED) 397 ret = -ENXIO; 398 399 goto out; 400 } 401 402 ret = i915_gem_object_lock_interruptible(obj, NULL); 403 if (ret) 404 goto out; 405 406 ret = i915_gem_object_set_cache_level(obj, level); 407 i915_gem_object_unlock(obj); 408 409 out: 410 i915_gem_object_put(obj); 411 return ret; 412 } 413 414 /* 415 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 416 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 417 * (for pageflips). We only flush the caches while preparing the buffer for 418 * display, the callers are responsible for frontbuffer flush. 419 */ 420 struct i915_vma * 421 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 422 struct i915_gem_ww_ctx *ww, 423 u32 alignment, unsigned int guard, 424 const struct i915_gtt_view *view, 425 unsigned int flags) 426 { 427 struct drm_i915_private *i915 = to_i915(obj->base.dev); 428 struct i915_vma *vma; 429 int ret; 430 431 /* Frame buffer must be in LMEM */ 432 if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) 433 return ERR_PTR(-EINVAL); 434 435 /* 436 * The display engine is not coherent with the LLC cache on gen6. As 437 * a result, we make sure that the pinning that is about to occur is 438 * done with uncached PTEs. This is lowest common denominator for all 439 * chipsets. 440 * 441 * However for gen6+, we could do better by using the GFDT bit instead 442 * of uncaching, which would allow us to flush all the LLC-cached data 443 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 444 */ 445 ret = i915_gem_object_set_cache_level(obj, 446 HAS_WT(i915) ? 447 I915_CACHE_WT : I915_CACHE_NONE); 448 if (ret) 449 return ERR_PTR(ret); 450 451 /* VT-d may overfetch before/after the vma, so pad with scratch */ 452 if (guard) 453 flags |= PIN_OFFSET_GUARD | (guard * I915_GTT_PAGE_SIZE); 454 455 /* 456 * As the user may map the buffer once pinned in the display plane 457 * (e.g. libkms for the bootup splash), we have to ensure that we 458 * always use map_and_fenceable for all scanout buffers. However, 459 * it may simply be too big to fit into mappable, in which case 460 * put it anyway and hope that userspace can cope (but always first 461 * try to preserve the existing ABI). 462 */ 463 vma = ERR_PTR(-ENOSPC); 464 if ((flags & PIN_MAPPABLE) == 0 && 465 (!view || view->type == I915_GTT_VIEW_NORMAL)) 466 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, 467 flags | PIN_MAPPABLE | 468 PIN_NONBLOCK); 469 if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) 470 vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, 471 alignment, flags); 472 if (IS_ERR(vma)) 473 return vma; 474 475 vma->display_alignment = max(vma->display_alignment, alignment); 476 i915_vma_mark_scanout(vma); 477 478 i915_gem_object_flush_if_display_locked(obj); 479 480 return vma; 481 } 482 483 /** 484 * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read, 485 * and possibly write domain. 486 * @obj: object to act on 487 * @write: requesting write or read-only access 488 * 489 * This function returns when the move is complete, including waiting on 490 * flushes to occur. 491 */ 492 int 493 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 494 { 495 int ret; 496 497 assert_object_held(obj); 498 499 ret = i915_gem_object_wait(obj, 500 I915_WAIT_INTERRUPTIBLE | 501 (write ? I915_WAIT_ALL : 0), 502 MAX_SCHEDULE_TIMEOUT); 503 if (ret) 504 return ret; 505 506 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 507 508 /* Flush the CPU cache if it's still invalid. */ 509 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 510 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 511 obj->read_domains |= I915_GEM_DOMAIN_CPU; 512 } 513 514 /* It should now be out of any other write domains, and we can update 515 * the domain values for our changes. 516 */ 517 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 518 519 /* If we're writing through the CPU, then the GPU read domains will 520 * need to be invalidated at next use. 521 */ 522 if (write) 523 __start_cpu_write(obj); 524 525 return 0; 526 } 527 528 /** 529 * i915_gem_set_domain_ioctl - Called when user space prepares to use an 530 * object with the CPU, either 531 * through the mmap ioctl's mapping or a GTT mapping. 532 * @dev: drm device 533 * @data: ioctl data blob 534 * @file: drm file 535 */ 536 int 537 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 538 struct drm_file *file) 539 { 540 struct drm_i915_gem_set_domain *args = data; 541 struct drm_i915_gem_object *obj; 542 u32 read_domains = args->read_domains; 543 u32 write_domain = args->write_domain; 544 int err; 545 546 if (IS_DGFX(to_i915(dev))) 547 return -ENODEV; 548 549 /* Only handle setting domains to types used by the CPU. */ 550 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 551 return -EINVAL; 552 553 /* 554 * Having something in the write domain implies it's in the read 555 * domain, and only that read domain. Enforce that in the request. 556 */ 557 if (write_domain && read_domains != write_domain) 558 return -EINVAL; 559 560 if (!read_domains) 561 return 0; 562 563 obj = i915_gem_object_lookup(file, args->handle); 564 if (!obj) 565 return -ENOENT; 566 567 /* 568 * Try to flush the object off the GPU without holding the lock. 569 * We will repeat the flush holding the lock in the normal manner 570 * to catch cases where we are gazumped. 571 */ 572 err = i915_gem_object_wait(obj, 573 I915_WAIT_INTERRUPTIBLE | 574 I915_WAIT_PRIORITY | 575 (write_domain ? I915_WAIT_ALL : 0), 576 MAX_SCHEDULE_TIMEOUT); 577 if (err) 578 goto out; 579 580 if (i915_gem_object_is_userptr(obj)) { 581 /* 582 * Try to grab userptr pages, iris uses set_domain to check 583 * userptr validity 584 */ 585 err = i915_gem_object_userptr_validate(obj); 586 if (!err) 587 err = i915_gem_object_wait(obj, 588 I915_WAIT_INTERRUPTIBLE | 589 I915_WAIT_PRIORITY | 590 (write_domain ? I915_WAIT_ALL : 0), 591 MAX_SCHEDULE_TIMEOUT); 592 goto out; 593 } 594 595 /* 596 * Proxy objects do not control access to the backing storage, ergo 597 * they cannot be used as a means to manipulate the cache domain 598 * tracking for that backing storage. The proxy object is always 599 * considered to be outside of any cache domain. 600 */ 601 if (i915_gem_object_is_proxy(obj)) { 602 err = -ENXIO; 603 goto out; 604 } 605 606 err = i915_gem_object_lock_interruptible(obj, NULL); 607 if (err) 608 goto out; 609 610 /* 611 * Flush and acquire obj->pages so that we are coherent through 612 * direct access in memory with previous cached writes through 613 * shmemfs and that our cache domain tracking remains valid. 614 * For example, if the obj->filp was moved to swap without us 615 * being notified and releasing the pages, we would mistakenly 616 * continue to assume that the obj remained out of the CPU cached 617 * domain. 618 */ 619 err = i915_gem_object_pin_pages(obj); 620 if (err) 621 goto out_unlock; 622 623 /* 624 * Already in the desired write domain? Nothing for us to do! 625 * 626 * We apply a little bit of cunning here to catch a broader set of 627 * no-ops. If obj->write_domain is set, we must be in the same 628 * obj->read_domains, and only that domain. Therefore, if that 629 * obj->write_domain matches the request read_domains, we are 630 * already in the same read/write domain and can skip the operation, 631 * without having to further check the requested write_domain. 632 */ 633 if (READ_ONCE(obj->write_domain) == read_domains) 634 goto out_unpin; 635 636 if (read_domains & I915_GEM_DOMAIN_WC) 637 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 638 else if (read_domains & I915_GEM_DOMAIN_GTT) 639 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 640 else 641 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 642 643 out_unpin: 644 i915_gem_object_unpin_pages(obj); 645 646 out_unlock: 647 i915_gem_object_unlock(obj); 648 649 if (!err && write_domain) 650 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 651 652 out: 653 i915_gem_object_put(obj); 654 return err; 655 } 656 657 /* 658 * Pins the specified object's pages and synchronizes the object with 659 * GPU accesses. Sets needs_clflush to non-zero if the caller should 660 * flush the object from the CPU cache. 661 */ 662 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, 663 unsigned int *needs_clflush) 664 { 665 int ret; 666 667 *needs_clflush = 0; 668 if (!i915_gem_object_has_struct_page(obj)) 669 return -ENODEV; 670 671 assert_object_held(obj); 672 673 ret = i915_gem_object_wait(obj, 674 I915_WAIT_INTERRUPTIBLE, 675 MAX_SCHEDULE_TIMEOUT); 676 if (ret) 677 return ret; 678 679 ret = i915_gem_object_pin_pages(obj); 680 if (ret) 681 return ret; 682 683 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 684 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 685 ret = i915_gem_object_set_to_cpu_domain(obj, false); 686 if (ret) 687 goto err_unpin; 688 else 689 goto out; 690 } 691 692 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 693 694 /* If we're not in the cpu read domain, set ourself into the gtt 695 * read domain and manually flush cachelines (if required). This 696 * optimizes for the case when the gpu will dirty the data 697 * anyway again before the next pread happens. 698 */ 699 if (!obj->cache_dirty && 700 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 701 *needs_clflush = CLFLUSH_BEFORE; 702 703 out: 704 /* return with the pages pinned */ 705 return 0; 706 707 err_unpin: 708 i915_gem_object_unpin_pages(obj); 709 return ret; 710 } 711 712 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, 713 unsigned int *needs_clflush) 714 { 715 int ret; 716 717 *needs_clflush = 0; 718 if (!i915_gem_object_has_struct_page(obj)) 719 return -ENODEV; 720 721 assert_object_held(obj); 722 723 ret = i915_gem_object_wait(obj, 724 I915_WAIT_INTERRUPTIBLE | 725 I915_WAIT_ALL, 726 MAX_SCHEDULE_TIMEOUT); 727 if (ret) 728 return ret; 729 730 ret = i915_gem_object_pin_pages(obj); 731 if (ret) 732 return ret; 733 734 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 735 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 736 ret = i915_gem_object_set_to_cpu_domain(obj, true); 737 if (ret) 738 goto err_unpin; 739 else 740 goto out; 741 } 742 743 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 744 745 /* If we're not in the cpu write domain, set ourself into the 746 * gtt write domain and manually flush cachelines (as required). 747 * This optimizes for the case when the gpu will use the data 748 * right away and we therefore have to clflush anyway. 749 */ 750 if (!obj->cache_dirty) { 751 *needs_clflush |= CLFLUSH_AFTER; 752 753 /* 754 * Same trick applies to invalidate partially written 755 * cachelines read before writing. 756 */ 757 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 758 *needs_clflush |= CLFLUSH_BEFORE; 759 } 760 761 out: 762 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 763 obj->mm.dirty = true; 764 /* return with the pages pinned */ 765 return 0; 766 767 err_unpin: 768 i915_gem_object_unpin_pages(obj); 769 return ret; 770 } 771