1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <drm/i915_drm.h> 30 #include <linux/dma-fence-array.h> 31 #include <linux/kthread.h> 32 #include <linux/dma-resv.h> 33 #include <linux/shmem_fs.h> 34 #include <linux/slab.h> 35 #include <linux/stop_machine.h> 36 #include <linux/swap.h> 37 #include <linux/pci.h> 38 #include <linux/dma-buf.h> 39 #include <linux/mman.h> 40 41 #include "display/intel_display.h" 42 #include "display/intel_frontbuffer.h" 43 44 #include "gem/i915_gem_clflush.h" 45 #include "gem/i915_gem_context.h" 46 #include "gem/i915_gem_ioctls.h" 47 #include "gem/i915_gem_mman.h" 48 #include "gem/i915_gem_region.h" 49 #include "gt/intel_engine_user.h" 50 #include "gt/intel_gt.h" 51 #include "gt/intel_gt_pm.h" 52 #include "gt/intel_workarounds.h" 53 54 #include "i915_drv.h" 55 #include "i915_trace.h" 56 #include "i915_vgpu.h" 57 58 #include "intel_pm.h" 59 60 static int 61 insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) 62 { 63 int err; 64 65 err = mutex_lock_interruptible(&ggtt->vm.mutex); 66 if (err) 67 return err; 68 69 memset(node, 0, sizeof(*node)); 70 err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 71 size, 0, I915_COLOR_UNEVICTABLE, 72 0, ggtt->mappable_end, 73 DRM_MM_INSERT_LOW); 74 75 mutex_unlock(&ggtt->vm.mutex); 76 77 return err; 78 } 79 80 static void 81 remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) 82 { 83 mutex_lock(&ggtt->vm.mutex); 84 drm_mm_remove_node(node); 85 mutex_unlock(&ggtt->vm.mutex); 86 } 87 88 int 89 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 90 struct drm_file *file) 91 { 92 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 93 struct drm_i915_gem_get_aperture *args = data; 94 struct i915_vma *vma; 95 u64 pinned; 96 97 if (mutex_lock_interruptible(&ggtt->vm.mutex)) 98 return -EINTR; 99 100 pinned = ggtt->vm.reserved; 101 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 102 if (i915_vma_is_pinned(vma)) 103 pinned += vma->node.size; 104 105 mutex_unlock(&ggtt->vm.mutex); 106 107 args->aper_size = ggtt->vm.total; 108 args->aper_available_size = args->aper_size - pinned; 109 110 return 0; 111 } 112 113 int i915_gem_object_unbind(struct drm_i915_gem_object *obj, 114 unsigned long flags) 115 { 116 struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; 117 LIST_HEAD(still_in_list); 118 intel_wakeref_t wakeref; 119 struct i915_vma *vma; 120 int ret; 121 122 if (!atomic_read(&obj->bind_count)) 123 return 0; 124 125 /* 126 * As some machines use ACPI to handle runtime-resume callbacks, and 127 * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex 128 * as they are required by the shrinker. Ergo, we wake the device up 129 * first just in case. 130 */ 131 wakeref = intel_runtime_pm_get(rpm); 132 133 try_again: 134 ret = 0; 135 spin_lock(&obj->vma.lock); 136 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 137 struct i915_vma, 138 obj_link))) { 139 struct i915_address_space *vm = vma->vm; 140 141 list_move_tail(&vma->obj_link, &still_in_list); 142 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) 143 continue; 144 145 ret = -EAGAIN; 146 if (!i915_vm_tryopen(vm)) 147 break; 148 149 /* Prevent vma being freed by i915_vma_parked as we unbind */ 150 vma = __i915_vma_get(vma); 151 spin_unlock(&obj->vma.lock); 152 153 if (vma) { 154 ret = -EBUSY; 155 if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || 156 !i915_vma_is_active(vma)) 157 ret = i915_vma_unbind(vma); 158 159 __i915_vma_put(vma); 160 } 161 162 i915_vm_close(vm); 163 spin_lock(&obj->vma.lock); 164 } 165 list_splice_init(&still_in_list, &obj->vma.list); 166 spin_unlock(&obj->vma.lock); 167 168 if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) { 169 rcu_barrier(); /* flush the i915_vm_release() */ 170 goto try_again; 171 } 172 173 intel_runtime_pm_put(rpm, wakeref); 174 175 return ret; 176 } 177 178 static int 179 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 180 struct drm_i915_gem_pwrite *args, 181 struct drm_file *file) 182 { 183 void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; 184 char __user *user_data = u64_to_user_ptr(args->data_ptr); 185 186 /* 187 * We manually control the domain here and pretend that it 188 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 189 */ 190 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 191 192 if (copy_from_user(vaddr, user_data, args->size)) 193 return -EFAULT; 194 195 drm_clflush_virt_range(vaddr, args->size); 196 intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); 197 198 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 199 return 0; 200 } 201 202 static int 203 i915_gem_create(struct drm_file *file, 204 struct intel_memory_region *mr, 205 u64 *size_p, 206 u32 *handle_p) 207 { 208 struct drm_i915_gem_object *obj; 209 u32 handle; 210 u64 size; 211 int ret; 212 213 GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); 214 size = round_up(*size_p, mr->min_page_size); 215 if (size == 0) 216 return -EINVAL; 217 218 /* For most of the ABI (e.g. mmap) we think in system pages */ 219 GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); 220 221 /* Allocate the new object */ 222 obj = i915_gem_object_create_region(mr, size, 0); 223 if (IS_ERR(obj)) 224 return PTR_ERR(obj); 225 226 ret = drm_gem_handle_create(file, &obj->base, &handle); 227 /* drop reference from allocate - handle holds it now */ 228 i915_gem_object_put(obj); 229 if (ret) 230 return ret; 231 232 *handle_p = handle; 233 *size_p = size; 234 return 0; 235 } 236 237 int 238 i915_gem_dumb_create(struct drm_file *file, 239 struct drm_device *dev, 240 struct drm_mode_create_dumb *args) 241 { 242 enum intel_memory_type mem_type; 243 int cpp = DIV_ROUND_UP(args->bpp, 8); 244 u32 format; 245 246 switch (cpp) { 247 case 1: 248 format = DRM_FORMAT_C8; 249 break; 250 case 2: 251 format = DRM_FORMAT_RGB565; 252 break; 253 case 4: 254 format = DRM_FORMAT_XRGB8888; 255 break; 256 default: 257 return -EINVAL; 258 } 259 260 /* have to work out size/pitch and return them */ 261 args->pitch = ALIGN(args->width * cpp, 64); 262 263 /* align stride to page size so that we can remap */ 264 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 265 DRM_FORMAT_MOD_LINEAR)) 266 args->pitch = ALIGN(args->pitch, 4096); 267 268 if (args->pitch < args->width) 269 return -EINVAL; 270 271 args->size = mul_u32_u32(args->pitch, args->height); 272 273 mem_type = INTEL_MEMORY_SYSTEM; 274 if (HAS_LMEM(to_i915(dev))) 275 mem_type = INTEL_MEMORY_LOCAL; 276 277 return i915_gem_create(file, 278 intel_memory_region_by_type(to_i915(dev), 279 mem_type), 280 &args->size, &args->handle); 281 } 282 283 /** 284 * Creates a new mm object and returns a handle to it. 285 * @dev: drm device pointer 286 * @data: ioctl data blob 287 * @file: drm file pointer 288 */ 289 int 290 i915_gem_create_ioctl(struct drm_device *dev, void *data, 291 struct drm_file *file) 292 { 293 struct drm_i915_private *i915 = to_i915(dev); 294 struct drm_i915_gem_create *args = data; 295 296 i915_gem_flush_free_objects(i915); 297 298 return i915_gem_create(file, 299 intel_memory_region_by_type(i915, 300 INTEL_MEMORY_SYSTEM), 301 &args->size, &args->handle); 302 } 303 304 static int 305 shmem_pread(struct page *page, int offset, int len, char __user *user_data, 306 bool needs_clflush) 307 { 308 char *vaddr; 309 int ret; 310 311 vaddr = kmap(page); 312 313 if (needs_clflush) 314 drm_clflush_virt_range(vaddr + offset, len); 315 316 ret = __copy_to_user(user_data, vaddr + offset, len); 317 318 kunmap(page); 319 320 return ret ? -EFAULT : 0; 321 } 322 323 static int 324 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 325 struct drm_i915_gem_pread *args) 326 { 327 unsigned int needs_clflush; 328 unsigned int idx, offset; 329 struct dma_fence *fence; 330 char __user *user_data; 331 u64 remain; 332 int ret; 333 334 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 335 if (ret) 336 return ret; 337 338 fence = i915_gem_object_lock_fence(obj); 339 i915_gem_object_finish_access(obj); 340 if (!fence) 341 return -ENOMEM; 342 343 remain = args->size; 344 user_data = u64_to_user_ptr(args->data_ptr); 345 offset = offset_in_page(args->offset); 346 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 347 struct page *page = i915_gem_object_get_page(obj, idx); 348 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 349 350 ret = shmem_pread(page, offset, length, user_data, 351 needs_clflush); 352 if (ret) 353 break; 354 355 remain -= length; 356 user_data += length; 357 offset = 0; 358 } 359 360 i915_gem_object_unlock_fence(obj, fence); 361 return ret; 362 } 363 364 static inline bool 365 gtt_user_read(struct io_mapping *mapping, 366 loff_t base, int offset, 367 char __user *user_data, int length) 368 { 369 void __iomem *vaddr; 370 unsigned long unwritten; 371 372 /* We can use the cpu mem copy function because this is X86. */ 373 vaddr = io_mapping_map_atomic_wc(mapping, base); 374 unwritten = __copy_to_user_inatomic(user_data, 375 (void __force *)vaddr + offset, 376 length); 377 io_mapping_unmap_atomic(vaddr); 378 if (unwritten) { 379 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 380 unwritten = copy_to_user(user_data, 381 (void __force *)vaddr + offset, 382 length); 383 io_mapping_unmap(vaddr); 384 } 385 return unwritten; 386 } 387 388 static int 389 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 390 const struct drm_i915_gem_pread *args) 391 { 392 struct drm_i915_private *i915 = to_i915(obj->base.dev); 393 struct i915_ggtt *ggtt = &i915->ggtt; 394 intel_wakeref_t wakeref; 395 struct drm_mm_node node; 396 struct dma_fence *fence; 397 void __user *user_data; 398 struct i915_vma *vma; 399 u64 remain, offset; 400 int ret; 401 402 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 403 vma = ERR_PTR(-ENODEV); 404 if (!i915_gem_object_is_tiled(obj)) 405 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 406 PIN_MAPPABLE | 407 PIN_NONBLOCK /* NOWARN */ | 408 PIN_NOEVICT); 409 if (!IS_ERR(vma)) { 410 node.start = i915_ggtt_offset(vma); 411 node.flags = 0; 412 } else { 413 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 414 if (ret) 415 goto out_rpm; 416 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 417 } 418 419 ret = i915_gem_object_lock_interruptible(obj); 420 if (ret) 421 goto out_unpin; 422 423 ret = i915_gem_object_set_to_gtt_domain(obj, false); 424 if (ret) { 425 i915_gem_object_unlock(obj); 426 goto out_unpin; 427 } 428 429 fence = i915_gem_object_lock_fence(obj); 430 i915_gem_object_unlock(obj); 431 if (!fence) { 432 ret = -ENOMEM; 433 goto out_unpin; 434 } 435 436 user_data = u64_to_user_ptr(args->data_ptr); 437 remain = args->size; 438 offset = args->offset; 439 440 while (remain > 0) { 441 /* Operation in this page 442 * 443 * page_base = page offset within aperture 444 * page_offset = offset within page 445 * page_length = bytes to copy for this page 446 */ 447 u32 page_base = node.start; 448 unsigned page_offset = offset_in_page(offset); 449 unsigned page_length = PAGE_SIZE - page_offset; 450 page_length = remain < page_length ? remain : page_length; 451 if (drm_mm_node_allocated(&node)) { 452 ggtt->vm.insert_page(&ggtt->vm, 453 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 454 node.start, I915_CACHE_NONE, 0); 455 } else { 456 page_base += offset & PAGE_MASK; 457 } 458 459 if (gtt_user_read(&ggtt->iomap, page_base, page_offset, 460 user_data, page_length)) { 461 ret = -EFAULT; 462 break; 463 } 464 465 remain -= page_length; 466 user_data += page_length; 467 offset += page_length; 468 } 469 470 i915_gem_object_unlock_fence(obj, fence); 471 out_unpin: 472 if (drm_mm_node_allocated(&node)) { 473 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 474 remove_mappable_node(ggtt, &node); 475 } else { 476 i915_vma_unpin(vma); 477 } 478 out_rpm: 479 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 480 return ret; 481 } 482 483 /** 484 * Reads data from the object referenced by handle. 485 * @dev: drm device pointer 486 * @data: ioctl data blob 487 * @file: drm file pointer 488 * 489 * On error, the contents of *data are undefined. 490 */ 491 int 492 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 493 struct drm_file *file) 494 { 495 struct drm_i915_gem_pread *args = data; 496 struct drm_i915_gem_object *obj; 497 int ret; 498 499 if (args->size == 0) 500 return 0; 501 502 if (!access_ok(u64_to_user_ptr(args->data_ptr), 503 args->size)) 504 return -EFAULT; 505 506 obj = i915_gem_object_lookup(file, args->handle); 507 if (!obj) 508 return -ENOENT; 509 510 /* Bounds check source. */ 511 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 512 ret = -EINVAL; 513 goto out; 514 } 515 516 trace_i915_gem_object_pread(obj, args->offset, args->size); 517 518 ret = i915_gem_object_wait(obj, 519 I915_WAIT_INTERRUPTIBLE, 520 MAX_SCHEDULE_TIMEOUT); 521 if (ret) 522 goto out; 523 524 ret = i915_gem_object_pin_pages(obj); 525 if (ret) 526 goto out; 527 528 ret = i915_gem_shmem_pread(obj, args); 529 if (ret == -EFAULT || ret == -ENODEV) 530 ret = i915_gem_gtt_pread(obj, args); 531 532 i915_gem_object_unpin_pages(obj); 533 out: 534 i915_gem_object_put(obj); 535 return ret; 536 } 537 538 /* This is the fast write path which cannot handle 539 * page faults in the source data 540 */ 541 542 static inline bool 543 ggtt_write(struct io_mapping *mapping, 544 loff_t base, int offset, 545 char __user *user_data, int length) 546 { 547 void __iomem *vaddr; 548 unsigned long unwritten; 549 550 /* We can use the cpu mem copy function because this is X86. */ 551 vaddr = io_mapping_map_atomic_wc(mapping, base); 552 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 553 user_data, length); 554 io_mapping_unmap_atomic(vaddr); 555 if (unwritten) { 556 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 557 unwritten = copy_from_user((void __force *)vaddr + offset, 558 user_data, length); 559 io_mapping_unmap(vaddr); 560 } 561 562 return unwritten; 563 } 564 565 /** 566 * This is the fast pwrite path, where we copy the data directly from the 567 * user into the GTT, uncached. 568 * @obj: i915 GEM object 569 * @args: pwrite arguments structure 570 */ 571 static int 572 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 573 const struct drm_i915_gem_pwrite *args) 574 { 575 struct drm_i915_private *i915 = to_i915(obj->base.dev); 576 struct i915_ggtt *ggtt = &i915->ggtt; 577 struct intel_runtime_pm *rpm = &i915->runtime_pm; 578 intel_wakeref_t wakeref; 579 struct drm_mm_node node; 580 struct dma_fence *fence; 581 struct i915_vma *vma; 582 u64 remain, offset; 583 void __user *user_data; 584 int ret; 585 586 if (i915_gem_object_has_struct_page(obj)) { 587 /* 588 * Avoid waking the device up if we can fallback, as 589 * waking/resuming is very slow (worst-case 10-100 ms 590 * depending on PCI sleeps and our own resume time). 591 * This easily dwarfs any performance advantage from 592 * using the cache bypass of indirect GGTT access. 593 */ 594 wakeref = intel_runtime_pm_get_if_in_use(rpm); 595 if (!wakeref) 596 return -EFAULT; 597 } else { 598 /* No backing pages, no fallback, we must force GGTT access */ 599 wakeref = intel_runtime_pm_get(rpm); 600 } 601 602 vma = ERR_PTR(-ENODEV); 603 if (!i915_gem_object_is_tiled(obj)) 604 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 605 PIN_MAPPABLE | 606 PIN_NONBLOCK /* NOWARN */ | 607 PIN_NOEVICT); 608 if (!IS_ERR(vma)) { 609 node.start = i915_ggtt_offset(vma); 610 node.flags = 0; 611 } else { 612 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 613 if (ret) 614 goto out_rpm; 615 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 616 } 617 618 ret = i915_gem_object_lock_interruptible(obj); 619 if (ret) 620 goto out_unpin; 621 622 ret = i915_gem_object_set_to_gtt_domain(obj, true); 623 if (ret) { 624 i915_gem_object_unlock(obj); 625 goto out_unpin; 626 } 627 628 fence = i915_gem_object_lock_fence(obj); 629 i915_gem_object_unlock(obj); 630 if (!fence) { 631 ret = -ENOMEM; 632 goto out_unpin; 633 } 634 635 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 636 637 user_data = u64_to_user_ptr(args->data_ptr); 638 offset = args->offset; 639 remain = args->size; 640 while (remain) { 641 /* Operation in this page 642 * 643 * page_base = page offset within aperture 644 * page_offset = offset within page 645 * page_length = bytes to copy for this page 646 */ 647 u32 page_base = node.start; 648 unsigned int page_offset = offset_in_page(offset); 649 unsigned int page_length = PAGE_SIZE - page_offset; 650 page_length = remain < page_length ? remain : page_length; 651 if (drm_mm_node_allocated(&node)) { 652 /* flush the write before we modify the GGTT */ 653 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 654 ggtt->vm.insert_page(&ggtt->vm, 655 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 656 node.start, I915_CACHE_NONE, 0); 657 wmb(); /* flush modifications to the GGTT (insert_page) */ 658 } else { 659 page_base += offset & PAGE_MASK; 660 } 661 /* If we get a fault while copying data, then (presumably) our 662 * source page isn't available. Return the error and we'll 663 * retry in the slow path. 664 * If the object is non-shmem backed, we retry again with the 665 * path that handles page fault. 666 */ 667 if (ggtt_write(&ggtt->iomap, page_base, page_offset, 668 user_data, page_length)) { 669 ret = -EFAULT; 670 break; 671 } 672 673 remain -= page_length; 674 user_data += page_length; 675 offset += page_length; 676 } 677 678 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 679 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 680 681 i915_gem_object_unlock_fence(obj, fence); 682 out_unpin: 683 if (drm_mm_node_allocated(&node)) { 684 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 685 remove_mappable_node(ggtt, &node); 686 } else { 687 i915_vma_unpin(vma); 688 } 689 out_rpm: 690 intel_runtime_pm_put(rpm, wakeref); 691 return ret; 692 } 693 694 /* Per-page copy function for the shmem pwrite fastpath. 695 * Flushes invalid cachelines before writing to the target if 696 * needs_clflush_before is set and flushes out any written cachelines after 697 * writing if needs_clflush is set. 698 */ 699 static int 700 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, 701 bool needs_clflush_before, 702 bool needs_clflush_after) 703 { 704 char *vaddr; 705 int ret; 706 707 vaddr = kmap(page); 708 709 if (needs_clflush_before) 710 drm_clflush_virt_range(vaddr + offset, len); 711 712 ret = __copy_from_user(vaddr + offset, user_data, len); 713 if (!ret && needs_clflush_after) 714 drm_clflush_virt_range(vaddr + offset, len); 715 716 kunmap(page); 717 718 return ret ? -EFAULT : 0; 719 } 720 721 static int 722 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 723 const struct drm_i915_gem_pwrite *args) 724 { 725 unsigned int partial_cacheline_write; 726 unsigned int needs_clflush; 727 unsigned int offset, idx; 728 struct dma_fence *fence; 729 void __user *user_data; 730 u64 remain; 731 int ret; 732 733 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 734 if (ret) 735 return ret; 736 737 fence = i915_gem_object_lock_fence(obj); 738 i915_gem_object_finish_access(obj); 739 if (!fence) 740 return -ENOMEM; 741 742 /* If we don't overwrite a cacheline completely we need to be 743 * careful to have up-to-date data by first clflushing. Don't 744 * overcomplicate things and flush the entire patch. 745 */ 746 partial_cacheline_write = 0; 747 if (needs_clflush & CLFLUSH_BEFORE) 748 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; 749 750 user_data = u64_to_user_ptr(args->data_ptr); 751 remain = args->size; 752 offset = offset_in_page(args->offset); 753 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 754 struct page *page = i915_gem_object_get_page(obj, idx); 755 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 756 757 ret = shmem_pwrite(page, offset, length, user_data, 758 (offset | length) & partial_cacheline_write, 759 needs_clflush & CLFLUSH_AFTER); 760 if (ret) 761 break; 762 763 remain -= length; 764 user_data += length; 765 offset = 0; 766 } 767 768 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 769 i915_gem_object_unlock_fence(obj, fence); 770 771 return ret; 772 } 773 774 /** 775 * Writes data to the object referenced by handle. 776 * @dev: drm device 777 * @data: ioctl data blob 778 * @file: drm file 779 * 780 * On error, the contents of the buffer that were to be modified are undefined. 781 */ 782 int 783 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 784 struct drm_file *file) 785 { 786 struct drm_i915_gem_pwrite *args = data; 787 struct drm_i915_gem_object *obj; 788 int ret; 789 790 if (args->size == 0) 791 return 0; 792 793 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 794 return -EFAULT; 795 796 obj = i915_gem_object_lookup(file, args->handle); 797 if (!obj) 798 return -ENOENT; 799 800 /* Bounds check destination. */ 801 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 802 ret = -EINVAL; 803 goto err; 804 } 805 806 /* Writes not allowed into this read-only object */ 807 if (i915_gem_object_is_readonly(obj)) { 808 ret = -EINVAL; 809 goto err; 810 } 811 812 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 813 814 ret = -ENODEV; 815 if (obj->ops->pwrite) 816 ret = obj->ops->pwrite(obj, args); 817 if (ret != -ENODEV) 818 goto err; 819 820 ret = i915_gem_object_wait(obj, 821 I915_WAIT_INTERRUPTIBLE | 822 I915_WAIT_ALL, 823 MAX_SCHEDULE_TIMEOUT); 824 if (ret) 825 goto err; 826 827 ret = i915_gem_object_pin_pages(obj); 828 if (ret) 829 goto err; 830 831 ret = -EFAULT; 832 /* We can only do the GTT pwrite on untiled buffers, as otherwise 833 * it would end up going through the fenced access, and we'll get 834 * different detiling behavior between reading and writing. 835 * pread/pwrite currently are reading and writing from the CPU 836 * perspective, requiring manual detiling by the client. 837 */ 838 if (!i915_gem_object_has_struct_page(obj) || 839 cpu_write_needs_clflush(obj)) 840 /* Note that the gtt paths might fail with non-page-backed user 841 * pointers (e.g. gtt mappings when moving data between 842 * textures). Fallback to the shmem path in that case. 843 */ 844 ret = i915_gem_gtt_pwrite_fast(obj, args); 845 846 if (ret == -EFAULT || ret == -ENOSPC) { 847 if (i915_gem_object_has_struct_page(obj)) 848 ret = i915_gem_shmem_pwrite(obj, args); 849 else 850 ret = i915_gem_phys_pwrite(obj, args, file); 851 } 852 853 i915_gem_object_unpin_pages(obj); 854 err: 855 i915_gem_object_put(obj); 856 return ret; 857 } 858 859 /** 860 * Called when user space has done writes to this buffer 861 * @dev: drm device 862 * @data: ioctl data blob 863 * @file: drm file 864 */ 865 int 866 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 867 struct drm_file *file) 868 { 869 struct drm_i915_gem_sw_finish *args = data; 870 struct drm_i915_gem_object *obj; 871 872 obj = i915_gem_object_lookup(file, args->handle); 873 if (!obj) 874 return -ENOENT; 875 876 /* 877 * Proxy objects are barred from CPU access, so there is no 878 * need to ban sw_finish as it is a nop. 879 */ 880 881 /* Pinned buffers may be scanout, so flush the cache */ 882 i915_gem_object_flush_if_display(obj); 883 i915_gem_object_put(obj); 884 885 return 0; 886 } 887 888 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 889 { 890 struct drm_i915_gem_object *obj, *on; 891 int i; 892 893 /* 894 * Only called during RPM suspend. All users of the userfault_list 895 * must be holding an RPM wakeref to ensure that this can not 896 * run concurrently with themselves (and use the struct_mutex for 897 * protection between themselves). 898 */ 899 900 list_for_each_entry_safe(obj, on, 901 &i915->ggtt.userfault_list, userfault_link) 902 __i915_gem_object_release_mmap_gtt(obj); 903 904 /* 905 * The fence will be lost when the device powers down. If any were 906 * in use by hardware (i.e. they are pinned), we should not be powering 907 * down! All other fences will be reacquired by the user upon waking. 908 */ 909 for (i = 0; i < i915->ggtt.num_fences; i++) { 910 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 911 912 /* 913 * Ideally we want to assert that the fence register is not 914 * live at this point (i.e. that no piece of code will be 915 * trying to write through fence + GTT, as that both violates 916 * our tracking of activity and associated locking/barriers, 917 * but also is illegal given that the hw is powered down). 918 * 919 * Previously we used reg->pin_count as a "liveness" indicator. 920 * That is not sufficient, and we need a more fine-grained 921 * tool if we want to have a sanity check here. 922 */ 923 924 if (!reg->vma) 925 continue; 926 927 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 928 reg->dirty = true; 929 } 930 } 931 932 struct i915_vma * 933 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 934 const struct i915_ggtt_view *view, 935 u64 size, 936 u64 alignment, 937 u64 flags) 938 { 939 struct drm_i915_private *i915 = to_i915(obj->base.dev); 940 struct i915_ggtt *ggtt = &i915->ggtt; 941 struct i915_vma *vma; 942 int ret; 943 944 if (flags & PIN_MAPPABLE && 945 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 946 /* 947 * If the required space is larger than the available 948 * aperture, we will not able to find a slot for the 949 * object and unbinding the object now will be in 950 * vain. Worse, doing so may cause us to ping-pong 951 * the object in and out of the Global GTT and 952 * waste a lot of cycles under the mutex. 953 */ 954 if (obj->base.size > ggtt->mappable_end) 955 return ERR_PTR(-E2BIG); 956 957 /* 958 * If NONBLOCK is set the caller is optimistically 959 * trying to cache the full object within the mappable 960 * aperture, and *must* have a fallback in place for 961 * situations where we cannot bind the object. We 962 * can be a little more lax here and use the fallback 963 * more often to avoid costly migrations of ourselves 964 * and other objects within the aperture. 965 * 966 * Half-the-aperture is used as a simple heuristic. 967 * More interesting would to do search for a free 968 * block prior to making the commitment to unbind. 969 * That caters for the self-harm case, and with a 970 * little more heuristics (e.g. NOFAULT, NOEVICT) 971 * we could try to minimise harm to others. 972 */ 973 if (flags & PIN_NONBLOCK && 974 obj->base.size > ggtt->mappable_end / 2) 975 return ERR_PTR(-ENOSPC); 976 } 977 978 vma = i915_vma_instance(obj, &ggtt->vm, view); 979 if (IS_ERR(vma)) 980 return vma; 981 982 if (i915_vma_misplaced(vma, size, alignment, flags)) { 983 if (flags & PIN_NONBLOCK) { 984 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 985 return ERR_PTR(-ENOSPC); 986 987 if (flags & PIN_MAPPABLE && 988 vma->fence_size > ggtt->mappable_end / 2) 989 return ERR_PTR(-ENOSPC); 990 } 991 992 ret = i915_vma_unbind(vma); 993 if (ret) 994 return ERR_PTR(ret); 995 } 996 997 if (vma->fence && !i915_gem_object_is_tiled(obj)) { 998 mutex_lock(&ggtt->vm.mutex); 999 ret = i915_vma_revoke_fence(vma); 1000 mutex_unlock(&ggtt->vm.mutex); 1001 if (ret) 1002 return ERR_PTR(ret); 1003 } 1004 1005 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 1006 if (ret) 1007 return ERR_PTR(ret); 1008 1009 ret = i915_vma_wait_for_bind(vma); 1010 if (ret) { 1011 i915_vma_unpin(vma); 1012 return ERR_PTR(ret); 1013 } 1014 1015 return vma; 1016 } 1017 1018 int 1019 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1020 struct drm_file *file_priv) 1021 { 1022 struct drm_i915_private *i915 = to_i915(dev); 1023 struct drm_i915_gem_madvise *args = data; 1024 struct drm_i915_gem_object *obj; 1025 int err; 1026 1027 switch (args->madv) { 1028 case I915_MADV_DONTNEED: 1029 case I915_MADV_WILLNEED: 1030 break; 1031 default: 1032 return -EINVAL; 1033 } 1034 1035 obj = i915_gem_object_lookup(file_priv, args->handle); 1036 if (!obj) 1037 return -ENOENT; 1038 1039 err = mutex_lock_interruptible(&obj->mm.lock); 1040 if (err) 1041 goto out; 1042 1043 if (i915_gem_object_has_pages(obj) && 1044 i915_gem_object_is_tiled(obj) && 1045 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1046 if (obj->mm.madv == I915_MADV_WILLNEED) { 1047 GEM_BUG_ON(!obj->mm.quirked); 1048 __i915_gem_object_unpin_pages(obj); 1049 obj->mm.quirked = false; 1050 } 1051 if (args->madv == I915_MADV_WILLNEED) { 1052 GEM_BUG_ON(obj->mm.quirked); 1053 __i915_gem_object_pin_pages(obj); 1054 obj->mm.quirked = true; 1055 } 1056 } 1057 1058 if (obj->mm.madv != __I915_MADV_PURGED) 1059 obj->mm.madv = args->madv; 1060 1061 if (i915_gem_object_has_pages(obj)) { 1062 struct list_head *list; 1063 1064 if (i915_gem_object_is_shrinkable(obj)) { 1065 unsigned long flags; 1066 1067 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1068 1069 if (obj->mm.madv != I915_MADV_WILLNEED) 1070 list = &i915->mm.purge_list; 1071 else 1072 list = &i915->mm.shrink_list; 1073 list_move_tail(&obj->mm.link, list); 1074 1075 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1076 } 1077 } 1078 1079 /* if the object is no longer attached, discard its backing storage */ 1080 if (obj->mm.madv == I915_MADV_DONTNEED && 1081 !i915_gem_object_has_pages(obj)) 1082 i915_gem_object_truncate(obj); 1083 1084 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1085 mutex_unlock(&obj->mm.lock); 1086 1087 out: 1088 i915_gem_object_put(obj); 1089 return err; 1090 } 1091 1092 int i915_gem_init(struct drm_i915_private *dev_priv) 1093 { 1094 int ret; 1095 1096 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1097 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1098 mkwrite_device_info(dev_priv)->page_sizes = 1099 I915_GTT_PAGE_SIZE_4K; 1100 1101 ret = i915_gem_init_userptr(dev_priv); 1102 if (ret) 1103 return ret; 1104 1105 intel_uc_fetch_firmwares(&dev_priv->gt.uc); 1106 intel_wopcm_init(&dev_priv->wopcm); 1107 1108 ret = i915_init_ggtt(dev_priv); 1109 if (ret) { 1110 GEM_BUG_ON(ret == -EIO); 1111 goto err_unlock; 1112 } 1113 1114 /* 1115 * Despite its name intel_init_clock_gating applies both display 1116 * clock gating workarounds; GT mmio workarounds and the occasional 1117 * GT power context workaround. Worse, sometimes it includes a context 1118 * register workaround which we need to apply before we record the 1119 * default HW state for all contexts. 1120 * 1121 * FIXME: break up the workarounds and apply them at the right time! 1122 */ 1123 intel_init_clock_gating(dev_priv); 1124 1125 ret = intel_gt_init(&dev_priv->gt); 1126 if (ret) 1127 goto err_unlock; 1128 1129 return 0; 1130 1131 /* 1132 * Unwinding is complicated by that we want to handle -EIO to mean 1133 * disable GPU submission but keep KMS alive. We want to mark the 1134 * HW as irrevisibly wedged, but keep enough state around that the 1135 * driver doesn't explode during runtime. 1136 */ 1137 err_unlock: 1138 i915_gem_drain_workqueue(dev_priv); 1139 1140 if (ret != -EIO) { 1141 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1142 i915_gem_cleanup_userptr(dev_priv); 1143 } 1144 1145 if (ret == -EIO) { 1146 /* 1147 * Allow engines or uC initialisation to fail by marking the GPU 1148 * as wedged. But we only want to do this when the GPU is angry, 1149 * for all other failure, such as an allocation failure, bail. 1150 */ 1151 if (!intel_gt_is_wedged(&dev_priv->gt)) { 1152 i915_probe_error(dev_priv, 1153 "Failed to initialize GPU, declaring it wedged!\n"); 1154 intel_gt_set_wedged(&dev_priv->gt); 1155 } 1156 1157 /* Minimal basic recovery for KMS */ 1158 ret = i915_ggtt_enable_hw(dev_priv); 1159 i915_ggtt_resume(&dev_priv->ggtt); 1160 i915_gem_restore_fences(&dev_priv->ggtt); 1161 intel_init_clock_gating(dev_priv); 1162 } 1163 1164 i915_gem_drain_freed_objects(dev_priv); 1165 return ret; 1166 } 1167 1168 void i915_gem_driver_register(struct drm_i915_private *i915) 1169 { 1170 i915_gem_driver_register__shrinker(i915); 1171 1172 intel_engines_driver_register(i915); 1173 } 1174 1175 void i915_gem_driver_unregister(struct drm_i915_private *i915) 1176 { 1177 i915_gem_driver_unregister__shrinker(i915); 1178 } 1179 1180 void i915_gem_driver_remove(struct drm_i915_private *dev_priv) 1181 { 1182 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1183 1184 i915_gem_suspend_late(dev_priv); 1185 intel_gt_driver_remove(&dev_priv->gt); 1186 dev_priv->uabi_engines = RB_ROOT; 1187 1188 /* Flush any outstanding unpin_work. */ 1189 i915_gem_drain_workqueue(dev_priv); 1190 1191 i915_gem_drain_freed_objects(dev_priv); 1192 } 1193 1194 void i915_gem_driver_release(struct drm_i915_private *dev_priv) 1195 { 1196 i915_gem_driver_release__contexts(dev_priv); 1197 1198 intel_gt_driver_release(&dev_priv->gt); 1199 1200 intel_wa_list_free(&dev_priv->gt_wa_list); 1201 1202 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1203 i915_gem_cleanup_userptr(dev_priv); 1204 1205 i915_gem_drain_freed_objects(dev_priv); 1206 1207 drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list)); 1208 } 1209 1210 static void i915_gem_init__mm(struct drm_i915_private *i915) 1211 { 1212 spin_lock_init(&i915->mm.obj_lock); 1213 1214 init_llist_head(&i915->mm.free_list); 1215 1216 INIT_LIST_HEAD(&i915->mm.purge_list); 1217 INIT_LIST_HEAD(&i915->mm.shrink_list); 1218 1219 i915_gem_init__objects(i915); 1220 } 1221 1222 void i915_gem_init_early(struct drm_i915_private *dev_priv) 1223 { 1224 i915_gem_init__mm(dev_priv); 1225 i915_gem_init__contexts(dev_priv); 1226 1227 spin_lock_init(&dev_priv->fb_tracking.lock); 1228 } 1229 1230 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1231 { 1232 i915_gem_drain_freed_objects(dev_priv); 1233 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1234 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1235 drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count); 1236 } 1237 1238 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1239 { 1240 /* Discard all purgeable objects, let userspace recover those as 1241 * required after resuming. 1242 */ 1243 i915_gem_shrink_all(dev_priv); 1244 1245 return 0; 1246 } 1247 1248 int i915_gem_freeze_late(struct drm_i915_private *i915) 1249 { 1250 struct drm_i915_gem_object *obj; 1251 intel_wakeref_t wakeref; 1252 1253 /* 1254 * Called just before we write the hibernation image. 1255 * 1256 * We need to update the domain tracking to reflect that the CPU 1257 * will be accessing all the pages to create and restore from the 1258 * hibernation, and so upon restoration those pages will be in the 1259 * CPU domain. 1260 * 1261 * To make sure the hibernation image contains the latest state, 1262 * we update that state just before writing out the image. 1263 * 1264 * To try and reduce the hibernation image, we manually shrink 1265 * the objects as well, see i915_gem_freeze() 1266 */ 1267 1268 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1269 1270 i915_gem_shrink(i915, -1UL, NULL, ~0); 1271 i915_gem_drain_freed_objects(i915); 1272 1273 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1274 i915_gem_object_lock(obj); 1275 drm_WARN_ON(&i915->drm, 1276 i915_gem_object_set_to_cpu_domain(obj, true)); 1277 i915_gem_object_unlock(obj); 1278 } 1279 1280 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1281 1282 return 0; 1283 } 1284 1285 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 1286 { 1287 struct drm_i915_file_private *file_priv = file->driver_priv; 1288 struct i915_request *request; 1289 1290 /* Clean up our request list when the client is going away, so that 1291 * later retire_requests won't dereference our soon-to-be-gone 1292 * file_priv. 1293 */ 1294 spin_lock(&file_priv->mm.lock); 1295 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 1296 request->file_priv = NULL; 1297 spin_unlock(&file_priv->mm.lock); 1298 } 1299 1300 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1301 { 1302 struct drm_i915_file_private *file_priv; 1303 int ret; 1304 1305 DRM_DEBUG("\n"); 1306 1307 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1308 if (!file_priv) 1309 return -ENOMEM; 1310 1311 file->driver_priv = file_priv; 1312 file_priv->dev_priv = i915; 1313 file_priv->file = file; 1314 1315 spin_lock_init(&file_priv->mm.lock); 1316 INIT_LIST_HEAD(&file_priv->mm.request_list); 1317 1318 file_priv->bsd_engine = -1; 1319 file_priv->hang_timestamp = jiffies; 1320 1321 ret = i915_gem_context_open(i915, file); 1322 if (ret) 1323 kfree(file_priv); 1324 1325 return ret; 1326 } 1327 1328 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1329 #include "selftests/mock_gem_device.c" 1330 #include "selftests/i915_gem.c" 1331 #endif 1332