1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_device.h" 7 8 #include <linux/units.h> 9 10 #include <drm/drm_aperture.h> 11 #include <drm/drm_atomic_helper.h> 12 #include <drm/drm_gem_ttm_helper.h> 13 #include <drm/drm_ioctl.h> 14 #include <drm/drm_managed.h> 15 #include <drm/drm_print.h> 16 #include <drm/xe_drm.h> 17 18 #include "regs/xe_gt_regs.h" 19 #include "regs/xe_regs.h" 20 #include "xe_bo.h" 21 #include "xe_debugfs.h" 22 #include "xe_display.h" 23 #include "xe_dma_buf.h" 24 #include "xe_drm_client.h" 25 #include "xe_drv.h" 26 #include "xe_exec_queue.h" 27 #include "xe_exec.h" 28 #include "xe_ggtt.h" 29 #include "xe_gt.h" 30 #include "xe_gt_mcr.h" 31 #include "xe_irq.h" 32 #include "xe_mmio.h" 33 #include "xe_module.h" 34 #include "xe_pat.h" 35 #include "xe_pcode.h" 36 #include "xe_pm.h" 37 #include "xe_query.h" 38 #include "xe_tile.h" 39 #include "xe_ttm_stolen_mgr.h" 40 #include "xe_ttm_sys_mgr.h" 41 #include "xe_vm.h" 42 #include "xe_wait_user_fence.h" 43 #include "xe_hwmon.h" 44 45 #ifdef CONFIG_LOCKDEP 46 struct lockdep_map xe_device_mem_access_lockdep_map = { 47 .name = "xe_device_mem_access_lockdep_map" 48 }; 49 #endif 50 51 static int xe_file_open(struct drm_device *dev, struct drm_file *file) 52 { 53 struct xe_device *xe = to_xe_device(dev); 54 struct xe_drm_client *client; 55 struct xe_file *xef; 56 int ret = -ENOMEM; 57 58 xef = kzalloc(sizeof(*xef), GFP_KERNEL); 59 if (!xef) 60 return ret; 61 62 client = xe_drm_client_alloc(); 63 if (!client) { 64 kfree(xef); 65 return ret; 66 } 67 68 xef->drm = file; 69 xef->client = client; 70 xef->xe = xe; 71 72 mutex_init(&xef->vm.lock); 73 xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); 74 75 mutex_init(&xef->exec_queue.lock); 76 xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); 77 78 spin_lock(&xe->clients.lock); 79 xe->clients.count++; 80 spin_unlock(&xe->clients.lock); 81 82 file->driver_priv = xef; 83 return 0; 84 } 85 86 static void xe_file_close(struct drm_device *dev, struct drm_file *file) 87 { 88 struct xe_device *xe = to_xe_device(dev); 89 struct xe_file *xef = file->driver_priv; 90 struct xe_vm *vm; 91 struct xe_exec_queue *q; 92 unsigned long idx; 93 94 mutex_lock(&xef->exec_queue.lock); 95 xa_for_each(&xef->exec_queue.xa, idx, q) { 96 xe_exec_queue_kill(q); 97 xe_exec_queue_put(q); 98 } 99 mutex_unlock(&xef->exec_queue.lock); 100 xa_destroy(&xef->exec_queue.xa); 101 mutex_destroy(&xef->exec_queue.lock); 102 mutex_lock(&xef->vm.lock); 103 xa_for_each(&xef->vm.xa, idx, vm) 104 xe_vm_close_and_put(vm); 105 mutex_unlock(&xef->vm.lock); 106 xa_destroy(&xef->vm.xa); 107 mutex_destroy(&xef->vm.lock); 108 109 spin_lock(&xe->clients.lock); 110 xe->clients.count--; 111 spin_unlock(&xe->clients.lock); 112 113 xe_drm_client_put(xef->client); 114 kfree(xef); 115 } 116 117 static const struct drm_ioctl_desc xe_ioctls[] = { 118 DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW), 119 DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW), 120 DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl, 121 DRM_RENDER_ALLOW), 122 DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), 123 DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), 124 DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), 125 DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), 126 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl, 127 DRM_RENDER_ALLOW), 128 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, 129 DRM_RENDER_ALLOW), 130 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, 131 DRM_RENDER_ALLOW), 132 DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, 133 DRM_RENDER_ALLOW), 134 }; 135 136 static const struct file_operations xe_driver_fops = { 137 .owner = THIS_MODULE, 138 .open = drm_open, 139 .release = drm_release_noglobal, 140 .unlocked_ioctl = drm_ioctl, 141 .mmap = drm_gem_mmap, 142 .poll = drm_poll, 143 .read = drm_read, 144 .compat_ioctl = drm_compat_ioctl, 145 .llseek = noop_llseek, 146 #ifdef CONFIG_PROC_FS 147 .show_fdinfo = drm_show_fdinfo, 148 #endif 149 }; 150 151 static void xe_driver_release(struct drm_device *dev) 152 { 153 struct xe_device *xe = to_xe_device(dev); 154 155 pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL); 156 } 157 158 static struct drm_driver driver = { 159 /* Don't use MTRRs here; the Xserver or userspace app should 160 * deal with them for Intel hardware. 161 */ 162 .driver_features = 163 DRIVER_GEM | 164 DRIVER_RENDER | DRIVER_SYNCOBJ | 165 DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, 166 .open = xe_file_open, 167 .postclose = xe_file_close, 168 169 .gem_prime_import = xe_gem_prime_import, 170 171 .dumb_create = xe_bo_dumb_create, 172 .dumb_map_offset = drm_gem_ttm_dumb_map_offset, 173 #ifdef CONFIG_PROC_FS 174 .show_fdinfo = xe_drm_client_fdinfo, 175 #endif 176 .release = &xe_driver_release, 177 178 .ioctls = xe_ioctls, 179 .num_ioctls = ARRAY_SIZE(xe_ioctls), 180 .fops = &xe_driver_fops, 181 .name = DRIVER_NAME, 182 .desc = DRIVER_DESC, 183 .date = DRIVER_DATE, 184 .major = DRIVER_MAJOR, 185 .minor = DRIVER_MINOR, 186 .patchlevel = DRIVER_PATCHLEVEL, 187 }; 188 189 static void xe_device_destroy(struct drm_device *dev, void *dummy) 190 { 191 struct xe_device *xe = to_xe_device(dev); 192 193 if (xe->ordered_wq) 194 destroy_workqueue(xe->ordered_wq); 195 196 if (xe->unordered_wq) 197 destroy_workqueue(xe->unordered_wq); 198 199 ttm_device_fini(&xe->ttm); 200 } 201 202 struct xe_device *xe_device_create(struct pci_dev *pdev, 203 const struct pci_device_id *ent) 204 { 205 struct xe_device *xe; 206 int err; 207 208 xe_display_driver_set_hooks(&driver); 209 210 err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); 211 if (err) 212 return ERR_PTR(err); 213 214 xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm); 215 if (IS_ERR(xe)) 216 return xe; 217 218 err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, 219 xe->drm.anon_inode->i_mapping, 220 xe->drm.vma_offset_manager, false, false); 221 if (WARN_ON(err)) 222 goto err; 223 224 err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); 225 if (err) 226 goto err; 227 228 xe->info.devid = pdev->device; 229 xe->info.revid = pdev->revision; 230 xe->info.force_execlist = xe_modparam.force_execlist; 231 232 spin_lock_init(&xe->irq.lock); 233 spin_lock_init(&xe->clients.lock); 234 235 init_waitqueue_head(&xe->ufence_wq); 236 237 drmm_mutex_init(&xe->drm, &xe->usm.lock); 238 xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); 239 240 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { 241 /* Trigger a large asid and an early asid wrap. */ 242 u32 asid; 243 244 BUILD_BUG_ON(XE_MAX_ASID < 2); 245 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL, 246 XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1), 247 &xe->usm.next_asid, GFP_KERNEL); 248 drm_WARN_ON(&xe->drm, err); 249 if (err >= 0) 250 xa_erase(&xe->usm.asid_to_vm, asid); 251 } 252 253 spin_lock_init(&xe->pinned.lock); 254 INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); 255 INIT_LIST_HEAD(&xe->pinned.external_vram); 256 INIT_LIST_HEAD(&xe->pinned.evicted); 257 258 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); 259 xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); 260 if (!xe->ordered_wq || !xe->unordered_wq) { 261 drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); 262 err = -ENOMEM; 263 goto err; 264 } 265 266 err = xe_display_create(xe); 267 if (WARN_ON(err)) 268 goto err; 269 270 return xe; 271 272 err: 273 return ERR_PTR(err); 274 } 275 276 /* 277 * The driver-initiated FLR is the highest level of reset that we can trigger 278 * from within the driver. It is different from the PCI FLR in that it doesn't 279 * fully reset the SGUnit and doesn't modify the PCI config space and therefore 280 * it doesn't require a re-enumeration of the PCI BARs. However, the 281 * driver-initiated FLR does still cause a reset of both GT and display and a 282 * memory wipe of local and stolen memory, so recovery would require a full HW 283 * re-init and saving/restoring (or re-populating) the wiped memory. Since we 284 * perform the FLR as the very last action before releasing access to the HW 285 * during the driver release flow, we don't attempt recovery at all, because 286 * if/when a new instance of i915 is bound to the device it will do a full 287 * re-init anyway. 288 */ 289 static void xe_driver_flr(struct xe_device *xe) 290 { 291 const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ 292 struct xe_gt *gt = xe_root_mmio_gt(xe); 293 int ret; 294 295 if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) { 296 drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); 297 return; 298 } 299 300 drm_dbg(&xe->drm, "Triggering Driver-FLR\n"); 301 302 /* 303 * Make sure any pending FLR requests have cleared by waiting for the 304 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS 305 * to make sure it's not still set from a prior attempt (it's a write to 306 * clear bit). 307 * Note that we should never be in a situation where a previous attempt 308 * is still pending (unless the HW is totally dead), but better to be 309 * safe in case something unexpected happens 310 */ 311 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); 312 if (ret) { 313 drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret); 314 return; 315 } 316 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); 317 318 /* Trigger the actual Driver-FLR */ 319 xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR); 320 321 /* Wait for hardware teardown to complete */ 322 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); 323 if (ret) { 324 drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret); 325 return; 326 } 327 328 /* Wait for hardware/firmware re-init to complete */ 329 ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS, 330 flr_timeout, NULL, false); 331 if (ret) { 332 drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret); 333 return; 334 } 335 336 /* Clear sticky completion status */ 337 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); 338 } 339 340 static void xe_driver_flr_fini(struct drm_device *drm, void *arg) 341 { 342 struct xe_device *xe = arg; 343 344 if (xe->needs_flr_on_fini) 345 xe_driver_flr(xe); 346 } 347 348 static void xe_device_sanitize(struct drm_device *drm, void *arg) 349 { 350 struct xe_device *xe = arg; 351 struct xe_gt *gt; 352 u8 id; 353 354 for_each_gt(gt, xe, id) 355 xe_gt_sanitize(gt); 356 } 357 358 static int xe_set_dma_info(struct xe_device *xe) 359 { 360 unsigned int mask_size = xe->info.dma_mask_size; 361 int err; 362 363 dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev)); 364 365 err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 366 if (err) 367 goto mask_err; 368 369 err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 370 if (err) 371 goto mask_err; 372 373 return 0; 374 375 mask_err: 376 drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); 377 return err; 378 } 379 380 /* 381 * Initialize MMIO resources that don't require any knowledge about tile count. 382 */ 383 int xe_device_probe_early(struct xe_device *xe) 384 { 385 int err; 386 387 err = xe_mmio_init(xe); 388 if (err) 389 return err; 390 391 err = xe_mmio_root_tile_init(xe); 392 if (err) 393 return err; 394 395 return 0; 396 } 397 398 static int xe_device_set_has_flat_ccs(struct xe_device *xe) 399 { 400 u32 reg; 401 int err; 402 403 if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) 404 return 0; 405 406 struct xe_gt *gt = xe_root_mmio_gt(xe); 407 408 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 409 if (err) 410 return err; 411 412 reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); 413 xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE); 414 415 if (!xe->info.has_flat_ccs) 416 drm_dbg(&xe->drm, 417 "Flat CCS has been disabled in bios, May lead to performance impact"); 418 419 return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 420 } 421 422 int xe_device_probe(struct xe_device *xe) 423 { 424 struct xe_tile *tile; 425 struct xe_gt *gt; 426 int err; 427 u8 id; 428 429 xe_pat_init_early(xe); 430 431 xe->info.mem_region_mask = 1; 432 err = xe_display_init_nommio(xe); 433 if (err) 434 return err; 435 436 err = xe_set_dma_info(xe); 437 if (err) 438 return err; 439 440 xe_mmio_probe_tiles(xe); 441 442 xe_ttm_sys_mgr_init(xe); 443 444 for_each_gt(gt, xe, id) 445 xe_force_wake_init_gt(gt, gt_to_fw(gt)); 446 447 for_each_tile(tile, xe, id) { 448 err = xe_ggtt_init_early(tile->mem.ggtt); 449 if (err) 450 return err; 451 } 452 453 err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); 454 if (err) 455 return err; 456 457 for_each_gt(gt, xe, id) { 458 err = xe_pcode_probe(gt); 459 if (err) 460 return err; 461 } 462 463 err = xe_display_init_noirq(xe); 464 if (err) 465 return err; 466 467 err = xe_irq_install(xe); 468 if (err) 469 goto err; 470 471 for_each_gt(gt, xe, id) { 472 err = xe_gt_init_early(gt); 473 if (err) 474 goto err_irq_shutdown; 475 } 476 477 err = xe_device_set_has_flat_ccs(xe); 478 if (err) 479 goto err_irq_shutdown; 480 481 err = xe_mmio_probe_vram(xe); 482 if (err) 483 goto err_irq_shutdown; 484 485 for_each_tile(tile, xe, id) { 486 err = xe_tile_init_noalloc(tile); 487 if (err) 488 goto err_irq_shutdown; 489 } 490 491 /* Allocate and map stolen after potential VRAM resize */ 492 xe_ttm_stolen_mgr_init(xe); 493 494 /* 495 * Now that GT is initialized (TTM in particular), 496 * we can try to init display, and inherit the initial fb. 497 * This is the reason the first allocation needs to be done 498 * inside display. 499 */ 500 err = xe_display_init_noaccel(xe); 501 if (err) 502 goto err_irq_shutdown; 503 504 for_each_gt(gt, xe, id) { 505 err = xe_gt_init(gt); 506 if (err) 507 goto err_irq_shutdown; 508 } 509 510 xe_heci_gsc_init(xe); 511 512 err = xe_display_init(xe); 513 if (err) 514 goto err_irq_shutdown; 515 516 err = drm_dev_register(&xe->drm, 0); 517 if (err) 518 goto err_fini_display; 519 520 xe_display_register(xe); 521 522 xe_debugfs_register(xe); 523 524 xe_hwmon_register(xe); 525 526 err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); 527 if (err) 528 return err; 529 530 return 0; 531 532 err_fini_display: 533 xe_display_driver_remove(xe); 534 535 err_irq_shutdown: 536 xe_irq_shutdown(xe); 537 err: 538 xe_display_fini(xe); 539 return err; 540 } 541 542 static void xe_device_remove_display(struct xe_device *xe) 543 { 544 xe_display_unregister(xe); 545 546 drm_dev_unplug(&xe->drm); 547 xe_display_driver_remove(xe); 548 } 549 550 void xe_device_remove(struct xe_device *xe) 551 { 552 xe_device_remove_display(xe); 553 554 xe_display_fini(xe); 555 556 xe_heci_gsc_fini(xe); 557 558 xe_irq_shutdown(xe); 559 } 560 561 void xe_device_shutdown(struct xe_device *xe) 562 { 563 } 564 565 void xe_device_wmb(struct xe_device *xe) 566 { 567 struct xe_gt *gt = xe_root_mmio_gt(xe); 568 569 wmb(); 570 if (IS_DGFX(xe)) 571 xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0); 572 } 573 574 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) 575 { 576 return xe_device_has_flat_ccs(xe) ? 577 DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; 578 } 579 580 bool xe_device_mem_access_ongoing(struct xe_device *xe) 581 { 582 if (xe_pm_read_callback_task(xe) != NULL) 583 return true; 584 585 return atomic_read(&xe->mem_access.ref); 586 } 587 588 void xe_device_assert_mem_access(struct xe_device *xe) 589 { 590 XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); 591 } 592 593 bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) 594 { 595 bool active; 596 597 if (xe_pm_read_callback_task(xe) == current) 598 return true; 599 600 active = xe_pm_runtime_get_if_active(xe); 601 if (active) { 602 int ref = atomic_inc_return(&xe->mem_access.ref); 603 604 xe_assert(xe, ref != S32_MAX); 605 } 606 607 return active; 608 } 609 610 void xe_device_mem_access_get(struct xe_device *xe) 611 { 612 int ref; 613 614 /* 615 * This looks racy, but should be fine since the pm_callback_task only 616 * transitions from NULL -> current (and back to NULL again), during the 617 * runtime_resume() or runtime_suspend() callbacks, for which there can 618 * only be a single one running for our device. We only need to prevent 619 * recursively calling the runtime_get or runtime_put from those 620 * callbacks, as well as preventing triggering any access_ongoing 621 * asserts. 622 */ 623 if (xe_pm_read_callback_task(xe) == current) 624 return; 625 626 /* 627 * Since the resume here is synchronous it can be quite easy to deadlock 628 * if we are not careful. Also in practice it might be quite timing 629 * sensitive to ever see the 0 -> 1 transition with the callers locks 630 * held, so deadlocks might exist but are hard for lockdep to ever see. 631 * With this in mind, help lockdep learn about the potentially scary 632 * stuff that can happen inside the runtime_resume callback by acquiring 633 * a dummy lock (it doesn't protect anything and gets compiled out on 634 * non-debug builds). Lockdep then only needs to see the 635 * mem_access_lockdep_map -> runtime_resume callback once, and then can 636 * hopefully validate all the (callers_locks) -> mem_access_lockdep_map. 637 * For example if the (callers_locks) are ever grabbed in the 638 * runtime_resume callback, lockdep should give us a nice splat. 639 */ 640 lock_map_acquire(&xe_device_mem_access_lockdep_map); 641 lock_map_release(&xe_device_mem_access_lockdep_map); 642 643 xe_pm_runtime_get(xe); 644 ref = atomic_inc_return(&xe->mem_access.ref); 645 646 xe_assert(xe, ref != S32_MAX); 647 648 } 649 650 void xe_device_mem_access_put(struct xe_device *xe) 651 { 652 int ref; 653 654 if (xe_pm_read_callback_task(xe) == current) 655 return; 656 657 ref = atomic_dec_return(&xe->mem_access.ref); 658 xe_pm_runtime_put(xe); 659 660 xe_assert(xe, ref >= 0); 661 } 662