1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_device.h" 7 8 #include <linux/delay.h> 9 #include <linux/units.h> 10 11 #include <drm/drm_aperture.h> 12 #include <drm/drm_atomic_helper.h> 13 #include <drm/drm_client.h> 14 #include <drm/drm_gem_ttm_helper.h> 15 #include <drm/drm_ioctl.h> 16 #include <drm/drm_managed.h> 17 #include <drm/drm_print.h> 18 #include <drm/xe_drm.h> 19 20 #include "display/xe_display.h" 21 #include "instructions/xe_gpu_commands.h" 22 #include "regs/xe_gt_regs.h" 23 #include "regs/xe_regs.h" 24 #include "xe_bo.h" 25 #include "xe_debugfs.h" 26 #include "xe_devcoredump.h" 27 #include "xe_dma_buf.h" 28 #include "xe_drm_client.h" 29 #include "xe_drv.h" 30 #include "xe_exec.h" 31 #include "xe_exec_queue.h" 32 #include "xe_force_wake.h" 33 #include "xe_ggtt.h" 34 #include "xe_gsc_proxy.h" 35 #include "xe_gt.h" 36 #include "xe_gt_mcr.h" 37 #include "xe_gt_printk.h" 38 #include "xe_gt_sriov_vf.h" 39 #include "xe_guc.h" 40 #include "xe_hwmon.h" 41 #include "xe_irq.h" 42 #include "xe_memirq.h" 43 #include "xe_mmio.h" 44 #include "xe_module.h" 45 #include "xe_observation.h" 46 #include "xe_pat.h" 47 #include "xe_pcode.h" 48 #include "xe_pm.h" 49 #include "xe_query.h" 50 #include "xe_sriov.h" 51 #include "xe_tile.h" 52 #include "xe_ttm_stolen_mgr.h" 53 #include "xe_ttm_sys_mgr.h" 54 #include "xe_vm.h" 55 #include "xe_vram.h" 56 #include "xe_wait_user_fence.h" 57 58 static int xe_file_open(struct drm_device *dev, struct drm_file *file) 59 { 60 struct xe_device *xe = to_xe_device(dev); 61 struct xe_drm_client *client; 62 struct xe_file *xef; 63 int ret = -ENOMEM; 64 65 xef = kzalloc(sizeof(*xef), GFP_KERNEL); 66 if (!xef) 67 return ret; 68 69 client = xe_drm_client_alloc(); 70 if (!client) { 71 kfree(xef); 72 return ret; 73 } 74 75 xef->drm = file; 76 xef->client = client; 77 xef->xe = xe; 78 79 mutex_init(&xef->vm.lock); 80 xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); 81 82 mutex_init(&xef->exec_queue.lock); 83 xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); 84 85 spin_lock(&xe->clients.lock); 86 xe->clients.count++; 87 spin_unlock(&xe->clients.lock); 88 89 file->driver_priv = xef; 90 kref_init(&xef->refcount); 91 92 return 0; 93 } 94 95 static void xe_file_destroy(struct kref *ref) 96 { 97 struct xe_file *xef = container_of(ref, struct xe_file, refcount); 98 struct xe_device *xe = xef->xe; 99 100 xa_destroy(&xef->exec_queue.xa); 101 mutex_destroy(&xef->exec_queue.lock); 102 xa_destroy(&xef->vm.xa); 103 mutex_destroy(&xef->vm.lock); 104 105 spin_lock(&xe->clients.lock); 106 xe->clients.count--; 107 spin_unlock(&xe->clients.lock); 108 109 xe_drm_client_put(xef->client); 110 kfree(xef); 111 } 112 113 /** 114 * xe_file_get() - Take a reference to the xe file object 115 * @xef: Pointer to the xe file 116 * 117 * Anyone with a pointer to xef must take a reference to the xe file 118 * object using this call. 119 * 120 * Return: xe file pointer 121 */ 122 struct xe_file *xe_file_get(struct xe_file *xef) 123 { 124 kref_get(&xef->refcount); 125 return xef; 126 } 127 128 /** 129 * xe_file_put() - Drop a reference to the xe file object 130 * @xef: Pointer to the xe file 131 * 132 * Used to drop reference to the xef object 133 */ 134 void xe_file_put(struct xe_file *xef) 135 { 136 kref_put(&xef->refcount, xe_file_destroy); 137 } 138 139 static void xe_file_close(struct drm_device *dev, struct drm_file *file) 140 { 141 struct xe_device *xe = to_xe_device(dev); 142 struct xe_file *xef = file->driver_priv; 143 struct xe_vm *vm; 144 struct xe_exec_queue *q; 145 unsigned long idx; 146 147 xe_pm_runtime_get(xe); 148 149 /* 150 * No need for exec_queue.lock here as there is no contention for it 151 * when FD is closing as IOCTLs presumably can't be modifying the 152 * xarray. Taking exec_queue.lock here causes undue dependency on 153 * vm->lock taken during xe_exec_queue_kill(). 154 */ 155 xa_for_each(&xef->exec_queue.xa, idx, q) { 156 xe_exec_queue_kill(q); 157 xe_exec_queue_put(q); 158 } 159 mutex_lock(&xef->vm.lock); 160 xa_for_each(&xef->vm.xa, idx, vm) 161 xe_vm_close_and_put(vm); 162 mutex_unlock(&xef->vm.lock); 163 164 xe_file_put(xef); 165 166 xe_pm_runtime_put(xe); 167 } 168 169 static const struct drm_ioctl_desc xe_ioctls[] = { 170 DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW), 171 DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW), 172 DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl, 173 DRM_RENDER_ALLOW), 174 DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), 175 DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), 176 DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), 177 DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), 178 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl, 179 DRM_RENDER_ALLOW), 180 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, 181 DRM_RENDER_ALLOW), 182 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, 183 DRM_RENDER_ALLOW), 184 DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, 185 DRM_RENDER_ALLOW), 186 DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW), 187 }; 188 189 static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 190 { 191 struct drm_file *file_priv = file->private_data; 192 struct xe_device *xe = to_xe_device(file_priv->minor->dev); 193 long ret; 194 195 if (xe_device_wedged(xe)) 196 return -ECANCELED; 197 198 ret = xe_pm_runtime_get_ioctl(xe); 199 if (ret >= 0) 200 ret = drm_ioctl(file, cmd, arg); 201 xe_pm_runtime_put(xe); 202 203 return ret; 204 } 205 206 #ifdef CONFIG_COMPAT 207 static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 208 { 209 struct drm_file *file_priv = file->private_data; 210 struct xe_device *xe = to_xe_device(file_priv->minor->dev); 211 long ret; 212 213 if (xe_device_wedged(xe)) 214 return -ECANCELED; 215 216 ret = xe_pm_runtime_get_ioctl(xe); 217 if (ret >= 0) 218 ret = drm_compat_ioctl(file, cmd, arg); 219 xe_pm_runtime_put(xe); 220 221 return ret; 222 } 223 #else 224 /* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */ 225 #define xe_drm_compat_ioctl NULL 226 #endif 227 228 static const struct file_operations xe_driver_fops = { 229 .owner = THIS_MODULE, 230 .open = drm_open, 231 .release = drm_release_noglobal, 232 .unlocked_ioctl = xe_drm_ioctl, 233 .mmap = drm_gem_mmap, 234 .poll = drm_poll, 235 .read = drm_read, 236 .compat_ioctl = xe_drm_compat_ioctl, 237 .llseek = noop_llseek, 238 #ifdef CONFIG_PROC_FS 239 .show_fdinfo = drm_show_fdinfo, 240 #endif 241 }; 242 243 static struct drm_driver driver = { 244 /* Don't use MTRRs here; the Xserver or userspace app should 245 * deal with them for Intel hardware. 246 */ 247 .driver_features = 248 DRIVER_GEM | 249 DRIVER_RENDER | DRIVER_SYNCOBJ | 250 DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, 251 .open = xe_file_open, 252 .postclose = xe_file_close, 253 254 .gem_prime_import = xe_gem_prime_import, 255 256 .dumb_create = xe_bo_dumb_create, 257 .dumb_map_offset = drm_gem_ttm_dumb_map_offset, 258 #ifdef CONFIG_PROC_FS 259 .show_fdinfo = xe_drm_client_fdinfo, 260 #endif 261 .ioctls = xe_ioctls, 262 .num_ioctls = ARRAY_SIZE(xe_ioctls), 263 .fops = &xe_driver_fops, 264 .name = DRIVER_NAME, 265 .desc = DRIVER_DESC, 266 .date = DRIVER_DATE, 267 .major = DRIVER_MAJOR, 268 .minor = DRIVER_MINOR, 269 .patchlevel = DRIVER_PATCHLEVEL, 270 }; 271 272 static void xe_device_destroy(struct drm_device *dev, void *dummy) 273 { 274 struct xe_device *xe = to_xe_device(dev); 275 276 if (xe->preempt_fence_wq) 277 destroy_workqueue(xe->preempt_fence_wq); 278 279 if (xe->ordered_wq) 280 destroy_workqueue(xe->ordered_wq); 281 282 if (xe->unordered_wq) 283 destroy_workqueue(xe->unordered_wq); 284 285 ttm_device_fini(&xe->ttm); 286 } 287 288 struct xe_device *xe_device_create(struct pci_dev *pdev, 289 const struct pci_device_id *ent) 290 { 291 struct xe_device *xe; 292 int err; 293 294 xe_display_driver_set_hooks(&driver); 295 296 err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); 297 if (err) 298 return ERR_PTR(err); 299 300 xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm); 301 if (IS_ERR(xe)) 302 return xe; 303 304 err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, 305 xe->drm.anon_inode->i_mapping, 306 xe->drm.vma_offset_manager, false, false); 307 if (WARN_ON(err)) 308 goto err; 309 310 err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); 311 if (err) 312 goto err; 313 314 xe->info.devid = pdev->device; 315 xe->info.revid = pdev->revision; 316 xe->info.force_execlist = xe_modparam.force_execlist; 317 318 spin_lock_init(&xe->irq.lock); 319 spin_lock_init(&xe->clients.lock); 320 321 init_waitqueue_head(&xe->ufence_wq); 322 323 err = drmm_mutex_init(&xe->drm, &xe->usm.lock); 324 if (err) 325 goto err; 326 327 xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); 328 329 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { 330 /* Trigger a large asid and an early asid wrap. */ 331 u32 asid; 332 333 BUILD_BUG_ON(XE_MAX_ASID < 2); 334 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL, 335 XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1), 336 &xe->usm.next_asid, GFP_KERNEL); 337 drm_WARN_ON(&xe->drm, err); 338 if (err >= 0) 339 xa_erase(&xe->usm.asid_to_vm, asid); 340 } 341 342 spin_lock_init(&xe->pinned.lock); 343 INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); 344 INIT_LIST_HEAD(&xe->pinned.external_vram); 345 INIT_LIST_HEAD(&xe->pinned.evicted); 346 347 xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); 348 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); 349 xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); 350 if (!xe->ordered_wq || !xe->unordered_wq || 351 !xe->preempt_fence_wq) { 352 /* 353 * Cleanup done in xe_device_destroy via 354 * drmm_add_action_or_reset register above 355 */ 356 drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); 357 err = -ENOMEM; 358 goto err; 359 } 360 361 err = xe_display_create(xe); 362 if (WARN_ON(err)) 363 goto err; 364 365 return xe; 366 367 err: 368 return ERR_PTR(err); 369 } 370 371 /* 372 * The driver-initiated FLR is the highest level of reset that we can trigger 373 * from within the driver. It is different from the PCI FLR in that it doesn't 374 * fully reset the SGUnit and doesn't modify the PCI config space and therefore 375 * it doesn't require a re-enumeration of the PCI BARs. However, the 376 * driver-initiated FLR does still cause a reset of both GT and display and a 377 * memory wipe of local and stolen memory, so recovery would require a full HW 378 * re-init and saving/restoring (or re-populating) the wiped memory. Since we 379 * perform the FLR as the very last action before releasing access to the HW 380 * during the driver release flow, we don't attempt recovery at all, because 381 * if/when a new instance of i915 is bound to the device it will do a full 382 * re-init anyway. 383 */ 384 static void xe_driver_flr(struct xe_device *xe) 385 { 386 const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ 387 struct xe_gt *gt = xe_root_mmio_gt(xe); 388 int ret; 389 390 if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) { 391 drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); 392 return; 393 } 394 395 drm_dbg(&xe->drm, "Triggering Driver-FLR\n"); 396 397 /* 398 * Make sure any pending FLR requests have cleared by waiting for the 399 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS 400 * to make sure it's not still set from a prior attempt (it's a write to 401 * clear bit). 402 * Note that we should never be in a situation where a previous attempt 403 * is still pending (unless the HW is totally dead), but better to be 404 * safe in case something unexpected happens 405 */ 406 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); 407 if (ret) { 408 drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret); 409 return; 410 } 411 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); 412 413 /* Trigger the actual Driver-FLR */ 414 xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR); 415 416 /* Wait for hardware teardown to complete */ 417 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); 418 if (ret) { 419 drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret); 420 return; 421 } 422 423 /* Wait for hardware/firmware re-init to complete */ 424 ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS, 425 flr_timeout, NULL, false); 426 if (ret) { 427 drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret); 428 return; 429 } 430 431 /* Clear sticky completion status */ 432 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); 433 } 434 435 static void xe_driver_flr_fini(void *arg) 436 { 437 struct xe_device *xe = arg; 438 439 if (xe->needs_flr_on_fini) 440 xe_driver_flr(xe); 441 } 442 443 static void xe_device_sanitize(void *arg) 444 { 445 struct xe_device *xe = arg; 446 struct xe_gt *gt; 447 u8 id; 448 449 for_each_gt(gt, xe, id) 450 xe_gt_sanitize(gt); 451 } 452 453 static int xe_set_dma_info(struct xe_device *xe) 454 { 455 unsigned int mask_size = xe->info.dma_mask_size; 456 int err; 457 458 dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev)); 459 460 err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 461 if (err) 462 goto mask_err; 463 464 err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 465 if (err) 466 goto mask_err; 467 468 return 0; 469 470 mask_err: 471 drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); 472 return err; 473 } 474 475 static bool verify_lmem_ready(struct xe_gt *gt) 476 { 477 u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT; 478 479 return !!val; 480 } 481 482 static int wait_for_lmem_ready(struct xe_device *xe) 483 { 484 struct xe_gt *gt = xe_root_mmio_gt(xe); 485 unsigned long timeout, start; 486 487 if (!IS_DGFX(xe)) 488 return 0; 489 490 if (IS_SRIOV_VF(xe)) 491 return 0; 492 493 if (verify_lmem_ready(gt)) 494 return 0; 495 496 drm_dbg(&xe->drm, "Waiting for lmem initialization\n"); 497 498 start = jiffies; 499 timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */ 500 501 do { 502 if (signal_pending(current)) 503 return -EINTR; 504 505 /* 506 * The boot firmware initializes local memory and 507 * assesses its health. If memory training fails, 508 * the punit will have been instructed to keep the GT powered 509 * down.we won't be able to communicate with it 510 * 511 * If the status check is done before punit updates the register, 512 * it can lead to the system being unusable. 513 * use a timeout and defer the probe to prevent this. 514 */ 515 if (time_after(jiffies, timeout)) { 516 drm_dbg(&xe->drm, "lmem not initialized by firmware\n"); 517 return -EPROBE_DEFER; 518 } 519 520 msleep(20); 521 522 } while (!verify_lmem_ready(gt)); 523 524 drm_dbg(&xe->drm, "lmem ready after %ums", 525 jiffies_to_msecs(jiffies - start)); 526 527 return 0; 528 } 529 530 static void update_device_info(struct xe_device *xe) 531 { 532 /* disable features that are not available/applicable to VFs */ 533 if (IS_SRIOV_VF(xe)) { 534 xe->info.enable_display = 0; 535 xe->info.has_heci_gscfi = 0; 536 xe->info.skip_guc_pc = 1; 537 xe->info.skip_pcode = 1; 538 } 539 } 540 541 /** 542 * xe_device_probe_early: Device early probe 543 * @xe: xe device instance 544 * 545 * Initialize MMIO resources that don't require any 546 * knowledge about tile count. Also initialize pcode and 547 * check vram initialization on root tile. 548 * 549 * Return: 0 on success, error code on failure 550 */ 551 int xe_device_probe_early(struct xe_device *xe) 552 { 553 int err; 554 555 err = xe_mmio_init(xe); 556 if (err) 557 return err; 558 559 xe_sriov_probe_early(xe); 560 561 update_device_info(xe); 562 563 err = xe_pcode_probe_early(xe); 564 if (err) 565 return err; 566 567 err = wait_for_lmem_ready(xe); 568 if (err) 569 return err; 570 571 xe->wedged.mode = xe_modparam.wedged_mode; 572 573 return 0; 574 } 575 576 static int xe_device_set_has_flat_ccs(struct xe_device *xe) 577 { 578 u32 reg; 579 int err; 580 581 if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) 582 return 0; 583 584 struct xe_gt *gt = xe_root_mmio_gt(xe); 585 586 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 587 if (err) 588 return err; 589 590 reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); 591 xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE); 592 593 if (!xe->info.has_flat_ccs) 594 drm_dbg(&xe->drm, 595 "Flat CCS has been disabled in bios, May lead to performance impact"); 596 597 return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 598 } 599 600 int xe_device_probe(struct xe_device *xe) 601 { 602 struct xe_tile *tile; 603 struct xe_gt *gt; 604 int err; 605 u8 last_gt; 606 u8 id; 607 608 xe_pat_init_early(xe); 609 610 err = xe_sriov_init(xe); 611 if (err) 612 return err; 613 614 xe->info.mem_region_mask = 1; 615 err = xe_display_init_nommio(xe); 616 if (err) 617 return err; 618 619 err = xe_set_dma_info(xe); 620 if (err) 621 return err; 622 623 err = xe_mmio_probe_tiles(xe); 624 if (err) 625 return err; 626 627 xe_ttm_sys_mgr_init(xe); 628 629 for_each_gt(gt, xe, id) { 630 err = xe_gt_init_early(gt); 631 if (err) 632 return err; 633 } 634 635 for_each_tile(tile, xe, id) { 636 if (IS_SRIOV_VF(xe)) { 637 xe_guc_comm_init_early(&tile->primary_gt->uc.guc); 638 err = xe_gt_sriov_vf_bootstrap(tile->primary_gt); 639 if (err) 640 return err; 641 err = xe_gt_sriov_vf_query_config(tile->primary_gt); 642 if (err) 643 return err; 644 } 645 err = xe_ggtt_init_early(tile->mem.ggtt); 646 if (err) 647 return err; 648 if (IS_SRIOV_VF(xe)) { 649 err = xe_memirq_init(&tile->sriov.vf.memirq); 650 if (err) 651 return err; 652 } 653 } 654 655 for_each_gt(gt, xe, id) { 656 err = xe_gt_init_hwconfig(gt); 657 if (err) 658 return err; 659 } 660 661 err = xe_devcoredump_init(xe); 662 if (err) 663 return err; 664 err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe); 665 if (err) 666 return err; 667 668 err = xe_display_init_noirq(xe); 669 if (err) 670 return err; 671 672 err = xe_irq_install(xe); 673 if (err) 674 goto err; 675 676 err = xe_device_set_has_flat_ccs(xe); 677 if (err) 678 goto err; 679 680 err = xe_vram_probe(xe); 681 if (err) 682 goto err; 683 684 for_each_tile(tile, xe, id) { 685 err = xe_tile_init_noalloc(tile); 686 if (err) 687 goto err; 688 } 689 690 /* Allocate and map stolen after potential VRAM resize */ 691 xe_ttm_stolen_mgr_init(xe); 692 693 /* 694 * Now that GT is initialized (TTM in particular), 695 * we can try to init display, and inherit the initial fb. 696 * This is the reason the first allocation needs to be done 697 * inside display. 698 */ 699 err = xe_display_init_noaccel(xe); 700 if (err) 701 goto err; 702 703 for_each_gt(gt, xe, id) { 704 last_gt = id; 705 706 err = xe_gt_init(gt); 707 if (err) 708 goto err_fini_gt; 709 } 710 711 xe_heci_gsc_init(xe); 712 713 err = xe_oa_init(xe); 714 if (err) 715 goto err_fini_gt; 716 717 err = xe_display_init(xe); 718 if (err) 719 goto err_fini_oa; 720 721 err = drm_dev_register(&xe->drm, 0); 722 if (err) 723 goto err_fini_display; 724 725 xe_display_register(xe); 726 727 xe_oa_register(xe); 728 729 xe_debugfs_register(xe); 730 731 xe_hwmon_register(xe); 732 733 for_each_gt(gt, xe, id) 734 xe_gt_sanitize_freq(gt); 735 736 return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); 737 738 err_fini_display: 739 xe_display_driver_remove(xe); 740 741 err_fini_oa: 742 xe_oa_fini(xe); 743 744 err_fini_gt: 745 for_each_gt(gt, xe, id) { 746 if (id < last_gt) 747 xe_gt_remove(gt); 748 else 749 break; 750 } 751 752 err: 753 xe_display_fini(xe); 754 return err; 755 } 756 757 static void xe_device_remove_display(struct xe_device *xe) 758 { 759 xe_display_unregister(xe); 760 761 drm_dev_unplug(&xe->drm); 762 xe_display_driver_remove(xe); 763 } 764 765 void xe_device_remove(struct xe_device *xe) 766 { 767 struct xe_gt *gt; 768 u8 id; 769 770 xe_oa_unregister(xe); 771 772 xe_device_remove_display(xe); 773 774 xe_display_fini(xe); 775 776 xe_oa_fini(xe); 777 778 xe_heci_gsc_fini(xe); 779 780 for_each_gt(gt, xe, id) 781 xe_gt_remove(gt); 782 } 783 784 void xe_device_shutdown(struct xe_device *xe) 785 { 786 } 787 788 void xe_device_wmb(struct xe_device *xe) 789 { 790 struct xe_gt *gt = xe_root_mmio_gt(xe); 791 792 wmb(); 793 if (IS_DGFX(xe)) 794 xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0); 795 } 796 797 /** 798 * xe_device_td_flush() - Flush transient L3 cache entries 799 * @xe: The device 800 * 801 * Display engine has direct access to memory and is never coherent with L3/L4 802 * caches (or CPU caches), however KMD is responsible for specifically flushing 803 * transient L3 GPU cache entries prior to the flip sequence to ensure scanout 804 * can happen from such a surface without seeing corruption. 805 * 806 * Display surfaces can be tagged as transient by mapping it using one of the 807 * various L3:XD PAT index modes on Xe2. 808 * 809 * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed 810 * at the end of each submission via PIPE_CONTROL for compute/render, since SA 811 * Media is not coherent with L3 and we want to support render-vs-media 812 * usescases. For other engines like copy/blt the HW internally forces uncached 813 * behaviour, hence why we can skip the TDF on such platforms. 814 */ 815 void xe_device_td_flush(struct xe_device *xe) 816 { 817 struct xe_gt *gt; 818 u8 id; 819 820 if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) 821 return; 822 823 for_each_gt(gt, xe, id) { 824 if (xe_gt_is_media_type(gt)) 825 continue; 826 827 if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)) 828 return; 829 830 xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); 831 /* 832 * FIXME: We can likely do better here with our choice of 833 * timeout. Currently we just assume the worst case, i.e. 150us, 834 * which is believed to be sufficient to cover the worst case 835 * scenario on current platforms if all cache entries are 836 * transient and need to be flushed.. 837 */ 838 if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0, 839 150, NULL, false)) 840 xe_gt_err_once(gt, "TD flush timeout\n"); 841 842 xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 843 } 844 } 845 846 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) 847 { 848 return xe_device_has_flat_ccs(xe) ? 849 DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; 850 } 851 852 /** 853 * xe_device_assert_mem_access - Inspect the current runtime_pm state. 854 * @xe: xe device instance 855 * 856 * To be used before any kind of memory access. It will splat a debug warning 857 * if the device is currently sleeping. But it doesn't guarantee in any way 858 * that the device is going to remain awake. Xe PM runtime get and put 859 * functions might be added to the outer bound of the memory access, while 860 * this check is intended for inner usage to splat some warning if the worst 861 * case has just happened. 862 */ 863 void xe_device_assert_mem_access(struct xe_device *xe) 864 { 865 xe_assert(xe, !xe_pm_runtime_suspended(xe)); 866 } 867 868 void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p) 869 { 870 struct xe_gt *gt; 871 u8 id; 872 873 drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid); 874 drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid); 875 876 for_each_gt(gt, xe, id) { 877 drm_printf(p, "GT id: %u\n", id); 878 drm_printf(p, "\tType: %s\n", 879 gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media"); 880 drm_printf(p, "\tIP ver: %u.%u.%u\n", 881 REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid), 882 REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid), 883 REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid)); 884 drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock); 885 } 886 } 887 888 u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address) 889 { 890 return sign_extend64(address, xe->info.va_bits - 1); 891 } 892 893 u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) 894 { 895 return address & GENMASK_ULL(xe->info.va_bits - 1, 0); 896 } 897 898 static void xe_device_wedged_fini(struct drm_device *drm, void *arg) 899 { 900 struct xe_device *xe = arg; 901 902 xe_pm_runtime_put(xe); 903 } 904 905 /** 906 * xe_device_declare_wedged - Declare device wedged 907 * @xe: xe device instance 908 * 909 * This is a final state that can only be cleared with a mudule 910 * re-probe (unbind + bind). 911 * In this state every IOCTL will be blocked so the GT cannot be used. 912 * In general it will be called upon any critical error such as gt reset 913 * failure or guc loading failure. 914 * If xe.wedged module parameter is set to 2, this function will be called 915 * on every single execution timeout (a.k.a. GPU hang) right after devcoredump 916 * snapshot capture. In this mode, GT reset won't be attempted so the state of 917 * the issue is preserved for further debugging. 918 */ 919 void xe_device_declare_wedged(struct xe_device *xe) 920 { 921 struct xe_gt *gt; 922 u8 id; 923 924 if (xe->wedged.mode == 0) { 925 drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n"); 926 return; 927 } 928 929 if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { 930 drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); 931 return; 932 } 933 934 xe_pm_runtime_get_noresume(xe); 935 936 if (!atomic_xchg(&xe->wedged.flag, 1)) { 937 xe->needs_flr_on_fini = true; 938 drm_err(&xe->drm, 939 "CRITICAL: Xe has declared device %s as wedged.\n" 940 "IOCTLs and executions are blocked. Only a rebind may clear the failure\n" 941 "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", 942 dev_name(xe->drm.dev)); 943 } 944 945 for_each_gt(gt, xe, id) 946 xe_gt_declare_wedged(gt); 947 } 948