1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_device.h" 7 8 #include <linux/aperture.h> 9 #include <linux/delay.h> 10 #include <linux/fault-inject.h> 11 #include <linux/units.h> 12 13 #include <drm/drm_atomic_helper.h> 14 #include <drm/drm_client.h> 15 #include <drm/drm_gem_ttm_helper.h> 16 #include <drm/drm_ioctl.h> 17 #include <drm/drm_managed.h> 18 #include <drm/drm_print.h> 19 #include <uapi/drm/xe_drm.h> 20 21 #include "display/xe_display.h" 22 #include "instructions/xe_gpu_commands.h" 23 #include "regs/xe_gt_regs.h" 24 #include "regs/xe_regs.h" 25 #include "xe_bo.h" 26 #include "xe_debugfs.h" 27 #include "xe_devcoredump.h" 28 #include "xe_dma_buf.h" 29 #include "xe_drm_client.h" 30 #include "xe_drv.h" 31 #include "xe_exec.h" 32 #include "xe_exec_queue.h" 33 #include "xe_force_wake.h" 34 #include "xe_ggtt.h" 35 #include "xe_gsc_proxy.h" 36 #include "xe_gt.h" 37 #include "xe_gt_mcr.h" 38 #include "xe_gt_printk.h" 39 #include "xe_gt_sriov_vf.h" 40 #include "xe_guc.h" 41 #include "xe_hw_engine_group.h" 42 #include "xe_hwmon.h" 43 #include "xe_irq.h" 44 #include "xe_memirq.h" 45 #include "xe_mmio.h" 46 #include "xe_module.h" 47 #include "xe_oa.h" 48 #include "xe_observation.h" 49 #include "xe_pat.h" 50 #include "xe_pcode.h" 51 #include "xe_pm.h" 52 #include "xe_query.h" 53 #include "xe_sriov.h" 54 #include "xe_tile.h" 55 #include "xe_ttm_stolen_mgr.h" 56 #include "xe_ttm_sys_mgr.h" 57 #include "xe_vm.h" 58 #include "xe_vram.h" 59 #include "xe_vsec.h" 60 #include "xe_wait_user_fence.h" 61 #include "xe_wa.h" 62 63 #include <generated/xe_wa_oob.h> 64 65 static int xe_file_open(struct drm_device *dev, struct drm_file *file) 66 { 67 struct xe_device *xe = to_xe_device(dev); 68 struct xe_drm_client *client; 69 struct xe_file *xef; 70 int ret = -ENOMEM; 71 struct task_struct *task = NULL; 72 73 xef = kzalloc(sizeof(*xef), GFP_KERNEL); 74 if (!xef) 75 return ret; 76 77 client = xe_drm_client_alloc(); 78 if (!client) { 79 kfree(xef); 80 return ret; 81 } 82 83 xef->drm = file; 84 xef->client = client; 85 xef->xe = xe; 86 87 mutex_init(&xef->vm.lock); 88 xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); 89 90 mutex_init(&xef->exec_queue.lock); 91 xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); 92 93 file->driver_priv = xef; 94 kref_init(&xef->refcount); 95 96 task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID); 97 if (task) { 98 xef->process_name = kstrdup(task->comm, GFP_KERNEL); 99 xef->pid = task->pid; 100 put_task_struct(task); 101 } 102 103 return 0; 104 } 105 106 static void xe_file_destroy(struct kref *ref) 107 { 108 struct xe_file *xef = container_of(ref, struct xe_file, refcount); 109 110 xa_destroy(&xef->exec_queue.xa); 111 mutex_destroy(&xef->exec_queue.lock); 112 xa_destroy(&xef->vm.xa); 113 mutex_destroy(&xef->vm.lock); 114 115 xe_drm_client_put(xef->client); 116 kfree(xef->process_name); 117 kfree(xef); 118 } 119 120 /** 121 * xe_file_get() - Take a reference to the xe file object 122 * @xef: Pointer to the xe file 123 * 124 * Anyone with a pointer to xef must take a reference to the xe file 125 * object using this call. 126 * 127 * Return: xe file pointer 128 */ 129 struct xe_file *xe_file_get(struct xe_file *xef) 130 { 131 kref_get(&xef->refcount); 132 return xef; 133 } 134 135 /** 136 * xe_file_put() - Drop a reference to the xe file object 137 * @xef: Pointer to the xe file 138 * 139 * Used to drop reference to the xef object 140 */ 141 void xe_file_put(struct xe_file *xef) 142 { 143 kref_put(&xef->refcount, xe_file_destroy); 144 } 145 146 static void xe_file_close(struct drm_device *dev, struct drm_file *file) 147 { 148 struct xe_device *xe = to_xe_device(dev); 149 struct xe_file *xef = file->driver_priv; 150 struct xe_vm *vm; 151 struct xe_exec_queue *q; 152 unsigned long idx; 153 154 xe_pm_runtime_get(xe); 155 156 /* 157 * No need for exec_queue.lock here as there is no contention for it 158 * when FD is closing as IOCTLs presumably can't be modifying the 159 * xarray. Taking exec_queue.lock here causes undue dependency on 160 * vm->lock taken during xe_exec_queue_kill(). 161 */ 162 xa_for_each(&xef->exec_queue.xa, idx, q) { 163 if (q->vm && q->hwe->hw_engine_group) 164 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 165 xe_exec_queue_kill(q); 166 xe_exec_queue_put(q); 167 } 168 xa_for_each(&xef->vm.xa, idx, vm) 169 xe_vm_close_and_put(vm); 170 171 xe_file_put(xef); 172 173 xe_pm_runtime_put(xe); 174 } 175 176 static const struct drm_ioctl_desc xe_ioctls[] = { 177 DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW), 178 DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW), 179 DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl, 180 DRM_RENDER_ALLOW), 181 DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), 182 DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), 183 DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), 184 DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), 185 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl, 186 DRM_RENDER_ALLOW), 187 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, 188 DRM_RENDER_ALLOW), 189 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, 190 DRM_RENDER_ALLOW), 191 DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, 192 DRM_RENDER_ALLOW), 193 DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW), 194 }; 195 196 static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 197 { 198 struct drm_file *file_priv = file->private_data; 199 struct xe_device *xe = to_xe_device(file_priv->minor->dev); 200 long ret; 201 202 if (xe_device_wedged(xe)) 203 return -ECANCELED; 204 205 ret = xe_pm_runtime_get_ioctl(xe); 206 if (ret >= 0) 207 ret = drm_ioctl(file, cmd, arg); 208 xe_pm_runtime_put(xe); 209 210 return ret; 211 } 212 213 #ifdef CONFIG_COMPAT 214 static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 215 { 216 struct drm_file *file_priv = file->private_data; 217 struct xe_device *xe = to_xe_device(file_priv->minor->dev); 218 long ret; 219 220 if (xe_device_wedged(xe)) 221 return -ECANCELED; 222 223 ret = xe_pm_runtime_get_ioctl(xe); 224 if (ret >= 0) 225 ret = drm_compat_ioctl(file, cmd, arg); 226 xe_pm_runtime_put(xe); 227 228 return ret; 229 } 230 #else 231 /* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */ 232 #define xe_drm_compat_ioctl NULL 233 #endif 234 235 static const struct file_operations xe_driver_fops = { 236 .owner = THIS_MODULE, 237 .open = drm_open, 238 .release = drm_release_noglobal, 239 .unlocked_ioctl = xe_drm_ioctl, 240 .mmap = drm_gem_mmap, 241 .poll = drm_poll, 242 .read = drm_read, 243 .compat_ioctl = xe_drm_compat_ioctl, 244 .llseek = noop_llseek, 245 #ifdef CONFIG_PROC_FS 246 .show_fdinfo = drm_show_fdinfo, 247 #endif 248 .fop_flags = FOP_UNSIGNED_OFFSET, 249 }; 250 251 static struct drm_driver driver = { 252 /* Don't use MTRRs here; the Xserver or userspace app should 253 * deal with them for Intel hardware. 254 */ 255 .driver_features = 256 DRIVER_GEM | 257 DRIVER_RENDER | DRIVER_SYNCOBJ | 258 DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, 259 .open = xe_file_open, 260 .postclose = xe_file_close, 261 262 .gem_prime_import = xe_gem_prime_import, 263 264 .dumb_create = xe_bo_dumb_create, 265 .dumb_map_offset = drm_gem_ttm_dumb_map_offset, 266 #ifdef CONFIG_PROC_FS 267 .show_fdinfo = xe_drm_client_fdinfo, 268 #endif 269 .ioctls = xe_ioctls, 270 .num_ioctls = ARRAY_SIZE(xe_ioctls), 271 .fops = &xe_driver_fops, 272 .name = DRIVER_NAME, 273 .desc = DRIVER_DESC, 274 .major = DRIVER_MAJOR, 275 .minor = DRIVER_MINOR, 276 .patchlevel = DRIVER_PATCHLEVEL, 277 }; 278 279 static void xe_device_destroy(struct drm_device *dev, void *dummy) 280 { 281 struct xe_device *xe = to_xe_device(dev); 282 283 if (xe->preempt_fence_wq) 284 destroy_workqueue(xe->preempt_fence_wq); 285 286 if (xe->ordered_wq) 287 destroy_workqueue(xe->ordered_wq); 288 289 if (xe->unordered_wq) 290 destroy_workqueue(xe->unordered_wq); 291 292 if (xe->destroy_wq) 293 destroy_workqueue(xe->destroy_wq); 294 295 ttm_device_fini(&xe->ttm); 296 } 297 298 struct xe_device *xe_device_create(struct pci_dev *pdev, 299 const struct pci_device_id *ent) 300 { 301 struct xe_device *xe; 302 int err; 303 304 xe_display_driver_set_hooks(&driver); 305 306 err = aperture_remove_conflicting_pci_devices(pdev, driver.name); 307 if (err) 308 return ERR_PTR(err); 309 310 xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm); 311 if (IS_ERR(xe)) 312 return xe; 313 314 err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, 315 xe->drm.anon_inode->i_mapping, 316 xe->drm.vma_offset_manager, false, false); 317 if (WARN_ON(err)) 318 goto err; 319 320 err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); 321 if (err) 322 goto err; 323 324 xe->info.devid = pdev->device; 325 xe->info.revid = pdev->revision; 326 xe->info.force_execlist = xe_modparam.force_execlist; 327 328 err = xe_irq_init(xe); 329 if (err) 330 goto err; 331 332 init_waitqueue_head(&xe->ufence_wq); 333 334 init_rwsem(&xe->usm.lock); 335 336 xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); 337 338 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { 339 /* Trigger a large asid and an early asid wrap. */ 340 u32 asid; 341 342 BUILD_BUG_ON(XE_MAX_ASID < 2); 343 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL, 344 XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1), 345 &xe->usm.next_asid, GFP_KERNEL); 346 drm_WARN_ON(&xe->drm, err); 347 if (err >= 0) 348 xa_erase(&xe->usm.asid_to_vm, asid); 349 } 350 351 spin_lock_init(&xe->pinned.lock); 352 INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); 353 INIT_LIST_HEAD(&xe->pinned.external_vram); 354 INIT_LIST_HEAD(&xe->pinned.evicted); 355 356 xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 357 WQ_MEM_RECLAIM); 358 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); 359 xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); 360 xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); 361 if (!xe->ordered_wq || !xe->unordered_wq || 362 !xe->preempt_fence_wq || !xe->destroy_wq) { 363 /* 364 * Cleanup done in xe_device_destroy via 365 * drmm_add_action_or_reset register above 366 */ 367 drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); 368 err = -ENOMEM; 369 goto err; 370 } 371 372 err = drmm_mutex_init(&xe->drm, &xe->pmt.lock); 373 if (err) 374 goto err; 375 376 err = xe_display_create(xe); 377 if (WARN_ON(err)) 378 goto err; 379 380 return xe; 381 382 err: 383 return ERR_PTR(err); 384 } 385 ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */ 386 387 static bool xe_driver_flr_disabled(struct xe_device *xe) 388 { 389 return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS; 390 } 391 392 /* 393 * The driver-initiated FLR is the highest level of reset that we can trigger 394 * from within the driver. It is different from the PCI FLR in that it doesn't 395 * fully reset the SGUnit and doesn't modify the PCI config space and therefore 396 * it doesn't require a re-enumeration of the PCI BARs. However, the 397 * driver-initiated FLR does still cause a reset of both GT and display and a 398 * memory wipe of local and stolen memory, so recovery would require a full HW 399 * re-init and saving/restoring (or re-populating) the wiped memory. Since we 400 * perform the FLR as the very last action before releasing access to the HW 401 * during the driver release flow, we don't attempt recovery at all, because 402 * if/when a new instance of i915 is bound to the device it will do a full 403 * re-init anyway. 404 */ 405 static void __xe_driver_flr(struct xe_device *xe) 406 { 407 const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ 408 struct xe_mmio *mmio = xe_root_tile_mmio(xe); 409 int ret; 410 411 drm_dbg(&xe->drm, "Triggering Driver-FLR\n"); 412 413 /* 414 * Make sure any pending FLR requests have cleared by waiting for the 415 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS 416 * to make sure it's not still set from a prior attempt (it's a write to 417 * clear bit). 418 * Note that we should never be in a situation where a previous attempt 419 * is still pending (unless the HW is totally dead), but better to be 420 * safe in case something unexpected happens 421 */ 422 ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); 423 if (ret) { 424 drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret); 425 return; 426 } 427 xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS); 428 429 /* Trigger the actual Driver-FLR */ 430 xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR); 431 432 /* Wait for hardware teardown to complete */ 433 ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); 434 if (ret) { 435 drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret); 436 return; 437 } 438 439 /* Wait for hardware/firmware re-init to complete */ 440 ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS, 441 flr_timeout, NULL, false); 442 if (ret) { 443 drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret); 444 return; 445 } 446 447 /* Clear sticky completion status */ 448 xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS); 449 } 450 451 static void xe_driver_flr(struct xe_device *xe) 452 { 453 if (xe_driver_flr_disabled(xe)) { 454 drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); 455 return; 456 } 457 458 __xe_driver_flr(xe); 459 } 460 461 static void xe_driver_flr_fini(void *arg) 462 { 463 struct xe_device *xe = arg; 464 465 if (xe->needs_flr_on_fini) 466 xe_driver_flr(xe); 467 } 468 469 static void xe_device_sanitize(void *arg) 470 { 471 struct xe_device *xe = arg; 472 struct xe_gt *gt; 473 u8 id; 474 475 for_each_gt(gt, xe, id) 476 xe_gt_sanitize(gt); 477 } 478 479 static int xe_set_dma_info(struct xe_device *xe) 480 { 481 unsigned int mask_size = xe->info.dma_mask_size; 482 int err; 483 484 dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev)); 485 486 err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 487 if (err) 488 goto mask_err; 489 490 err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 491 if (err) 492 goto mask_err; 493 494 return 0; 495 496 mask_err: 497 drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); 498 return err; 499 } 500 501 static bool verify_lmem_ready(struct xe_device *xe) 502 { 503 u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT; 504 505 return !!val; 506 } 507 508 static int wait_for_lmem_ready(struct xe_device *xe) 509 { 510 unsigned long timeout, start; 511 512 if (!IS_DGFX(xe)) 513 return 0; 514 515 if (IS_SRIOV_VF(xe)) 516 return 0; 517 518 if (verify_lmem_ready(xe)) 519 return 0; 520 521 drm_dbg(&xe->drm, "Waiting for lmem initialization\n"); 522 523 start = jiffies; 524 timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */ 525 526 do { 527 if (signal_pending(current)) 528 return -EINTR; 529 530 /* 531 * The boot firmware initializes local memory and 532 * assesses its health. If memory training fails, 533 * the punit will have been instructed to keep the GT powered 534 * down.we won't be able to communicate with it 535 * 536 * If the status check is done before punit updates the register, 537 * it can lead to the system being unusable. 538 * use a timeout and defer the probe to prevent this. 539 */ 540 if (time_after(jiffies, timeout)) { 541 drm_dbg(&xe->drm, "lmem not initialized by firmware\n"); 542 return -EPROBE_DEFER; 543 } 544 545 msleep(20); 546 547 } while (!verify_lmem_ready(xe)); 548 549 drm_dbg(&xe->drm, "lmem ready after %ums", 550 jiffies_to_msecs(jiffies - start)); 551 552 return 0; 553 } 554 ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */ 555 556 static void update_device_info(struct xe_device *xe) 557 { 558 /* disable features that are not available/applicable to VFs */ 559 if (IS_SRIOV_VF(xe)) { 560 xe->info.probe_display = 0; 561 xe->info.has_heci_gscfi = 0; 562 xe->info.skip_guc_pc = 1; 563 xe->info.skip_pcode = 1; 564 } 565 } 566 567 /** 568 * xe_device_probe_early: Device early probe 569 * @xe: xe device instance 570 * 571 * Initialize MMIO resources that don't require any 572 * knowledge about tile count. Also initialize pcode and 573 * check vram initialization on root tile. 574 * 575 * Return: 0 on success, error code on failure 576 */ 577 int xe_device_probe_early(struct xe_device *xe) 578 { 579 int err; 580 581 err = xe_mmio_init(xe); 582 if (err) 583 return err; 584 585 xe_sriov_probe_early(xe); 586 587 update_device_info(xe); 588 589 err = xe_pcode_probe_early(xe); 590 if (err) 591 return err; 592 593 err = wait_for_lmem_ready(xe); 594 if (err) 595 return err; 596 597 xe->wedged.mode = xe_modparam.wedged_mode; 598 599 return 0; 600 } 601 602 static int probe_has_flat_ccs(struct xe_device *xe) 603 { 604 struct xe_gt *gt; 605 unsigned int fw_ref; 606 u32 reg; 607 608 /* Always enabled/disabled, no runtime check to do */ 609 if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs || IS_SRIOV_VF(xe)) 610 return 0; 611 612 gt = xe_root_mmio_gt(xe); 613 614 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 615 if (!fw_ref) 616 return -ETIMEDOUT; 617 618 reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); 619 xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE); 620 621 if (!xe->info.has_flat_ccs) 622 drm_dbg(&xe->drm, 623 "Flat CCS has been disabled in bios, May lead to performance impact"); 624 625 xe_force_wake_put(gt_to_fw(gt), fw_ref); 626 return 0; 627 } 628 629 int xe_device_probe(struct xe_device *xe) 630 { 631 struct xe_tile *tile; 632 struct xe_gt *gt; 633 int err; 634 u8 last_gt; 635 u8 id; 636 637 xe_pat_init_early(xe); 638 639 err = xe_sriov_init(xe); 640 if (err) 641 return err; 642 643 xe->info.mem_region_mask = 1; 644 err = xe_display_init_nommio(xe); 645 if (err) 646 return err; 647 648 err = xe_set_dma_info(xe); 649 if (err) 650 return err; 651 652 err = xe_mmio_probe_tiles(xe); 653 if (err) 654 return err; 655 656 xe_ttm_sys_mgr_init(xe); 657 658 for_each_gt(gt, xe, id) { 659 err = xe_gt_init_early(gt); 660 if (err) 661 return err; 662 663 /* 664 * Only after this point can GT-specific MMIO operations 665 * (including things like communication with the GuC) 666 * be performed. 667 */ 668 xe_gt_mmio_init(gt); 669 } 670 671 for_each_tile(tile, xe, id) { 672 if (IS_SRIOV_VF(xe)) { 673 xe_guc_comm_init_early(&tile->primary_gt->uc.guc); 674 err = xe_gt_sriov_vf_bootstrap(tile->primary_gt); 675 if (err) 676 return err; 677 err = xe_gt_sriov_vf_query_config(tile->primary_gt); 678 if (err) 679 return err; 680 } 681 err = xe_ggtt_init_early(tile->mem.ggtt); 682 if (err) 683 return err; 684 err = xe_memirq_init(&tile->memirq); 685 if (err) 686 return err; 687 } 688 689 for_each_gt(gt, xe, id) { 690 err = xe_gt_init_hwconfig(gt); 691 if (err) 692 return err; 693 } 694 695 err = xe_devcoredump_init(xe); 696 if (err) 697 return err; 698 err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe); 699 if (err) 700 return err; 701 702 err = xe_display_init_noirq(xe); 703 if (err) 704 return err; 705 706 err = xe_irq_install(xe); 707 if (err) 708 goto err; 709 710 err = probe_has_flat_ccs(xe); 711 if (err) 712 goto err; 713 714 err = xe_vram_probe(xe); 715 if (err) 716 goto err; 717 718 for_each_tile(tile, xe, id) { 719 err = xe_tile_init_noalloc(tile); 720 if (err) 721 goto err; 722 } 723 724 /* Allocate and map stolen after potential VRAM resize */ 725 xe_ttm_stolen_mgr_init(xe); 726 727 /* 728 * Now that GT is initialized (TTM in particular), 729 * we can try to init display, and inherit the initial fb. 730 * This is the reason the first allocation needs to be done 731 * inside display. 732 */ 733 err = xe_display_init_noaccel(xe); 734 if (err) 735 goto err; 736 737 for_each_gt(gt, xe, id) { 738 last_gt = id; 739 740 err = xe_gt_init(gt); 741 if (err) 742 goto err_fini_gt; 743 } 744 745 xe_heci_gsc_init(xe); 746 747 err = xe_oa_init(xe); 748 if (err) 749 goto err_fini_gt; 750 751 err = xe_display_init(xe); 752 if (err) 753 goto err_fini_oa; 754 755 err = drm_dev_register(&xe->drm, 0); 756 if (err) 757 goto err_fini_display; 758 759 xe_display_register(xe); 760 761 xe_oa_register(xe); 762 763 xe_debugfs_register(xe); 764 765 xe_hwmon_register(xe); 766 767 for_each_gt(gt, xe, id) 768 xe_gt_sanitize_freq(gt); 769 770 xe_vsec_init(xe); 771 772 return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); 773 774 err_fini_display: 775 xe_display_driver_remove(xe); 776 777 err_fini_oa: 778 xe_oa_fini(xe); 779 780 err_fini_gt: 781 for_each_gt(gt, xe, id) { 782 if (id < last_gt) 783 xe_gt_remove(gt); 784 else 785 break; 786 } 787 788 err: 789 xe_display_fini(xe); 790 return err; 791 } 792 793 static void xe_device_remove_display(struct xe_device *xe) 794 { 795 xe_display_unregister(xe); 796 797 drm_dev_unplug(&xe->drm); 798 xe_display_driver_remove(xe); 799 } 800 801 void xe_device_remove(struct xe_device *xe) 802 { 803 struct xe_gt *gt; 804 u8 id; 805 806 xe_oa_unregister(xe); 807 808 xe_device_remove_display(xe); 809 810 xe_display_fini(xe); 811 812 xe_oa_fini(xe); 813 814 xe_heci_gsc_fini(xe); 815 816 for_each_gt(gt, xe, id) 817 xe_gt_remove(gt); 818 } 819 820 void xe_device_shutdown(struct xe_device *xe) 821 { 822 struct xe_gt *gt; 823 u8 id; 824 825 drm_dbg(&xe->drm, "Shutting down device\n"); 826 827 if (xe_driver_flr_disabled(xe)) { 828 xe_display_pm_shutdown(xe); 829 830 xe_irq_suspend(xe); 831 832 for_each_gt(gt, xe, id) 833 xe_gt_shutdown(gt); 834 835 xe_display_pm_shutdown_late(xe); 836 } else { 837 /* BOOM! */ 838 __xe_driver_flr(xe); 839 } 840 } 841 842 /** 843 * xe_device_wmb() - Device specific write memory barrier 844 * @xe: the &xe_device 845 * 846 * While wmb() is sufficient for a barrier if we use system memory, on discrete 847 * platforms with device memory we additionally need to issue a register write. 848 * Since it doesn't matter which register we write to, use the read-only VF_CAP 849 * register that is also marked as accessible by the VFs. 850 */ 851 void xe_device_wmb(struct xe_device *xe) 852 { 853 wmb(); 854 if (IS_DGFX(xe)) 855 xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); 856 } 857 858 /** 859 * xe_device_td_flush() - Flush transient L3 cache entries 860 * @xe: The device 861 * 862 * Display engine has direct access to memory and is never coherent with L3/L4 863 * caches (or CPU caches), however KMD is responsible for specifically flushing 864 * transient L3 GPU cache entries prior to the flip sequence to ensure scanout 865 * can happen from such a surface without seeing corruption. 866 * 867 * Display surfaces can be tagged as transient by mapping it using one of the 868 * various L3:XD PAT index modes on Xe2. 869 * 870 * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed 871 * at the end of each submission via PIPE_CONTROL for compute/render, since SA 872 * Media is not coherent with L3 and we want to support render-vs-media 873 * usescases. For other engines like copy/blt the HW internally forces uncached 874 * behaviour, hence why we can skip the TDF on such platforms. 875 */ 876 void xe_device_td_flush(struct xe_device *xe) 877 { 878 struct xe_gt *gt; 879 unsigned int fw_ref; 880 u8 id; 881 882 if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) 883 return; 884 885 if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { 886 xe_device_l2_flush(xe); 887 return; 888 } 889 890 for_each_gt(gt, xe, id) { 891 if (xe_gt_is_media_type(gt)) 892 continue; 893 894 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 895 if (!fw_ref) 896 return; 897 898 xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); 899 /* 900 * FIXME: We can likely do better here with our choice of 901 * timeout. Currently we just assume the worst case, i.e. 150us, 902 * which is believed to be sufficient to cover the worst case 903 * scenario on current platforms if all cache entries are 904 * transient and need to be flushed.. 905 */ 906 if (xe_mmio_wait32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0, 907 150, NULL, false)) 908 xe_gt_err_once(gt, "TD flush timeout\n"); 909 910 xe_force_wake_put(gt_to_fw(gt), fw_ref); 911 } 912 } 913 914 void xe_device_l2_flush(struct xe_device *xe) 915 { 916 struct xe_gt *gt; 917 unsigned int fw_ref; 918 919 gt = xe_root_mmio_gt(xe); 920 921 if (!XE_WA(gt, 16023588340)) 922 return; 923 924 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 925 if (!fw_ref) 926 return; 927 928 spin_lock(>->global_invl_lock); 929 xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); 930 931 if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) 932 xe_gt_err_once(gt, "Global invalidation timeout\n"); 933 spin_unlock(>->global_invl_lock); 934 935 xe_force_wake_put(gt_to_fw(gt), fw_ref); 936 } 937 938 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) 939 { 940 return xe_device_has_flat_ccs(xe) ? 941 DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; 942 } 943 944 /** 945 * xe_device_assert_mem_access - Inspect the current runtime_pm state. 946 * @xe: xe device instance 947 * 948 * To be used before any kind of memory access. It will splat a debug warning 949 * if the device is currently sleeping. But it doesn't guarantee in any way 950 * that the device is going to remain awake. Xe PM runtime get and put 951 * functions might be added to the outer bound of the memory access, while 952 * this check is intended for inner usage to splat some warning if the worst 953 * case has just happened. 954 */ 955 void xe_device_assert_mem_access(struct xe_device *xe) 956 { 957 xe_assert(xe, !xe_pm_runtime_suspended(xe)); 958 } 959 960 void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p) 961 { 962 struct xe_gt *gt; 963 u8 id; 964 965 drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid); 966 drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid); 967 968 for_each_gt(gt, xe, id) { 969 drm_printf(p, "GT id: %u\n", id); 970 drm_printf(p, "\tTile: %u\n", gt->tile->id); 971 drm_printf(p, "\tType: %s\n", 972 gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media"); 973 drm_printf(p, "\tIP ver: %u.%u.%u\n", 974 REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid), 975 REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid), 976 REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid)); 977 drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock); 978 } 979 } 980 981 u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address) 982 { 983 return sign_extend64(address, xe->info.va_bits - 1); 984 } 985 986 u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) 987 { 988 return address & GENMASK_ULL(xe->info.va_bits - 1, 0); 989 } 990 991 static void xe_device_wedged_fini(struct drm_device *drm, void *arg) 992 { 993 struct xe_device *xe = arg; 994 995 xe_pm_runtime_put(xe); 996 } 997 998 /** 999 * xe_device_declare_wedged - Declare device wedged 1000 * @xe: xe device instance 1001 * 1002 * This is a final state that can only be cleared with a module 1003 * re-probe (unbind + bind). 1004 * In this state every IOCTL will be blocked so the GT cannot be used. 1005 * In general it will be called upon any critical error such as gt reset 1006 * failure or guc loading failure. 1007 * If xe.wedged module parameter is set to 2, this function will be called 1008 * on every single execution timeout (a.k.a. GPU hang) right after devcoredump 1009 * snapshot capture. In this mode, GT reset won't be attempted so the state of 1010 * the issue is preserved for further debugging. 1011 */ 1012 void xe_device_declare_wedged(struct xe_device *xe) 1013 { 1014 struct xe_gt *gt; 1015 u8 id; 1016 1017 if (xe->wedged.mode == 0) { 1018 drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n"); 1019 return; 1020 } 1021 1022 xe_pm_runtime_get_noresume(xe); 1023 1024 if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { 1025 drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); 1026 return; 1027 } 1028 1029 if (!atomic_xchg(&xe->wedged.flag, 1)) { 1030 xe->needs_flr_on_fini = true; 1031 drm_err(&xe->drm, 1032 "CRITICAL: Xe has declared device %s as wedged.\n" 1033 "IOCTLs and executions are blocked. Only a rebind may clear the failure\n" 1034 "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", 1035 dev_name(xe->drm.dev)); 1036 } 1037 1038 for_each_gt(gt, xe, id) 1039 xe_gt_declare_wedged(gt); 1040 } 1041