1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_device.h" 7 8 #include <linux/units.h> 9 10 #include <drm/drm_aperture.h> 11 #include <drm/drm_atomic_helper.h> 12 #include <drm/drm_gem_ttm_helper.h> 13 #include <drm/drm_ioctl.h> 14 #include <drm/drm_managed.h> 15 #include <drm/drm_print.h> 16 #include <drm/xe_drm.h> 17 18 #include "regs/xe_regs.h" 19 #include "xe_bo.h" 20 #include "xe_debugfs.h" 21 #include "xe_display.h" 22 #include "xe_dma_buf.h" 23 #include "xe_drm_client.h" 24 #include "xe_drv.h" 25 #include "xe_exec_queue.h" 26 #include "xe_exec.h" 27 #include "xe_gt.h" 28 #include "xe_irq.h" 29 #include "xe_mmio.h" 30 #include "xe_module.h" 31 #include "xe_pat.h" 32 #include "xe_pcode.h" 33 #include "xe_pm.h" 34 #include "xe_query.h" 35 #include "xe_tile.h" 36 #include "xe_ttm_stolen_mgr.h" 37 #include "xe_ttm_sys_mgr.h" 38 #include "xe_vm.h" 39 #include "xe_wait_user_fence.h" 40 #include "xe_hwmon.h" 41 42 #ifdef CONFIG_LOCKDEP 43 struct lockdep_map xe_device_mem_access_lockdep_map = { 44 .name = "xe_device_mem_access_lockdep_map" 45 }; 46 #endif 47 48 static int xe_file_open(struct drm_device *dev, struct drm_file *file) 49 { 50 struct xe_device *xe = to_xe_device(dev); 51 struct xe_drm_client *client; 52 struct xe_file *xef; 53 int ret = -ENOMEM; 54 55 xef = kzalloc(sizeof(*xef), GFP_KERNEL); 56 if (!xef) 57 return ret; 58 59 client = xe_drm_client_alloc(); 60 if (!client) { 61 kfree(xef); 62 return ret; 63 } 64 65 xef->drm = file; 66 xef->client = client; 67 xef->xe = xe; 68 69 mutex_init(&xef->vm.lock); 70 xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); 71 72 mutex_init(&xef->exec_queue.lock); 73 xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); 74 75 file->driver_priv = xef; 76 return 0; 77 } 78 79 static void device_kill_persistent_exec_queues(struct xe_device *xe, 80 struct xe_file *xef); 81 82 static void xe_file_close(struct drm_device *dev, struct drm_file *file) 83 { 84 struct xe_device *xe = to_xe_device(dev); 85 struct xe_file *xef = file->driver_priv; 86 struct xe_vm *vm; 87 struct xe_exec_queue *q; 88 unsigned long idx; 89 90 mutex_lock(&xef->exec_queue.lock); 91 xa_for_each(&xef->exec_queue.xa, idx, q) { 92 xe_exec_queue_kill(q); 93 xe_exec_queue_put(q); 94 } 95 mutex_unlock(&xef->exec_queue.lock); 96 xa_destroy(&xef->exec_queue.xa); 97 mutex_destroy(&xef->exec_queue.lock); 98 device_kill_persistent_exec_queues(xe, xef); 99 100 mutex_lock(&xef->vm.lock); 101 xa_for_each(&xef->vm.xa, idx, vm) 102 xe_vm_close_and_put(vm); 103 mutex_unlock(&xef->vm.lock); 104 xa_destroy(&xef->vm.xa); 105 mutex_destroy(&xef->vm.lock); 106 107 xe_drm_client_put(xef->client); 108 kfree(xef); 109 } 110 111 static const struct drm_ioctl_desc xe_ioctls[] = { 112 DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW), 113 DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW), 114 DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl, 115 DRM_RENDER_ALLOW), 116 DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), 117 DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), 118 DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), 119 DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), 120 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl, 121 DRM_RENDER_ALLOW), 122 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, 123 DRM_RENDER_ALLOW), 124 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, 125 DRM_RENDER_ALLOW), 126 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, 127 DRM_RENDER_ALLOW), 128 DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, 129 DRM_RENDER_ALLOW), 130 }; 131 132 static const struct file_operations xe_driver_fops = { 133 .owner = THIS_MODULE, 134 .open = drm_open, 135 .release = drm_release_noglobal, 136 .unlocked_ioctl = drm_ioctl, 137 .mmap = drm_gem_mmap, 138 .poll = drm_poll, 139 .read = drm_read, 140 .compat_ioctl = drm_compat_ioctl, 141 .llseek = noop_llseek, 142 #ifdef CONFIG_PROC_FS 143 .show_fdinfo = drm_show_fdinfo, 144 #endif 145 }; 146 147 static void xe_driver_release(struct drm_device *dev) 148 { 149 struct xe_device *xe = to_xe_device(dev); 150 151 pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL); 152 } 153 154 static struct drm_driver driver = { 155 /* Don't use MTRRs here; the Xserver or userspace app should 156 * deal with them for Intel hardware. 157 */ 158 .driver_features = 159 DRIVER_GEM | 160 DRIVER_RENDER | DRIVER_SYNCOBJ | 161 DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, 162 .open = xe_file_open, 163 .postclose = xe_file_close, 164 165 .gem_prime_import = xe_gem_prime_import, 166 167 .dumb_create = xe_bo_dumb_create, 168 .dumb_map_offset = drm_gem_ttm_dumb_map_offset, 169 #ifdef CONFIG_PROC_FS 170 .show_fdinfo = xe_drm_client_fdinfo, 171 #endif 172 .release = &xe_driver_release, 173 174 .ioctls = xe_ioctls, 175 .num_ioctls = ARRAY_SIZE(xe_ioctls), 176 .fops = &xe_driver_fops, 177 .name = DRIVER_NAME, 178 .desc = DRIVER_DESC, 179 .date = DRIVER_DATE, 180 .major = DRIVER_MAJOR, 181 .minor = DRIVER_MINOR, 182 .patchlevel = DRIVER_PATCHLEVEL, 183 }; 184 185 static void xe_device_destroy(struct drm_device *dev, void *dummy) 186 { 187 struct xe_device *xe = to_xe_device(dev); 188 189 if (xe->ordered_wq) 190 destroy_workqueue(xe->ordered_wq); 191 192 if (xe->unordered_wq) 193 destroy_workqueue(xe->unordered_wq); 194 195 ttm_device_fini(&xe->ttm); 196 } 197 198 struct xe_device *xe_device_create(struct pci_dev *pdev, 199 const struct pci_device_id *ent) 200 { 201 struct xe_device *xe; 202 int err; 203 204 xe_display_driver_set_hooks(&driver); 205 206 err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); 207 if (err) 208 return ERR_PTR(err); 209 210 xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm); 211 if (IS_ERR(xe)) 212 return xe; 213 214 err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, 215 xe->drm.anon_inode->i_mapping, 216 xe->drm.vma_offset_manager, false, false); 217 if (WARN_ON(err)) 218 goto err_put; 219 220 err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); 221 if (err) 222 goto err_put; 223 224 xe->info.devid = pdev->device; 225 xe->info.revid = pdev->revision; 226 xe->info.force_execlist = xe_modparam.force_execlist; 227 228 spin_lock_init(&xe->irq.lock); 229 230 init_waitqueue_head(&xe->ufence_wq); 231 232 drmm_mutex_init(&xe->drm, &xe->usm.lock); 233 xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); 234 235 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { 236 /* Trigger a large asid and an early asid wrap. */ 237 u32 asid; 238 239 BUILD_BUG_ON(XE_MAX_ASID < 2); 240 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL, 241 XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1), 242 &xe->usm.next_asid, GFP_KERNEL); 243 drm_WARN_ON(&xe->drm, err); 244 if (err >= 0) 245 xa_erase(&xe->usm.asid_to_vm, asid); 246 } 247 248 drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock); 249 INIT_LIST_HEAD(&xe->persistent_engines.list); 250 251 spin_lock_init(&xe->pinned.lock); 252 INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); 253 INIT_LIST_HEAD(&xe->pinned.external_vram); 254 INIT_LIST_HEAD(&xe->pinned.evicted); 255 256 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); 257 xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); 258 if (!xe->ordered_wq || !xe->unordered_wq) { 259 drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); 260 err = -ENOMEM; 261 goto err_put; 262 } 263 264 err = xe_display_create(xe); 265 if (WARN_ON(err)) 266 goto err_put; 267 268 return xe; 269 270 err_put: 271 drm_dev_put(&xe->drm); 272 273 return ERR_PTR(err); 274 } 275 276 /* 277 * The driver-initiated FLR is the highest level of reset that we can trigger 278 * from within the driver. It is different from the PCI FLR in that it doesn't 279 * fully reset the SGUnit and doesn't modify the PCI config space and therefore 280 * it doesn't require a re-enumeration of the PCI BARs. However, the 281 * driver-initiated FLR does still cause a reset of both GT and display and a 282 * memory wipe of local and stolen memory, so recovery would require a full HW 283 * re-init and saving/restoring (or re-populating) the wiped memory. Since we 284 * perform the FLR as the very last action before releasing access to the HW 285 * during the driver release flow, we don't attempt recovery at all, because 286 * if/when a new instance of i915 is bound to the device it will do a full 287 * re-init anyway. 288 */ 289 static void xe_driver_flr(struct xe_device *xe) 290 { 291 const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ 292 struct xe_gt *gt = xe_root_mmio_gt(xe); 293 int ret; 294 295 if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) { 296 drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); 297 return; 298 } 299 300 drm_dbg(&xe->drm, "Triggering Driver-FLR\n"); 301 302 /* 303 * Make sure any pending FLR requests have cleared by waiting for the 304 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS 305 * to make sure it's not still set from a prior attempt (it's a write to 306 * clear bit). 307 * Note that we should never be in a situation where a previous attempt 308 * is still pending (unless the HW is totally dead), but better to be 309 * safe in case something unexpected happens 310 */ 311 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); 312 if (ret) { 313 drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret); 314 return; 315 } 316 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); 317 318 /* Trigger the actual Driver-FLR */ 319 xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR); 320 321 /* Wait for hardware teardown to complete */ 322 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); 323 if (ret) { 324 drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret); 325 return; 326 } 327 328 /* Wait for hardware/firmware re-init to complete */ 329 ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS, 330 flr_timeout, NULL, false); 331 if (ret) { 332 drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret); 333 return; 334 } 335 336 /* Clear sticky completion status */ 337 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); 338 } 339 340 static void xe_driver_flr_fini(struct drm_device *drm, void *arg) 341 { 342 struct xe_device *xe = arg; 343 344 if (xe->needs_flr_on_fini) 345 xe_driver_flr(xe); 346 } 347 348 static void xe_device_sanitize(struct drm_device *drm, void *arg) 349 { 350 struct xe_device *xe = arg; 351 struct xe_gt *gt; 352 u8 id; 353 354 for_each_gt(gt, xe, id) 355 xe_gt_sanitize(gt); 356 } 357 358 int xe_device_probe(struct xe_device *xe) 359 { 360 struct xe_tile *tile; 361 struct xe_gt *gt; 362 int err; 363 u8 id; 364 365 xe_pat_init_early(xe); 366 367 xe->info.mem_region_mask = 1; 368 err = xe_display_init_nommio(xe); 369 if (err) 370 return err; 371 372 for_each_tile(tile, xe, id) { 373 err = xe_tile_alloc(tile); 374 if (err) 375 return err; 376 } 377 378 err = xe_mmio_init(xe); 379 if (err) 380 return err; 381 382 err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); 383 if (err) 384 return err; 385 386 for_each_gt(gt, xe, id) { 387 err = xe_pcode_probe(gt); 388 if (err) 389 return err; 390 } 391 392 err = xe_display_init_noirq(xe); 393 if (err) 394 return err; 395 396 err = xe_irq_install(xe); 397 if (err) 398 goto err; 399 400 for_each_gt(gt, xe, id) { 401 err = xe_gt_init_early(gt); 402 if (err) 403 goto err_irq_shutdown; 404 } 405 406 err = xe_mmio_probe_vram(xe); 407 if (err) 408 goto err_irq_shutdown; 409 410 xe_ttm_sys_mgr_init(xe); 411 412 for_each_tile(tile, xe, id) { 413 err = xe_tile_init_noalloc(tile); 414 if (err) 415 goto err_irq_shutdown; 416 } 417 418 /* Allocate and map stolen after potential VRAM resize */ 419 xe_ttm_stolen_mgr_init(xe); 420 421 /* 422 * Now that GT is initialized (TTM in particular), 423 * we can try to init display, and inherit the initial fb. 424 * This is the reason the first allocation needs to be done 425 * inside display. 426 */ 427 err = xe_display_init_noaccel(xe); 428 if (err) 429 goto err_irq_shutdown; 430 431 for_each_gt(gt, xe, id) { 432 err = xe_gt_init(gt); 433 if (err) 434 goto err_irq_shutdown; 435 } 436 437 xe_heci_gsc_init(xe); 438 439 err = xe_display_init(xe); 440 if (err) 441 goto err_irq_shutdown; 442 443 err = drm_dev_register(&xe->drm, 0); 444 if (err) 445 goto err_fini_display; 446 447 xe_display_register(xe); 448 449 xe_debugfs_register(xe); 450 451 xe_pmu_register(&xe->pmu); 452 453 xe_hwmon_register(xe); 454 455 err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); 456 if (err) 457 return err; 458 459 return 0; 460 461 err_fini_display: 462 xe_display_driver_remove(xe); 463 464 err_irq_shutdown: 465 xe_irq_shutdown(xe); 466 err: 467 xe_display_fini(xe); 468 return err; 469 } 470 471 static void xe_device_remove_display(struct xe_device *xe) 472 { 473 xe_display_unregister(xe); 474 475 drm_dev_unplug(&xe->drm); 476 xe_display_driver_remove(xe); 477 } 478 479 void xe_device_remove(struct xe_device *xe) 480 { 481 xe_device_remove_display(xe); 482 483 xe_display_fini(xe); 484 485 xe_heci_gsc_fini(xe); 486 487 xe_irq_shutdown(xe); 488 } 489 490 void xe_device_shutdown(struct xe_device *xe) 491 { 492 } 493 494 void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q) 495 { 496 mutex_lock(&xe->persistent_engines.lock); 497 list_add_tail(&q->persistent.link, &xe->persistent_engines.list); 498 mutex_unlock(&xe->persistent_engines.lock); 499 } 500 501 void xe_device_remove_persistent_exec_queues(struct xe_device *xe, 502 struct xe_exec_queue *q) 503 { 504 mutex_lock(&xe->persistent_engines.lock); 505 if (!list_empty(&q->persistent.link)) 506 list_del(&q->persistent.link); 507 mutex_unlock(&xe->persistent_engines.lock); 508 } 509 510 static void device_kill_persistent_exec_queues(struct xe_device *xe, 511 struct xe_file *xef) 512 { 513 struct xe_exec_queue *q, *next; 514 515 mutex_lock(&xe->persistent_engines.lock); 516 list_for_each_entry_safe(q, next, &xe->persistent_engines.list, 517 persistent.link) 518 if (q->persistent.xef == xef) { 519 xe_exec_queue_kill(q); 520 list_del_init(&q->persistent.link); 521 } 522 mutex_unlock(&xe->persistent_engines.lock); 523 } 524 525 void xe_device_wmb(struct xe_device *xe) 526 { 527 struct xe_gt *gt = xe_root_mmio_gt(xe); 528 529 wmb(); 530 if (IS_DGFX(xe)) 531 xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0); 532 } 533 534 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) 535 { 536 return xe_device_has_flat_ccs(xe) ? 537 DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0; 538 } 539 540 bool xe_device_mem_access_ongoing(struct xe_device *xe) 541 { 542 if (xe_pm_read_callback_task(xe) != NULL) 543 return true; 544 545 return atomic_read(&xe->mem_access.ref); 546 } 547 548 void xe_device_assert_mem_access(struct xe_device *xe) 549 { 550 XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); 551 } 552 553 bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) 554 { 555 bool active; 556 557 if (xe_pm_read_callback_task(xe) == current) 558 return true; 559 560 active = xe_pm_runtime_get_if_active(xe); 561 if (active) { 562 int ref = atomic_inc_return(&xe->mem_access.ref); 563 564 xe_assert(xe, ref != S32_MAX); 565 } 566 567 return active; 568 } 569 570 void xe_device_mem_access_get(struct xe_device *xe) 571 { 572 int ref; 573 574 /* 575 * This looks racy, but should be fine since the pm_callback_task only 576 * transitions from NULL -> current (and back to NULL again), during the 577 * runtime_resume() or runtime_suspend() callbacks, for which there can 578 * only be a single one running for our device. We only need to prevent 579 * recursively calling the runtime_get or runtime_put from those 580 * callbacks, as well as preventing triggering any access_ongoing 581 * asserts. 582 */ 583 if (xe_pm_read_callback_task(xe) == current) 584 return; 585 586 /* 587 * Since the resume here is synchronous it can be quite easy to deadlock 588 * if we are not careful. Also in practice it might be quite timing 589 * sensitive to ever see the 0 -> 1 transition with the callers locks 590 * held, so deadlocks might exist but are hard for lockdep to ever see. 591 * With this in mind, help lockdep learn about the potentially scary 592 * stuff that can happen inside the runtime_resume callback by acquiring 593 * a dummy lock (it doesn't protect anything and gets compiled out on 594 * non-debug builds). Lockdep then only needs to see the 595 * mem_access_lockdep_map -> runtime_resume callback once, and then can 596 * hopefully validate all the (callers_locks) -> mem_access_lockdep_map. 597 * For example if the (callers_locks) are ever grabbed in the 598 * runtime_resume callback, lockdep should give us a nice splat. 599 */ 600 lock_map_acquire(&xe_device_mem_access_lockdep_map); 601 lock_map_release(&xe_device_mem_access_lockdep_map); 602 603 xe_pm_runtime_get(xe); 604 ref = atomic_inc_return(&xe->mem_access.ref); 605 606 xe_assert(xe, ref != S32_MAX); 607 608 } 609 610 void xe_device_mem_access_put(struct xe_device *xe) 611 { 612 int ref; 613 614 if (xe_pm_read_callback_task(xe) == current) 615 return; 616 617 ref = atomic_dec_return(&xe->mem_access.ref); 618 xe_pm_runtime_put(xe); 619 620 xe_assert(xe, ref >= 0); 621 } 622