1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_device.h" 7 8 #include <drm/drm_aperture.h> 9 #include <drm/drm_atomic_helper.h> 10 #include <drm/drm_gem_ttm_helper.h> 11 #include <drm/drm_ioctl.h> 12 #include <drm/drm_managed.h> 13 #include <drm/drm_print.h> 14 #include <drm/xe_drm.h> 15 16 #include "regs/xe_regs.h" 17 #include "xe_bo.h" 18 #include "xe_debugfs.h" 19 #include "xe_dma_buf.h" 20 #include "xe_drm_client.h" 21 #include "xe_drv.h" 22 #include "xe_exec_queue.h" 23 #include "xe_exec.h" 24 #include "xe_gt.h" 25 #include "xe_irq.h" 26 #include "xe_mmio.h" 27 #include "xe_module.h" 28 #include "xe_pat.h" 29 #include "xe_pcode.h" 30 #include "xe_pm.h" 31 #include "xe_query.h" 32 #include "xe_tile.h" 33 #include "xe_ttm_stolen_mgr.h" 34 #include "xe_ttm_sys_mgr.h" 35 #include "xe_vm.h" 36 #include "xe_vm_madvise.h" 37 #include "xe_wait_user_fence.h" 38 #include "xe_hwmon.h" 39 40 #ifdef CONFIG_LOCKDEP 41 struct lockdep_map xe_device_mem_access_lockdep_map = { 42 .name = "xe_device_mem_access_lockdep_map" 43 }; 44 #endif 45 46 static int xe_file_open(struct drm_device *dev, struct drm_file *file) 47 { 48 struct xe_device *xe = to_xe_device(dev); 49 struct xe_drm_client *client; 50 struct xe_file *xef; 51 int ret = -ENOMEM; 52 53 xef = kzalloc(sizeof(*xef), GFP_KERNEL); 54 if (!xef) 55 return ret; 56 57 client = xe_drm_client_alloc(); 58 if (!client) { 59 kfree(xef); 60 return ret; 61 } 62 63 xef->drm = file; 64 xef->client = client; 65 xef->xe = xe; 66 67 mutex_init(&xef->vm.lock); 68 xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); 69 70 mutex_init(&xef->exec_queue.lock); 71 xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); 72 73 file->driver_priv = xef; 74 return 0; 75 } 76 77 static void device_kill_persistent_exec_queues(struct xe_device *xe, 78 struct xe_file *xef); 79 80 static void xe_file_close(struct drm_device *dev, struct drm_file *file) 81 { 82 struct xe_device *xe = to_xe_device(dev); 83 struct xe_file *xef = file->driver_priv; 84 struct xe_vm *vm; 85 struct xe_exec_queue *q; 86 unsigned long idx; 87 88 mutex_lock(&xef->exec_queue.lock); 89 xa_for_each(&xef->exec_queue.xa, idx, q) { 90 xe_exec_queue_kill(q); 91 xe_exec_queue_put(q); 92 } 93 mutex_unlock(&xef->exec_queue.lock); 94 xa_destroy(&xef->exec_queue.xa); 95 mutex_destroy(&xef->exec_queue.lock); 96 device_kill_persistent_exec_queues(xe, xef); 97 98 mutex_lock(&xef->vm.lock); 99 xa_for_each(&xef->vm.xa, idx, vm) 100 xe_vm_close_and_put(vm); 101 mutex_unlock(&xef->vm.lock); 102 xa_destroy(&xef->vm.xa); 103 mutex_destroy(&xef->vm.lock); 104 105 xe_drm_client_put(xef->client); 106 kfree(xef); 107 } 108 109 static const struct drm_ioctl_desc xe_ioctls[] = { 110 DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW), 111 DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW), 112 DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl, 113 DRM_RENDER_ALLOW), 114 DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), 115 DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), 116 DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), 117 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl, 118 DRM_RENDER_ALLOW), 119 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, 120 DRM_RENDER_ALLOW), 121 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, 122 DRM_RENDER_ALLOW), 123 DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), 124 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, 125 DRM_RENDER_ALLOW), 126 DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, 127 DRM_RENDER_ALLOW), 128 DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), 129 }; 130 131 static const struct file_operations xe_driver_fops = { 132 .owner = THIS_MODULE, 133 .open = drm_open, 134 .release = drm_release_noglobal, 135 .unlocked_ioctl = drm_ioctl, 136 .mmap = drm_gem_mmap, 137 .poll = drm_poll, 138 .read = drm_read, 139 .compat_ioctl = drm_compat_ioctl, 140 .llseek = noop_llseek, 141 #ifdef CONFIG_PROC_FS 142 .show_fdinfo = drm_show_fdinfo, 143 #endif 144 }; 145 146 static void xe_driver_release(struct drm_device *dev) 147 { 148 struct xe_device *xe = to_xe_device(dev); 149 150 pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL); 151 } 152 153 static struct drm_driver driver = { 154 /* Don't use MTRRs here; the Xserver or userspace app should 155 * deal with them for Intel hardware. 156 */ 157 .driver_features = 158 DRIVER_GEM | 159 DRIVER_RENDER | DRIVER_SYNCOBJ | 160 DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, 161 .open = xe_file_open, 162 .postclose = xe_file_close, 163 164 .gem_prime_import = xe_gem_prime_import, 165 166 .dumb_create = xe_bo_dumb_create, 167 .dumb_map_offset = drm_gem_ttm_dumb_map_offset, 168 #ifdef CONFIG_PROC_FS 169 .show_fdinfo = xe_drm_client_fdinfo, 170 #endif 171 .release = &xe_driver_release, 172 173 .ioctls = xe_ioctls, 174 .num_ioctls = ARRAY_SIZE(xe_ioctls), 175 .fops = &xe_driver_fops, 176 .name = DRIVER_NAME, 177 .desc = DRIVER_DESC, 178 .date = DRIVER_DATE, 179 .major = DRIVER_MAJOR, 180 .minor = DRIVER_MINOR, 181 .patchlevel = DRIVER_PATCHLEVEL, 182 }; 183 184 static void xe_device_destroy(struct drm_device *dev, void *dummy) 185 { 186 struct xe_device *xe = to_xe_device(dev); 187 188 if (xe->ordered_wq) 189 destroy_workqueue(xe->ordered_wq); 190 191 ttm_device_fini(&xe->ttm); 192 } 193 194 struct xe_device *xe_device_create(struct pci_dev *pdev, 195 const struct pci_device_id *ent) 196 { 197 struct xe_device *xe; 198 int err; 199 200 err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); 201 if (err) 202 return ERR_PTR(err); 203 204 xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm); 205 if (IS_ERR(xe)) 206 return xe; 207 208 err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, 209 xe->drm.anon_inode->i_mapping, 210 xe->drm.vma_offset_manager, false, false); 211 if (WARN_ON(err)) 212 goto err_put; 213 214 err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); 215 if (err) 216 goto err_put; 217 218 xe->info.devid = pdev->device; 219 xe->info.revid = pdev->revision; 220 xe->info.force_execlist = force_execlist; 221 222 spin_lock_init(&xe->irq.lock); 223 224 init_waitqueue_head(&xe->ufence_wq); 225 226 drmm_mutex_init(&xe->drm, &xe->usm.lock); 227 xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1); 228 229 drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock); 230 INIT_LIST_HEAD(&xe->persistent_engines.list); 231 232 spin_lock_init(&xe->pinned.lock); 233 INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); 234 INIT_LIST_HEAD(&xe->pinned.external_vram); 235 INIT_LIST_HEAD(&xe->pinned.evicted); 236 237 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); 238 if (!xe->ordered_wq) { 239 drm_err(&xe->drm, "Failed to allocate xe-ordered-wq\n"); 240 err = -ENOMEM; 241 goto err_put; 242 } 243 244 drmm_mutex_init(&xe->drm, &xe->sb_lock); 245 xe->enabled_irq_mask = ~0; 246 247 return xe; 248 249 err_put: 250 drm_dev_put(&xe->drm); 251 252 return ERR_PTR(err); 253 } 254 255 static void xe_device_sanitize(struct drm_device *drm, void *arg) 256 { 257 struct xe_device *xe = arg; 258 struct xe_gt *gt; 259 u8 id; 260 261 for_each_gt(gt, xe, id) 262 xe_gt_sanitize(gt); 263 } 264 265 int xe_device_probe(struct xe_device *xe) 266 { 267 struct xe_tile *tile; 268 struct xe_gt *gt; 269 int err; 270 u8 id; 271 272 xe_pat_init_early(xe); 273 274 xe->info.mem_region_mask = 1; 275 276 for_each_tile(tile, xe, id) { 277 err = xe_tile_alloc(tile); 278 if (err) 279 return err; 280 } 281 282 err = xe_mmio_init(xe); 283 if (err) 284 return err; 285 286 for_each_gt(gt, xe, id) { 287 err = xe_pcode_probe(gt); 288 if (err) 289 return err; 290 } 291 292 err = xe_irq_install(xe); 293 if (err) 294 return err; 295 296 for_each_gt(gt, xe, id) { 297 err = xe_gt_init_early(gt); 298 if (err) 299 goto err_irq_shutdown; 300 } 301 302 err = xe_mmio_probe_vram(xe); 303 if (err) 304 goto err_irq_shutdown; 305 306 xe_ttm_sys_mgr_init(xe); 307 308 for_each_tile(tile, xe, id) { 309 err = xe_tile_init_noalloc(tile); 310 if (err) 311 goto err_irq_shutdown; 312 } 313 314 /* Allocate and map stolen after potential VRAM resize */ 315 xe_ttm_stolen_mgr_init(xe); 316 317 for_each_gt(gt, xe, id) { 318 err = xe_gt_init(gt); 319 if (err) 320 goto err_irq_shutdown; 321 } 322 323 xe_heci_gsc_init(xe); 324 325 err = drm_dev_register(&xe->drm, 0); 326 if (err) 327 goto err_irq_shutdown; 328 329 xe_debugfs_register(xe); 330 331 xe_pmu_register(&xe->pmu); 332 333 xe_hwmon_register(xe); 334 335 err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); 336 if (err) 337 return err; 338 339 return 0; 340 341 err_irq_shutdown: 342 xe_irq_shutdown(xe); 343 return err; 344 } 345 346 void xe_device_remove(struct xe_device *xe) 347 { 348 xe_heci_gsc_fini(xe); 349 350 xe_irq_shutdown(xe); 351 } 352 353 void xe_device_shutdown(struct xe_device *xe) 354 { 355 } 356 357 void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q) 358 { 359 mutex_lock(&xe->persistent_engines.lock); 360 list_add_tail(&q->persistent.link, &xe->persistent_engines.list); 361 mutex_unlock(&xe->persistent_engines.lock); 362 } 363 364 void xe_device_remove_persistent_exec_queues(struct xe_device *xe, 365 struct xe_exec_queue *q) 366 { 367 mutex_lock(&xe->persistent_engines.lock); 368 if (!list_empty(&q->persistent.link)) 369 list_del(&q->persistent.link); 370 mutex_unlock(&xe->persistent_engines.lock); 371 } 372 373 static void device_kill_persistent_exec_queues(struct xe_device *xe, 374 struct xe_file *xef) 375 { 376 struct xe_exec_queue *q, *next; 377 378 mutex_lock(&xe->persistent_engines.lock); 379 list_for_each_entry_safe(q, next, &xe->persistent_engines.list, 380 persistent.link) 381 if (q->persistent.xef == xef) { 382 xe_exec_queue_kill(q); 383 list_del_init(&q->persistent.link); 384 } 385 mutex_unlock(&xe->persistent_engines.lock); 386 } 387 388 void xe_device_wmb(struct xe_device *xe) 389 { 390 struct xe_gt *gt = xe_root_mmio_gt(xe); 391 392 wmb(); 393 if (IS_DGFX(xe)) 394 xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0); 395 } 396 397 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) 398 { 399 return xe_device_has_flat_ccs(xe) ? 400 DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0; 401 } 402 403 bool xe_device_mem_access_ongoing(struct xe_device *xe) 404 { 405 if (xe_pm_read_callback_task(xe) != NULL) 406 return true; 407 408 return atomic_read(&xe->mem_access.ref); 409 } 410 411 void xe_device_assert_mem_access(struct xe_device *xe) 412 { 413 XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); 414 } 415 416 bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) 417 { 418 bool active; 419 420 if (xe_pm_read_callback_task(xe) == current) 421 return true; 422 423 active = xe_pm_runtime_get_if_active(xe); 424 if (active) { 425 int ref = atomic_inc_return(&xe->mem_access.ref); 426 427 xe_assert(xe, ref != S32_MAX); 428 } 429 430 return active; 431 } 432 433 void xe_device_mem_access_get(struct xe_device *xe) 434 { 435 int ref; 436 437 /* 438 * This looks racy, but should be fine since the pm_callback_task only 439 * transitions from NULL -> current (and back to NULL again), during the 440 * runtime_resume() or runtime_suspend() callbacks, for which there can 441 * only be a single one running for our device. We only need to prevent 442 * recursively calling the runtime_get or runtime_put from those 443 * callbacks, as well as preventing triggering any access_ongoing 444 * asserts. 445 */ 446 if (xe_pm_read_callback_task(xe) == current) 447 return; 448 449 /* 450 * Since the resume here is synchronous it can be quite easy to deadlock 451 * if we are not careful. Also in practice it might be quite timing 452 * sensitive to ever see the 0 -> 1 transition with the callers locks 453 * held, so deadlocks might exist but are hard for lockdep to ever see. 454 * With this in mind, help lockdep learn about the potentially scary 455 * stuff that can happen inside the runtime_resume callback by acquiring 456 * a dummy lock (it doesn't protect anything and gets compiled out on 457 * non-debug builds). Lockdep then only needs to see the 458 * mem_access_lockdep_map -> runtime_resume callback once, and then can 459 * hopefully validate all the (callers_locks) -> mem_access_lockdep_map. 460 * For example if the (callers_locks) are ever grabbed in the 461 * runtime_resume callback, lockdep should give us a nice splat. 462 */ 463 lock_map_acquire(&xe_device_mem_access_lockdep_map); 464 lock_map_release(&xe_device_mem_access_lockdep_map); 465 466 xe_pm_runtime_get(xe); 467 ref = atomic_inc_return(&xe->mem_access.ref); 468 469 xe_assert(xe, ref != S32_MAX); 470 471 } 472 473 void xe_device_mem_access_put(struct xe_device *xe) 474 { 475 int ref; 476 477 if (xe_pm_read_callback_task(xe) == current) 478 return; 479 480 ref = atomic_dec_return(&xe->mem_access.ref); 481 xe_pm_runtime_put(xe); 482 483 xe_assert(xe, ref >= 0); 484 } 485