xref: /linux/drivers/gpu/drm/xe/xe_device.c (revision 8cdcef1c2f82d207aa8b2a02298fbc17191c6261)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_device.h"
7 
8 #include <linux/units.h>
9 
10 #include <drm/drm_aperture.h>
11 #include <drm/drm_atomic_helper.h>
12 #include <drm/drm_gem_ttm_helper.h>
13 #include <drm/drm_ioctl.h>
14 #include <drm/drm_managed.h>
15 #include <drm/drm_print.h>
16 #include <drm/xe_drm.h>
17 
18 #include "regs/xe_regs.h"
19 #include "xe_bo.h"
20 #include "xe_debugfs.h"
21 #include "xe_display.h"
22 #include "xe_dma_buf.h"
23 #include "xe_drm_client.h"
24 #include "xe_drv.h"
25 #include "xe_exec_queue.h"
26 #include "xe_exec.h"
27 #include "xe_gt.h"
28 #include "xe_irq.h"
29 #include "xe_mmio.h"
30 #include "xe_module.h"
31 #include "xe_pat.h"
32 #include "xe_pcode.h"
33 #include "xe_pm.h"
34 #include "xe_query.h"
35 #include "xe_tile.h"
36 #include "xe_ttm_stolen_mgr.h"
37 #include "xe_ttm_sys_mgr.h"
38 #include "xe_vm.h"
39 #include "xe_wait_user_fence.h"
40 #include "xe_hwmon.h"
41 
42 #ifdef CONFIG_LOCKDEP
43 struct lockdep_map xe_device_mem_access_lockdep_map = {
44 	.name = "xe_device_mem_access_lockdep_map"
45 };
46 #endif
47 
48 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
49 {
50 	struct xe_device *xe = to_xe_device(dev);
51 	struct xe_drm_client *client;
52 	struct xe_file *xef;
53 	int ret = -ENOMEM;
54 
55 	xef = kzalloc(sizeof(*xef), GFP_KERNEL);
56 	if (!xef)
57 		return ret;
58 
59 	client = xe_drm_client_alloc();
60 	if (!client) {
61 		kfree(xef);
62 		return ret;
63 	}
64 
65 	xef->drm = file;
66 	xef->client = client;
67 	xef->xe = xe;
68 
69 	mutex_init(&xef->vm.lock);
70 	xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
71 
72 	mutex_init(&xef->exec_queue.lock);
73 	xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
74 
75 	file->driver_priv = xef;
76 	return 0;
77 }
78 
79 static void device_kill_persistent_exec_queues(struct xe_device *xe,
80 					       struct xe_file *xef);
81 
82 static void xe_file_close(struct drm_device *dev, struct drm_file *file)
83 {
84 	struct xe_device *xe = to_xe_device(dev);
85 	struct xe_file *xef = file->driver_priv;
86 	struct xe_vm *vm;
87 	struct xe_exec_queue *q;
88 	unsigned long idx;
89 
90 	mutex_lock(&xef->exec_queue.lock);
91 	xa_for_each(&xef->exec_queue.xa, idx, q) {
92 		xe_exec_queue_kill(q);
93 		xe_exec_queue_put(q);
94 	}
95 	mutex_unlock(&xef->exec_queue.lock);
96 	xa_destroy(&xef->exec_queue.xa);
97 	mutex_destroy(&xef->exec_queue.lock);
98 	device_kill_persistent_exec_queues(xe, xef);
99 
100 	mutex_lock(&xef->vm.lock);
101 	xa_for_each(&xef->vm.xa, idx, vm)
102 		xe_vm_close_and_put(vm);
103 	mutex_unlock(&xef->vm.lock);
104 	xa_destroy(&xef->vm.xa);
105 	mutex_destroy(&xef->vm.lock);
106 
107 	xe_drm_client_put(xef->client);
108 	kfree(xef);
109 }
110 
111 static const struct drm_ioctl_desc xe_ioctls[] = {
112 	DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
113 	DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
114 	DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
115 			  DRM_RENDER_ALLOW),
116 	DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
117 	DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
118 	DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
119 	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
120 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
121 			  DRM_RENDER_ALLOW),
122 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
123 			  DRM_RENDER_ALLOW),
124 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl,
125 			  DRM_RENDER_ALLOW),
126 	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
127 			  DRM_RENDER_ALLOW),
128 	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
129 			  DRM_RENDER_ALLOW),
130 };
131 
132 static const struct file_operations xe_driver_fops = {
133 	.owner = THIS_MODULE,
134 	.open = drm_open,
135 	.release = drm_release_noglobal,
136 	.unlocked_ioctl = drm_ioctl,
137 	.mmap = drm_gem_mmap,
138 	.poll = drm_poll,
139 	.read = drm_read,
140 	.compat_ioctl = drm_compat_ioctl,
141 	.llseek = noop_llseek,
142 #ifdef CONFIG_PROC_FS
143 	.show_fdinfo = drm_show_fdinfo,
144 #endif
145 };
146 
147 static void xe_driver_release(struct drm_device *dev)
148 {
149 	struct xe_device *xe = to_xe_device(dev);
150 
151 	pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL);
152 }
153 
154 static struct drm_driver driver = {
155 	/* Don't use MTRRs here; the Xserver or userspace app should
156 	 * deal with them for Intel hardware.
157 	 */
158 	.driver_features =
159 	    DRIVER_GEM |
160 	    DRIVER_RENDER | DRIVER_SYNCOBJ |
161 	    DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
162 	.open = xe_file_open,
163 	.postclose = xe_file_close,
164 
165 	.gem_prime_import = xe_gem_prime_import,
166 
167 	.dumb_create = xe_bo_dumb_create,
168 	.dumb_map_offset = drm_gem_ttm_dumb_map_offset,
169 #ifdef CONFIG_PROC_FS
170 	.show_fdinfo = xe_drm_client_fdinfo,
171 #endif
172 	.release = &xe_driver_release,
173 
174 	.ioctls = xe_ioctls,
175 	.num_ioctls = ARRAY_SIZE(xe_ioctls),
176 	.fops = &xe_driver_fops,
177 	.name = DRIVER_NAME,
178 	.desc = DRIVER_DESC,
179 	.date = DRIVER_DATE,
180 	.major = DRIVER_MAJOR,
181 	.minor = DRIVER_MINOR,
182 	.patchlevel = DRIVER_PATCHLEVEL,
183 };
184 
185 static void xe_device_destroy(struct drm_device *dev, void *dummy)
186 {
187 	struct xe_device *xe = to_xe_device(dev);
188 
189 	if (xe->ordered_wq)
190 		destroy_workqueue(xe->ordered_wq);
191 
192 	if (xe->unordered_wq)
193 		destroy_workqueue(xe->unordered_wq);
194 
195 	ttm_device_fini(&xe->ttm);
196 }
197 
198 struct xe_device *xe_device_create(struct pci_dev *pdev,
199 				   const struct pci_device_id *ent)
200 {
201 	struct xe_device *xe;
202 	int err;
203 
204 	xe_display_driver_set_hooks(&driver);
205 
206 	err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
207 	if (err)
208 		return ERR_PTR(err);
209 
210 	xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
211 	if (IS_ERR(xe))
212 		return xe;
213 
214 	err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
215 			      xe->drm.anon_inode->i_mapping,
216 			      xe->drm.vma_offset_manager, false, false);
217 	if (WARN_ON(err))
218 		goto err_put;
219 
220 	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
221 	if (err)
222 		goto err_put;
223 
224 	xe->info.devid = pdev->device;
225 	xe->info.revid = pdev->revision;
226 	xe->info.force_execlist = xe_modparam.force_execlist;
227 
228 	spin_lock_init(&xe->irq.lock);
229 
230 	init_waitqueue_head(&xe->ufence_wq);
231 
232 	drmm_mutex_init(&xe->drm, &xe->usm.lock);
233 	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
234 
235 	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
236 		/* Trigger a large asid and an early asid wrap. */
237 		u32 asid;
238 
239 		BUILD_BUG_ON(XE_MAX_ASID < 2);
240 		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
241 				      XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
242 				      &xe->usm.next_asid, GFP_KERNEL);
243 		drm_WARN_ON(&xe->drm, err);
244 		if (err >= 0)
245 			xa_erase(&xe->usm.asid_to_vm, asid);
246 	}
247 
248 	drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock);
249 	INIT_LIST_HEAD(&xe->persistent_engines.list);
250 
251 	spin_lock_init(&xe->pinned.lock);
252 	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
253 	INIT_LIST_HEAD(&xe->pinned.external_vram);
254 	INIT_LIST_HEAD(&xe->pinned.evicted);
255 
256 	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
257 	xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
258 	if (!xe->ordered_wq || !xe->unordered_wq) {
259 		drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
260 		err = -ENOMEM;
261 		goto err_put;
262 	}
263 
264 	err = xe_display_create(xe);
265 	if (WARN_ON(err))
266 		goto err_put;
267 
268 	return xe;
269 
270 err_put:
271 	drm_dev_put(&xe->drm);
272 
273 	return ERR_PTR(err);
274 }
275 
276 /*
277  * The driver-initiated FLR is the highest level of reset that we can trigger
278  * from within the driver. It is different from the PCI FLR in that it doesn't
279  * fully reset the SGUnit and doesn't modify the PCI config space and therefore
280  * it doesn't require a re-enumeration of the PCI BARs. However, the
281  * driver-initiated FLR does still cause a reset of both GT and display and a
282  * memory wipe of local and stolen memory, so recovery would require a full HW
283  * re-init and saving/restoring (or re-populating) the wiped memory. Since we
284  * perform the FLR as the very last action before releasing access to the HW
285  * during the driver release flow, we don't attempt recovery at all, because
286  * if/when a new instance of i915 is bound to the device it will do a full
287  * re-init anyway.
288  */
289 static void xe_driver_flr(struct xe_device *xe)
290 {
291 	const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
292 	struct xe_gt *gt = xe_root_mmio_gt(xe);
293 	int ret;
294 
295 	if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
296 		drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
297 		return;
298 	}
299 
300 	drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
301 
302 	/*
303 	 * Make sure any pending FLR requests have cleared by waiting for the
304 	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
305 	 * to make sure it's not still set from a prior attempt (it's a write to
306 	 * clear bit).
307 	 * Note that we should never be in a situation where a previous attempt
308 	 * is still pending (unless the HW is totally dead), but better to be
309 	 * safe in case something unexpected happens
310 	 */
311 	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
312 	if (ret) {
313 		drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
314 		return;
315 	}
316 	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
317 
318 	/* Trigger the actual Driver-FLR */
319 	xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
320 
321 	/* Wait for hardware teardown to complete */
322 	ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
323 	if (ret) {
324 		drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
325 		return;
326 	}
327 
328 	/* Wait for hardware/firmware re-init to complete */
329 	ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
330 			     flr_timeout, NULL, false);
331 	if (ret) {
332 		drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
333 		return;
334 	}
335 
336 	/* Clear sticky completion status */
337 	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
338 }
339 
340 static void xe_driver_flr_fini(struct drm_device *drm, void *arg)
341 {
342 	struct xe_device *xe = arg;
343 
344 	if (xe->needs_flr_on_fini)
345 		xe_driver_flr(xe);
346 }
347 
348 static void xe_device_sanitize(struct drm_device *drm, void *arg)
349 {
350 	struct xe_device *xe = arg;
351 	struct xe_gt *gt;
352 	u8 id;
353 
354 	for_each_gt(gt, xe, id)
355 		xe_gt_sanitize(gt);
356 }
357 
358 int xe_device_probe(struct xe_device *xe)
359 {
360 	struct xe_tile *tile;
361 	struct xe_gt *gt;
362 	int err;
363 	u8 id;
364 
365 	xe_pat_init_early(xe);
366 
367 	xe->info.mem_region_mask = 1;
368 	err = xe_display_init_nommio(xe);
369 	if (err)
370 		return err;
371 
372 	for_each_tile(tile, xe, id) {
373 		err = xe_tile_alloc(tile);
374 		if (err)
375 			return err;
376 	}
377 
378 	err = xe_mmio_init(xe);
379 	if (err)
380 		return err;
381 
382 	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
383 	if (err)
384 		return err;
385 
386 	for_each_gt(gt, xe, id) {
387 		err = xe_pcode_probe(gt);
388 		if (err)
389 			return err;
390 	}
391 
392 	err = xe_display_init_noirq(xe);
393 	if (err)
394 		return err;
395 
396 	err = xe_irq_install(xe);
397 	if (err)
398 		goto err;
399 
400 	for_each_gt(gt, xe, id) {
401 		err = xe_gt_init_early(gt);
402 		if (err)
403 			goto err_irq_shutdown;
404 	}
405 
406 	err = xe_mmio_probe_vram(xe);
407 	if (err)
408 		goto err_irq_shutdown;
409 
410 	xe_ttm_sys_mgr_init(xe);
411 
412 	for_each_tile(tile, xe, id) {
413 		err = xe_tile_init_noalloc(tile);
414 		if (err)
415 			goto err_irq_shutdown;
416 	}
417 
418 	/* Allocate and map stolen after potential VRAM resize */
419 	xe_ttm_stolen_mgr_init(xe);
420 
421 	/*
422 	 * Now that GT is initialized (TTM in particular),
423 	 * we can try to init display, and inherit the initial fb.
424 	 * This is the reason the first allocation needs to be done
425 	 * inside display.
426 	 */
427 	err = xe_display_init_noaccel(xe);
428 	if (err)
429 		goto err_irq_shutdown;
430 
431 	for_each_gt(gt, xe, id) {
432 		err = xe_gt_init(gt);
433 		if (err)
434 			goto err_irq_shutdown;
435 	}
436 
437 	xe_heci_gsc_init(xe);
438 
439 	err = xe_display_init(xe);
440 	if (err)
441 		goto err_irq_shutdown;
442 
443 	err = drm_dev_register(&xe->drm, 0);
444 	if (err)
445 		goto err_fini_display;
446 
447 	xe_display_register(xe);
448 
449 	xe_debugfs_register(xe);
450 
451 	xe_pmu_register(&xe->pmu);
452 
453 	xe_hwmon_register(xe);
454 
455 	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
456 	if (err)
457 		return err;
458 
459 	return 0;
460 
461 err_fini_display:
462 	xe_display_driver_remove(xe);
463 
464 err_irq_shutdown:
465 	xe_irq_shutdown(xe);
466 err:
467 	xe_display_fini(xe);
468 	return err;
469 }
470 
471 static void xe_device_remove_display(struct xe_device *xe)
472 {
473 	xe_display_unregister(xe);
474 
475 	drm_dev_unplug(&xe->drm);
476 	xe_display_driver_remove(xe);
477 }
478 
479 void xe_device_remove(struct xe_device *xe)
480 {
481 	xe_device_remove_display(xe);
482 
483 	xe_display_fini(xe);
484 
485 	xe_heci_gsc_fini(xe);
486 
487 	xe_irq_shutdown(xe);
488 }
489 
490 void xe_device_shutdown(struct xe_device *xe)
491 {
492 }
493 
494 void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q)
495 {
496 	mutex_lock(&xe->persistent_engines.lock);
497 	list_add_tail(&q->persistent.link, &xe->persistent_engines.list);
498 	mutex_unlock(&xe->persistent_engines.lock);
499 }
500 
501 void xe_device_remove_persistent_exec_queues(struct xe_device *xe,
502 					     struct xe_exec_queue *q)
503 {
504 	mutex_lock(&xe->persistent_engines.lock);
505 	if (!list_empty(&q->persistent.link))
506 		list_del(&q->persistent.link);
507 	mutex_unlock(&xe->persistent_engines.lock);
508 }
509 
510 static void device_kill_persistent_exec_queues(struct xe_device *xe,
511 					       struct xe_file *xef)
512 {
513 	struct xe_exec_queue *q, *next;
514 
515 	mutex_lock(&xe->persistent_engines.lock);
516 	list_for_each_entry_safe(q, next, &xe->persistent_engines.list,
517 				 persistent.link)
518 		if (q->persistent.xef == xef) {
519 			xe_exec_queue_kill(q);
520 			list_del_init(&q->persistent.link);
521 		}
522 	mutex_unlock(&xe->persistent_engines.lock);
523 }
524 
525 void xe_device_wmb(struct xe_device *xe)
526 {
527 	struct xe_gt *gt = xe_root_mmio_gt(xe);
528 
529 	wmb();
530 	if (IS_DGFX(xe))
531 		xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0);
532 }
533 
534 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
535 {
536 	return xe_device_has_flat_ccs(xe) ?
537 		DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0;
538 }
539 
540 bool xe_device_mem_access_ongoing(struct xe_device *xe)
541 {
542 	if (xe_pm_read_callback_task(xe) != NULL)
543 		return true;
544 
545 	return atomic_read(&xe->mem_access.ref);
546 }
547 
548 void xe_device_assert_mem_access(struct xe_device *xe)
549 {
550 	XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
551 }
552 
553 bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
554 {
555 	bool active;
556 
557 	if (xe_pm_read_callback_task(xe) == current)
558 		return true;
559 
560 	active = xe_pm_runtime_get_if_active(xe);
561 	if (active) {
562 		int ref = atomic_inc_return(&xe->mem_access.ref);
563 
564 		xe_assert(xe, ref != S32_MAX);
565 	}
566 
567 	return active;
568 }
569 
570 void xe_device_mem_access_get(struct xe_device *xe)
571 {
572 	int ref;
573 
574 	/*
575 	 * This looks racy, but should be fine since the pm_callback_task only
576 	 * transitions from NULL -> current (and back to NULL again), during the
577 	 * runtime_resume() or runtime_suspend() callbacks, for which there can
578 	 * only be a single one running for our device. We only need to prevent
579 	 * recursively calling the runtime_get or runtime_put from those
580 	 * callbacks, as well as preventing triggering any access_ongoing
581 	 * asserts.
582 	 */
583 	if (xe_pm_read_callback_task(xe) == current)
584 		return;
585 
586 	/*
587 	 * Since the resume here is synchronous it can be quite easy to deadlock
588 	 * if we are not careful. Also in practice it might be quite timing
589 	 * sensitive to ever see the 0 -> 1 transition with the callers locks
590 	 * held, so deadlocks might exist but are hard for lockdep to ever see.
591 	 * With this in mind, help lockdep learn about the potentially scary
592 	 * stuff that can happen inside the runtime_resume callback by acquiring
593 	 * a dummy lock (it doesn't protect anything and gets compiled out on
594 	 * non-debug builds).  Lockdep then only needs to see the
595 	 * mem_access_lockdep_map -> runtime_resume callback once, and then can
596 	 * hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
597 	 * For example if the (callers_locks) are ever grabbed in the
598 	 * runtime_resume callback, lockdep should give us a nice splat.
599 	 */
600 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
601 	lock_map_release(&xe_device_mem_access_lockdep_map);
602 
603 	xe_pm_runtime_get(xe);
604 	ref = atomic_inc_return(&xe->mem_access.ref);
605 
606 	xe_assert(xe, ref != S32_MAX);
607 
608 }
609 
610 void xe_device_mem_access_put(struct xe_device *xe)
611 {
612 	int ref;
613 
614 	if (xe_pm_read_callback_task(xe) == current)
615 		return;
616 
617 	ref = atomic_dec_return(&xe->mem_access.ref);
618 	xe_pm_runtime_put(xe);
619 
620 	xe_assert(xe, ref >= 0);
621 }
622