1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_device.h"
7
8 #include <linux/aperture.h>
9 #include <linux/delay.h>
10 #include <linux/fault-inject.h>
11 #include <linux/units.h>
12
13 #include <drm/drm_atomic_helper.h>
14 #include <drm/drm_client.h>
15 #include <drm/drm_gem_ttm_helper.h>
16 #include <drm/drm_ioctl.h>
17 #include <drm/drm_managed.h>
18 #include <drm/drm_print.h>
19 #include <uapi/drm/xe_drm.h>
20
21 #include "display/xe_display.h"
22 #include "instructions/xe_gpu_commands.h"
23 #include "regs/xe_gt_regs.h"
24 #include "regs/xe_regs.h"
25 #include "xe_bo.h"
26 #include "xe_debugfs.h"
27 #include "xe_devcoredump.h"
28 #include "xe_dma_buf.h"
29 #include "xe_drm_client.h"
30 #include "xe_drv.h"
31 #include "xe_exec.h"
32 #include "xe_exec_queue.h"
33 #include "xe_force_wake.h"
34 #include "xe_ggtt.h"
35 #include "xe_gsc_proxy.h"
36 #include "xe_gt.h"
37 #include "xe_gt_mcr.h"
38 #include "xe_gt_printk.h"
39 #include "xe_gt_sriov_vf.h"
40 #include "xe_guc.h"
41 #include "xe_hw_engine_group.h"
42 #include "xe_hwmon.h"
43 #include "xe_irq.h"
44 #include "xe_memirq.h"
45 #include "xe_mmio.h"
46 #include "xe_module.h"
47 #include "xe_oa.h"
48 #include "xe_observation.h"
49 #include "xe_pat.h"
50 #include "xe_pcode.h"
51 #include "xe_pm.h"
52 #include "xe_query.h"
53 #include "xe_sriov.h"
54 #include "xe_tile.h"
55 #include "xe_ttm_stolen_mgr.h"
56 #include "xe_ttm_sys_mgr.h"
57 #include "xe_vm.h"
58 #include "xe_vram.h"
59 #include "xe_vsec.h"
60 #include "xe_wait_user_fence.h"
61 #include "xe_wa.h"
62
63 #include <generated/xe_wa_oob.h>
64
xe_file_open(struct drm_device * dev,struct drm_file * file)65 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
66 {
67 struct xe_device *xe = to_xe_device(dev);
68 struct xe_drm_client *client;
69 struct xe_file *xef;
70 int ret = -ENOMEM;
71 struct task_struct *task = NULL;
72
73 xef = kzalloc(sizeof(*xef), GFP_KERNEL);
74 if (!xef)
75 return ret;
76
77 client = xe_drm_client_alloc();
78 if (!client) {
79 kfree(xef);
80 return ret;
81 }
82
83 xef->drm = file;
84 xef->client = client;
85 xef->xe = xe;
86
87 mutex_init(&xef->vm.lock);
88 xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
89
90 mutex_init(&xef->exec_queue.lock);
91 xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
92
93 file->driver_priv = xef;
94 kref_init(&xef->refcount);
95
96 task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
97 if (task) {
98 xef->process_name = kstrdup(task->comm, GFP_KERNEL);
99 xef->pid = task->pid;
100 put_task_struct(task);
101 }
102
103 return 0;
104 }
105
xe_file_destroy(struct kref * ref)106 static void xe_file_destroy(struct kref *ref)
107 {
108 struct xe_file *xef = container_of(ref, struct xe_file, refcount);
109
110 xa_destroy(&xef->exec_queue.xa);
111 mutex_destroy(&xef->exec_queue.lock);
112 xa_destroy(&xef->vm.xa);
113 mutex_destroy(&xef->vm.lock);
114
115 xe_drm_client_put(xef->client);
116 kfree(xef->process_name);
117 kfree(xef);
118 }
119
120 /**
121 * xe_file_get() - Take a reference to the xe file object
122 * @xef: Pointer to the xe file
123 *
124 * Anyone with a pointer to xef must take a reference to the xe file
125 * object using this call.
126 *
127 * Return: xe file pointer
128 */
xe_file_get(struct xe_file * xef)129 struct xe_file *xe_file_get(struct xe_file *xef)
130 {
131 kref_get(&xef->refcount);
132 return xef;
133 }
134
135 /**
136 * xe_file_put() - Drop a reference to the xe file object
137 * @xef: Pointer to the xe file
138 *
139 * Used to drop reference to the xef object
140 */
xe_file_put(struct xe_file * xef)141 void xe_file_put(struct xe_file *xef)
142 {
143 kref_put(&xef->refcount, xe_file_destroy);
144 }
145
xe_file_close(struct drm_device * dev,struct drm_file * file)146 static void xe_file_close(struct drm_device *dev, struct drm_file *file)
147 {
148 struct xe_device *xe = to_xe_device(dev);
149 struct xe_file *xef = file->driver_priv;
150 struct xe_vm *vm;
151 struct xe_exec_queue *q;
152 unsigned long idx;
153
154 xe_pm_runtime_get(xe);
155
156 /*
157 * No need for exec_queue.lock here as there is no contention for it
158 * when FD is closing as IOCTLs presumably can't be modifying the
159 * xarray. Taking exec_queue.lock here causes undue dependency on
160 * vm->lock taken during xe_exec_queue_kill().
161 */
162 xa_for_each(&xef->exec_queue.xa, idx, q) {
163 if (q->vm && q->hwe->hw_engine_group)
164 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
165 xe_exec_queue_kill(q);
166 xe_exec_queue_put(q);
167 }
168 xa_for_each(&xef->vm.xa, idx, vm)
169 xe_vm_close_and_put(vm);
170
171 xe_file_put(xef);
172
173 xe_pm_runtime_put(xe);
174 }
175
176 static const struct drm_ioctl_desc xe_ioctls[] = {
177 DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
178 DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
179 DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
180 DRM_RENDER_ALLOW),
181 DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
182 DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
183 DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
184 DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
185 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
186 DRM_RENDER_ALLOW),
187 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
188 DRM_RENDER_ALLOW),
189 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
190 DRM_RENDER_ALLOW),
191 DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
192 DRM_RENDER_ALLOW),
193 DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
194 };
195
xe_drm_ioctl(struct file * file,unsigned int cmd,unsigned long arg)196 static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
197 {
198 struct drm_file *file_priv = file->private_data;
199 struct xe_device *xe = to_xe_device(file_priv->minor->dev);
200 long ret;
201
202 if (xe_device_wedged(xe))
203 return -ECANCELED;
204
205 ret = xe_pm_runtime_get_ioctl(xe);
206 if (ret >= 0)
207 ret = drm_ioctl(file, cmd, arg);
208 xe_pm_runtime_put(xe);
209
210 return ret;
211 }
212
213 #ifdef CONFIG_COMPAT
xe_drm_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)214 static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
215 {
216 struct drm_file *file_priv = file->private_data;
217 struct xe_device *xe = to_xe_device(file_priv->minor->dev);
218 long ret;
219
220 if (xe_device_wedged(xe))
221 return -ECANCELED;
222
223 ret = xe_pm_runtime_get_ioctl(xe);
224 if (ret >= 0)
225 ret = drm_compat_ioctl(file, cmd, arg);
226 xe_pm_runtime_put(xe);
227
228 return ret;
229 }
230 #else
231 /* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
232 #define xe_drm_compat_ioctl NULL
233 #endif
234
235 static const struct file_operations xe_driver_fops = {
236 .owner = THIS_MODULE,
237 .open = drm_open,
238 .release = drm_release_noglobal,
239 .unlocked_ioctl = xe_drm_ioctl,
240 .mmap = drm_gem_mmap,
241 .poll = drm_poll,
242 .read = drm_read,
243 .compat_ioctl = xe_drm_compat_ioctl,
244 .llseek = noop_llseek,
245 #ifdef CONFIG_PROC_FS
246 .show_fdinfo = drm_show_fdinfo,
247 #endif
248 .fop_flags = FOP_UNSIGNED_OFFSET,
249 };
250
251 static struct drm_driver driver = {
252 /* Don't use MTRRs here; the Xserver or userspace app should
253 * deal with them for Intel hardware.
254 */
255 .driver_features =
256 DRIVER_GEM |
257 DRIVER_RENDER | DRIVER_SYNCOBJ |
258 DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
259 .open = xe_file_open,
260 .postclose = xe_file_close,
261
262 .gem_prime_import = xe_gem_prime_import,
263
264 .dumb_create = xe_bo_dumb_create,
265 .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
266 #ifdef CONFIG_PROC_FS
267 .show_fdinfo = xe_drm_client_fdinfo,
268 #endif
269 .ioctls = xe_ioctls,
270 .num_ioctls = ARRAY_SIZE(xe_ioctls),
271 .fops = &xe_driver_fops,
272 .name = DRIVER_NAME,
273 .desc = DRIVER_DESC,
274 .major = DRIVER_MAJOR,
275 .minor = DRIVER_MINOR,
276 .patchlevel = DRIVER_PATCHLEVEL,
277 };
278
xe_device_destroy(struct drm_device * dev,void * dummy)279 static void xe_device_destroy(struct drm_device *dev, void *dummy)
280 {
281 struct xe_device *xe = to_xe_device(dev);
282
283 if (xe->preempt_fence_wq)
284 destroy_workqueue(xe->preempt_fence_wq);
285
286 if (xe->ordered_wq)
287 destroy_workqueue(xe->ordered_wq);
288
289 if (xe->unordered_wq)
290 destroy_workqueue(xe->unordered_wq);
291
292 if (xe->destroy_wq)
293 destroy_workqueue(xe->destroy_wq);
294
295 ttm_device_fini(&xe->ttm);
296 }
297
xe_device_create(struct pci_dev * pdev,const struct pci_device_id * ent)298 struct xe_device *xe_device_create(struct pci_dev *pdev,
299 const struct pci_device_id *ent)
300 {
301 struct xe_device *xe;
302 int err;
303
304 xe_display_driver_set_hooks(&driver);
305
306 err = aperture_remove_conflicting_pci_devices(pdev, driver.name);
307 if (err)
308 return ERR_PTR(err);
309
310 xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
311 if (IS_ERR(xe))
312 return xe;
313
314 err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
315 xe->drm.anon_inode->i_mapping,
316 xe->drm.vma_offset_manager, false, false);
317 if (WARN_ON(err))
318 goto err;
319
320 err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
321 if (err)
322 goto err;
323
324 xe->info.devid = pdev->device;
325 xe->info.revid = pdev->revision;
326 xe->info.force_execlist = xe_modparam.force_execlist;
327
328 err = xe_irq_init(xe);
329 if (err)
330 goto err;
331
332 init_waitqueue_head(&xe->ufence_wq);
333
334 init_rwsem(&xe->usm.lock);
335
336 xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
337
338 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
339 /* Trigger a large asid and an early asid wrap. */
340 u32 asid;
341
342 BUILD_BUG_ON(XE_MAX_ASID < 2);
343 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
344 XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
345 &xe->usm.next_asid, GFP_KERNEL);
346 drm_WARN_ON(&xe->drm, err);
347 if (err >= 0)
348 xa_erase(&xe->usm.asid_to_vm, asid);
349 }
350
351 spin_lock_init(&xe->pinned.lock);
352 INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
353 INIT_LIST_HEAD(&xe->pinned.external_vram);
354 INIT_LIST_HEAD(&xe->pinned.evicted);
355
356 xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
357 WQ_MEM_RECLAIM);
358 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
359 xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
360 xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
361 if (!xe->ordered_wq || !xe->unordered_wq ||
362 !xe->preempt_fence_wq || !xe->destroy_wq) {
363 /*
364 * Cleanup done in xe_device_destroy via
365 * drmm_add_action_or_reset register above
366 */
367 drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
368 err = -ENOMEM;
369 goto err;
370 }
371
372 err = drmm_mutex_init(&xe->drm, &xe->pmt.lock);
373 if (err)
374 goto err;
375
376 err = xe_display_create(xe);
377 if (WARN_ON(err))
378 goto err;
379
380 return xe;
381
382 err:
383 return ERR_PTR(err);
384 }
385 ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
386
xe_driver_flr_disabled(struct xe_device * xe)387 static bool xe_driver_flr_disabled(struct xe_device *xe)
388 {
389 return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
390 }
391
392 /*
393 * The driver-initiated FLR is the highest level of reset that we can trigger
394 * from within the driver. It is different from the PCI FLR in that it doesn't
395 * fully reset the SGUnit and doesn't modify the PCI config space and therefore
396 * it doesn't require a re-enumeration of the PCI BARs. However, the
397 * driver-initiated FLR does still cause a reset of both GT and display and a
398 * memory wipe of local and stolen memory, so recovery would require a full HW
399 * re-init and saving/restoring (or re-populating) the wiped memory. Since we
400 * perform the FLR as the very last action before releasing access to the HW
401 * during the driver release flow, we don't attempt recovery at all, because
402 * if/when a new instance of i915 is bound to the device it will do a full
403 * re-init anyway.
404 */
__xe_driver_flr(struct xe_device * xe)405 static void __xe_driver_flr(struct xe_device *xe)
406 {
407 const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
408 struct xe_mmio *mmio = xe_root_tile_mmio(xe);
409 int ret;
410
411 drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
412
413 /*
414 * Make sure any pending FLR requests have cleared by waiting for the
415 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
416 * to make sure it's not still set from a prior attempt (it's a write to
417 * clear bit).
418 * Note that we should never be in a situation where a previous attempt
419 * is still pending (unless the HW is totally dead), but better to be
420 * safe in case something unexpected happens
421 */
422 ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
423 if (ret) {
424 drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
425 return;
426 }
427 xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
428
429 /* Trigger the actual Driver-FLR */
430 xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR);
431
432 /* Wait for hardware teardown to complete */
433 ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
434 if (ret) {
435 drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
436 return;
437 }
438
439 /* Wait for hardware/firmware re-init to complete */
440 ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
441 flr_timeout, NULL, false);
442 if (ret) {
443 drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
444 return;
445 }
446
447 /* Clear sticky completion status */
448 xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
449 }
450
xe_driver_flr(struct xe_device * xe)451 static void xe_driver_flr(struct xe_device *xe)
452 {
453 if (xe_driver_flr_disabled(xe)) {
454 drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
455 return;
456 }
457
458 __xe_driver_flr(xe);
459 }
460
xe_driver_flr_fini(void * arg)461 static void xe_driver_flr_fini(void *arg)
462 {
463 struct xe_device *xe = arg;
464
465 if (xe->needs_flr_on_fini)
466 xe_driver_flr(xe);
467 }
468
xe_device_sanitize(void * arg)469 static void xe_device_sanitize(void *arg)
470 {
471 struct xe_device *xe = arg;
472 struct xe_gt *gt;
473 u8 id;
474
475 for_each_gt(gt, xe, id)
476 xe_gt_sanitize(gt);
477 }
478
xe_set_dma_info(struct xe_device * xe)479 static int xe_set_dma_info(struct xe_device *xe)
480 {
481 unsigned int mask_size = xe->info.dma_mask_size;
482 int err;
483
484 dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
485
486 err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
487 if (err)
488 goto mask_err;
489
490 err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
491 if (err)
492 goto mask_err;
493
494 return 0;
495
496 mask_err:
497 drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
498 return err;
499 }
500
verify_lmem_ready(struct xe_device * xe)501 static bool verify_lmem_ready(struct xe_device *xe)
502 {
503 u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
504
505 return !!val;
506 }
507
wait_for_lmem_ready(struct xe_device * xe)508 static int wait_for_lmem_ready(struct xe_device *xe)
509 {
510 unsigned long timeout, start;
511
512 if (!IS_DGFX(xe))
513 return 0;
514
515 if (IS_SRIOV_VF(xe))
516 return 0;
517
518 if (verify_lmem_ready(xe))
519 return 0;
520
521 drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
522
523 start = jiffies;
524 timeout = start + secs_to_jiffies(60); /* 60 sec! */
525
526 do {
527 if (signal_pending(current))
528 return -EINTR;
529
530 /*
531 * The boot firmware initializes local memory and
532 * assesses its health. If memory training fails,
533 * the punit will have been instructed to keep the GT powered
534 * down.we won't be able to communicate with it
535 *
536 * If the status check is done before punit updates the register,
537 * it can lead to the system being unusable.
538 * use a timeout and defer the probe to prevent this.
539 */
540 if (time_after(jiffies, timeout)) {
541 drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
542 return -EPROBE_DEFER;
543 }
544
545 msleep(20);
546
547 } while (!verify_lmem_ready(xe));
548
549 drm_dbg(&xe->drm, "lmem ready after %ums",
550 jiffies_to_msecs(jiffies - start));
551
552 return 0;
553 }
554 ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
555
update_device_info(struct xe_device * xe)556 static void update_device_info(struct xe_device *xe)
557 {
558 /* disable features that are not available/applicable to VFs */
559 if (IS_SRIOV_VF(xe)) {
560 xe->info.probe_display = 0;
561 xe->info.has_heci_gscfi = 0;
562 xe->info.skip_guc_pc = 1;
563 xe->info.skip_pcode = 1;
564 }
565 }
566
567 /**
568 * xe_device_probe_early: Device early probe
569 * @xe: xe device instance
570 *
571 * Initialize MMIO resources that don't require any
572 * knowledge about tile count. Also initialize pcode and
573 * check vram initialization on root tile.
574 *
575 * Return: 0 on success, error code on failure
576 */
xe_device_probe_early(struct xe_device * xe)577 int xe_device_probe_early(struct xe_device *xe)
578 {
579 int err;
580
581 err = xe_mmio_init(xe);
582 if (err)
583 return err;
584
585 xe_sriov_probe_early(xe);
586
587 update_device_info(xe);
588
589 err = xe_pcode_probe_early(xe);
590 if (err)
591 return err;
592
593 err = wait_for_lmem_ready(xe);
594 if (err)
595 return err;
596
597 xe->wedged.mode = xe_modparam.wedged_mode;
598
599 return 0;
600 }
601
probe_has_flat_ccs(struct xe_device * xe)602 static int probe_has_flat_ccs(struct xe_device *xe)
603 {
604 struct xe_gt *gt;
605 unsigned int fw_ref;
606 u32 reg;
607
608 /* Always enabled/disabled, no runtime check to do */
609 if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs || IS_SRIOV_VF(xe))
610 return 0;
611
612 gt = xe_root_mmio_gt(xe);
613
614 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
615 if (!fw_ref)
616 return -ETIMEDOUT;
617
618 reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
619 xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
620
621 if (!xe->info.has_flat_ccs)
622 drm_dbg(&xe->drm,
623 "Flat CCS has been disabled in bios, May lead to performance impact");
624
625 xe_force_wake_put(gt_to_fw(gt), fw_ref);
626 return 0;
627 }
628
xe_device_probe(struct xe_device * xe)629 int xe_device_probe(struct xe_device *xe)
630 {
631 struct xe_tile *tile;
632 struct xe_gt *gt;
633 int err;
634 u8 last_gt;
635 u8 id;
636
637 xe_pat_init_early(xe);
638
639 err = xe_sriov_init(xe);
640 if (err)
641 return err;
642
643 xe->info.mem_region_mask = 1;
644 err = xe_display_init_nommio(xe);
645 if (err)
646 return err;
647
648 err = xe_set_dma_info(xe);
649 if (err)
650 return err;
651
652 err = xe_mmio_probe_tiles(xe);
653 if (err)
654 return err;
655
656 xe_ttm_sys_mgr_init(xe);
657
658 for_each_gt(gt, xe, id) {
659 err = xe_gt_init_early(gt);
660 if (err)
661 return err;
662
663 /*
664 * Only after this point can GT-specific MMIO operations
665 * (including things like communication with the GuC)
666 * be performed.
667 */
668 xe_gt_mmio_init(gt);
669 }
670
671 for_each_tile(tile, xe, id) {
672 if (IS_SRIOV_VF(xe)) {
673 xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
674 err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
675 if (err)
676 return err;
677 err = xe_gt_sriov_vf_query_config(tile->primary_gt);
678 if (err)
679 return err;
680 }
681 err = xe_ggtt_init_early(tile->mem.ggtt);
682 if (err)
683 return err;
684 err = xe_memirq_init(&tile->memirq);
685 if (err)
686 return err;
687 }
688
689 for_each_gt(gt, xe, id) {
690 err = xe_gt_init_hwconfig(gt);
691 if (err)
692 return err;
693 }
694
695 err = xe_devcoredump_init(xe);
696 if (err)
697 return err;
698 err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
699 if (err)
700 return err;
701
702 err = xe_display_init_noirq(xe);
703 if (err)
704 return err;
705
706 err = xe_irq_install(xe);
707 if (err)
708 goto err;
709
710 err = probe_has_flat_ccs(xe);
711 if (err)
712 goto err;
713
714 err = xe_vram_probe(xe);
715 if (err)
716 goto err;
717
718 for_each_tile(tile, xe, id) {
719 err = xe_tile_init_noalloc(tile);
720 if (err)
721 goto err;
722 }
723
724 /* Allocate and map stolen after potential VRAM resize */
725 xe_ttm_stolen_mgr_init(xe);
726
727 /*
728 * Now that GT is initialized (TTM in particular),
729 * we can try to init display, and inherit the initial fb.
730 * This is the reason the first allocation needs to be done
731 * inside display.
732 */
733 err = xe_display_init_noaccel(xe);
734 if (err)
735 goto err;
736
737 for_each_gt(gt, xe, id) {
738 last_gt = id;
739
740 err = xe_gt_init(gt);
741 if (err)
742 goto err_fini_gt;
743 }
744
745 xe_heci_gsc_init(xe);
746
747 err = xe_oa_init(xe);
748 if (err)
749 goto err_fini_gt;
750
751 err = xe_display_init(xe);
752 if (err)
753 goto err_fini_oa;
754
755 err = drm_dev_register(&xe->drm, 0);
756 if (err)
757 goto err_fini_display;
758
759 xe_display_register(xe);
760
761 xe_oa_register(xe);
762
763 xe_debugfs_register(xe);
764
765 xe_hwmon_register(xe);
766
767 for_each_gt(gt, xe, id)
768 xe_gt_sanitize_freq(gt);
769
770 xe_vsec_init(xe);
771
772 return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
773
774 err_fini_display:
775 xe_display_driver_remove(xe);
776
777 err_fini_oa:
778 xe_oa_fini(xe);
779
780 err_fini_gt:
781 for_each_gt(gt, xe, id) {
782 if (id < last_gt)
783 xe_gt_remove(gt);
784 else
785 break;
786 }
787
788 err:
789 xe_display_fini(xe);
790 return err;
791 }
792
xe_device_remove_display(struct xe_device * xe)793 static void xe_device_remove_display(struct xe_device *xe)
794 {
795 xe_display_unregister(xe);
796
797 drm_dev_unplug(&xe->drm);
798 xe_display_driver_remove(xe);
799 }
800
xe_device_remove(struct xe_device * xe)801 void xe_device_remove(struct xe_device *xe)
802 {
803 struct xe_gt *gt;
804 u8 id;
805
806 xe_oa_unregister(xe);
807
808 xe_device_remove_display(xe);
809
810 xe_display_fini(xe);
811
812 xe_oa_fini(xe);
813
814 xe_heci_gsc_fini(xe);
815
816 for_each_gt(gt, xe, id)
817 xe_gt_remove(gt);
818 }
819
xe_device_shutdown(struct xe_device * xe)820 void xe_device_shutdown(struct xe_device *xe)
821 {
822 struct xe_gt *gt;
823 u8 id;
824
825 drm_dbg(&xe->drm, "Shutting down device\n");
826
827 if (xe_driver_flr_disabled(xe)) {
828 xe_display_pm_shutdown(xe);
829
830 xe_irq_suspend(xe);
831
832 for_each_gt(gt, xe, id)
833 xe_gt_shutdown(gt);
834
835 xe_display_pm_shutdown_late(xe);
836 } else {
837 /* BOOM! */
838 __xe_driver_flr(xe);
839 }
840 }
841
842 /**
843 * xe_device_wmb() - Device specific write memory barrier
844 * @xe: the &xe_device
845 *
846 * While wmb() is sufficient for a barrier if we use system memory, on discrete
847 * platforms with device memory we additionally need to issue a register write.
848 * Since it doesn't matter which register we write to, use the read-only VF_CAP
849 * register that is also marked as accessible by the VFs.
850 */
xe_device_wmb(struct xe_device * xe)851 void xe_device_wmb(struct xe_device *xe)
852 {
853 wmb();
854 if (IS_DGFX(xe))
855 xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
856 }
857
858 /**
859 * xe_device_td_flush() - Flush transient L3 cache entries
860 * @xe: The device
861 *
862 * Display engine has direct access to memory and is never coherent with L3/L4
863 * caches (or CPU caches), however KMD is responsible for specifically flushing
864 * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
865 * can happen from such a surface without seeing corruption.
866 *
867 * Display surfaces can be tagged as transient by mapping it using one of the
868 * various L3:XD PAT index modes on Xe2.
869 *
870 * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
871 * at the end of each submission via PIPE_CONTROL for compute/render, since SA
872 * Media is not coherent with L3 and we want to support render-vs-media
873 * usescases. For other engines like copy/blt the HW internally forces uncached
874 * behaviour, hence why we can skip the TDF on such platforms.
875 */
xe_device_td_flush(struct xe_device * xe)876 void xe_device_td_flush(struct xe_device *xe)
877 {
878 struct xe_gt *gt;
879 unsigned int fw_ref;
880 u8 id;
881
882 if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
883 return;
884
885 if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
886 xe_device_l2_flush(xe);
887 return;
888 }
889
890 for_each_gt(gt, xe, id) {
891 if (xe_gt_is_media_type(gt))
892 continue;
893
894 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
895 if (!fw_ref)
896 return;
897
898 xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
899 /*
900 * FIXME: We can likely do better here with our choice of
901 * timeout. Currently we just assume the worst case, i.e. 150us,
902 * which is believed to be sufficient to cover the worst case
903 * scenario on current platforms if all cache entries are
904 * transient and need to be flushed..
905 */
906 if (xe_mmio_wait32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
907 150, NULL, false))
908 xe_gt_err_once(gt, "TD flush timeout\n");
909
910 xe_force_wake_put(gt_to_fw(gt), fw_ref);
911 }
912 }
913
xe_device_l2_flush(struct xe_device * xe)914 void xe_device_l2_flush(struct xe_device *xe)
915 {
916 struct xe_gt *gt;
917 unsigned int fw_ref;
918
919 gt = xe_root_mmio_gt(xe);
920
921 if (!XE_WA(gt, 16023588340))
922 return;
923
924 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
925 if (!fw_ref)
926 return;
927
928 spin_lock(>->global_invl_lock);
929 xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1);
930
931 if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
932 xe_gt_err_once(gt, "Global invalidation timeout\n");
933 spin_unlock(>->global_invl_lock);
934
935 xe_force_wake_put(gt_to_fw(gt), fw_ref);
936 }
937
xe_device_ccs_bytes(struct xe_device * xe,u64 size)938 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
939 {
940 return xe_device_has_flat_ccs(xe) ?
941 DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
942 }
943
944 /**
945 * xe_device_assert_mem_access - Inspect the current runtime_pm state.
946 * @xe: xe device instance
947 *
948 * To be used before any kind of memory access. It will splat a debug warning
949 * if the device is currently sleeping. But it doesn't guarantee in any way
950 * that the device is going to remain awake. Xe PM runtime get and put
951 * functions might be added to the outer bound of the memory access, while
952 * this check is intended for inner usage to splat some warning if the worst
953 * case has just happened.
954 */
xe_device_assert_mem_access(struct xe_device * xe)955 void xe_device_assert_mem_access(struct xe_device *xe)
956 {
957 xe_assert(xe, !xe_pm_runtime_suspended(xe));
958 }
959
xe_device_snapshot_print(struct xe_device * xe,struct drm_printer * p)960 void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
961 {
962 struct xe_gt *gt;
963 u8 id;
964
965 drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
966 drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
967
968 for_each_gt(gt, xe, id) {
969 drm_printf(p, "GT id: %u\n", id);
970 drm_printf(p, "\tTile: %u\n", gt->tile->id);
971 drm_printf(p, "\tType: %s\n",
972 gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
973 drm_printf(p, "\tIP ver: %u.%u.%u\n",
974 REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
975 REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
976 REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
977 drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
978 }
979 }
980
xe_device_canonicalize_addr(struct xe_device * xe,u64 address)981 u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
982 {
983 return sign_extend64(address, xe->info.va_bits - 1);
984 }
985
xe_device_uncanonicalize_addr(struct xe_device * xe,u64 address)986 u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
987 {
988 return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
989 }
990
xe_device_wedged_fini(struct drm_device * drm,void * arg)991 static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
992 {
993 struct xe_device *xe = arg;
994
995 xe_pm_runtime_put(xe);
996 }
997
998 /**
999 * xe_device_declare_wedged - Declare device wedged
1000 * @xe: xe device instance
1001 *
1002 * This is a final state that can only be cleared with a module
1003 * re-probe (unbind + bind).
1004 * In this state every IOCTL will be blocked so the GT cannot be used.
1005 * In general it will be called upon any critical error such as gt reset
1006 * failure or guc loading failure.
1007 * If xe.wedged module parameter is set to 2, this function will be called
1008 * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
1009 * snapshot capture. In this mode, GT reset won't be attempted so the state of
1010 * the issue is preserved for further debugging.
1011 */
xe_device_declare_wedged(struct xe_device * xe)1012 void xe_device_declare_wedged(struct xe_device *xe)
1013 {
1014 struct xe_gt *gt;
1015 u8 id;
1016
1017 if (xe->wedged.mode == 0) {
1018 drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
1019 return;
1020 }
1021
1022 xe_pm_runtime_get_noresume(xe);
1023
1024 if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
1025 drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
1026 return;
1027 }
1028
1029 if (!atomic_xchg(&xe->wedged.flag, 1)) {
1030 xe->needs_flr_on_fini = true;
1031 drm_err(&xe->drm,
1032 "CRITICAL: Xe has declared device %s as wedged.\n"
1033 "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
1034 "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
1035 dev_name(xe->drm.dev));
1036 }
1037
1038 for_each_gt(gt, xe, id)
1039 xe_gt_declare_wedged(gt);
1040 }
1041