1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_device.h"
7
8 #include <linux/delay.h>
9 #include <linux/units.h>
10
11 #include <drm/drm_aperture.h>
12 #include <drm/drm_atomic_helper.h>
13 #include <drm/drm_client.h>
14 #include <drm/drm_gem_ttm_helper.h>
15 #include <drm/drm_ioctl.h>
16 #include <drm/drm_managed.h>
17 #include <drm/drm_print.h>
18 #include <uapi/drm/xe_drm.h>
19
20 #include "display/xe_display.h"
21 #include "instructions/xe_gpu_commands.h"
22 #include "regs/xe_gt_regs.h"
23 #include "regs/xe_regs.h"
24 #include "xe_bo.h"
25 #include "xe_debugfs.h"
26 #include "xe_devcoredump.h"
27 #include "xe_dma_buf.h"
28 #include "xe_drm_client.h"
29 #include "xe_drv.h"
30 #include "xe_exec.h"
31 #include "xe_exec_queue.h"
32 #include "xe_force_wake.h"
33 #include "xe_ggtt.h"
34 #include "xe_gsc_proxy.h"
35 #include "xe_gt.h"
36 #include "xe_gt_mcr.h"
37 #include "xe_gt_printk.h"
38 #include "xe_gt_sriov_vf.h"
39 #include "xe_guc.h"
40 #include "xe_hw_engine_group.h"
41 #include "xe_hwmon.h"
42 #include "xe_irq.h"
43 #include "xe_memirq.h"
44 #include "xe_mmio.h"
45 #include "xe_module.h"
46 #include "xe_observation.h"
47 #include "xe_pat.h"
48 #include "xe_pcode.h"
49 #include "xe_pm.h"
50 #include "xe_query.h"
51 #include "xe_sriov.h"
52 #include "xe_tile.h"
53 #include "xe_ttm_stolen_mgr.h"
54 #include "xe_ttm_sys_mgr.h"
55 #include "xe_vm.h"
56 #include "xe_vram.h"
57 #include "xe_wait_user_fence.h"
58 #include "xe_wa.h"
59
60 #include <generated/xe_wa_oob.h>
61
xe_file_open(struct drm_device * dev,struct drm_file * file)62 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
63 {
64 struct xe_device *xe = to_xe_device(dev);
65 struct xe_drm_client *client;
66 struct xe_file *xef;
67 int ret = -ENOMEM;
68 struct task_struct *task = NULL;
69
70 xef = kzalloc(sizeof(*xef), GFP_KERNEL);
71 if (!xef)
72 return ret;
73
74 client = xe_drm_client_alloc();
75 if (!client) {
76 kfree(xef);
77 return ret;
78 }
79
80 xef->drm = file;
81 xef->client = client;
82 xef->xe = xe;
83
84 mutex_init(&xef->vm.lock);
85 xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
86
87 mutex_init(&xef->exec_queue.lock);
88 xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
89
90 file->driver_priv = xef;
91 kref_init(&xef->refcount);
92
93 task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
94 if (task) {
95 xef->process_name = kstrdup(task->comm, GFP_KERNEL);
96 xef->pid = task->pid;
97 put_task_struct(task);
98 }
99
100 return 0;
101 }
102
xe_file_destroy(struct kref * ref)103 static void xe_file_destroy(struct kref *ref)
104 {
105 struct xe_file *xef = container_of(ref, struct xe_file, refcount);
106
107 xa_destroy(&xef->exec_queue.xa);
108 mutex_destroy(&xef->exec_queue.lock);
109 xa_destroy(&xef->vm.xa);
110 mutex_destroy(&xef->vm.lock);
111
112 xe_drm_client_put(xef->client);
113 kfree(xef->process_name);
114 kfree(xef);
115 }
116
117 /**
118 * xe_file_get() - Take a reference to the xe file object
119 * @xef: Pointer to the xe file
120 *
121 * Anyone with a pointer to xef must take a reference to the xe file
122 * object using this call.
123 *
124 * Return: xe file pointer
125 */
xe_file_get(struct xe_file * xef)126 struct xe_file *xe_file_get(struct xe_file *xef)
127 {
128 kref_get(&xef->refcount);
129 return xef;
130 }
131
132 /**
133 * xe_file_put() - Drop a reference to the xe file object
134 * @xef: Pointer to the xe file
135 *
136 * Used to drop reference to the xef object
137 */
xe_file_put(struct xe_file * xef)138 void xe_file_put(struct xe_file *xef)
139 {
140 kref_put(&xef->refcount, xe_file_destroy);
141 }
142
xe_file_close(struct drm_device * dev,struct drm_file * file)143 static void xe_file_close(struct drm_device *dev, struct drm_file *file)
144 {
145 struct xe_device *xe = to_xe_device(dev);
146 struct xe_file *xef = file->driver_priv;
147 struct xe_vm *vm;
148 struct xe_exec_queue *q;
149 unsigned long idx;
150
151 xe_pm_runtime_get(xe);
152
153 /*
154 * No need for exec_queue.lock here as there is no contention for it
155 * when FD is closing as IOCTLs presumably can't be modifying the
156 * xarray. Taking exec_queue.lock here causes undue dependency on
157 * vm->lock taken during xe_exec_queue_kill().
158 */
159 xa_for_each(&xef->exec_queue.xa, idx, q) {
160 if (q->vm && q->hwe->hw_engine_group)
161 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
162 xe_exec_queue_kill(q);
163 xe_exec_queue_put(q);
164 }
165 xa_for_each(&xef->vm.xa, idx, vm)
166 xe_vm_close_and_put(vm);
167
168 xe_file_put(xef);
169
170 xe_pm_runtime_put(xe);
171 }
172
173 static const struct drm_ioctl_desc xe_ioctls[] = {
174 DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
175 DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
176 DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
177 DRM_RENDER_ALLOW),
178 DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
179 DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
180 DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
181 DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
182 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
183 DRM_RENDER_ALLOW),
184 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
185 DRM_RENDER_ALLOW),
186 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
187 DRM_RENDER_ALLOW),
188 DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
189 DRM_RENDER_ALLOW),
190 DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
191 };
192
xe_drm_ioctl(struct file * file,unsigned int cmd,unsigned long arg)193 static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
194 {
195 struct drm_file *file_priv = file->private_data;
196 struct xe_device *xe = to_xe_device(file_priv->minor->dev);
197 long ret;
198
199 if (xe_device_wedged(xe))
200 return -ECANCELED;
201
202 ret = xe_pm_runtime_get_ioctl(xe);
203 if (ret >= 0)
204 ret = drm_ioctl(file, cmd, arg);
205 xe_pm_runtime_put(xe);
206
207 return ret;
208 }
209
210 #ifdef CONFIG_COMPAT
xe_drm_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)211 static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
212 {
213 struct drm_file *file_priv = file->private_data;
214 struct xe_device *xe = to_xe_device(file_priv->minor->dev);
215 long ret;
216
217 if (xe_device_wedged(xe))
218 return -ECANCELED;
219
220 ret = xe_pm_runtime_get_ioctl(xe);
221 if (ret >= 0)
222 ret = drm_compat_ioctl(file, cmd, arg);
223 xe_pm_runtime_put(xe);
224
225 return ret;
226 }
227 #else
228 /* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
229 #define xe_drm_compat_ioctl NULL
230 #endif
231
232 static const struct file_operations xe_driver_fops = {
233 .owner = THIS_MODULE,
234 .open = drm_open,
235 .release = drm_release_noglobal,
236 .unlocked_ioctl = xe_drm_ioctl,
237 .mmap = drm_gem_mmap,
238 .poll = drm_poll,
239 .read = drm_read,
240 .compat_ioctl = xe_drm_compat_ioctl,
241 .llseek = noop_llseek,
242 #ifdef CONFIG_PROC_FS
243 .show_fdinfo = drm_show_fdinfo,
244 #endif
245 .fop_flags = FOP_UNSIGNED_OFFSET,
246 };
247
248 static struct drm_driver driver = {
249 /* Don't use MTRRs here; the Xserver or userspace app should
250 * deal with them for Intel hardware.
251 */
252 .driver_features =
253 DRIVER_GEM |
254 DRIVER_RENDER | DRIVER_SYNCOBJ |
255 DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
256 .open = xe_file_open,
257 .postclose = xe_file_close,
258
259 .gem_prime_import = xe_gem_prime_import,
260
261 .dumb_create = xe_bo_dumb_create,
262 .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
263 #ifdef CONFIG_PROC_FS
264 .show_fdinfo = xe_drm_client_fdinfo,
265 #endif
266 .ioctls = xe_ioctls,
267 .num_ioctls = ARRAY_SIZE(xe_ioctls),
268 .fops = &xe_driver_fops,
269 .name = DRIVER_NAME,
270 .desc = DRIVER_DESC,
271 .date = DRIVER_DATE,
272 .major = DRIVER_MAJOR,
273 .minor = DRIVER_MINOR,
274 .patchlevel = DRIVER_PATCHLEVEL,
275 };
276
xe_device_destroy(struct drm_device * dev,void * dummy)277 static void xe_device_destroy(struct drm_device *dev, void *dummy)
278 {
279 struct xe_device *xe = to_xe_device(dev);
280
281 if (xe->preempt_fence_wq)
282 destroy_workqueue(xe->preempt_fence_wq);
283
284 if (xe->ordered_wq)
285 destroy_workqueue(xe->ordered_wq);
286
287 if (xe->unordered_wq)
288 destroy_workqueue(xe->unordered_wq);
289
290 if (xe->destroy_wq)
291 destroy_workqueue(xe->destroy_wq);
292
293 ttm_device_fini(&xe->ttm);
294 }
295
xe_device_create(struct pci_dev * pdev,const struct pci_device_id * ent)296 struct xe_device *xe_device_create(struct pci_dev *pdev,
297 const struct pci_device_id *ent)
298 {
299 struct xe_device *xe;
300 int err;
301
302 xe_display_driver_set_hooks(&driver);
303
304 err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
305 if (err)
306 return ERR_PTR(err);
307
308 xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
309 if (IS_ERR(xe))
310 return xe;
311
312 err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
313 xe->drm.anon_inode->i_mapping,
314 xe->drm.vma_offset_manager, false, false);
315 if (WARN_ON(err))
316 goto err;
317
318 err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
319 if (err)
320 goto err;
321
322 xe->info.devid = pdev->device;
323 xe->info.revid = pdev->revision;
324 xe->info.force_execlist = xe_modparam.force_execlist;
325
326 spin_lock_init(&xe->irq.lock);
327
328 init_waitqueue_head(&xe->ufence_wq);
329
330 init_rwsem(&xe->usm.lock);
331
332 xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
333
334 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
335 /* Trigger a large asid and an early asid wrap. */
336 u32 asid;
337
338 BUILD_BUG_ON(XE_MAX_ASID < 2);
339 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
340 XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
341 &xe->usm.next_asid, GFP_KERNEL);
342 drm_WARN_ON(&xe->drm, err);
343 if (err >= 0)
344 xa_erase(&xe->usm.asid_to_vm, asid);
345 }
346
347 spin_lock_init(&xe->pinned.lock);
348 INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
349 INIT_LIST_HEAD(&xe->pinned.external_vram);
350 INIT_LIST_HEAD(&xe->pinned.evicted);
351
352 xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
353 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
354 xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
355 xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
356 if (!xe->ordered_wq || !xe->unordered_wq ||
357 !xe->preempt_fence_wq || !xe->destroy_wq) {
358 /*
359 * Cleanup done in xe_device_destroy via
360 * drmm_add_action_or_reset register above
361 */
362 drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
363 err = -ENOMEM;
364 goto err;
365 }
366
367 err = xe_display_create(xe);
368 if (WARN_ON(err))
369 goto err;
370
371 return xe;
372
373 err:
374 return ERR_PTR(err);
375 }
376
377 /*
378 * The driver-initiated FLR is the highest level of reset that we can trigger
379 * from within the driver. It is different from the PCI FLR in that it doesn't
380 * fully reset the SGUnit and doesn't modify the PCI config space and therefore
381 * it doesn't require a re-enumeration of the PCI BARs. However, the
382 * driver-initiated FLR does still cause a reset of both GT and display and a
383 * memory wipe of local and stolen memory, so recovery would require a full HW
384 * re-init and saving/restoring (or re-populating) the wiped memory. Since we
385 * perform the FLR as the very last action before releasing access to the HW
386 * during the driver release flow, we don't attempt recovery at all, because
387 * if/when a new instance of i915 is bound to the device it will do a full
388 * re-init anyway.
389 */
xe_driver_flr(struct xe_device * xe)390 static void xe_driver_flr(struct xe_device *xe)
391 {
392 const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
393 struct xe_gt *gt = xe_root_mmio_gt(xe);
394 int ret;
395
396 if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
397 drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
398 return;
399 }
400
401 drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
402
403 /*
404 * Make sure any pending FLR requests have cleared by waiting for the
405 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
406 * to make sure it's not still set from a prior attempt (it's a write to
407 * clear bit).
408 * Note that we should never be in a situation where a previous attempt
409 * is still pending (unless the HW is totally dead), but better to be
410 * safe in case something unexpected happens
411 */
412 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
413 if (ret) {
414 drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
415 return;
416 }
417 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
418
419 /* Trigger the actual Driver-FLR */
420 xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
421
422 /* Wait for hardware teardown to complete */
423 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
424 if (ret) {
425 drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
426 return;
427 }
428
429 /* Wait for hardware/firmware re-init to complete */
430 ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
431 flr_timeout, NULL, false);
432 if (ret) {
433 drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
434 return;
435 }
436
437 /* Clear sticky completion status */
438 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
439 }
440
xe_driver_flr_fini(void * arg)441 static void xe_driver_flr_fini(void *arg)
442 {
443 struct xe_device *xe = arg;
444
445 if (xe->needs_flr_on_fini)
446 xe_driver_flr(xe);
447 }
448
xe_device_sanitize(void * arg)449 static void xe_device_sanitize(void *arg)
450 {
451 struct xe_device *xe = arg;
452 struct xe_gt *gt;
453 u8 id;
454
455 for_each_gt(gt, xe, id)
456 xe_gt_sanitize(gt);
457 }
458
xe_set_dma_info(struct xe_device * xe)459 static int xe_set_dma_info(struct xe_device *xe)
460 {
461 unsigned int mask_size = xe->info.dma_mask_size;
462 int err;
463
464 dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
465
466 err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
467 if (err)
468 goto mask_err;
469
470 err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
471 if (err)
472 goto mask_err;
473
474 return 0;
475
476 mask_err:
477 drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
478 return err;
479 }
480
verify_lmem_ready(struct xe_gt * gt)481 static bool verify_lmem_ready(struct xe_gt *gt)
482 {
483 u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
484
485 return !!val;
486 }
487
wait_for_lmem_ready(struct xe_device * xe)488 static int wait_for_lmem_ready(struct xe_device *xe)
489 {
490 struct xe_gt *gt = xe_root_mmio_gt(xe);
491 unsigned long timeout, start;
492
493 if (!IS_DGFX(xe))
494 return 0;
495
496 if (IS_SRIOV_VF(xe))
497 return 0;
498
499 if (verify_lmem_ready(gt))
500 return 0;
501
502 drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
503
504 start = jiffies;
505 timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
506
507 do {
508 if (signal_pending(current))
509 return -EINTR;
510
511 /*
512 * The boot firmware initializes local memory and
513 * assesses its health. If memory training fails,
514 * the punit will have been instructed to keep the GT powered
515 * down.we won't be able to communicate with it
516 *
517 * If the status check is done before punit updates the register,
518 * it can lead to the system being unusable.
519 * use a timeout and defer the probe to prevent this.
520 */
521 if (time_after(jiffies, timeout)) {
522 drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
523 return -EPROBE_DEFER;
524 }
525
526 msleep(20);
527
528 } while (!verify_lmem_ready(gt));
529
530 drm_dbg(&xe->drm, "lmem ready after %ums",
531 jiffies_to_msecs(jiffies - start));
532
533 return 0;
534 }
535
update_device_info(struct xe_device * xe)536 static void update_device_info(struct xe_device *xe)
537 {
538 /* disable features that are not available/applicable to VFs */
539 if (IS_SRIOV_VF(xe)) {
540 xe->info.probe_display = 0;
541 xe->info.has_heci_gscfi = 0;
542 xe->info.skip_guc_pc = 1;
543 xe->info.skip_pcode = 1;
544 }
545 }
546
547 /**
548 * xe_device_probe_early: Device early probe
549 * @xe: xe device instance
550 *
551 * Initialize MMIO resources that don't require any
552 * knowledge about tile count. Also initialize pcode and
553 * check vram initialization on root tile.
554 *
555 * Return: 0 on success, error code on failure
556 */
xe_device_probe_early(struct xe_device * xe)557 int xe_device_probe_early(struct xe_device *xe)
558 {
559 int err;
560
561 err = xe_mmio_init(xe);
562 if (err)
563 return err;
564
565 xe_sriov_probe_early(xe);
566
567 update_device_info(xe);
568
569 err = xe_pcode_probe_early(xe);
570 if (err)
571 return err;
572
573 err = wait_for_lmem_ready(xe);
574 if (err)
575 return err;
576
577 xe->wedged.mode = xe_modparam.wedged_mode;
578
579 return 0;
580 }
581
xe_device_set_has_flat_ccs(struct xe_device * xe)582 static int xe_device_set_has_flat_ccs(struct xe_device *xe)
583 {
584 u32 reg;
585 int err;
586
587 if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
588 return 0;
589
590 struct xe_gt *gt = xe_root_mmio_gt(xe);
591
592 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
593 if (err)
594 return err;
595
596 reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
597 xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
598
599 if (!xe->info.has_flat_ccs)
600 drm_dbg(&xe->drm,
601 "Flat CCS has been disabled in bios, May lead to performance impact");
602
603 return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
604 }
605
xe_device_probe(struct xe_device * xe)606 int xe_device_probe(struct xe_device *xe)
607 {
608 struct xe_tile *tile;
609 struct xe_gt *gt;
610 int err;
611 u8 last_gt;
612 u8 id;
613
614 xe_pat_init_early(xe);
615
616 err = xe_sriov_init(xe);
617 if (err)
618 return err;
619
620 xe->info.mem_region_mask = 1;
621 err = xe_display_init_nommio(xe);
622 if (err)
623 return err;
624
625 err = xe_set_dma_info(xe);
626 if (err)
627 return err;
628
629 err = xe_mmio_probe_tiles(xe);
630 if (err)
631 return err;
632
633 xe_ttm_sys_mgr_init(xe);
634
635 for_each_gt(gt, xe, id) {
636 err = xe_gt_init_early(gt);
637 if (err)
638 return err;
639 }
640
641 for_each_tile(tile, xe, id) {
642 if (IS_SRIOV_VF(xe)) {
643 xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
644 err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
645 if (err)
646 return err;
647 err = xe_gt_sriov_vf_query_config(tile->primary_gt);
648 if (err)
649 return err;
650 }
651 err = xe_ggtt_init_early(tile->mem.ggtt);
652 if (err)
653 return err;
654 if (IS_SRIOV_VF(xe)) {
655 err = xe_memirq_init(&tile->sriov.vf.memirq);
656 if (err)
657 return err;
658 }
659 }
660
661 for_each_gt(gt, xe, id) {
662 err = xe_gt_init_hwconfig(gt);
663 if (err)
664 return err;
665 }
666
667 err = xe_devcoredump_init(xe);
668 if (err)
669 return err;
670 err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
671 if (err)
672 return err;
673
674 err = xe_display_init_noirq(xe);
675 if (err)
676 return err;
677
678 err = xe_irq_install(xe);
679 if (err)
680 goto err;
681
682 err = xe_device_set_has_flat_ccs(xe);
683 if (err)
684 goto err;
685
686 err = xe_vram_probe(xe);
687 if (err)
688 goto err;
689
690 for_each_tile(tile, xe, id) {
691 err = xe_tile_init_noalloc(tile);
692 if (err)
693 goto err;
694 }
695
696 /* Allocate and map stolen after potential VRAM resize */
697 xe_ttm_stolen_mgr_init(xe);
698
699 /*
700 * Now that GT is initialized (TTM in particular),
701 * we can try to init display, and inherit the initial fb.
702 * This is the reason the first allocation needs to be done
703 * inside display.
704 */
705 err = xe_display_init_noaccel(xe);
706 if (err)
707 goto err;
708
709 for_each_gt(gt, xe, id) {
710 last_gt = id;
711
712 err = xe_gt_init(gt);
713 if (err)
714 goto err_fini_gt;
715 }
716
717 xe_heci_gsc_init(xe);
718
719 err = xe_oa_init(xe);
720 if (err)
721 goto err_fini_gt;
722
723 err = xe_display_init(xe);
724 if (err)
725 goto err_fini_oa;
726
727 err = drm_dev_register(&xe->drm, 0);
728 if (err)
729 goto err_fini_display;
730
731 xe_display_register(xe);
732
733 xe_oa_register(xe);
734
735 xe_debugfs_register(xe);
736
737 xe_hwmon_register(xe);
738
739 for_each_gt(gt, xe, id)
740 xe_gt_sanitize_freq(gt);
741
742 return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
743
744 err_fini_display:
745 xe_display_driver_remove(xe);
746
747 err_fini_oa:
748 xe_oa_fini(xe);
749
750 err_fini_gt:
751 for_each_gt(gt, xe, id) {
752 if (id < last_gt)
753 xe_gt_remove(gt);
754 else
755 break;
756 }
757
758 err:
759 xe_display_fini(xe);
760 return err;
761 }
762
xe_device_remove_display(struct xe_device * xe)763 static void xe_device_remove_display(struct xe_device *xe)
764 {
765 xe_display_unregister(xe);
766
767 drm_dev_unplug(&xe->drm);
768 xe_display_driver_remove(xe);
769 }
770
xe_device_remove(struct xe_device * xe)771 void xe_device_remove(struct xe_device *xe)
772 {
773 struct xe_gt *gt;
774 u8 id;
775
776 xe_oa_unregister(xe);
777
778 xe_device_remove_display(xe);
779
780 xe_display_fini(xe);
781
782 xe_oa_fini(xe);
783
784 xe_heci_gsc_fini(xe);
785
786 for_each_gt(gt, xe, id)
787 xe_gt_remove(gt);
788 }
789
xe_device_shutdown(struct xe_device * xe)790 void xe_device_shutdown(struct xe_device *xe)
791 {
792 }
793
794 /**
795 * xe_device_wmb() - Device specific write memory barrier
796 * @xe: the &xe_device
797 *
798 * While wmb() is sufficient for a barrier if we use system memory, on discrete
799 * platforms with device memory we additionally need to issue a register write.
800 * Since it doesn't matter which register we write to, use the read-only VF_CAP
801 * register that is also marked as accessible by the VFs.
802 */
xe_device_wmb(struct xe_device * xe)803 void xe_device_wmb(struct xe_device *xe)
804 {
805 struct xe_gt *gt = xe_root_mmio_gt(xe);
806
807 wmb();
808 if (IS_DGFX(xe))
809 xe_mmio_write32(gt, VF_CAP_REG, 0);
810 }
811
812 /**
813 * xe_device_td_flush() - Flush transient L3 cache entries
814 * @xe: The device
815 *
816 * Display engine has direct access to memory and is never coherent with L3/L4
817 * caches (or CPU caches), however KMD is responsible for specifically flushing
818 * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
819 * can happen from such a surface without seeing corruption.
820 *
821 * Display surfaces can be tagged as transient by mapping it using one of the
822 * various L3:XD PAT index modes on Xe2.
823 *
824 * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
825 * at the end of each submission via PIPE_CONTROL for compute/render, since SA
826 * Media is not coherent with L3 and we want to support render-vs-media
827 * usescases. For other engines like copy/blt the HW internally forces uncached
828 * behaviour, hence why we can skip the TDF on such platforms.
829 */
xe_device_td_flush(struct xe_device * xe)830 void xe_device_td_flush(struct xe_device *xe)
831 {
832 struct xe_gt *gt;
833 u8 id;
834
835 if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
836 return;
837
838 if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
839 xe_device_l2_flush(xe);
840 return;
841 }
842
843 for_each_gt(gt, xe, id) {
844 if (xe_gt_is_media_type(gt))
845 continue;
846
847 if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
848 return;
849
850 xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
851 /*
852 * FIXME: We can likely do better here with our choice of
853 * timeout. Currently we just assume the worst case, i.e. 150us,
854 * which is believed to be sufficient to cover the worst case
855 * scenario on current platforms if all cache entries are
856 * transient and need to be flushed..
857 */
858 if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
859 150, NULL, false))
860 xe_gt_err_once(gt, "TD flush timeout\n");
861
862 xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
863 }
864 }
865
xe_device_l2_flush(struct xe_device * xe)866 void xe_device_l2_flush(struct xe_device *xe)
867 {
868 struct xe_gt *gt;
869 int err;
870
871 gt = xe_root_mmio_gt(xe);
872
873 if (!XE_WA(gt, 16023588340))
874 return;
875
876 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
877 if (err)
878 return;
879
880 spin_lock(>->global_invl_lock);
881 xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
882
883 if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
884 xe_gt_err_once(gt, "Global invalidation timeout\n");
885 spin_unlock(>->global_invl_lock);
886
887 xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
888 }
889
xe_device_ccs_bytes(struct xe_device * xe,u64 size)890 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
891 {
892 return xe_device_has_flat_ccs(xe) ?
893 DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
894 }
895
896 /**
897 * xe_device_assert_mem_access - Inspect the current runtime_pm state.
898 * @xe: xe device instance
899 *
900 * To be used before any kind of memory access. It will splat a debug warning
901 * if the device is currently sleeping. But it doesn't guarantee in any way
902 * that the device is going to remain awake. Xe PM runtime get and put
903 * functions might be added to the outer bound of the memory access, while
904 * this check is intended for inner usage to splat some warning if the worst
905 * case has just happened.
906 */
xe_device_assert_mem_access(struct xe_device * xe)907 void xe_device_assert_mem_access(struct xe_device *xe)
908 {
909 xe_assert(xe, !xe_pm_runtime_suspended(xe));
910 }
911
xe_device_snapshot_print(struct xe_device * xe,struct drm_printer * p)912 void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
913 {
914 struct xe_gt *gt;
915 u8 id;
916
917 drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
918 drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
919
920 for_each_gt(gt, xe, id) {
921 drm_printf(p, "GT id: %u\n", id);
922 drm_printf(p, "\tType: %s\n",
923 gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
924 drm_printf(p, "\tIP ver: %u.%u.%u\n",
925 REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
926 REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
927 REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
928 drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
929 }
930 }
931
xe_device_canonicalize_addr(struct xe_device * xe,u64 address)932 u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
933 {
934 return sign_extend64(address, xe->info.va_bits - 1);
935 }
936
xe_device_uncanonicalize_addr(struct xe_device * xe,u64 address)937 u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
938 {
939 return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
940 }
941
xe_device_wedged_fini(struct drm_device * drm,void * arg)942 static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
943 {
944 struct xe_device *xe = arg;
945
946 xe_pm_runtime_put(xe);
947 }
948
949 /**
950 * xe_device_declare_wedged - Declare device wedged
951 * @xe: xe device instance
952 *
953 * This is a final state that can only be cleared with a mudule
954 * re-probe (unbind + bind).
955 * In this state every IOCTL will be blocked so the GT cannot be used.
956 * In general it will be called upon any critical error such as gt reset
957 * failure or guc loading failure.
958 * If xe.wedged module parameter is set to 2, this function will be called
959 * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
960 * snapshot capture. In this mode, GT reset won't be attempted so the state of
961 * the issue is preserved for further debugging.
962 */
xe_device_declare_wedged(struct xe_device * xe)963 void xe_device_declare_wedged(struct xe_device *xe)
964 {
965 struct xe_gt *gt;
966 u8 id;
967
968 if (xe->wedged.mode == 0) {
969 drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
970 return;
971 }
972
973 xe_pm_runtime_get_noresume(xe);
974
975 if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
976 drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
977 return;
978 }
979
980 if (!atomic_xchg(&xe->wedged.flag, 1)) {
981 xe->needs_flr_on_fini = true;
982 drm_err(&xe->drm,
983 "CRITICAL: Xe has declared device %s as wedged.\n"
984 "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
985 "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
986 dev_name(xe->drm.dev));
987 }
988
989 for_each_gt(gt, xe, id)
990 xe_gt_declare_wedged(gt);
991 }
992