1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_gt.h"
7
8 #include <linux/minmax.h>
9
10 #include <drm/drm_managed.h>
11 #include <uapi/drm/xe_drm.h>
12
13 #include <generated/xe_wa_oob.h>
14
15 #include "instructions/xe_alu_commands.h"
16 #include "instructions/xe_mi_commands.h"
17 #include "regs/xe_engine_regs.h"
18 #include "regs/xe_gt_regs.h"
19 #include "xe_assert.h"
20 #include "xe_bb.h"
21 #include "xe_device.h"
22 #include "xe_eu_stall.h"
23 #include "xe_exec_queue.h"
24 #include "xe_execlist.h"
25 #include "xe_force_wake.h"
26 #include "xe_ggtt.h"
27 #include "xe_gsc.h"
28 #include "xe_gt_ccs_mode.h"
29 #include "xe_gt_clock.h"
30 #include "xe_gt_freq.h"
31 #include "xe_gt_idle.h"
32 #include "xe_gt_mcr.h"
33 #include "xe_gt_printk.h"
34 #include "xe_gt_sriov_pf.h"
35 #include "xe_gt_sriov_vf.h"
36 #include "xe_gt_sysfs.h"
37 #include "xe_gt_topology.h"
38 #include "xe_guc_exec_queue_types.h"
39 #include "xe_guc_pc.h"
40 #include "xe_guc_submit.h"
41 #include "xe_hw_fence.h"
42 #include "xe_hw_engine_class_sysfs.h"
43 #include "xe_irq.h"
44 #include "xe_lmtt.h"
45 #include "xe_lrc.h"
46 #include "xe_map.h"
47 #include "xe_migrate.h"
48 #include "xe_mmio.h"
49 #include "xe_pagefault.h"
50 #include "xe_pat.h"
51 #include "xe_pm.h"
52 #include "xe_mocs.h"
53 #include "xe_reg_sr.h"
54 #include "xe_ring_ops.h"
55 #include "xe_sa.h"
56 #include "xe_sched_job.h"
57 #include "xe_sriov.h"
58 #include "xe_tlb_inval.h"
59 #include "xe_tuning.h"
60 #include "xe_uc.h"
61 #include "xe_uc_fw.h"
62 #include "xe_vm.h"
63 #include "xe_wa.h"
64 #include "xe_wopcm.h"
65
xe_gt_alloc(struct xe_tile * tile)66 struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
67 {
68 struct xe_device *xe = tile_to_xe(tile);
69 struct drm_device *drm = &xe->drm;
70 bool shared_wq = xe->info.needs_shared_vf_gt_wq && tile->primary_gt &&
71 IS_SRIOV_VF(xe);
72 struct workqueue_struct *ordered_wq;
73 struct xe_gt *gt;
74
75 gt = drmm_kzalloc(drm, sizeof(*gt), GFP_KERNEL);
76 if (!gt)
77 return ERR_PTR(-ENOMEM);
78
79 gt->tile = tile;
80 if (shared_wq && tile->primary_gt->ordered_wq)
81 ordered_wq = tile->primary_gt->ordered_wq;
82 else
83 ordered_wq = drmm_alloc_ordered_workqueue(drm, "gt-ordered-wq",
84 WQ_MEM_RECLAIM);
85 if (IS_ERR(ordered_wq))
86 return ERR_CAST(ordered_wq);
87
88 gt->ordered_wq = ordered_wq;
89
90 return gt;
91 }
92
xe_gt_sanitize(struct xe_gt * gt)93 void xe_gt_sanitize(struct xe_gt *gt)
94 {
95 /*
96 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not
97 * reload
98 */
99 xe_guc_submit_disable(>->uc.guc);
100 }
101
xe_gt_enable_host_l2_vram(struct xe_gt * gt)102 static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
103 {
104 u32 reg;
105
106 if (!XE_GT_WA(gt, 16023588340))
107 return;
108
109 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
110 if (!fw_ref.domains)
111 return;
112
113 if (xe_gt_is_main_type(gt)) {
114 reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
115 reg |= CG_DIS_CNTLBUS;
116 xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
117 }
118
119 xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF);
120 }
121
xe_gt_disable_host_l2_vram(struct xe_gt * gt)122 static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
123 {
124 u32 reg;
125
126 if (!XE_GT_WA(gt, 16023588340))
127 return;
128
129 if (xe_gt_is_media_type(gt))
130 return;
131
132 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
133 if (!fw_ref.domains)
134 return;
135
136 reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
137 reg &= ~CG_DIS_CNTLBUS;
138 xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
139 }
140
xe_gt_enable_comp_1wcoh(struct xe_gt * gt)141 static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt)
142 {
143 struct xe_device *xe = gt_to_xe(gt);
144 unsigned int fw_ref;
145 u32 reg;
146
147 if (IS_SRIOV_VF(xe))
148 return;
149
150 if (GRAPHICS_VER(xe) >= 30 && xe->info.has_flat_ccs) {
151 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
152 if (!fw_ref)
153 return;
154
155 reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
156 reg |= EN_CMP_1WCOH;
157 xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
158
159 if (xe_gt_is_media_type(gt)) {
160 xe_mmio_rmw32(>->mmio, XE2_GAMWALK_CTRL_MEDIA, 0, EN_CMP_1WCOH_GW);
161 } else {
162 reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMWALK_CTRL_3D);
163 reg |= EN_CMP_1WCOH_GW;
164 xe_gt_mcr_multicast_write(gt, XE2_GAMWALK_CTRL_3D, reg);
165 }
166
167 xe_force_wake_put(gt_to_fw(gt), fw_ref);
168 }
169 }
170
171 static void gt_reset_worker(struct work_struct *w);
172
emit_job_sync(struct xe_exec_queue * q,struct xe_bb * bb,long timeout_jiffies)173 static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
174 long timeout_jiffies)
175 {
176 struct xe_sched_job *job;
177 struct dma_fence *fence;
178 long timeout;
179
180 job = xe_bb_create_job(q, bb);
181 if (IS_ERR(job))
182 return PTR_ERR(job);
183
184 xe_sched_job_arm(job);
185 fence = dma_fence_get(&job->drm.s_fence->finished);
186 xe_sched_job_push(job);
187
188 timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies);
189 dma_fence_put(fence);
190 if (timeout < 0)
191 return timeout;
192 else if (!timeout)
193 return -ETIME;
194
195 return 0;
196 }
197
emit_nop_job(struct xe_gt * gt,struct xe_exec_queue * q)198 static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
199 {
200 struct xe_bb *bb;
201 int ret;
202
203 bb = xe_bb_new(gt, 4, false);
204 if (IS_ERR(bb))
205 return PTR_ERR(bb);
206
207 ret = emit_job_sync(q, bb, HZ);
208 xe_bb_free(bb, NULL);
209
210 return ret;
211 }
212
213 /* Dwords required to emit a RMW of a register */
214 #define EMIT_RMW_DW 20
215
emit_wa_job(struct xe_gt * gt,struct xe_exec_queue * q)216 static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
217 {
218 struct xe_hw_engine *hwe = q->hwe;
219 struct xe_reg_sr *sr = &hwe->reg_lrc;
220 struct xe_reg_sr_entry *entry;
221 int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret;
222 unsigned long idx;
223 struct xe_bb *bb;
224 size_t bb_len = 0;
225 u32 *cs;
226
227 /* count RMW registers as those will be handled separately */
228 xa_for_each(&sr->xa, idx, entry) {
229 if (entry->reg.masked || entry->clr_bits == ~0)
230 ++count;
231 else if (entry->reg.mcr)
232 ++count_rmw_mcr;
233 else
234 ++count_rmw;
235 }
236
237 if (count)
238 bb_len += count * 2 + 1;
239
240 /*
241 * RMW of MCR registers is the same as a normal RMW, except an
242 * additional LRI (3 dwords) is required per register to steer the read
243 * to a nom-terminated instance.
244 *
245 * We could probably shorten the batch slightly by eliding the
246 * steering for consecutive MCR registers that have the same
247 * group/instance target, but it's not worth the extra complexity to do
248 * so.
249 */
250 bb_len += count_rmw * EMIT_RMW_DW;
251 bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3);
252
253 /*
254 * After doing all RMW, we need 7 trailing dwords to clean up,
255 * plus an additional 3 dwords to reset steering if any of the
256 * registers were MCR.
257 */
258 if (count_rmw || count_rmw_mcr)
259 bb_len += 7 + (count_rmw_mcr ? 3 : 0);
260
261 if (hwe->class == XE_ENGINE_CLASS_RENDER)
262 /*
263 * Big enough to emit all of the context's 3DSTATE via
264 * xe_lrc_emit_hwe_state_instructions()
265 */
266 bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32);
267
268 xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len);
269
270 bb = xe_bb_new(gt, bb_len, false);
271 if (IS_ERR(bb))
272 return PTR_ERR(bb);
273
274 cs = bb->cs;
275
276 if (count) {
277 /*
278 * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per
279 * reg + 1
280 */
281
282 *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
283
284 xa_for_each(&sr->xa, idx, entry) {
285 struct xe_reg reg = entry->reg;
286 u32 val;
287
288 if (reg.masked)
289 val = entry->clr_bits << 16;
290 else if (entry->clr_bits == ~0)
291 val = 0;
292 else
293 continue;
294
295 val |= entry->set_bits;
296
297 *cs++ = reg.addr;
298 *cs++ = val;
299 xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
300 }
301 }
302
303 if (count_rmw || count_rmw_mcr) {
304 xa_for_each(&sr->xa, idx, entry) {
305 if (entry->reg.masked || entry->clr_bits == ~0)
306 continue;
307
308 if (entry->reg.mcr) {
309 struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw };
310 u8 group, instance;
311
312 xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
313 *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
314 *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr;
315 *cs++ = SELECTIVE_READ_ADDRESSING |
316 REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) |
317 REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance);
318 }
319
320 *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
321 *cs++ = entry->reg.addr;
322 *cs++ = CS_GPR_REG(0, 0).addr;
323
324 *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
325 MI_LRI_LRM_CS_MMIO;
326 *cs++ = CS_GPR_REG(0, 1).addr;
327 *cs++ = entry->clr_bits;
328 *cs++ = CS_GPR_REG(0, 2).addr;
329 *cs++ = entry->set_bits;
330
331 *cs++ = MI_MATH(8);
332 *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0);
333 *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1);
334 *cs++ = CS_ALU_INSTR_AND;
335 *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU);
336 *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0);
337 *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2);
338 *cs++ = CS_ALU_INSTR_OR;
339 *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU);
340
341 *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO;
342 *cs++ = CS_GPR_REG(0, 0).addr;
343 *cs++ = entry->reg.addr;
344
345 xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n",
346 entry->reg.addr, entry->clr_bits, entry->set_bits,
347 entry->reg.mcr ? " (MCR)" : "");
348 }
349
350 /* reset used GPR */
351 *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) |
352 MI_LRI_LRM_CS_MMIO;
353 *cs++ = CS_GPR_REG(0, 0).addr;
354 *cs++ = 0;
355 *cs++ = CS_GPR_REG(0, 1).addr;
356 *cs++ = 0;
357 *cs++ = CS_GPR_REG(0, 2).addr;
358 *cs++ = 0;
359
360 /* reset steering */
361 if (count_rmw_mcr) {
362 *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
363 *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr;
364 *cs++ = 0;
365 }
366 }
367
368 cs = xe_lrc_emit_hwe_state_instructions(q, cs);
369
370 bb->len = cs - bb->cs;
371
372 ret = emit_job_sync(q, bb, HZ);
373
374 xe_bb_free(bb, NULL);
375
376 return ret;
377 }
378
xe_gt_record_default_lrcs(struct xe_gt * gt)379 int xe_gt_record_default_lrcs(struct xe_gt *gt)
380 {
381 struct xe_device *xe = gt_to_xe(gt);
382 struct xe_hw_engine *hwe;
383 enum xe_hw_engine_id id;
384 int err = 0;
385
386 for_each_hw_engine(hwe, gt, id) {
387 struct xe_exec_queue *q, *nop_q;
388 void *default_lrc;
389
390 if (gt->default_lrc[hwe->class])
391 continue;
392
393 xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe);
394 xe_wa_process_lrc(hwe);
395 xe_hw_engine_setup_default_lrc_state(hwe);
396 xe_tuning_process_lrc(hwe);
397
398 default_lrc = drmm_kzalloc(&xe->drm,
399 xe_gt_lrc_size(gt, hwe->class),
400 GFP_KERNEL);
401 if (!default_lrc)
402 return -ENOMEM;
403
404 q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
405 hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
406 if (IS_ERR(q)) {
407 err = PTR_ERR(q);
408 xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
409 hwe->name, q);
410 return err;
411 }
412
413 /* Prime golden LRC with known good state */
414 err = emit_wa_job(gt, q);
415 if (err) {
416 xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n",
417 hwe->name, ERR_PTR(err), q->guc->id);
418 goto put_exec_queue;
419 }
420
421 nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
422 1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
423 if (IS_ERR(nop_q)) {
424 err = PTR_ERR(nop_q);
425 xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
426 hwe->name, nop_q);
427 goto put_exec_queue;
428 }
429
430 /* Switch to different LRC */
431 err = emit_nop_job(gt, nop_q);
432 if (err) {
433 xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n",
434 hwe->name, ERR_PTR(err), nop_q->guc->id);
435 goto put_nop_q;
436 }
437
438 xe_map_memcpy_from(xe, default_lrc,
439 &q->lrc[0]->bo->vmap,
440 xe_lrc_pphwsp_offset(q->lrc[0]),
441 xe_gt_lrc_size(gt, hwe->class));
442
443 gt->default_lrc[hwe->class] = default_lrc;
444 put_nop_q:
445 xe_exec_queue_put(nop_q);
446 put_exec_queue:
447 xe_exec_queue_put(q);
448 if (err)
449 break;
450 }
451
452 return err;
453 }
454
xe_gt_init_early(struct xe_gt * gt)455 int xe_gt_init_early(struct xe_gt *gt)
456 {
457 int err;
458
459 if (IS_SRIOV_PF(gt_to_xe(gt))) {
460 err = xe_gt_sriov_pf_init_early(gt);
461 if (err)
462 return err;
463 }
464
465 if (IS_SRIOV_VF(gt_to_xe(gt))) {
466 err = xe_gt_sriov_vf_init_early(gt);
467 if (err)
468 return err;
469 }
470
471 xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt));
472
473 err = xe_wa_gt_init(gt);
474 if (err)
475 return err;
476
477 err = xe_tuning_init(gt);
478 if (err)
479 return err;
480
481 xe_wa_process_gt_oob(gt);
482
483 xe_force_wake_init_gt(gt, gt_to_fw(gt));
484 spin_lock_init(>->global_invl_lock);
485
486 err = xe_gt_tlb_inval_init_early(gt);
487 if (err)
488 return err;
489
490 xe_mocs_init_early(gt);
491
492 /*
493 * Only after this point can GT-specific MMIO operations
494 * (including things like communication with the GuC)
495 * be performed.
496 */
497 xe_gt_mmio_init(gt);
498
499 err = xe_uc_init_noalloc(>->uc);
500 if (err)
501 return err;
502
503 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
504 if (!fw_ref.domains)
505 return -ETIMEDOUT;
506
507 xe_gt_mcr_init_early(gt);
508 xe_pat_init(gt);
509
510 return 0;
511 }
512
dump_pat_on_error(struct xe_gt * gt)513 static void dump_pat_on_error(struct xe_gt *gt)
514 {
515 struct drm_printer p;
516 char prefix[32];
517
518 snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
519 p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, prefix);
520
521 xe_pat_dump(gt, &p);
522 }
523
gt_init_with_gt_forcewake(struct xe_gt * gt)524 static int gt_init_with_gt_forcewake(struct xe_gt *gt)
525 {
526 int err;
527
528 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
529 if (!fw_ref.domains)
530 return -ETIMEDOUT;
531
532 err = xe_uc_init(>->uc);
533 if (err)
534 return err;
535
536 xe_gt_topology_init(gt);
537 xe_gt_mcr_init(gt);
538 xe_gt_enable_host_l2_vram(gt);
539 xe_gt_enable_comp_1wcoh(gt);
540
541 if (xe_gt_is_main_type(gt)) {
542 err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
543 if (err)
544 return err;
545 if (IS_SRIOV_PF(gt_to_xe(gt)))
546 xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt);
547 }
548
549 /* Enable per hw engine IRQs */
550 xe_irq_enable_hwe(gt);
551
552 /* Rerun MCR init as we now have hw engine list */
553 xe_gt_mcr_init(gt);
554
555 err = xe_hw_engines_init_early(gt);
556 if (err) {
557 dump_pat_on_error(gt);
558 return err;
559 }
560
561 err = xe_hw_engine_class_sysfs_init(gt);
562 if (err)
563 return err;
564
565 /* Initialize CCS mode sysfs after early initialization of HW engines */
566 err = xe_gt_ccs_mode_sysfs_init(gt);
567 if (err)
568 return err;
569
570 /*
571 * Stash hardware-reported version. Since this register does not exist
572 * on pre-MTL platforms, reading it there will (correctly) return 0.
573 */
574 gt->info.gmdid = xe_mmio_read32(>->mmio, GMD_ID);
575
576 return 0;
577 }
578
gt_init_with_all_forcewake(struct xe_gt * gt)579 static int gt_init_with_all_forcewake(struct xe_gt *gt)
580 {
581 int err;
582
583 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
584 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
585 return -ETIMEDOUT;
586
587 xe_gt_mcr_set_implicit_defaults(gt);
588 xe_wa_process_gt(gt);
589 xe_tuning_process_gt(gt);
590 xe_reg_sr_apply_mmio(>->reg_sr, gt);
591
592 err = xe_gt_clock_init(gt);
593 if (err)
594 return err;
595
596 xe_mocs_init(gt);
597 err = xe_execlist_init(gt);
598 if (err)
599 return err;
600
601 err = xe_hw_engines_init(gt);
602 if (err)
603 return err;
604
605 err = xe_uc_init_post_hwconfig(>->uc);
606 if (err)
607 return err;
608
609 if (xe_gt_is_main_type(gt)) {
610 /*
611 * USM has its only SA pool to non-block behind user operations
612 */
613 if (gt_to_xe(gt)->info.has_usm) {
614 struct xe_device *xe = gt_to_xe(gt);
615
616 gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
617 IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
618 if (IS_ERR(gt->usm.bb_pool))
619 return PTR_ERR(gt->usm.bb_pool);
620 }
621 }
622
623 if (xe_gt_is_main_type(gt)) {
624 struct xe_tile *tile = gt_to_tile(gt);
625
626 err = xe_migrate_init(tile->migrate);
627 if (err)
628 return err;
629 }
630
631 err = xe_uc_load_hw(>->uc);
632 if (err)
633 return err;
634
635 /* Configure default CCS mode of 1 engine with all resources */
636 if (xe_gt_ccs_mode_enabled(gt)) {
637 gt->ccs_mode = 1;
638 xe_gt_apply_ccs_mode(gt);
639 }
640
641 if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt))
642 xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt);
643
644 if (IS_SRIOV_PF(gt_to_xe(gt)))
645 xe_gt_sriov_pf_init_hw(gt);
646
647 return 0;
648 }
649
xe_gt_fini(void * arg)650 static void xe_gt_fini(void *arg)
651 {
652 struct xe_gt *gt = arg;
653 int i;
654
655 if (disable_work_sync(>->reset.worker))
656 /*
657 * If gt_reset_worker was halted from executing, take care of
658 * releasing the rpm reference here.
659 */
660 xe_pm_runtime_put(gt_to_xe(gt));
661
662 for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
663 xe_hw_fence_irq_finish(>->fence_irq[i]);
664
665 xe_gt_disable_host_l2_vram(gt);
666 }
667
xe_gt_init(struct xe_gt * gt)668 int xe_gt_init(struct xe_gt *gt)
669 {
670 int err;
671 int i;
672
673 INIT_WORK(>->reset.worker, gt_reset_worker);
674
675 for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
676 gt->ring_ops[i] = xe_ring_ops_get(gt, i);
677 xe_hw_fence_irq_init(>->fence_irq[i]);
678 }
679
680 err = devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, xe_gt_fini, gt);
681 if (err)
682 return err;
683
684 err = xe_gt_sysfs_init(gt);
685 if (err)
686 return err;
687
688 err = gt_init_with_gt_forcewake(gt);
689 if (err)
690 return err;
691
692 err = xe_gt_idle_init(>->gtidle);
693 if (err)
694 return err;
695
696 err = xe_gt_freq_init(gt);
697 if (err)
698 return err;
699
700 xe_force_wake_init_engines(gt, gt_to_fw(gt));
701
702 err = gt_init_with_all_forcewake(gt);
703 if (err)
704 return err;
705
706 xe_gt_record_user_engines(gt);
707
708 err = xe_eu_stall_init(gt);
709 if (err)
710 return err;
711
712 if (IS_SRIOV_VF(gt_to_xe(gt))) {
713 err = xe_gt_sriov_vf_init(gt);
714 if (err)
715 return err;
716 }
717
718 return 0;
719 }
720
721 /**
722 * xe_gt_mmio_init() - Initialize GT's MMIO access
723 * @gt: the GT object
724 *
725 * Initialize GT's MMIO accessor, which will be used to access registers inside
726 * this GT.
727 */
xe_gt_mmio_init(struct xe_gt * gt)728 void xe_gt_mmio_init(struct xe_gt *gt)
729 {
730 struct xe_tile *tile = gt_to_tile(gt);
731 struct xe_device *xe = tile_to_xe(tile);
732
733 xe_mmio_init(>->mmio, tile, tile->mmio.regs, tile->mmio.regs_size);
734
735 if (gt->info.type == XE_GT_TYPE_MEDIA) {
736 gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
737 gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
738 } else {
739 gt->mmio.adj_offset = 0;
740 gt->mmio.adj_limit = 0;
741 }
742
743 if (IS_SRIOV_VF(xe))
744 gt->mmio.sriov_vf_gt = gt;
745 }
746
xe_gt_record_user_engines(struct xe_gt * gt)747 void xe_gt_record_user_engines(struct xe_gt *gt)
748 {
749 struct xe_hw_engine *hwe;
750 enum xe_hw_engine_id id;
751
752 gt->user_engines.mask = 0;
753 memset(gt->user_engines.instances_per_class, 0,
754 sizeof(gt->user_engines.instances_per_class));
755
756 for_each_hw_engine(hwe, gt, id) {
757 if (xe_hw_engine_is_reserved(hwe))
758 continue;
759
760 gt->user_engines.mask |= BIT_ULL(id);
761 gt->user_engines.instances_per_class[hwe->class]++;
762 }
763
764 xe_gt_assert(gt, (gt->user_engines.mask | gt->info.engine_mask)
765 == gt->info.engine_mask);
766 }
767
do_gt_reset(struct xe_gt * gt)768 static int do_gt_reset(struct xe_gt *gt)
769 {
770 int err;
771
772 if (IS_SRIOV_VF(gt_to_xe(gt)))
773 return xe_gt_sriov_vf_reset(gt);
774
775 xe_gsc_wa_14015076503(gt, true);
776
777 xe_mmio_write32(>->mmio, GDRST, GRDOM_FULL);
778 err = xe_mmio_wait32(>->mmio, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
779 if (err)
780 xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
781 ERR_PTR(err));
782
783 xe_gsc_wa_14015076503(gt, false);
784
785 return err;
786 }
787
vf_gt_restart(struct xe_gt * gt)788 static int vf_gt_restart(struct xe_gt *gt)
789 {
790 int err;
791
792 err = xe_uc_sanitize_reset(>->uc);
793 if (err)
794 return err;
795
796 err = xe_uc_load_hw(>->uc);
797 if (err)
798 return err;
799
800 err = xe_uc_start(>->uc);
801 if (err)
802 return err;
803
804 return 0;
805 }
806
do_gt_restart(struct xe_gt * gt)807 static int do_gt_restart(struct xe_gt *gt)
808 {
809 struct xe_hw_engine *hwe;
810 enum xe_hw_engine_id id;
811 int err;
812
813 if (IS_SRIOV_VF(gt_to_xe(gt)))
814 return vf_gt_restart(gt);
815
816 xe_pat_init(gt);
817
818 xe_gt_enable_host_l2_vram(gt);
819 xe_gt_enable_comp_1wcoh(gt);
820
821 xe_gt_mcr_set_implicit_defaults(gt);
822 xe_reg_sr_apply_mmio(>->reg_sr, gt);
823
824 err = xe_wopcm_init(>->uc.wopcm);
825 if (err)
826 return err;
827
828 for_each_hw_engine(hwe, gt, id)
829 xe_hw_engine_enable_ring(hwe);
830
831 err = xe_uc_sanitize_reset(>->uc);
832 if (err)
833 return err;
834
835 err = xe_uc_load_hw(>->uc);
836 if (err)
837 return err;
838
839 if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt))
840 xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt);
841
842 if (IS_SRIOV_PF(gt_to_xe(gt)))
843 xe_gt_sriov_pf_init_hw(gt);
844
845 xe_mocs_init(gt);
846
847 for_each_hw_engine(hwe, gt, id)
848 xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
849
850 /* Get CCS mode in sync between sw/hw */
851 xe_gt_apply_ccs_mode(gt);
852
853 err = xe_uc_start(>->uc);
854 if (err)
855 return err;
856
857 /* Restore GT freq to expected values */
858 xe_gt_sanitize_freq(gt);
859
860 if (IS_SRIOV_PF(gt_to_xe(gt)))
861 xe_gt_sriov_pf_restart(gt);
862
863 return 0;
864 }
865
gt_reset_worker(struct work_struct * w)866 static void gt_reset_worker(struct work_struct *w)
867 {
868 struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
869 unsigned int fw_ref;
870 int err;
871
872 if (xe_device_wedged(gt_to_xe(gt)))
873 goto err_pm_put;
874
875 /* We only support GT resets with GuC submission */
876 if (!xe_device_uc_enabled(gt_to_xe(gt)))
877 goto err_pm_put;
878
879 xe_gt_info(gt, "reset started\n");
880
881 if (xe_fault_inject_gt_reset()) {
882 err = -ECANCELED;
883 goto err_fail;
884 }
885
886 xe_gt_sanitize(gt);
887
888 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
889 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
890 err = -ETIMEDOUT;
891 goto err_out;
892 }
893
894 if (IS_SRIOV_PF(gt_to_xe(gt)))
895 xe_gt_sriov_pf_stop_prepare(gt);
896
897 xe_uc_gucrc_disable(>->uc);
898 xe_uc_stop_prepare(>->uc);
899 xe_pagefault_reset(gt_to_xe(gt), gt);
900
901 xe_uc_stop(>->uc);
902
903 xe_tlb_inval_reset(>->tlb_inval);
904
905 err = do_gt_reset(gt);
906 if (err)
907 goto err_out;
908
909 err = do_gt_restart(gt);
910 if (err)
911 goto err_out;
912
913 xe_force_wake_put(gt_to_fw(gt), fw_ref);
914
915 /* Pair with get while enqueueing the work in xe_gt_reset_async() */
916 xe_pm_runtime_put(gt_to_xe(gt));
917
918 xe_gt_info(gt, "reset done\n");
919
920 return;
921
922 err_out:
923 xe_force_wake_put(gt_to_fw(gt), fw_ref);
924 XE_WARN_ON(xe_uc_start(>->uc));
925
926 err_fail:
927 xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
928 xe_device_declare_wedged(gt_to_xe(gt));
929 err_pm_put:
930 xe_pm_runtime_put(gt_to_xe(gt));
931 }
932
xe_gt_reset_async(struct xe_gt * gt)933 void xe_gt_reset_async(struct xe_gt *gt)
934 {
935 xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0));
936
937 /* Don't do a reset while one is already in flight */
938 if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc))
939 return;
940
941 xe_gt_info(gt, "reset queued\n");
942
943 /* Pair with put in gt_reset_worker() if work is enqueued */
944 xe_pm_runtime_get_noresume(gt_to_xe(gt));
945 if (!queue_work(gt->ordered_wq, >->reset.worker))
946 xe_pm_runtime_put(gt_to_xe(gt));
947 }
948
xe_gt_suspend_prepare(struct xe_gt * gt)949 void xe_gt_suspend_prepare(struct xe_gt *gt)
950 {
951 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
952 xe_uc_suspend_prepare(>->uc);
953 }
954
xe_gt_suspend(struct xe_gt * gt)955 int xe_gt_suspend(struct xe_gt *gt)
956 {
957 int err;
958
959 xe_gt_dbg(gt, "suspending\n");
960 xe_gt_sanitize(gt);
961
962 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
963 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
964 xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
965 return -ETIMEDOUT;
966 }
967
968 err = xe_uc_suspend(>->uc);
969 if (err) {
970 xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
971 return err;
972 }
973
974 xe_gt_idle_disable_pg(gt);
975
976 xe_gt_disable_host_l2_vram(gt);
977
978 xe_gt_dbg(gt, "suspended\n");
979
980 return 0;
981 }
982
xe_gt_shutdown(struct xe_gt * gt)983 void xe_gt_shutdown(struct xe_gt *gt)
984 {
985 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
986 do_gt_reset(gt);
987 }
988
989 /**
990 * xe_gt_sanitize_freq() - Restore saved frequencies if necessary.
991 * @gt: the GT object
992 *
993 * Called after driver init/GSC load completes to restore GT frequencies if we
994 * limited them for any WAs.
995 */
xe_gt_sanitize_freq(struct xe_gt * gt)996 int xe_gt_sanitize_freq(struct xe_gt *gt)
997 {
998 int ret = 0;
999
1000 if ((!xe_uc_fw_is_available(>->uc.gsc.fw) ||
1001 xe_uc_fw_is_loaded(>->uc.gsc.fw) ||
1002 xe_uc_fw_is_in_error_state(>->uc.gsc.fw)) &&
1003 XE_GT_WA(gt, 22019338487))
1004 ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc);
1005
1006 return ret;
1007 }
1008
xe_gt_resume(struct xe_gt * gt)1009 int xe_gt_resume(struct xe_gt *gt)
1010 {
1011 int err;
1012
1013 xe_gt_dbg(gt, "resuming\n");
1014 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1015 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1016 xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1017 return -ETIMEDOUT;
1018 }
1019
1020 err = do_gt_restart(gt);
1021 if (err)
1022 return err;
1023
1024 xe_gt_idle_enable_pg(gt);
1025
1026 xe_gt_dbg(gt, "resumed\n");
1027
1028 return 0;
1029 }
1030
1031 /**
1032 * xe_gt_runtime_suspend() - GT runtime suspend
1033 * @gt: the GT object
1034 *
1035 * Return: 0 on success, negative error code otherwise.
1036 */
xe_gt_runtime_suspend(struct xe_gt * gt)1037 int xe_gt_runtime_suspend(struct xe_gt *gt)
1038 {
1039 xe_gt_dbg(gt, "runtime suspending\n");
1040
1041 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1042 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1043 xe_gt_err(gt, "runtime suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1044 return -ETIMEDOUT;
1045 }
1046
1047 xe_uc_runtime_suspend(>->uc);
1048 xe_gt_disable_host_l2_vram(gt);
1049
1050 xe_gt_dbg(gt, "runtime suspended\n");
1051
1052 return 0;
1053 }
1054
1055 /**
1056 * xe_gt_runtime_resume() - GT runtime resume
1057 * @gt: the GT object
1058 *
1059 * Return: 0 on success, negative error code otherwise.
1060 */
xe_gt_runtime_resume(struct xe_gt * gt)1061 int xe_gt_runtime_resume(struct xe_gt *gt)
1062 {
1063 xe_gt_dbg(gt, "runtime resuming\n");
1064
1065 CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1066 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1067 xe_gt_err(gt, "runtime resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1068 return -ETIMEDOUT;
1069 }
1070
1071 xe_gt_enable_host_l2_vram(gt);
1072 xe_uc_runtime_resume(>->uc);
1073
1074 xe_gt_dbg(gt, "runtime resumed\n");
1075
1076 return 0;
1077 }
1078
xe_gt_hw_engine(struct xe_gt * gt,enum xe_engine_class class,u16 instance,bool logical)1079 struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
1080 enum xe_engine_class class,
1081 u16 instance, bool logical)
1082 {
1083 struct xe_hw_engine *hwe;
1084 enum xe_hw_engine_id id;
1085
1086 for_each_hw_engine(hwe, gt, id)
1087 if (hwe->class == class &&
1088 ((!logical && hwe->instance == instance) ||
1089 (logical && hwe->logical_instance == instance)))
1090 return hwe;
1091
1092 return NULL;
1093 }
1094
xe_gt_any_hw_engine_by_reset_domain(struct xe_gt * gt,enum xe_engine_class class)1095 struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
1096 enum xe_engine_class class)
1097 {
1098 struct xe_hw_engine *hwe;
1099 enum xe_hw_engine_id id;
1100
1101 for_each_hw_engine(hwe, gt, id) {
1102 switch (class) {
1103 case XE_ENGINE_CLASS_RENDER:
1104 case XE_ENGINE_CLASS_COMPUTE:
1105 if (hwe->class == XE_ENGINE_CLASS_RENDER ||
1106 hwe->class == XE_ENGINE_CLASS_COMPUTE)
1107 return hwe;
1108 break;
1109 default:
1110 if (hwe->class == class)
1111 return hwe;
1112 }
1113 }
1114
1115 return NULL;
1116 }
1117
xe_gt_any_hw_engine(struct xe_gt * gt)1118 struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt)
1119 {
1120 struct xe_hw_engine *hwe;
1121 enum xe_hw_engine_id id;
1122
1123 for_each_hw_engine(hwe, gt, id)
1124 return hwe;
1125
1126 return NULL;
1127 }
1128
1129 /**
1130 * xe_gt_declare_wedged() - Declare GT wedged
1131 * @gt: the GT object
1132 *
1133 * Wedge the GT which stops all submission, saves desired debug state, and
1134 * cleans up anything which could timeout.
1135 */
xe_gt_declare_wedged(struct xe_gt * gt)1136 void xe_gt_declare_wedged(struct xe_gt *gt)
1137 {
1138 xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode);
1139
1140 xe_uc_declare_wedged(>->uc);
1141 xe_tlb_inval_reset(>->tlb_inval);
1142 }
1143