xref: /linux/drivers/gpu/drm/xe/xe_gsc.c (revision 7293859c51e1d7f923073aa46e2072333b3e143d)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #include "xe_gsc.h"
7 
8 #include <linux/delay.h>
9 
10 #include <drm/drm_managed.h>
11 
12 #include <generated/xe_wa_oob.h>
13 
14 #include "abi/gsc_mkhi_commands_abi.h"
15 #include "xe_bb.h"
16 #include "xe_bo.h"
17 #include "xe_device.h"
18 #include "xe_exec_queue.h"
19 #include "xe_force_wake.h"
20 #include "xe_gsc_proxy.h"
21 #include "xe_gsc_submit.h"
22 #include "xe_gt.h"
23 #include "xe_gt_mcr.h"
24 #include "xe_gt_printk.h"
25 #include "xe_guc_pc.h"
26 #include "xe_huc.h"
27 #include "xe_map.h"
28 #include "xe_mmio.h"
29 #include "xe_pm.h"
30 #include "xe_sched_job.h"
31 #include "xe_uc_fw.h"
32 #include "xe_wa.h"
33 #include "instructions/xe_gsc_commands.h"
34 #include "regs/xe_gsc_regs.h"
35 #include "regs/xe_gt_regs.h"
36 
37 static struct xe_gt *
38 gsc_to_gt(struct xe_gsc *gsc)
39 {
40 	return container_of(gsc, struct xe_gt, uc.gsc);
41 }
42 
43 static int memcpy_fw(struct xe_gsc *gsc)
44 {
45 	struct xe_gt *gt = gsc_to_gt(gsc);
46 	struct xe_device *xe = gt_to_xe(gt);
47 	u32 fw_size = gsc->fw.size;
48 	void *storage;
49 
50 	/*
51 	 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use
52 	 * a memcpy for now.
53 	 */
54 	storage = kmalloc(fw_size, GFP_KERNEL);
55 	if (!storage)
56 		return -ENOMEM;
57 
58 	xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
59 	xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
60 	xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);
61 
62 	kfree(storage);
63 
64 	return 0;
65 }
66 
67 static int emit_gsc_upload(struct xe_gsc *gsc)
68 {
69 	struct xe_gt *gt = gsc_to_gt(gsc);
70 	u64 offset = xe_bo_ggtt_addr(gsc->private);
71 	struct xe_bb *bb;
72 	struct xe_sched_job *job;
73 	struct dma_fence *fence;
74 	long timeout;
75 
76 	bb = xe_bb_new(gt, 4, false);
77 	if (IS_ERR(bb))
78 		return PTR_ERR(bb);
79 
80 	bb->cs[bb->len++] = GSC_FW_LOAD;
81 	bb->cs[bb->len++] = lower_32_bits(offset);
82 	bb->cs[bb->len++] = upper_32_bits(offset);
83 	bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;
84 
85 	job = xe_bb_create_job(gsc->q, bb);
86 	if (IS_ERR(job)) {
87 		xe_bb_free(bb, NULL);
88 		return PTR_ERR(job);
89 	}
90 
91 	xe_sched_job_arm(job);
92 	fence = dma_fence_get(&job->drm.s_fence->finished);
93 	xe_sched_job_push(job);
94 
95 	timeout = dma_fence_wait_timeout(fence, false, HZ);
96 	dma_fence_put(fence);
97 	xe_bb_free(bb, NULL);
98 	if (timeout < 0)
99 		return timeout;
100 	else if (!timeout)
101 		return -ETIME;
102 
103 	return 0;
104 }
105 
106 #define version_query_wr(xe_, map_, offset_, field_, val_) \
107 	xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_)
108 #define version_query_rd(xe_, map_, offset_, field_) \
109 	xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_)
110 
111 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset)
112 {
113 	xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in));
114 
115 	version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV);
116 	version_query_wr(xe, map, wr_offset, header.command,
117 			 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION);
118 
119 	return wr_offset + sizeof(struct gsc_get_compatibility_version_in);
120 }
121 
122 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */
123 static int query_compatibility_version(struct xe_gsc *gsc)
124 {
125 	struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
126 	struct xe_gt *gt = gsc_to_gt(gsc);
127 	struct xe_tile *tile = gt_to_tile(gt);
128 	struct xe_device *xe = gt_to_xe(gt);
129 	struct xe_bo *bo;
130 	u32 wr_offset;
131 	u32 rd_offset;
132 	u64 ggtt_offset;
133 	int err;
134 
135 	bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
136 				  ttm_bo_type_kernel,
137 				  XE_BO_FLAG_SYSTEM |
138 				  XE_BO_FLAG_GGTT);
139 	if (IS_ERR(bo)) {
140 		xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
141 		return PTR_ERR(bo);
142 	}
143 
144 	ggtt_offset = xe_bo_ggtt_addr(bo);
145 
146 	wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0,
147 				       sizeof(struct gsc_get_compatibility_version_in));
148 	wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset);
149 
150 	err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset,
151 				       ggtt_offset + GSC_VER_PKT_SZ,
152 				       GSC_VER_PKT_SZ);
153 	if (err) {
154 		xe_gt_err(gt,
155 			  "failed to submit GSC request for compatibility version: %d\n",
156 			  err);
157 		goto out_bo;
158 	}
159 
160 	err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ,
161 				     sizeof(struct gsc_get_compatibility_version_out),
162 				     &rd_offset);
163 	if (err) {
164 		xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
165 		return err;
166 	}
167 
168 	compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major);
169 	compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor);
170 
171 	xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor);
172 
173 out_bo:
174 	xe_bo_unpin_map_no_vm(bo);
175 	return err;
176 }
177 
178 static int gsc_fw_is_loaded(struct xe_gt *gt)
179 {
180 	return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
181 			      HECI1_FWSTS1_INIT_COMPLETE;
182 }
183 
184 static int gsc_fw_wait(struct xe_gt *gt)
185 {
186 	/*
187 	 * GSC load can take up to 250ms from the moment the instruction is
188 	 * executed by the GSCCS. To account for possible submission delays or
189 	 * other issues, we use a 500ms timeout in the wait here.
190 	 */
191 	return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
192 			      HECI1_FWSTS1_INIT_COMPLETE,
193 			      HECI1_FWSTS1_INIT_COMPLETE,
194 			      500 * USEC_PER_MSEC, NULL, false);
195 }
196 
197 static int gsc_upload(struct xe_gsc *gsc)
198 {
199 	struct xe_gt *gt = gsc_to_gt(gsc);
200 	struct xe_device *xe = gt_to_xe(gt);
201 	int err;
202 
203 	/* we should only be here if the init step were successful */
204 	xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q);
205 
206 	if (gsc_fw_is_loaded(gt)) {
207 		xe_gt_err(gt, "GSC already loaded at upload time\n");
208 		return -EEXIST;
209 	}
210 
211 	err = memcpy_fw(gsc);
212 	if (err) {
213 		xe_gt_err(gt, "Failed to memcpy GSC FW\n");
214 		return err;
215 	}
216 
217 	/*
218 	 * GSC is only killed by an FLR, so we need to trigger one on unload to
219 	 * make sure we stop it. This is because we assign a chunk of memory to
220 	 * the GSC as part of the FW load, so we need to make sure it stops
221 	 * using it when we release it to the system on driver unload. Note that
222 	 * this is not a problem of the unload per-se, because the GSC will not
223 	 * touch that memory unless there are requests for it coming from the
224 	 * driver; therefore, no accesses will happen while Xe is not loaded,
225 	 * but if we re-load the driver then the GSC might wake up and try to
226 	 * access that old memory location again.
227 	 * Given that an FLR is a very disruptive action (see the FLR function
228 	 * for details), we want to do it as the last action before releasing
229 	 * the access to the MMIO bar, which means we need to do it as part of
230 	 * mmio cleanup.
231 	 */
232 	xe->needs_flr_on_fini = true;
233 
234 	err = emit_gsc_upload(gsc);
235 	if (err) {
236 		xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
237 		return err;
238 	}
239 
240 	err = gsc_fw_wait(gt);
241 	if (err) {
242 		xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err));
243 		return err;
244 	}
245 
246 	err = query_compatibility_version(gsc);
247 	if (err)
248 		return err;
249 
250 	err = xe_uc_fw_check_version_requirements(&gsc->fw);
251 	if (err)
252 		return err;
253 
254 	return 0;
255 }
256 
257 static int gsc_upload_and_init(struct xe_gsc *gsc)
258 {
259 	struct xe_gt *gt = gsc_to_gt(gsc);
260 	struct xe_tile *tile = gt_to_tile(gt);
261 	int ret;
262 
263 	if (XE_WA(tile->primary_gt, 14018094691)) {
264 		ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
265 
266 		/*
267 		 * If the forcewake fails we want to keep going, because the worst
268 		 * case outcome in failing to apply the WA is that PXP won't work,
269 		 * which is not fatal. We still throw a warning so the issue is
270 		 * seen if it happens.
271 		 */
272 		xe_gt_WARN_ON(tile->primary_gt, ret);
273 
274 		xe_gt_mcr_multicast_write(tile->primary_gt,
275 					  EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK,
276 					  EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT);
277 	}
278 
279 	ret = gsc_upload(gsc);
280 
281 	if (XE_WA(tile->primary_gt, 14018094691))
282 		xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
283 
284 	if (ret)
285 		return ret;
286 
287 	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
288 
289 	/* GSC load is done, restore expected GT frequencies */
290 	xe_gt_sanitize_freq(gt);
291 
292 	xe_gt_dbg(gt, "GSC FW async load completed\n");
293 
294 	/* HuC auth failure is not fatal */
295 	if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
296 		xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
297 
298 	ret = xe_gsc_proxy_start(gsc);
299 	if (ret)
300 		return ret;
301 
302 	xe_gt_dbg(gt, "GSC proxy init completed\n");
303 
304 	return 0;
305 }
306 
307 static int gsc_er_complete(struct xe_gt *gt)
308 {
309 	u32 er_status;
310 
311 	if (!gsc_fw_is_loaded(gt))
312 		return 0;
313 
314 	/*
315 	 * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the
316 	 * driver or the GuC hit the GDRST register, the CS is immediately reset
317 	 * and a success is reported, but the GSC shim keeps resetting in the
318 	 * background. While the shim reset is ongoing, the CS is able to accept
319 	 * new context submission, but any commands that require the shim will
320 	 * be stalled until the reset is completed. This means that we can keep
321 	 * submitting to the GSCCS as long as we make sure that the preemption
322 	 * timeout is big enough to cover any delay introduced by the reset.
323 	 * When the shim reset completes, a specific CS interrupt is triggered,
324 	 * in response to which we need to check the GSCI_TIMER_STATUS register
325 	 * to see if the reset was successful or not.
326 	 * Note that the GSCI_TIMER_STATUS register is not power save/restored,
327 	 * so it gets reset on MC6 entry. However, a reset failure stops MC6,
328 	 * so in that scenario we're always guaranteed to find the correct
329 	 * value.
330 	 */
331 	er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;
332 
333 	if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) {
334 		/*
335 		 * XXX: we should trigger an FLR here, but we don't have support
336 		 * for that yet.
337 		 */
338 		xe_gt_err(gt, "GSC ER timed out!\n");
339 		return -EIO;
340 	}
341 
342 	return 0;
343 }
344 
345 static void gsc_work(struct work_struct *work)
346 {
347 	struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
348 	struct xe_gt *gt = gsc_to_gt(gsc);
349 	struct xe_device *xe = gt_to_xe(gt);
350 	u32 actions;
351 	int ret;
352 
353 	spin_lock_irq(&gsc->lock);
354 	actions = gsc->work_actions;
355 	gsc->work_actions = 0;
356 	spin_unlock_irq(&gsc->lock);
357 
358 	xe_pm_runtime_get(xe);
359 	xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC));
360 
361 	if (actions & GSC_ACTION_ER_COMPLETE) {
362 		ret = gsc_er_complete(gt);
363 		if (ret)
364 			goto out;
365 	}
366 
367 	if (actions & GSC_ACTION_FW_LOAD) {
368 		ret = gsc_upload_and_init(gsc);
369 		if (ret && ret != -EEXIST)
370 			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
371 		else
372 			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING);
373 	}
374 
375 	if (actions & GSC_ACTION_SW_PROXY)
376 		xe_gsc_proxy_request_handler(gsc);
377 
378 out:
379 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
380 	xe_pm_runtime_put(xe);
381 }
382 
383 void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec)
384 {
385 	struct xe_gt *gt = hwe->gt;
386 	struct xe_gsc *gsc = &gt->uc.gsc;
387 
388 	if (unlikely(!intr_vec))
389 		return;
390 
391 	if (intr_vec & GSC_ER_COMPLETE) {
392 		spin_lock(&gsc->lock);
393 		gsc->work_actions |= GSC_ACTION_ER_COMPLETE;
394 		spin_unlock(&gsc->lock);
395 
396 		queue_work(gsc->wq, &gsc->work);
397 	}
398 }
399 
400 int xe_gsc_init(struct xe_gsc *gsc)
401 {
402 	struct xe_gt *gt = gsc_to_gt(gsc);
403 	struct xe_tile *tile = gt_to_tile(gt);
404 	int ret;
405 
406 	gsc->fw.type = XE_UC_FW_TYPE_GSC;
407 	INIT_WORK(&gsc->work, gsc_work);
408 	spin_lock_init(&gsc->lock);
409 
410 	/* The GSC uC is only available on the media GT */
411 	if (tile->media_gt && (gt != tile->media_gt)) {
412 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
413 		return 0;
414 	}
415 
416 	/*
417 	 * Some platforms can have GuC but not GSC. That would cause
418 	 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort
419 	 * all firmware loading. So check for GSC being enabled before
420 	 * propagating the failure back up. That way the higher level will keep
421 	 * going and load GuC as appropriate.
422 	 */
423 	ret = xe_uc_fw_init(&gsc->fw);
424 	if (!xe_uc_fw_is_enabled(&gsc->fw))
425 		return 0;
426 	else if (ret)
427 		goto out;
428 
429 	ret = xe_gsc_proxy_init(gsc);
430 	if (ret && ret != -ENODEV)
431 		goto out;
432 
433 	return 0;
434 
435 out:
436 	xe_gt_err(gt, "GSC init failed with %d", ret);
437 	return ret;
438 }
439 
440 static void free_resources(void *arg)
441 {
442 	struct xe_gsc *gsc = arg;
443 
444 	if (gsc->wq) {
445 		destroy_workqueue(gsc->wq);
446 		gsc->wq = NULL;
447 	}
448 
449 	if (gsc->q) {
450 		xe_exec_queue_put(gsc->q);
451 		gsc->q = NULL;
452 	}
453 }
454 
455 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
456 {
457 	struct xe_gt *gt = gsc_to_gt(gsc);
458 	struct xe_tile *tile = gt_to_tile(gt);
459 	struct xe_device *xe = gt_to_xe(gt);
460 	struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
461 	struct xe_exec_queue *q;
462 	struct workqueue_struct *wq;
463 	struct xe_bo *bo;
464 	int err;
465 
466 	if (!xe_uc_fw_is_available(&gsc->fw))
467 		return 0;
468 
469 	if (!hwe)
470 		return -ENODEV;
471 
472 	bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M,
473 					  XE_BO_FLAG_STOLEN |
474 					  XE_BO_FLAG_GGTT);
475 	if (IS_ERR(bo))
476 		return PTR_ERR(bo);
477 
478 	q = xe_exec_queue_create(xe, NULL,
479 				 BIT(hwe->logical_instance), 1, hwe,
480 				 EXEC_QUEUE_FLAG_KERNEL |
481 				 EXEC_QUEUE_FLAG_PERMANENT, 0);
482 	if (IS_ERR(q)) {
483 		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
484 		err = PTR_ERR(q);
485 		goto out_bo;
486 	}
487 
488 	wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
489 	if (!wq) {
490 		err = -ENOMEM;
491 		goto out_q;
492 	}
493 
494 	gsc->private = bo;
495 	gsc->q = q;
496 	gsc->wq = wq;
497 
498 	err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc);
499 	if (err)
500 		return err;
501 
502 	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE);
503 
504 	return 0;
505 
506 out_q:
507 	xe_exec_queue_put(q);
508 out_bo:
509 	xe_bo_unpin_map_no_vm(bo);
510 	return err;
511 }
512 
513 void xe_gsc_load_start(struct xe_gsc *gsc)
514 {
515 	struct xe_gt *gt = gsc_to_gt(gsc);
516 	struct xe_device *xe = gt_to_xe(gt);
517 
518 	if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
519 		return;
520 
521 	/*
522 	 * The GSC HW is only reset by driver FLR or D3cold entry. We don't
523 	 * support the former at runtime, while the latter is only supported on
524 	 * DGFX, for which we don't support GSC. Therefore, if GSC failed to
525 	 * load previously there is no need to try again because the HW is
526 	 * stuck in the error state.
527 	 */
528 	xe_assert(xe, !IS_DGFX(xe));
529 	if (xe_uc_fw_is_in_error_state(&gsc->fw))
530 		return;
531 
532 	/* GSC FW survives GT reset and D3Hot */
533 	if (gsc_fw_is_loaded(gt)) {
534 		if (xe_gsc_proxy_init_done(gsc))
535 			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING);
536 		else
537 			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
538 		return;
539 	}
540 
541 	spin_lock_irq(&gsc->lock);
542 	gsc->work_actions |= GSC_ACTION_FW_LOAD;
543 	spin_unlock_irq(&gsc->lock);
544 
545 	queue_work(gsc->wq, &gsc->work);
546 }
547 
548 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
549 {
550 	if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
551 		flush_work(&gsc->work);
552 }
553 
554 /**
555  * xe_gsc_remove() - Clean up the GSC structures before driver removal
556  * @gsc: the GSC uC
557  */
558 void xe_gsc_remove(struct xe_gsc *gsc)
559 {
560 	xe_gsc_proxy_remove(gsc);
561 }
562 
563 /*
564  * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
565  * GSC engine reset by writing a notification bit in the GS1 register and then
566  * triggering an interrupt to GSC; from the interrupt it will take up to 200ms
567  * for the FW to get prepare for the reset, so we need to wait for that amount
568  * of time.
569  * After the reset is complete we need to then clear the GS1 register.
570  */
571 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
572 {
573 	u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0;
574 	u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP;
575 
576 	/* WA only applies if the GSC is loaded */
577 	if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
578 		return;
579 
580 	xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
581 
582 	if (prep) {
583 		/* make sure the reset bit is clear when writing the CSR reg */
584 		xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
585 			      HECI_H_CSR_RST, HECI_H_CSR_IG);
586 		msleep(200);
587 	}
588 }
589