xref: /linux/drivers/gpu/drm/i915/gt/uc/intel_uc.c (revision a1c3be890440a1769ed6f822376a3e3ab0d42994)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2016-2019 Intel Corporation
4  */
5 
6 #include "gt/intel_gt.h"
7 #include "gt/intel_reset.h"
8 #include "intel_guc.h"
9 #include "intel_guc_ads.h"
10 #include "intel_guc_submission.h"
11 #include "intel_uc.h"
12 
13 #include "i915_drv.h"
14 
15 static const struct intel_uc_ops uc_ops_off;
16 static const struct intel_uc_ops uc_ops_on;
17 
18 static void uc_expand_default_options(struct intel_uc *uc)
19 {
20 	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
21 
22 	if (i915->params.enable_guc != -1)
23 		return;
24 
25 	/* Don't enable GuC/HuC on pre-Gen12 */
26 	if (INTEL_GEN(i915) < 12) {
27 		i915->params.enable_guc = 0;
28 		return;
29 	}
30 
31 	/* Don't enable GuC/HuC on older Gen12 platforms */
32 	if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
33 		i915->params.enable_guc = 0;
34 		return;
35 	}
36 
37 	/* Default: enable HuC authentication only */
38 	i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
39 }
40 
41 /* Reset GuC providing us with fresh state for both GuC and HuC.
42  */
43 static int __intel_uc_reset_hw(struct intel_uc *uc)
44 {
45 	struct intel_gt *gt = uc_to_gt(uc);
46 	int ret;
47 	u32 guc_status;
48 
49 	ret = i915_inject_probe_error(gt->i915, -ENXIO);
50 	if (ret)
51 		return ret;
52 
53 	ret = intel_reset_guc(gt);
54 	if (ret) {
55 		DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
56 		return ret;
57 	}
58 
59 	guc_status = intel_uncore_read(gt->uncore, GUC_STATUS);
60 	WARN(!(guc_status & GS_MIA_IN_RESET),
61 	     "GuC status: 0x%x, MIA core expected to be in reset\n",
62 	     guc_status);
63 
64 	return ret;
65 }
66 
67 static void __confirm_options(struct intel_uc *uc)
68 {
69 	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
70 
71 	drm_dbg(&i915->drm,
72 		"enable_guc=%d (guc:%s submission:%s huc:%s)\n",
73 		i915->params.enable_guc,
74 		yesno(intel_uc_wants_guc(uc)),
75 		yesno(intel_uc_wants_guc_submission(uc)),
76 		yesno(intel_uc_wants_huc(uc)));
77 
78 	if (i915->params.enable_guc == 0) {
79 		GEM_BUG_ON(intel_uc_wants_guc(uc));
80 		GEM_BUG_ON(intel_uc_wants_guc_submission(uc));
81 		GEM_BUG_ON(intel_uc_wants_huc(uc));
82 		return;
83 	}
84 
85 	if (!intel_uc_supports_guc(uc))
86 		drm_info(&i915->drm,
87 			 "Incompatible option enable_guc=%d - %s\n",
88 			 i915->params.enable_guc, "GuC is not supported!");
89 
90 	if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC &&
91 	    !intel_uc_supports_huc(uc))
92 		drm_info(&i915->drm,
93 			 "Incompatible option enable_guc=%d - %s\n",
94 			 i915->params.enable_guc, "HuC is not supported!");
95 
96 	if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
97 	    !intel_uc_supports_guc_submission(uc))
98 		drm_info(&i915->drm,
99 			 "Incompatible option enable_guc=%d - %s\n",
100 			 i915->params.enable_guc, "GuC submission is N/A");
101 
102 	if (i915->params.enable_guc & ~ENABLE_GUC_MASK)
103 		drm_info(&i915->drm,
104 			 "Incompatible option enable_guc=%d - %s\n",
105 			 i915->params.enable_guc, "undocumented flag");
106 }
107 
108 void intel_uc_init_early(struct intel_uc *uc)
109 {
110 	uc_expand_default_options(uc);
111 
112 	intel_guc_init_early(&uc->guc);
113 	intel_huc_init_early(&uc->huc);
114 
115 	__confirm_options(uc);
116 
117 	if (intel_uc_wants_guc(uc))
118 		uc->ops = &uc_ops_on;
119 	else
120 		uc->ops = &uc_ops_off;
121 }
122 
123 void intel_uc_driver_late_release(struct intel_uc *uc)
124 {
125 }
126 
127 /**
128  * intel_uc_init_mmio - setup uC MMIO access
129  * @uc: the intel_uc structure
130  *
131  * Setup minimal state necessary for MMIO accesses later in the
132  * initialization sequence.
133  */
134 void intel_uc_init_mmio(struct intel_uc *uc)
135 {
136 	intel_guc_init_send_regs(&uc->guc);
137 }
138 
139 static void __uc_capture_load_err_log(struct intel_uc *uc)
140 {
141 	struct intel_guc *guc = &uc->guc;
142 
143 	if (guc->log.vma && !uc->load_err_log)
144 		uc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
145 }
146 
147 static void __uc_free_load_err_log(struct intel_uc *uc)
148 {
149 	struct drm_i915_gem_object *log = fetch_and_zero(&uc->load_err_log);
150 
151 	if (log)
152 		i915_gem_object_put(log);
153 }
154 
155 void intel_uc_driver_remove(struct intel_uc *uc)
156 {
157 	intel_uc_fini_hw(uc);
158 	intel_uc_fini(uc);
159 	__uc_free_load_err_log(uc);
160 }
161 
162 static inline bool guc_communication_enabled(struct intel_guc *guc)
163 {
164 	return intel_guc_ct_enabled(&guc->ct);
165 }
166 
167 /*
168  * Events triggered while CT buffers are disabled are logged in the SCRATCH_15
169  * register using the same bits used in the CT message payload. Since our
170  * communication channel with guc is turned off at this point, we can save the
171  * message and handle it after we turn it back on.
172  */
173 static void guc_clear_mmio_msg(struct intel_guc *guc)
174 {
175 	intel_uncore_write(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15), 0);
176 }
177 
178 static void guc_get_mmio_msg(struct intel_guc *guc)
179 {
180 	u32 val;
181 
182 	spin_lock_irq(&guc->irq_lock);
183 
184 	val = intel_uncore_read(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15));
185 	guc->mmio_msg |= val & guc->msg_enabled_mask;
186 
187 	/*
188 	 * clear all events, including the ones we're not currently servicing,
189 	 * to make sure we don't try to process a stale message if we enable
190 	 * handling of more events later.
191 	 */
192 	guc_clear_mmio_msg(guc);
193 
194 	spin_unlock_irq(&guc->irq_lock);
195 }
196 
197 static void guc_handle_mmio_msg(struct intel_guc *guc)
198 {
199 	/* we need communication to be enabled to reply to GuC */
200 	GEM_BUG_ON(!guc_communication_enabled(guc));
201 
202 	spin_lock_irq(&guc->irq_lock);
203 	if (guc->mmio_msg) {
204 		intel_guc_to_host_process_recv_msg(guc, &guc->mmio_msg, 1);
205 		guc->mmio_msg = 0;
206 	}
207 	spin_unlock_irq(&guc->irq_lock);
208 }
209 
210 static void guc_reset_interrupts(struct intel_guc *guc)
211 {
212 	guc->interrupts.reset(guc);
213 }
214 
215 static void guc_enable_interrupts(struct intel_guc *guc)
216 {
217 	guc->interrupts.enable(guc);
218 }
219 
220 static void guc_disable_interrupts(struct intel_guc *guc)
221 {
222 	guc->interrupts.disable(guc);
223 }
224 
225 static int guc_enable_communication(struct intel_guc *guc)
226 {
227 	struct intel_gt *gt = guc_to_gt(guc);
228 	struct drm_i915_private *i915 = gt->i915;
229 	int ret;
230 
231 	GEM_BUG_ON(guc_communication_enabled(guc));
232 
233 	ret = i915_inject_probe_error(i915, -ENXIO);
234 	if (ret)
235 		return ret;
236 
237 	ret = intel_guc_ct_enable(&guc->ct);
238 	if (ret)
239 		return ret;
240 
241 	/* check for mmio messages received before/during the CT enable */
242 	guc_get_mmio_msg(guc);
243 	guc_handle_mmio_msg(guc);
244 
245 	guc_enable_interrupts(guc);
246 
247 	/* check for CT messages received before we enabled interrupts */
248 	spin_lock_irq(&gt->irq_lock);
249 	intel_guc_ct_event_handler(&guc->ct);
250 	spin_unlock_irq(&gt->irq_lock);
251 
252 	drm_dbg(&i915->drm, "GuC communication enabled\n");
253 
254 	return 0;
255 }
256 
257 static void guc_disable_communication(struct intel_guc *guc)
258 {
259 	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
260 
261 	/*
262 	 * Events generated during or after CT disable are logged by guc in
263 	 * via mmio. Make sure the register is clear before disabling CT since
264 	 * all events we cared about have already been processed via CT.
265 	 */
266 	guc_clear_mmio_msg(guc);
267 
268 	guc_disable_interrupts(guc);
269 
270 	intel_guc_ct_disable(&guc->ct);
271 
272 	/*
273 	 * Check for messages received during/after the CT disable. We do not
274 	 * expect any messages to have arrived via CT between the interrupt
275 	 * disable and the CT disable because GuC should've been idle until we
276 	 * triggered the CT disable protocol.
277 	 */
278 	guc_get_mmio_msg(guc);
279 
280 	drm_dbg(&i915->drm, "GuC communication disabled\n");
281 }
282 
283 static void __uc_fetch_firmwares(struct intel_uc *uc)
284 {
285 	int err;
286 
287 	GEM_BUG_ON(!intel_uc_wants_guc(uc));
288 
289 	err = intel_uc_fw_fetch(&uc->guc.fw);
290 	if (err) {
291 		/* Make sure we transition out of transient "SELECTED" state */
292 		if (intel_uc_wants_huc(uc)) {
293 			drm_dbg(&uc_to_gt(uc)->i915->drm,
294 				"Failed to fetch GuC: %d disabling HuC\n", err);
295 			intel_uc_fw_change_status(&uc->huc.fw,
296 						  INTEL_UC_FIRMWARE_ERROR);
297 		}
298 
299 		return;
300 	}
301 
302 	if (intel_uc_wants_huc(uc))
303 		intel_uc_fw_fetch(&uc->huc.fw);
304 }
305 
306 static void __uc_cleanup_firmwares(struct intel_uc *uc)
307 {
308 	intel_uc_fw_cleanup_fetch(&uc->huc.fw);
309 	intel_uc_fw_cleanup_fetch(&uc->guc.fw);
310 }
311 
312 static int __uc_init(struct intel_uc *uc)
313 {
314 	struct intel_guc *guc = &uc->guc;
315 	struct intel_huc *huc = &uc->huc;
316 	int ret;
317 
318 	GEM_BUG_ON(!intel_uc_wants_guc(uc));
319 
320 	if (!intel_uc_uses_guc(uc))
321 		return 0;
322 
323 	if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
324 		return -ENOMEM;
325 
326 	/* XXX: GuC submission is unavailable for now */
327 	GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
328 
329 	ret = intel_guc_init(guc);
330 	if (ret)
331 		return ret;
332 
333 	if (intel_uc_uses_huc(uc)) {
334 		ret = intel_huc_init(huc);
335 		if (ret)
336 			goto out_guc;
337 	}
338 
339 	return 0;
340 
341 out_guc:
342 	intel_guc_fini(guc);
343 	return ret;
344 }
345 
346 static void __uc_fini(struct intel_uc *uc)
347 {
348 	intel_huc_fini(&uc->huc);
349 	intel_guc_fini(&uc->guc);
350 }
351 
352 static int __uc_sanitize(struct intel_uc *uc)
353 {
354 	struct intel_guc *guc = &uc->guc;
355 	struct intel_huc *huc = &uc->huc;
356 
357 	GEM_BUG_ON(!intel_uc_supports_guc(uc));
358 
359 	intel_huc_sanitize(huc);
360 	intel_guc_sanitize(guc);
361 
362 	return __intel_uc_reset_hw(uc);
363 }
364 
365 /* Initialize and verify the uC regs related to uC positioning in WOPCM */
366 static int uc_init_wopcm(struct intel_uc *uc)
367 {
368 	struct intel_gt *gt = uc_to_gt(uc);
369 	struct intel_uncore *uncore = gt->uncore;
370 	u32 base = intel_wopcm_guc_base(&gt->i915->wopcm);
371 	u32 size = intel_wopcm_guc_size(&gt->i915->wopcm);
372 	u32 huc_agent = intel_uc_uses_huc(uc) ? HUC_LOADING_AGENT_GUC : 0;
373 	u32 mask;
374 	int err;
375 
376 	if (unlikely(!base || !size)) {
377 		i915_probe_error(gt->i915, "Unsuccessful WOPCM partitioning\n");
378 		return -E2BIG;
379 	}
380 
381 	GEM_BUG_ON(!intel_uc_supports_guc(uc));
382 	GEM_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK));
383 	GEM_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK);
384 	GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
385 	GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
386 
387 	err = i915_inject_probe_error(gt->i915, -ENXIO);
388 	if (err)
389 		return err;
390 
391 	mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
392 	err = intel_uncore_write_and_verify(uncore, GUC_WOPCM_SIZE, size, mask,
393 					    size | GUC_WOPCM_SIZE_LOCKED);
394 	if (err)
395 		goto err_out;
396 
397 	mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
398 	err = intel_uncore_write_and_verify(uncore, DMA_GUC_WOPCM_OFFSET,
399 					    base | huc_agent, mask,
400 					    base | huc_agent |
401 					    GUC_WOPCM_OFFSET_VALID);
402 	if (err)
403 		goto err_out;
404 
405 	return 0;
406 
407 err_out:
408 	i915_probe_error(gt->i915, "Failed to init uC WOPCM registers!\n");
409 	i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
410 			 i915_mmio_reg_offset(DMA_GUC_WOPCM_OFFSET),
411 			 intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET));
412 	i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
413 			 i915_mmio_reg_offset(GUC_WOPCM_SIZE),
414 			 intel_uncore_read(uncore, GUC_WOPCM_SIZE));
415 
416 	return err;
417 }
418 
419 static bool uc_is_wopcm_locked(struct intel_uc *uc)
420 {
421 	struct intel_gt *gt = uc_to_gt(uc);
422 	struct intel_uncore *uncore = gt->uncore;
423 
424 	return (intel_uncore_read(uncore, GUC_WOPCM_SIZE) & GUC_WOPCM_SIZE_LOCKED) ||
425 	       (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
426 }
427 
428 static int __uc_check_hw(struct intel_uc *uc)
429 {
430 	if (!intel_uc_supports_guc(uc))
431 		return 0;
432 
433 	/*
434 	 * We can silently continue without GuC only if it was never enabled
435 	 * before on this system after reboot, otherwise we risk GPU hangs.
436 	 * To check if GuC was loaded before we look at WOPCM registers.
437 	 */
438 	if (uc_is_wopcm_locked(uc))
439 		return -EIO;
440 
441 	return 0;
442 }
443 
444 static int __uc_init_hw(struct intel_uc *uc)
445 {
446 	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
447 	struct intel_guc *guc = &uc->guc;
448 	struct intel_huc *huc = &uc->huc;
449 	int ret, attempts;
450 
451 	GEM_BUG_ON(!intel_uc_supports_guc(uc));
452 	GEM_BUG_ON(!intel_uc_wants_guc(uc));
453 
454 	if (!intel_uc_fw_is_loadable(&guc->fw)) {
455 		ret = __uc_check_hw(uc) ||
456 		      intel_uc_fw_is_overridden(&guc->fw) ||
457 		      intel_uc_wants_guc_submission(uc) ?
458 		      intel_uc_fw_status_to_error(guc->fw.status) : 0;
459 		goto err_out;
460 	}
461 
462 	ret = uc_init_wopcm(uc);
463 	if (ret)
464 		goto err_out;
465 
466 	guc_reset_interrupts(guc);
467 
468 	/* WaEnableuKernelHeaderValidFix:skl */
469 	/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
470 	if (IS_GEN(i915, 9))
471 		attempts = 3;
472 	else
473 		attempts = 1;
474 
475 	while (attempts--) {
476 		/*
477 		 * Always reset the GuC just before (re)loading, so
478 		 * that the state and timing are fairly predictable
479 		 */
480 		ret = __uc_sanitize(uc);
481 		if (ret)
482 			goto err_out;
483 
484 		intel_huc_fw_upload(huc);
485 		intel_guc_ads_reset(guc);
486 		intel_guc_write_params(guc);
487 		ret = intel_guc_fw_upload(guc);
488 		if (ret == 0)
489 			break;
490 
491 		DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and "
492 				 "retry %d more time(s)\n", ret, attempts);
493 	}
494 
495 	/* Did we succeded or run out of retries? */
496 	if (ret)
497 		goto err_log_capture;
498 
499 	ret = guc_enable_communication(guc);
500 	if (ret)
501 		goto err_log_capture;
502 
503 	intel_huc_auth(huc);
504 
505 	ret = intel_guc_sample_forcewake(guc);
506 	if (ret)
507 		goto err_communication;
508 
509 	if (intel_uc_uses_guc_submission(uc))
510 		intel_guc_submission_enable(guc);
511 
512 	drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
513 		 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
514 		 guc->fw.major_ver_found, guc->fw.minor_ver_found,
515 		 "submission",
516 		 enableddisabled(intel_uc_uses_guc_submission(uc)));
517 
518 	if (intel_uc_uses_huc(uc)) {
519 		drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
520 			 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
521 			 huc->fw.path,
522 			 huc->fw.major_ver_found, huc->fw.minor_ver_found,
523 			 "authenticated",
524 			 yesno(intel_huc_is_authenticated(huc)));
525 	}
526 
527 	return 0;
528 
529 	/*
530 	 * We've failed to load the firmware :(
531 	 */
532 err_communication:
533 	guc_disable_communication(guc);
534 err_log_capture:
535 	__uc_capture_load_err_log(uc);
536 err_out:
537 	__uc_sanitize(uc);
538 
539 	if (!ret) {
540 		drm_notice(&i915->drm, "GuC is uninitialized\n");
541 		/* We want to run without GuC submission */
542 		return 0;
543 	}
544 
545 	i915_probe_error(i915, "GuC initialization failed %d\n", ret);
546 
547 	/* We want to keep KMS alive */
548 	return -EIO;
549 }
550 
551 static void __uc_fini_hw(struct intel_uc *uc)
552 {
553 	struct intel_guc *guc = &uc->guc;
554 
555 	if (!intel_guc_is_fw_running(guc))
556 		return;
557 
558 	if (intel_uc_uses_guc_submission(uc))
559 		intel_guc_submission_disable(guc);
560 
561 	if (guc_communication_enabled(guc))
562 		guc_disable_communication(guc);
563 
564 	__uc_sanitize(uc);
565 }
566 
567 /**
568  * intel_uc_reset_prepare - Prepare for reset
569  * @uc: the intel_uc structure
570  *
571  * Preparing for full gpu reset.
572  */
573 void intel_uc_reset_prepare(struct intel_uc *uc)
574 {
575 	struct intel_guc *guc = &uc->guc;
576 
577 	if (!intel_guc_is_ready(guc))
578 		return;
579 
580 	guc_disable_communication(guc);
581 	__uc_sanitize(uc);
582 }
583 
584 void intel_uc_runtime_suspend(struct intel_uc *uc)
585 {
586 	struct intel_guc *guc = &uc->guc;
587 	int err;
588 
589 	if (!intel_guc_is_ready(guc))
590 		return;
591 
592 	err = intel_guc_suspend(guc);
593 	if (err)
594 		DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
595 
596 	guc_disable_communication(guc);
597 }
598 
599 void intel_uc_suspend(struct intel_uc *uc)
600 {
601 	struct intel_guc *guc = &uc->guc;
602 	intel_wakeref_t wakeref;
603 
604 	if (!intel_guc_is_ready(guc))
605 		return;
606 
607 	with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref)
608 		intel_uc_runtime_suspend(uc);
609 }
610 
611 static int __uc_resume(struct intel_uc *uc, bool enable_communication)
612 {
613 	struct intel_guc *guc = &uc->guc;
614 	int err;
615 
616 	if (!intel_guc_is_fw_running(guc))
617 		return 0;
618 
619 	/* Make sure we enable communication if and only if it's disabled */
620 	GEM_BUG_ON(enable_communication == guc_communication_enabled(guc));
621 
622 	if (enable_communication)
623 		guc_enable_communication(guc);
624 
625 	err = intel_guc_resume(guc);
626 	if (err) {
627 		DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err);
628 		return err;
629 	}
630 
631 	return 0;
632 }
633 
634 int intel_uc_resume(struct intel_uc *uc)
635 {
636 	/*
637 	 * When coming out of S3/S4 we sanitize and re-init the HW, so
638 	 * communication is already re-enabled at this point.
639 	 */
640 	return __uc_resume(uc, false);
641 }
642 
643 int intel_uc_runtime_resume(struct intel_uc *uc)
644 {
645 	/*
646 	 * During runtime resume we don't sanitize, so we need to re-init
647 	 * communication as well.
648 	 */
649 	return __uc_resume(uc, true);
650 }
651 
652 static const struct intel_uc_ops uc_ops_off = {
653 	.init_hw = __uc_check_hw,
654 };
655 
656 static const struct intel_uc_ops uc_ops_on = {
657 	.sanitize = __uc_sanitize,
658 
659 	.init_fw = __uc_fetch_firmwares,
660 	.fini_fw = __uc_cleanup_firmwares,
661 
662 	.init = __uc_init,
663 	.fini = __uc_fini,
664 
665 	.init_hw = __uc_init_hw,
666 	.fini_hw = __uc_fini_hw,
667 };
668