xref: /linux/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c (revision 6f17ab9a63e670bd62a287f95e3982f99eafd77e)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014-2019 Intel Corporation
4  *
5  * Authors:
6  *    Vinit Azad <vinit.azad@intel.com>
7  *    Ben Widawsky <ben@bwidawsk.net>
8  *    Dave Gordon <david.s.gordon@intel.com>
9  *    Alex Dai <yu.dai@intel.com>
10  */
11 
12 #include "gt/intel_gt.h"
13 #include "gt/intel_gt_mcr.h"
14 #include "gt/intel_gt_regs.h"
15 #include "gt/intel_rps.h"
16 #include "intel_guc_fw.h"
17 #include "intel_guc_print.h"
18 #include "i915_drv.h"
19 
20 static void guc_prepare_xfer(struct intel_gt *gt)
21 {
22 	struct intel_uncore *uncore = gt->uncore;
23 
24 	u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
25 			 GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
26 			 GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
27 			 GUC_ENABLE_MIA_CLOCK_GATING;
28 
29 	if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 55))
30 		shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
31 			      GUC_ENABLE_MIA_CACHING;
32 
33 	/* Must program this register before loading the ucode with DMA */
34 	intel_uncore_write(uncore, GUC_SHIM_CONTROL, shim_flags);
35 
36 	if (IS_GEN9_LP(uncore->i915))
37 		intel_uncore_write(uncore, GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE);
38 	else
39 		intel_uncore_write(uncore, GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE);
40 
41 	if (GRAPHICS_VER(uncore->i915) == 9) {
42 		/* DOP Clock Gating Enable for GuC clocks */
43 		intel_uncore_rmw(uncore, GEN7_MISCCPCTL, 0,
44 				 GEN8_DOP_CLOCK_GATE_GUC_ENABLE);
45 
46 		/* allows for 5us (in 10ns units) before GT can go to RC6 */
47 		intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
48 	}
49 
50 	/*
51 	 * Starting from IP 12.50 we need to enable the mirroring of GuC
52 	 * internal state to debug registers. This is always enabled on previous
53 	 * IPs.
54 	 */
55 	if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50))
56 		intel_uncore_rmw(uncore, GUC_SHIM_CONTROL2, 0, GUC_ENABLE_DEBUG_REG);
57 }
58 
59 static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw,
60 			     struct intel_uncore *uncore)
61 {
62 	u32 rsa[UOS_RSA_SCRATCH_COUNT];
63 	size_t copied;
64 	int i;
65 
66 	copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa));
67 	if (copied < sizeof(rsa))
68 		return -ENOMEM;
69 
70 	for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
71 		intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]);
72 
73 	return 0;
74 }
75 
76 static int guc_xfer_rsa_vma(struct intel_uc_fw *guc_fw,
77 			    struct intel_uncore *uncore)
78 {
79 	struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw);
80 
81 	intel_uncore_write(uncore, UOS_RSA_SCRATCH(0),
82 			   intel_guc_ggtt_offset(guc, guc_fw->rsa_data));
83 
84 	return 0;
85 }
86 
87 /* Copy RSA signature from the fw image to HW for verification */
88 static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
89 			struct intel_uncore *uncore)
90 {
91 	if (guc_fw->rsa_data)
92 		return guc_xfer_rsa_vma(guc_fw, uncore);
93 	else
94 		return guc_xfer_rsa_mmio(guc_fw, uncore);
95 }
96 
97 /*
98  * Read the GuC status register (GUC_STATUS) and store it in the
99  * specified location; then return a boolean indicating whether
100  * the value matches either completion or a known failure code.
101  *
102  * This is used for polling the GuC status in a wait_for()
103  * loop below.
104  */
105 static inline bool guc_load_done(struct intel_uncore *uncore, u32 *status, bool *success)
106 {
107 	u32 val = intel_uncore_read(uncore, GUC_STATUS);
108 	u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
109 	u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, val);
110 
111 	*status = val;
112 	switch (uk_val) {
113 	case INTEL_GUC_LOAD_STATUS_READY:
114 		*success = true;
115 		return true;
116 
117 	case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH:
118 	case INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH:
119 	case INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE:
120 	case INTEL_GUC_LOAD_STATUS_HWCONFIG_ERROR:
121 	case INTEL_GUC_LOAD_STATUS_DPC_ERROR:
122 	case INTEL_GUC_LOAD_STATUS_EXCEPTION:
123 	case INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID:
124 	case INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID:
125 	case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
126 	case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
127 		*success = false;
128 		return true;
129 	}
130 
131 	switch (br_val) {
132 	case INTEL_BOOTROM_STATUS_NO_KEY_FOUND:
133 	case INTEL_BOOTROM_STATUS_RSA_FAILED:
134 	case INTEL_BOOTROM_STATUS_PAVPC_FAILED:
135 	case INTEL_BOOTROM_STATUS_WOPCM_FAILED:
136 	case INTEL_BOOTROM_STATUS_LOADLOC_FAILED:
137 	case INTEL_BOOTROM_STATUS_JUMP_FAILED:
138 	case INTEL_BOOTROM_STATUS_RC6CTXCONFIG_FAILED:
139 	case INTEL_BOOTROM_STATUS_MPUMAP_INCORRECT:
140 	case INTEL_BOOTROM_STATUS_EXCEPTION:
141 	case INTEL_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
142 		*success = false;
143 		return true;
144 	}
145 
146 	return false;
147 }
148 
149 /*
150  * Use a longer timeout for debug builds so that problems can be detected
151  * and analysed. But a shorter timeout for releases so that user's don't
152  * wait forever to find out there is a problem. Note that the only reason
153  * an end user should hit the timeout is in case of extreme thermal throttling.
154  * And a system that is that hot during boot is probably dead anyway!
155  */
156 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
157 #define GUC_LOAD_RETRY_LIMIT	20
158 #else
159 #define GUC_LOAD_RETRY_LIMIT	3
160 #endif
161 
162 static int guc_wait_ucode(struct intel_guc *guc)
163 {
164 	struct intel_gt *gt = guc_to_gt(guc);
165 	struct intel_uncore *uncore = gt->uncore;
166 	ktime_t before, after, delta;
167 	bool success;
168 	u32 status;
169 	int ret, count;
170 	u64 delta_ms;
171 	u32 before_freq;
172 
173 	/*
174 	 * Wait for the GuC to start up.
175 	 *
176 	 * Measurements indicate this should take no more than 20ms
177 	 * (assuming the GT clock is at maximum frequency). So, a
178 	 * timeout here indicates that the GuC has failed and is unusable.
179 	 * (Higher levels of the driver may decide to reset the GuC and
180 	 * attempt the ucode load again if this happens.)
181 	 *
182 	 * FIXME: There is a known (but exceedingly unlikely) race condition
183 	 * where the asynchronous frequency management code could reduce
184 	 * the GT clock while a GuC reload is in progress (during a full
185 	 * GT reset). A fix is in progress but there are complex locking
186 	 * issues to be resolved. In the meantime bump the timeout to
187 	 * 200ms. Even at slowest clock, this should be sufficient. And
188 	 * in the working case, a larger timeout makes no difference.
189 	 *
190 	 * IFWI updates have also been seen to cause sporadic failures due to
191 	 * the requested frequency not being granted and thus the firmware
192 	 * load is attempted at minimum frequency. That can lead to load times
193 	 * in the seconds range. However, there is a limit on how long an
194 	 * individual wait_for() can wait. So wrap it in a loop.
195 	 */
196 	before_freq = intel_rps_read_actual_frequency(&gt->rps);
197 	before = ktime_get();
198 	for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) {
199 		ret = wait_for(guc_load_done(uncore, &status, &success), 1000);
200 		if (!ret || !success)
201 			break;
202 
203 		guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz, status = 0x%08X [0x%02X/%02X]\n",
204 			count, intel_rps_read_actual_frequency(&gt->rps), status,
205 			REG_FIELD_GET(GS_BOOTROM_MASK, status),
206 			REG_FIELD_GET(GS_UKERNEL_MASK, status));
207 	}
208 	after = ktime_get();
209 	delta = ktime_sub(after, before);
210 	delta_ms = ktime_to_ms(delta);
211 	if (ret || !success) {
212 		u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
213 		u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
214 
215 		guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n",
216 			 status, delta_ms, intel_rps_read_actual_frequency(&gt->rps), ret);
217 		guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
218 			 REG_FIELD_GET(GS_MIA_IN_RESET, status),
219 			 bootrom, ukernel,
220 			 REG_FIELD_GET(GS_MIA_MASK, status),
221 			 REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
222 
223 		switch (bootrom) {
224 		case INTEL_BOOTROM_STATUS_NO_KEY_FOUND:
225 			guc_info(guc, "invalid key requested, header = 0x%08X\n",
226 				 intel_uncore_read(uncore, GUC_HEADER_INFO));
227 			ret = -ENOEXEC;
228 			break;
229 
230 		case INTEL_BOOTROM_STATUS_RSA_FAILED:
231 			guc_info(guc, "firmware signature verification failed\n");
232 			ret = -ENOEXEC;
233 			break;
234 
235 		case INTEL_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
236 			guc_info(guc, "firmware production part check failure\n");
237 			ret = -ENOEXEC;
238 			break;
239 		}
240 
241 		switch (ukernel) {
242 		case INTEL_GUC_LOAD_STATUS_EXCEPTION:
243 			guc_info(guc, "firmware exception. EIP: %#x\n",
244 				 intel_uncore_read(uncore, SOFT_SCRATCH(13)));
245 			ret = -ENXIO;
246 			break;
247 
248 		case INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
249 			guc_info(guc, "illegal register in save/restore workaround list\n");
250 			ret = -EPERM;
251 			break;
252 
253 		case INTEL_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
254 			guc_info(guc, "invalid w/a KLV entry\n");
255 			ret = -EINVAL;
256 			break;
257 
258 		case INTEL_GUC_LOAD_STATUS_HWCONFIG_START:
259 			guc_info(guc, "still extracting hwconfig table.\n");
260 			ret = -ETIMEDOUT;
261 			break;
262 		}
263 
264 		/* Uncommon/unexpected error, see earlier status code print for details */
265 		if (ret == 0)
266 			ret = -ENXIO;
267 	} else if (delta_ms > 200) {
268 		guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
269 			 delta_ms, status, count, ret);
270 		guc_warn(guc, "excessive init time: [freq = %dMHz -> %dMHz vs %dMHz, perf_limit_reasons = 0x%08X]\n",
271 			 before_freq, intel_rps_read_actual_frequency(&gt->rps),
272 			 intel_rps_get_requested_frequency(&gt->rps),
273 			 intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
274 	} else {
275 		guc_dbg(guc, "init took %lldms, freq = %dMHz -> %dMHz vs %dMHz, status = 0x%08X, count = %d, ret = %d\n",
276 			delta_ms, before_freq, intel_rps_read_actual_frequency(&gt->rps),
277 			intel_rps_get_requested_frequency(&gt->rps), status, count, ret);
278 	}
279 
280 	return ret;
281 }
282 
283 /**
284  * intel_guc_fw_upload() - load GuC uCode to device
285  * @guc: intel_guc structure
286  *
287  * Called from intel_uc_init_hw() during driver load, resume from sleep and
288  * after a GPU reset.
289  *
290  * The firmware image should have already been fetched into memory, so only
291  * check that fetch succeeded, and then transfer the image to the h/w.
292  *
293  * Return:	non-zero code on error
294  */
295 int intel_guc_fw_upload(struct intel_guc *guc)
296 {
297 	struct intel_gt *gt = guc_to_gt(guc);
298 	struct intel_uncore *uncore = gt->uncore;
299 	int ret;
300 
301 	guc_prepare_xfer(gt);
302 
303 	/*
304 	 * Note that GuC needs the CSS header plus uKernel code to be copied
305 	 * by the DMA engine in one operation, whereas the RSA signature is
306 	 * loaded separately, either by copying it to the UOS_RSA_SCRATCH
307 	 * register (if key size <= 256) or through a ggtt-pinned vma (if key
308 	 * size > 256). The RSA size and therefore the way we provide it to the
309 	 * HW is fixed for each platform and hard-coded in the bootrom.
310 	 */
311 	ret = guc_xfer_rsa(&guc->fw, uncore);
312 	if (ret)
313 		goto out;
314 
315 	/*
316 	 * Current uCode expects the code to be loaded at 8k; locations below
317 	 * this are used for the stack.
318 	 */
319 	ret = intel_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
320 	if (ret)
321 		goto out;
322 
323 	ret = guc_wait_ucode(guc);
324 	if (ret)
325 		goto out;
326 
327 	intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_RUNNING);
328 	return 0;
329 
330 out:
331 	intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
332 	return ret;
333 }
334