xref: /linux/drivers/gpu/drm/xe/xe_gsc.c (revision 8cdcef1c2f82d207aa8b2a02298fbc17191c6261)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #include "xe_gsc.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "abi/gsc_mkhi_commands_abi.h"
11 #include "generated/xe_wa_oob.h"
12 #include "xe_bb.h"
13 #include "xe_bo.h"
14 #include "xe_device.h"
15 #include "xe_exec_queue.h"
16 #include "xe_gsc_submit.h"
17 #include "xe_gt.h"
18 #include "xe_gt_printk.h"
19 #include "xe_map.h"
20 #include "xe_mmio.h"
21 #include "xe_sched_job.h"
22 #include "xe_uc_fw.h"
23 #include "xe_wa.h"
24 #include "instructions/xe_gsc_commands.h"
25 #include "regs/xe_gsc_regs.h"
26 
27 static struct xe_gt *
28 gsc_to_gt(struct xe_gsc *gsc)
29 {
30 	return container_of(gsc, struct xe_gt, uc.gsc);
31 }
32 
33 static int memcpy_fw(struct xe_gsc *gsc)
34 {
35 	struct xe_gt *gt = gsc_to_gt(gsc);
36 	struct xe_device *xe = gt_to_xe(gt);
37 	u32 fw_size = gsc->fw.size;
38 	void *storage;
39 
40 	/*
41 	 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use
42 	 * a memcpy for now.
43 	 */
44 	storage = kmalloc(fw_size, GFP_KERNEL);
45 	if (!storage)
46 		return -ENOMEM;
47 
48 	xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
49 	xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
50 	xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);
51 
52 	kfree(storage);
53 
54 	return 0;
55 }
56 
57 static int emit_gsc_upload(struct xe_gsc *gsc)
58 {
59 	struct xe_gt *gt = gsc_to_gt(gsc);
60 	u64 offset = xe_bo_ggtt_addr(gsc->private);
61 	struct xe_bb *bb;
62 	struct xe_sched_job *job;
63 	struct dma_fence *fence;
64 	long timeout;
65 
66 	bb = xe_bb_new(gt, 4, false);
67 	if (IS_ERR(bb))
68 		return PTR_ERR(bb);
69 
70 	bb->cs[bb->len++] = GSC_FW_LOAD;
71 	bb->cs[bb->len++] = lower_32_bits(offset);
72 	bb->cs[bb->len++] = upper_32_bits(offset);
73 	bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;
74 
75 	job = xe_bb_create_job(gsc->q, bb);
76 	if (IS_ERR(job)) {
77 		xe_bb_free(bb, NULL);
78 		return PTR_ERR(job);
79 	}
80 
81 	xe_sched_job_arm(job);
82 	fence = dma_fence_get(&job->drm.s_fence->finished);
83 	xe_sched_job_push(job);
84 
85 	timeout = dma_fence_wait_timeout(fence, false, HZ);
86 	dma_fence_put(fence);
87 	xe_bb_free(bb, NULL);
88 	if (timeout < 0)
89 		return timeout;
90 	else if (!timeout)
91 		return -ETIME;
92 
93 	return 0;
94 }
95 
96 #define version_query_wr(xe_, map_, offset_, field_, val_) \
97 	xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_)
98 #define version_query_rd(xe_, map_, offset_, field_) \
99 	xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_)
100 
101 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset)
102 {
103 	xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in));
104 
105 	version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV);
106 	version_query_wr(xe, map, wr_offset, header.command,
107 			 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION);
108 
109 	return wr_offset + sizeof(struct gsc_get_compatibility_version_in);
110 }
111 
112 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */
113 static int query_compatibility_version(struct xe_gsc *gsc)
114 {
115 	struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
116 	struct xe_gt *gt = gsc_to_gt(gsc);
117 	struct xe_tile *tile = gt_to_tile(gt);
118 	struct xe_device *xe = gt_to_xe(gt);
119 	struct xe_bo *bo;
120 	u32 wr_offset;
121 	u32 rd_offset;
122 	u64 ggtt_offset;
123 	int err;
124 
125 	bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
126 				  ttm_bo_type_kernel,
127 				  XE_BO_CREATE_SYSTEM_BIT |
128 				  XE_BO_CREATE_GGTT_BIT);
129 	if (IS_ERR(bo)) {
130 		xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
131 		return PTR_ERR(bo);
132 	}
133 
134 	ggtt_offset = xe_bo_ggtt_addr(bo);
135 
136 	wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0,
137 				       sizeof(struct gsc_get_compatibility_version_in));
138 	wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset);
139 
140 	err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset,
141 				       ggtt_offset + GSC_VER_PKT_SZ,
142 				       GSC_VER_PKT_SZ);
143 	if (err) {
144 		xe_gt_err(gt,
145 			  "failed to submit GSC request for compatibility version: %d\n",
146 			  err);
147 		goto out_bo;
148 	}
149 
150 	err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ,
151 				     sizeof(struct gsc_get_compatibility_version_out),
152 				     &rd_offset);
153 	if (err) {
154 		xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
155 		return err;
156 	}
157 
158 	compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major);
159 	compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor);
160 
161 	xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor);
162 
163 out_bo:
164 	xe_bo_unpin_map_no_vm(bo);
165 	return err;
166 }
167 
168 static int gsc_fw_is_loaded(struct xe_gt *gt)
169 {
170 	return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
171 			      HECI1_FWSTS1_INIT_COMPLETE;
172 }
173 
174 static int gsc_fw_wait(struct xe_gt *gt)
175 {
176 	/*
177 	 * GSC load can take up to 250ms from the moment the instruction is
178 	 * executed by the GSCCS. To account for possible submission delays or
179 	 * other issues, we use a 500ms timeout in the wait here.
180 	 */
181 	return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
182 			      HECI1_FWSTS1_INIT_COMPLETE,
183 			      HECI1_FWSTS1_INIT_COMPLETE,
184 			      500 * USEC_PER_MSEC, NULL, false);
185 }
186 
187 static int gsc_upload(struct xe_gsc *gsc)
188 {
189 	struct xe_gt *gt = gsc_to_gt(gsc);
190 	struct xe_device *xe = gt_to_xe(gt);
191 	int err;
192 
193 	/* we should only be here if the init step were successful */
194 	xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q);
195 
196 	if (gsc_fw_is_loaded(gt)) {
197 		xe_gt_err(gt, "GSC already loaded at upload time\n");
198 		return -EEXIST;
199 	}
200 
201 	err = memcpy_fw(gsc);
202 	if (err) {
203 		xe_gt_err(gt, "Failed to memcpy GSC FW\n");
204 		return err;
205 	}
206 
207 	/*
208 	 * GSC is only killed by an FLR, so we need to trigger one on unload to
209 	 * make sure we stop it. This is because we assign a chunk of memory to
210 	 * the GSC as part of the FW load, so we need to make sure it stops
211 	 * using it when we release it to the system on driver unload. Note that
212 	 * this is not a problem of the unload per-se, because the GSC will not
213 	 * touch that memory unless there are requests for it coming from the
214 	 * driver; therefore, no accesses will happen while Xe is not loaded,
215 	 * but if we re-load the driver then the GSC might wake up and try to
216 	 * access that old memory location again.
217 	 * Given that an FLR is a very disruptive action (see the FLR function
218 	 * for details), we want to do it as the last action before releasing
219 	 * the access to the MMIO bar, which means we need to do it as part of
220 	 * mmio cleanup.
221 	 */
222 	xe->needs_flr_on_fini = true;
223 
224 	err = emit_gsc_upload(gsc);
225 	if (err) {
226 		xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
227 		return err;
228 	}
229 
230 	err = gsc_fw_wait(gt);
231 	if (err) {
232 		xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err));
233 		return err;
234 	}
235 
236 	err = query_compatibility_version(gsc);
237 	if (err)
238 		return err;
239 
240 	err = xe_uc_fw_check_version_requirements(&gsc->fw);
241 	if (err)
242 		return err;
243 
244 	xe_gt_dbg(gt, "GSC FW async load completed\n");
245 
246 	return 0;
247 }
248 
249 static void gsc_work(struct work_struct *work)
250 {
251 	struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
252 	struct xe_gt *gt = gsc_to_gt(gsc);
253 	struct xe_device *xe = gt_to_xe(gt);
254 	int ret;
255 
256 	xe_device_mem_access_get(xe);
257 	xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
258 
259 	ret = gsc_upload(gsc);
260 	if (ret && ret != -EEXIST)
261 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
262 	else
263 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
264 
265 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
266 	xe_device_mem_access_put(xe);
267 }
268 
269 int xe_gsc_init(struct xe_gsc *gsc)
270 {
271 	struct xe_gt *gt = gsc_to_gt(gsc);
272 	struct xe_tile *tile = gt_to_tile(gt);
273 	int ret;
274 
275 	gsc->fw.type = XE_UC_FW_TYPE_GSC;
276 	INIT_WORK(&gsc->work, gsc_work);
277 
278 	/* The GSC uC is only available on the media GT */
279 	if (tile->media_gt && (gt != tile->media_gt)) {
280 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
281 		return 0;
282 	}
283 
284 	/*
285 	 * Some platforms can have GuC but not GSC. That would cause
286 	 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort
287 	 * all firmware loading. So check for GSC being enabled before
288 	 * propagating the failure back up. That way the higher level will keep
289 	 * going and load GuC as appropriate.
290 	 */
291 	ret = xe_uc_fw_init(&gsc->fw);
292 	if (!xe_uc_fw_is_enabled(&gsc->fw))
293 		return 0;
294 	else if (ret)
295 		goto out;
296 
297 	return 0;
298 
299 out:
300 	xe_gt_err(gt, "GSC init failed with %d", ret);
301 	return ret;
302 }
303 
304 static void free_resources(struct drm_device *drm, void *arg)
305 {
306 	struct xe_gsc *gsc = arg;
307 
308 	if (gsc->wq) {
309 		destroy_workqueue(gsc->wq);
310 		gsc->wq = NULL;
311 	}
312 
313 	if (gsc->q) {
314 		xe_exec_queue_put(gsc->q);
315 		gsc->q = NULL;
316 	}
317 
318 	if (gsc->private) {
319 		xe_bo_unpin_map_no_vm(gsc->private);
320 		gsc->private = NULL;
321 	}
322 }
323 
324 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
325 {
326 	struct xe_gt *gt = gsc_to_gt(gsc);
327 	struct xe_tile *tile = gt_to_tile(gt);
328 	struct xe_device *xe = gt_to_xe(gt);
329 	struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
330 	struct xe_exec_queue *q;
331 	struct workqueue_struct *wq;
332 	struct xe_bo *bo;
333 	int err;
334 
335 	if (!xe_uc_fw_is_available(&gsc->fw))
336 		return 0;
337 
338 	if (!hwe)
339 		return -ENODEV;
340 
341 	bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
342 				  ttm_bo_type_kernel,
343 				  XE_BO_CREATE_STOLEN_BIT |
344 				  XE_BO_CREATE_GGTT_BIT);
345 	if (IS_ERR(bo))
346 		return PTR_ERR(bo);
347 
348 	q = xe_exec_queue_create(xe, NULL,
349 				 BIT(hwe->logical_instance), 1, hwe,
350 				 EXEC_QUEUE_FLAG_KERNEL |
351 				 EXEC_QUEUE_FLAG_PERMANENT);
352 	if (IS_ERR(q)) {
353 		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
354 		err = PTR_ERR(q);
355 		goto out_bo;
356 	}
357 
358 	wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
359 	if (!wq) {
360 		err = -ENOMEM;
361 		goto out_q;
362 	}
363 
364 	gsc->private = bo;
365 	gsc->q = q;
366 	gsc->wq = wq;
367 
368 	err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc);
369 	if (err)
370 		return err;
371 
372 	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE);
373 
374 	return 0;
375 
376 out_q:
377 	xe_exec_queue_put(q);
378 out_bo:
379 	xe_bo_unpin_map_no_vm(bo);
380 	return err;
381 }
382 
383 void xe_gsc_load_start(struct xe_gsc *gsc)
384 {
385 	struct xe_gt *gt = gsc_to_gt(gsc);
386 
387 	if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
388 		return;
389 
390 	/* GSC FW survives GT reset and D3Hot */
391 	if (gsc_fw_is_loaded(gt)) {
392 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
393 		return;
394 	}
395 
396 	queue_work(gsc->wq, &gsc->work);
397 }
398 
399 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
400 {
401 	if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
402 		flush_work(&gsc->work);
403 }
404 
405 /*
406  * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
407  * GSC engine reset by writing a notification bit in the GS1 register and then
408  * triggering an interrupt to GSC; from the interrupt it will take up to 200ms
409  * for the FW to get prepare for the reset, so we need to wait for that amount
410  * of time.
411  * After the reset is complete we need to then clear the GS1 register.
412  */
413 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
414 {
415 	u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0;
416 	u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP;
417 
418 	/* WA only applies if the GSC is loaded */
419 	if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
420 		return;
421 
422 	xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
423 
424 	if (prep) {
425 		/* make sure the reset bit is clear when writing the CSR reg */
426 		xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
427 			      HECI_H_CSR_RST, HECI_H_CSR_IG);
428 		msleep(200);
429 	}
430 }
431