xref: /linux/drivers/gpu/drm/xe/xe_gsc.c (revision aae84bf1cd96889a7d80b6b50131f60aa63899d7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2023 Intel Corporation
4  */
5 
6 #include "xe_gsc.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "generated/xe_wa_oob.h"
11 #include "xe_bb.h"
12 #include "xe_bo.h"
13 #include "xe_device.h"
14 #include "xe_exec_queue.h"
15 #include "xe_gt.h"
16 #include "xe_gt_printk.h"
17 #include "xe_map.h"
18 #include "xe_mmio.h"
19 #include "xe_sched_job.h"
20 #include "xe_uc_fw.h"
21 #include "xe_wa.h"
22 #include "instructions/xe_gsc_commands.h"
23 #include "regs/xe_gsc_regs.h"
24 
25 static struct xe_gt *
26 gsc_to_gt(struct xe_gsc *gsc)
27 {
28 	return container_of(gsc, struct xe_gt, uc.gsc);
29 }
30 
31 static int memcpy_fw(struct xe_gsc *gsc)
32 {
33 	struct xe_gt *gt = gsc_to_gt(gsc);
34 	struct xe_device *xe = gt_to_xe(gt);
35 	u32 fw_size = gsc->fw.size;
36 	void *storage;
37 
38 	/*
39 	 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use
40 	 * a memcpy for now.
41 	 */
42 	storage = kmalloc(fw_size, GFP_KERNEL);
43 	if (!storage)
44 		return -ENOMEM;
45 
46 	xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
47 	xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
48 	xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);
49 
50 	kfree(storage);
51 
52 	return 0;
53 }
54 
55 static int emit_gsc_upload(struct xe_gsc *gsc)
56 {
57 	struct xe_gt *gt = gsc_to_gt(gsc);
58 	u64 offset = xe_bo_ggtt_addr(gsc->private);
59 	struct xe_bb *bb;
60 	struct xe_sched_job *job;
61 	struct dma_fence *fence;
62 	long timeout;
63 
64 	bb = xe_bb_new(gt, 4, false);
65 	if (IS_ERR(bb))
66 		return PTR_ERR(bb);
67 
68 	bb->cs[bb->len++] = GSC_FW_LOAD;
69 	bb->cs[bb->len++] = lower_32_bits(offset);
70 	bb->cs[bb->len++] = upper_32_bits(offset);
71 	bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;
72 
73 	job = xe_bb_create_job(gsc->q, bb);
74 	if (IS_ERR(job)) {
75 		xe_bb_free(bb, NULL);
76 		return PTR_ERR(job);
77 	}
78 
79 	xe_sched_job_arm(job);
80 	fence = dma_fence_get(&job->drm.s_fence->finished);
81 	xe_sched_job_push(job);
82 
83 	timeout = dma_fence_wait_timeout(fence, false, HZ);
84 	dma_fence_put(fence);
85 	xe_bb_free(bb, NULL);
86 	if (timeout < 0)
87 		return timeout;
88 	else if (!timeout)
89 		return -ETIME;
90 
91 	return 0;
92 }
93 
94 static int gsc_fw_is_loaded(struct xe_gt *gt)
95 {
96 	return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) &
97 			      HECI1_FWSTS1_INIT_COMPLETE;
98 }
99 
100 static int gsc_fw_wait(struct xe_gt *gt)
101 {
102 	/*
103 	 * GSC load can take up to 250ms from the moment the instruction is
104 	 * executed by the GSCCS. To account for possible submission delays or
105 	 * other issues, we use a 500ms timeout in the wait here.
106 	 */
107 	return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
108 			      HECI1_FWSTS1_INIT_COMPLETE,
109 			      HECI1_FWSTS1_INIT_COMPLETE,
110 			      500 * USEC_PER_MSEC, NULL, false);
111 }
112 
113 static int gsc_upload(struct xe_gsc *gsc)
114 {
115 	struct xe_gt *gt = gsc_to_gt(gsc);
116 	struct xe_device *xe = gt_to_xe(gt);
117 	int err;
118 
119 	/* we should only be here if the init step were successful */
120 	xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q);
121 
122 	if (gsc_fw_is_loaded(gt)) {
123 		xe_gt_err(gt, "GSC already loaded at upload time\n");
124 		return -EEXIST;
125 	}
126 
127 	err = memcpy_fw(gsc);
128 	if (err) {
129 		xe_gt_err(gt, "Failed to memcpy GSC FW\n");
130 		return err;
131 	}
132 
133 	err = emit_gsc_upload(gsc);
134 	if (err) {
135 		xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err));
136 		return err;
137 	}
138 
139 	err = gsc_fw_wait(gt);
140 	if (err) {
141 		xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err));
142 		return err;
143 	}
144 
145 	xe_gt_dbg(gt, "GSC FW async load completed\n");
146 
147 	return 0;
148 }
149 
150 static void gsc_work(struct work_struct *work)
151 {
152 	struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
153 	struct xe_gt *gt = gsc_to_gt(gsc);
154 	struct xe_device *xe = gt_to_xe(gt);
155 	int ret;
156 
157 	xe_device_mem_access_get(xe);
158 	xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
159 
160 	ret = gsc_upload(gsc);
161 	if (ret && ret != -EEXIST)
162 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
163 	else
164 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
165 
166 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
167 	xe_device_mem_access_put(xe);
168 }
169 
170 int xe_gsc_init(struct xe_gsc *gsc)
171 {
172 	struct xe_gt *gt = gsc_to_gt(gsc);
173 	struct xe_tile *tile = gt_to_tile(gt);
174 	int ret;
175 
176 	gsc->fw.type = XE_UC_FW_TYPE_GSC;
177 	INIT_WORK(&gsc->work, gsc_work);
178 
179 	/* The GSC uC is only available on the media GT */
180 	if (tile->media_gt && (gt != tile->media_gt)) {
181 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
182 		return 0;
183 	}
184 
185 	/*
186 	 * Some platforms can have GuC but not GSC. That would cause
187 	 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort
188 	 * all firmware loading. So check for GSC being enabled before
189 	 * propagating the failure back up. That way the higher level will keep
190 	 * going and load GuC as appropriate.
191 	 */
192 	ret = xe_uc_fw_init(&gsc->fw);
193 	if (!xe_uc_fw_is_enabled(&gsc->fw))
194 		return 0;
195 	else if (ret)
196 		goto out;
197 
198 	return 0;
199 
200 out:
201 	xe_gt_err(gt, "GSC init failed with %d", ret);
202 	return ret;
203 }
204 
205 static void free_resources(struct drm_device *drm, void *arg)
206 {
207 	struct xe_gsc *gsc = arg;
208 
209 	if (gsc->wq) {
210 		destroy_workqueue(gsc->wq);
211 		gsc->wq = NULL;
212 	}
213 
214 	if (gsc->q) {
215 		xe_exec_queue_put(gsc->q);
216 		gsc->q = NULL;
217 	}
218 
219 	if (gsc->private) {
220 		xe_bo_unpin_map_no_vm(gsc->private);
221 		gsc->private = NULL;
222 	}
223 }
224 
225 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
226 {
227 	struct xe_gt *gt = gsc_to_gt(gsc);
228 	struct xe_tile *tile = gt_to_tile(gt);
229 	struct xe_device *xe = gt_to_xe(gt);
230 	struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
231 	struct xe_exec_queue *q;
232 	struct workqueue_struct *wq;
233 	struct xe_bo *bo;
234 	int err;
235 
236 	if (!xe_uc_fw_is_available(&gsc->fw))
237 		return 0;
238 
239 	if (!hwe)
240 		return -ENODEV;
241 
242 	bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
243 				  ttm_bo_type_kernel,
244 				  XE_BO_CREATE_STOLEN_BIT |
245 				  XE_BO_CREATE_GGTT_BIT);
246 	if (IS_ERR(bo))
247 		return PTR_ERR(bo);
248 
249 	q = xe_exec_queue_create(xe, NULL,
250 				 BIT(hwe->logical_instance), 1, hwe,
251 				 EXEC_QUEUE_FLAG_KERNEL |
252 				 EXEC_QUEUE_FLAG_PERMANENT);
253 	if (IS_ERR(q)) {
254 		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
255 		err = PTR_ERR(q);
256 		goto out_bo;
257 	}
258 
259 	wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
260 	if (!wq) {
261 		err = -ENOMEM;
262 		goto out_q;
263 	}
264 
265 	gsc->private = bo;
266 	gsc->q = q;
267 	gsc->wq = wq;
268 
269 	err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc);
270 	if (err)
271 		return err;
272 
273 	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE);
274 
275 	return 0;
276 
277 out_q:
278 	xe_exec_queue_put(q);
279 out_bo:
280 	xe_bo_unpin_map_no_vm(bo);
281 	return err;
282 }
283 
284 void xe_gsc_load_start(struct xe_gsc *gsc)
285 {
286 	struct xe_gt *gt = gsc_to_gt(gsc);
287 
288 	if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q)
289 		return;
290 
291 	/* GSC FW survives GT reset and D3Hot */
292 	if (gsc_fw_is_loaded(gt)) {
293 		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
294 		return;
295 	}
296 
297 	queue_work(gsc->wq, &gsc->work);
298 }
299 
300 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
301 {
302 	if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq)
303 		flush_work(&gsc->work);
304 }
305 
306 /*
307  * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
308  * GSC engine reset by writing a notification bit in the GS1 register and then
309  * triggering an interrupt to GSC; from the interrupt it will take up to 200ms
310  * for the FW to get prepare for the reset, so we need to wait for that amount
311  * of time.
312  * After the reset is complete we need to then clear the GS1 register.
313  */
314 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
315 {
316 	u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0;
317 	u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP;
318 
319 	/* WA only applies if the GSC is loaded */
320 	if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
321 		return;
322 
323 	xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
324 
325 	if (prep) {
326 		/* make sure the reset bit is clear when writing the CSR reg */
327 		xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE),
328 			      HECI_H_CSR_RST, HECI_H_CSR_IG);
329 		msleep(200);
330 	}
331 }
332