xref: /linux/drivers/gpu/drm/xe/xe_pxp_submit.c (revision 96e84a2f5a5ba0efaaefb0dd5072e4b2e7f31f0e)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright(c) 2024 Intel Corporation.
4  */
5 
6 #include "xe_pxp_submit.h"
7 
8 #include <linux/delay.h>
9 #include <uapi/drm/xe_drm.h>
10 
11 #include "xe_device_types.h"
12 #include "xe_bb.h"
13 #include "xe_bo.h"
14 #include "xe_exec_queue.h"
15 #include "xe_gsc_submit.h"
16 #include "xe_gt.h"
17 #include "xe_lrc.h"
18 #include "xe_map.h"
19 #include "xe_pxp_types.h"
20 #include "xe_sched_job.h"
21 #include "xe_vm.h"
22 #include "abi/gsc_command_header_abi.h"
23 #include "abi/gsc_pxp_commands_abi.h"
24 #include "instructions/xe_gsc_commands.h"
25 #include "instructions/xe_mfx_commands.h"
26 #include "instructions/xe_mi_commands.h"
27 
28 /*
29  * The VCS is used for kernel-owned GGTT submissions to issue key termination.
30  * Terminations are serialized, so we only need a single queue and a single
31  * batch.
32  */
33 static int allocate_vcs_execution_resources(struct xe_pxp *pxp)
34 {
35 	struct xe_gt *gt = pxp->gt;
36 	struct xe_device *xe = pxp->xe;
37 	struct xe_tile *tile = gt_to_tile(gt);
38 	struct xe_hw_engine *hwe;
39 	struct xe_exec_queue *q;
40 	struct xe_bo *bo;
41 	int err;
42 
43 	hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_VIDEO_DECODE, 0, true);
44 	if (!hwe)
45 		return -ENODEV;
46 
47 	q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, hwe,
48 				 EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT, 0);
49 	if (IS_ERR(q))
50 		return PTR_ERR(q);
51 
52 	/*
53 	 * Each termination is 16 DWORDS, so 4K is enough to contain a
54 	 * termination for each sessions.
55 	 */
56 	bo = xe_bo_create_pin_map(xe, tile, 0, SZ_4K, ttm_bo_type_kernel,
57 				  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
58 	if (IS_ERR(bo)) {
59 		err = PTR_ERR(bo);
60 		goto out_queue;
61 	}
62 
63 	pxp->vcs_exec.q = q;
64 	pxp->vcs_exec.bo = bo;
65 
66 	return 0;
67 
68 out_queue:
69 	xe_exec_queue_put(q);
70 	return err;
71 }
72 
73 static void destroy_vcs_execution_resources(struct xe_pxp *pxp)
74 {
75 	if (pxp->vcs_exec.bo)
76 		xe_bo_unpin_map_no_vm(pxp->vcs_exec.bo);
77 
78 	if (pxp->vcs_exec.q)
79 		xe_exec_queue_put(pxp->vcs_exec.q);
80 }
81 
82 #define PXP_BB_SIZE		XE_PAGE_SIZE
83 static int allocate_gsc_client_resources(struct xe_gt *gt,
84 					 struct xe_pxp_gsc_client_resources *gsc_res,
85 					 size_t inout_size)
86 {
87 	struct xe_tile *tile = gt_to_tile(gt);
88 	struct xe_device *xe = tile_to_xe(tile);
89 	struct xe_hw_engine *hwe;
90 	struct xe_vm *vm;
91 	struct xe_bo *bo;
92 	struct xe_exec_queue *q;
93 	struct dma_fence *fence;
94 	long timeout;
95 	int err = 0;
96 
97 	hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);
98 
99 	/* we shouldn't reach here if the GSC engine is not available */
100 	xe_assert(xe, hwe);
101 
102 	/* PXP instructions must be issued from PPGTT */
103 	vm = xe_vm_create(xe, XE_VM_FLAG_GSC);
104 	if (IS_ERR(vm))
105 		return PTR_ERR(vm);
106 
107 	/* We allocate a single object for the batch and the in/out memory */
108 	xe_vm_lock(vm, false);
109 	bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2,
110 				  ttm_bo_type_kernel,
111 				  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC);
112 	xe_vm_unlock(vm);
113 	if (IS_ERR(bo)) {
114 		err = PTR_ERR(bo);
115 		goto vm_out;
116 	}
117 
118 	fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB);
119 	if (IS_ERR(fence)) {
120 		err = PTR_ERR(fence);
121 		goto bo_out;
122 	}
123 
124 	timeout = dma_fence_wait_timeout(fence, false, HZ);
125 	dma_fence_put(fence);
126 	if (timeout <= 0) {
127 		err = timeout ?: -ETIME;
128 		goto bo_out;
129 	}
130 
131 	q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, hwe,
132 				 EXEC_QUEUE_FLAG_KERNEL |
133 				 EXEC_QUEUE_FLAG_PERMANENT, 0);
134 	if (IS_ERR(q)) {
135 		err = PTR_ERR(q);
136 		goto bo_out;
137 	}
138 
139 	gsc_res->vm = vm;
140 	gsc_res->bo = bo;
141 	gsc_res->inout_size = inout_size;
142 	gsc_res->batch = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0);
143 	gsc_res->msg_in = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE);
144 	gsc_res->msg_out = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE + inout_size);
145 	gsc_res->q = q;
146 
147 	/* initialize host-session-handle (for all Xe-to-gsc-firmware PXP cmds) */
148 	gsc_res->host_session_handle = xe_gsc_create_host_session_id();
149 
150 	return 0;
151 
152 bo_out:
153 	xe_bo_unpin_map_no_vm(bo);
154 vm_out:
155 	xe_vm_close_and_put(vm);
156 
157 	return err;
158 }
159 
160 static void destroy_gsc_client_resources(struct xe_pxp_gsc_client_resources *gsc_res)
161 {
162 	if (!gsc_res->q)
163 		return;
164 
165 	xe_exec_queue_put(gsc_res->q);
166 	xe_bo_unpin_map_no_vm(gsc_res->bo);
167 	xe_vm_close_and_put(gsc_res->vm);
168 }
169 
170 /**
171  * xe_pxp_allocate_execution_resources - Allocate PXP submission objects
172  * @pxp: the xe_pxp structure
173  *
174  * Allocates exec_queues objects for VCS and GSCCS submission. The GSCCS
175  * submissions are done via PPGTT, so this function allocates a VM for it and
176  * maps the object into it.
177  *
178  * Returns 0 if the allocation and mapping is successful, an errno value
179  * otherwise.
180  */
181 int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp)
182 {
183 	int err;
184 
185 	err = allocate_vcs_execution_resources(pxp);
186 	if (err)
187 		return err;
188 
189 	/*
190 	 * PXP commands can require a lot of BO space (see PXP_MAX_PACKET_SIZE),
191 	 * but we currently only support a subset of commands that are small
192 	 * (< 20 dwords), so a single page is enough for now.
193 	 */
194 	err = allocate_gsc_client_resources(pxp->gt, &pxp->gsc_res, XE_PAGE_SIZE);
195 	if (err)
196 		goto destroy_vcs_context;
197 
198 	return 0;
199 
200 destroy_vcs_context:
201 	destroy_vcs_execution_resources(pxp);
202 	return err;
203 }
204 
205 void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp)
206 {
207 	destroy_gsc_client_resources(&pxp->gsc_res);
208 	destroy_vcs_execution_resources(pxp);
209 }
210 
211 #define emit_cmd(xe_, map_, offset_, val_) \
212 	xe_map_wr(xe_, map_, (offset_) * sizeof(u32), u32, val_)
213 
214 /* stall until prior PXP and MFX/HCP/HUC objects are completed */
215 #define MFX_WAIT_PXP (MFX_WAIT | \
216 		      MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG | \
217 		      MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG)
218 static u32 pxp_emit_wait(struct xe_device *xe, struct iosys_map *batch, u32 offset)
219 {
220 	/* wait for cmds to go through */
221 	emit_cmd(xe, batch, offset++, MFX_WAIT_PXP);
222 	emit_cmd(xe, batch, offset++, 0);
223 
224 	return offset;
225 }
226 
227 static u32 pxp_emit_session_selection(struct xe_device *xe, struct iosys_map *batch,
228 				      u32 offset, u32 idx)
229 {
230 	offset = pxp_emit_wait(xe, batch, offset);
231 
232 	/* pxp off */
233 	emit_cmd(xe, batch, offset++, MI_FLUSH_DW | MI_FLUSH_IMM_DW);
234 	emit_cmd(xe, batch, offset++, 0);
235 	emit_cmd(xe, batch, offset++, 0);
236 	emit_cmd(xe, batch, offset++, 0);
237 
238 	/* select session */
239 	emit_cmd(xe, batch, offset++, MI_SET_APPID | MI_SET_APPID_SESSION_ID(idx));
240 	emit_cmd(xe, batch, offset++, 0);
241 
242 	offset = pxp_emit_wait(xe, batch, offset);
243 
244 	/* pxp on */
245 	emit_cmd(xe, batch, offset++, MI_FLUSH_DW |
246 				      MI_FLUSH_DW_PROTECTED_MEM_EN |
247 				      MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX |
248 				      MI_FLUSH_IMM_DW);
249 	emit_cmd(xe, batch, offset++, LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR |
250 				      MI_FLUSH_DW_USE_GTT);
251 	emit_cmd(xe, batch, offset++, 0);
252 	emit_cmd(xe, batch, offset++, 0);
253 
254 	offset = pxp_emit_wait(xe, batch, offset);
255 
256 	return offset;
257 }
258 
259 static u32 pxp_emit_inline_termination(struct xe_device *xe,
260 				       struct iosys_map *batch, u32 offset)
261 {
262 	/* session inline termination */
263 	emit_cmd(xe, batch, offset++, CRYPTO_KEY_EXCHANGE);
264 	emit_cmd(xe, batch, offset++, 0);
265 
266 	return offset;
267 }
268 
269 static u32 pxp_emit_session_termination(struct xe_device *xe, struct iosys_map *batch,
270 					u32 offset, u32 idx)
271 {
272 	offset = pxp_emit_session_selection(xe, batch, offset, idx);
273 	offset = pxp_emit_inline_termination(xe, batch, offset);
274 
275 	return offset;
276 }
277 
278 /**
279  * xe_pxp_submit_session_termination - submits a PXP inline termination
280  * @pxp: the xe_pxp structure
281  * @id: the session to terminate
282  *
283  * Emit an inline termination via the VCS engine to terminate a session.
284  *
285  * Returns 0 if the submission is successful, an errno value otherwise.
286  */
287 int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id)
288 {
289 	struct xe_sched_job *job;
290 	struct dma_fence *fence;
291 	long timeout;
292 	u32 offset = 0;
293 	u64 addr = xe_bo_ggtt_addr(pxp->vcs_exec.bo);
294 
295 	offset = pxp_emit_session_termination(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, id);
296 	offset = pxp_emit_wait(pxp->xe, &pxp->vcs_exec.bo->vmap, offset);
297 	emit_cmd(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, MI_BATCH_BUFFER_END);
298 
299 	job = xe_sched_job_create(pxp->vcs_exec.q, &addr);
300 	if (IS_ERR(job))
301 		return PTR_ERR(job);
302 
303 	xe_sched_job_arm(job);
304 	fence = dma_fence_get(&job->drm.s_fence->finished);
305 	xe_sched_job_push(job);
306 
307 	timeout = dma_fence_wait_timeout(fence, false, HZ);
308 
309 	dma_fence_put(fence);
310 
311 	if (!timeout)
312 		return -ETIMEDOUT;
313 	else if (timeout < 0)
314 		return timeout;
315 
316 	return 0;
317 }
318 
319 static bool
320 is_fw_err_platform_config(u32 type)
321 {
322 	switch (type) {
323 	case PXP_STATUS_ERROR_API_VERSION:
324 	case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF:
325 	case PXP_STATUS_PLATFCONFIG_KF1_BAD:
326 	case PXP_STATUS_PLATFCONFIG_FIXED_KF1_NOT_SUPPORTED:
327 		return true;
328 	default:
329 		break;
330 	}
331 	return false;
332 }
333 
334 static const char *
335 fw_err_to_string(u32 type)
336 {
337 	switch (type) {
338 	case PXP_STATUS_ERROR_API_VERSION:
339 		return "ERR_API_VERSION";
340 	case PXP_STATUS_NOT_READY:
341 		return "ERR_NOT_READY";
342 	case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF:
343 	case PXP_STATUS_PLATFCONFIG_KF1_BAD:
344 	case PXP_STATUS_PLATFCONFIG_FIXED_KF1_NOT_SUPPORTED:
345 		return "ERR_PLATFORM_CONFIG";
346 	default:
347 		break;
348 	}
349 	return NULL;
350 }
351 
352 static int pxp_pkt_submit(struct xe_exec_queue *q, u64 batch_addr)
353 {
354 	struct xe_gt *gt = q->gt;
355 	struct xe_device *xe = gt_to_xe(gt);
356 	struct xe_sched_job *job;
357 	struct dma_fence *fence;
358 	long timeout;
359 
360 	xe_assert(xe, q->hwe->engine_id == XE_HW_ENGINE_GSCCS0);
361 
362 	job = xe_sched_job_create(q, &batch_addr);
363 	if (IS_ERR(job))
364 		return PTR_ERR(job);
365 
366 	xe_sched_job_arm(job);
367 	fence = dma_fence_get(&job->drm.s_fence->finished);
368 	xe_sched_job_push(job);
369 
370 	timeout = dma_fence_wait_timeout(fence, false, HZ);
371 	dma_fence_put(fence);
372 	if (timeout < 0)
373 		return timeout;
374 	else if (!timeout)
375 		return -ETIME;
376 
377 	return 0;
378 }
379 
380 static void emit_pxp_heci_cmd(struct xe_device *xe, struct iosys_map *batch,
381 			      u64 addr_in, u32 size_in, u64 addr_out, u32 size_out)
382 {
383 	u32 len = 0;
384 
385 	xe_map_wr(xe, batch, len++ * sizeof(u32), u32, GSC_HECI_CMD_PKT);
386 	xe_map_wr(xe, batch, len++ * sizeof(u32), u32, lower_32_bits(addr_in));
387 	xe_map_wr(xe, batch, len++ * sizeof(u32), u32, upper_32_bits(addr_in));
388 	xe_map_wr(xe, batch, len++ * sizeof(u32), u32, size_in);
389 	xe_map_wr(xe, batch, len++ * sizeof(u32), u32, lower_32_bits(addr_out));
390 	xe_map_wr(xe, batch, len++ * sizeof(u32), u32, upper_32_bits(addr_out));
391 	xe_map_wr(xe, batch, len++ * sizeof(u32), u32, size_out);
392 	xe_map_wr(xe, batch, len++ * sizeof(u32), u32, 0);
393 	xe_map_wr(xe, batch, len++ * sizeof(u32), u32, MI_BATCH_BUFFER_END);
394 }
395 
396 #define GSC_PENDING_RETRY_MAXCOUNT 40
397 #define GSC_PENDING_RETRY_PAUSE_MS 50
398 static int gsccs_send_message(struct xe_pxp_gsc_client_resources *gsc_res,
399 			      void *msg_in, size_t msg_in_size,
400 			      void *msg_out, size_t msg_out_size_max)
401 {
402 	struct xe_device *xe = gsc_res->vm->xe;
403 	const size_t max_msg_size = gsc_res->inout_size - sizeof(struct intel_gsc_mtl_header);
404 	u32 wr_offset;
405 	u32 rd_offset;
406 	u32 reply_size;
407 	u32 min_reply_size = 0;
408 	int ret;
409 	int retry = GSC_PENDING_RETRY_MAXCOUNT;
410 
411 	if (msg_in_size > max_msg_size || msg_out_size_max > max_msg_size)
412 		return -ENOSPC;
413 
414 	wr_offset = xe_gsc_emit_header(xe, &gsc_res->msg_in, 0,
415 				       HECI_MEADDRESS_PXP,
416 				       gsc_res->host_session_handle,
417 				       msg_in_size);
418 
419 	/* NOTE: zero size packets are used for session-cleanups */
420 	if (msg_in && msg_in_size) {
421 		xe_map_memcpy_to(xe, &gsc_res->msg_in, wr_offset,
422 				 msg_in, msg_in_size);
423 		min_reply_size = sizeof(struct pxp_cmd_header);
424 	}
425 
426 	/* Make sure the reply header does not contain stale data */
427 	xe_gsc_poison_header(xe, &gsc_res->msg_out, 0);
428 
429 	/*
430 	 * The BO is mapped at address 0 of the PPGTT, so no need to add its
431 	 * base offset when calculating the in/out addresses.
432 	 */
433 	emit_pxp_heci_cmd(xe, &gsc_res->batch, PXP_BB_SIZE,
434 			  wr_offset + msg_in_size, PXP_BB_SIZE + gsc_res->inout_size,
435 			  wr_offset + msg_out_size_max);
436 
437 	xe_device_wmb(xe);
438 
439 	/*
440 	 * If the GSC needs to communicate with CSME to complete our request,
441 	 * it'll set the "pending" flag in the return header. In this scenario
442 	 * we're expected to wait 50ms to give some time to the proxy code to
443 	 * handle the GSC<->CSME communication and then try again. Note that,
444 	 * although in most case the 50ms window is enough, the proxy flow is
445 	 * not actually guaranteed to complete within that time period, so we
446 	 * might have to try multiple times, up to a worst case of 2 seconds,
447 	 * after which the request is considered aborted.
448 	 */
449 	do {
450 		ret = pxp_pkt_submit(gsc_res->q, 0);
451 		if (ret)
452 			break;
453 
454 		if (xe_gsc_check_and_update_pending(xe, &gsc_res->msg_in, 0,
455 						    &gsc_res->msg_out, 0)) {
456 			ret = -EAGAIN;
457 			msleep(GSC_PENDING_RETRY_PAUSE_MS);
458 		}
459 	} while (--retry && ret == -EAGAIN);
460 
461 	if (ret) {
462 		drm_err(&xe->drm, "failed to submit GSC PXP message (%pe)\n", ERR_PTR(ret));
463 		return ret;
464 	}
465 
466 	ret = xe_gsc_read_out_header(xe, &gsc_res->msg_out, 0,
467 				     min_reply_size, &rd_offset);
468 	if (ret) {
469 		drm_err(&xe->drm, "invalid GSC reply for PXP (%pe)\n", ERR_PTR(ret));
470 		return ret;
471 	}
472 
473 	if (msg_out && min_reply_size) {
474 		reply_size = xe_map_rd_field(xe, &gsc_res->msg_out, rd_offset,
475 					     struct pxp_cmd_header, buffer_len);
476 		reply_size += sizeof(struct pxp_cmd_header);
477 
478 		if (reply_size > msg_out_size_max) {
479 			drm_warn(&xe->drm, "PXP reply size overflow: %u (%zu)\n",
480 				 reply_size, msg_out_size_max);
481 			reply_size = msg_out_size_max;
482 		}
483 
484 		xe_map_memcpy_from(xe, msg_out, &gsc_res->msg_out,
485 				   rd_offset, reply_size);
486 	}
487 
488 	xe_gsc_poison_header(xe, &gsc_res->msg_in, 0);
489 
490 	return ret;
491 }
492 
493 /**
494  * xe_pxp_submit_session_invalidation - submits a PXP GSC invalidation
495  * @gsc_res: the pxp client resources
496  * @id: the session to invalidate
497  *
498  * Submit a message to the GSC FW to notify it that a session has been
499  * terminated and is therefore invalid.
500  *
501  * Returns 0 if the submission is successful, an errno value otherwise.
502  */
503 int xe_pxp_submit_session_invalidation(struct xe_pxp_gsc_client_resources *gsc_res, u32 id)
504 {
505 	struct xe_device *xe = gsc_res->vm->xe;
506 	struct pxp43_inv_stream_key_in msg_in = {0};
507 	struct pxp43_inv_stream_key_out msg_out = {0};
508 	int ret = 0;
509 
510 	/*
511 	 * Stream key invalidation reuses the same version 4.2 input/output
512 	 * command format but firmware requires 4.3 API interaction
513 	 */
514 	msg_in.header.api_version = PXP_APIVER(4, 3);
515 	msg_in.header.command_id = PXP43_CMDID_INVALIDATE_STREAM_KEY;
516 	msg_in.header.buffer_len = sizeof(msg_in) - sizeof(msg_in.header);
517 
518 	msg_in.header.stream_id = FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_VALID, 1);
519 	msg_in.header.stream_id |= FIELD_PREP(PXP_CMDHDR_EXTDATA_APP_TYPE, 0);
520 	msg_in.header.stream_id |= FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_ID, id);
521 
522 	ret = gsccs_send_message(gsc_res, &msg_in, sizeof(msg_in),
523 				 &msg_out, sizeof(msg_out));
524 	if (ret) {
525 		drm_err(&xe->drm, "Failed to invalidate PXP stream-key %u (%pe)\n",
526 			id, ERR_PTR(ret));
527 	} else if (msg_out.header.status != 0) {
528 		ret = -EIO;
529 
530 		if (is_fw_err_platform_config(msg_out.header.status))
531 			drm_info_once(&xe->drm,
532 				      "Failed to invalidate PXP stream-key %u: BIOS/SOC 0x%08x(%s)\n",
533 				      id, msg_out.header.status,
534 				      fw_err_to_string(msg_out.header.status));
535 		else
536 			drm_dbg(&xe->drm, "Failed to invalidate stream-key %u, s=0x%08x\n",
537 				id, msg_out.header.status);
538 	}
539 
540 	return ret;
541 }
542