1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright(c) 2024 Intel Corporation. 4 */ 5 6 #include "xe_pxp_submit.h" 7 8 #include <linux/delay.h> 9 #include <uapi/drm/xe_drm.h> 10 11 #include "xe_device_types.h" 12 #include "xe_bb.h" 13 #include "xe_bo.h" 14 #include "xe_exec_queue.h" 15 #include "xe_gsc_submit.h" 16 #include "xe_gt.h" 17 #include "xe_lrc.h" 18 #include "xe_map.h" 19 #include "xe_pxp_types.h" 20 #include "xe_sched_job.h" 21 #include "xe_vm.h" 22 #include "abi/gsc_command_header_abi.h" 23 #include "abi/gsc_pxp_commands_abi.h" 24 #include "instructions/xe_gsc_commands.h" 25 #include "instructions/xe_mfx_commands.h" 26 #include "instructions/xe_mi_commands.h" 27 28 /* 29 * The VCS is used for kernel-owned GGTT submissions to issue key termination. 30 * Terminations are serialized, so we only need a single queue and a single 31 * batch. 32 */ 33 static int allocate_vcs_execution_resources(struct xe_pxp *pxp) 34 { 35 struct xe_gt *gt = pxp->gt; 36 struct xe_device *xe = pxp->xe; 37 struct xe_tile *tile = gt_to_tile(gt); 38 struct xe_hw_engine *hwe; 39 struct xe_exec_queue *q; 40 struct xe_bo *bo; 41 int err; 42 43 hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_VIDEO_DECODE, 0, true); 44 if (!hwe) 45 return -ENODEV; 46 47 q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, hwe, 48 EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT, 0); 49 if (IS_ERR(q)) 50 return PTR_ERR(q); 51 52 /* 53 * Each termination is 16 DWORDS, so 4K is enough to contain a 54 * termination for each sessions. 55 */ 56 bo = xe_bo_create_pin_map(xe, tile, 0, SZ_4K, ttm_bo_type_kernel, 57 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); 58 if (IS_ERR(bo)) { 59 err = PTR_ERR(bo); 60 goto out_queue; 61 } 62 63 pxp->vcs_exec.q = q; 64 pxp->vcs_exec.bo = bo; 65 66 return 0; 67 68 out_queue: 69 xe_exec_queue_put(q); 70 return err; 71 } 72 73 static void destroy_vcs_execution_resources(struct xe_pxp *pxp) 74 { 75 if (pxp->vcs_exec.bo) 76 xe_bo_unpin_map_no_vm(pxp->vcs_exec.bo); 77 78 if (pxp->vcs_exec.q) 79 xe_exec_queue_put(pxp->vcs_exec.q); 80 } 81 82 #define PXP_BB_SIZE XE_PAGE_SIZE 83 static int allocate_gsc_client_resources(struct xe_gt *gt, 84 struct xe_pxp_gsc_client_resources *gsc_res, 85 size_t inout_size) 86 { 87 struct xe_tile *tile = gt_to_tile(gt); 88 struct xe_device *xe = tile_to_xe(tile); 89 struct xe_hw_engine *hwe; 90 struct xe_vm *vm; 91 struct xe_bo *bo; 92 struct xe_exec_queue *q; 93 struct dma_fence *fence; 94 long timeout; 95 int err = 0; 96 97 hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); 98 99 /* we shouldn't reach here if the GSC engine is not available */ 100 xe_assert(xe, hwe); 101 102 /* PXP instructions must be issued from PPGTT */ 103 vm = xe_vm_create(xe, XE_VM_FLAG_GSC); 104 if (IS_ERR(vm)) 105 return PTR_ERR(vm); 106 107 /* We allocate a single object for the batch and the in/out memory */ 108 xe_vm_lock(vm, false); 109 bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, 110 ttm_bo_type_kernel, 111 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC); 112 xe_vm_unlock(vm); 113 if (IS_ERR(bo)) { 114 err = PTR_ERR(bo); 115 goto vm_out; 116 } 117 118 fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB); 119 if (IS_ERR(fence)) { 120 err = PTR_ERR(fence); 121 goto bo_out; 122 } 123 124 timeout = dma_fence_wait_timeout(fence, false, HZ); 125 dma_fence_put(fence); 126 if (timeout <= 0) { 127 err = timeout ?: -ETIME; 128 goto bo_out; 129 } 130 131 q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, hwe, 132 EXEC_QUEUE_FLAG_KERNEL | 133 EXEC_QUEUE_FLAG_PERMANENT, 0); 134 if (IS_ERR(q)) { 135 err = PTR_ERR(q); 136 goto bo_out; 137 } 138 139 gsc_res->vm = vm; 140 gsc_res->bo = bo; 141 gsc_res->inout_size = inout_size; 142 gsc_res->batch = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0); 143 gsc_res->msg_in = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE); 144 gsc_res->msg_out = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE + inout_size); 145 gsc_res->q = q; 146 147 /* initialize host-session-handle (for all Xe-to-gsc-firmware PXP cmds) */ 148 gsc_res->host_session_handle = xe_gsc_create_host_session_id(); 149 150 return 0; 151 152 bo_out: 153 xe_bo_unpin_map_no_vm(bo); 154 vm_out: 155 xe_vm_close_and_put(vm); 156 157 return err; 158 } 159 160 static void destroy_gsc_client_resources(struct xe_pxp_gsc_client_resources *gsc_res) 161 { 162 if (!gsc_res->q) 163 return; 164 165 xe_exec_queue_put(gsc_res->q); 166 xe_bo_unpin_map_no_vm(gsc_res->bo); 167 xe_vm_close_and_put(gsc_res->vm); 168 } 169 170 /** 171 * xe_pxp_allocate_execution_resources - Allocate PXP submission objects 172 * @pxp: the xe_pxp structure 173 * 174 * Allocates exec_queues objects for VCS and GSCCS submission. The GSCCS 175 * submissions are done via PPGTT, so this function allocates a VM for it and 176 * maps the object into it. 177 * 178 * Returns 0 if the allocation and mapping is successful, an errno value 179 * otherwise. 180 */ 181 int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp) 182 { 183 int err; 184 185 err = allocate_vcs_execution_resources(pxp); 186 if (err) 187 return err; 188 189 /* 190 * PXP commands can require a lot of BO space (see PXP_MAX_PACKET_SIZE), 191 * but we currently only support a subset of commands that are small 192 * (< 20 dwords), so a single page is enough for now. 193 */ 194 err = allocate_gsc_client_resources(pxp->gt, &pxp->gsc_res, XE_PAGE_SIZE); 195 if (err) 196 goto destroy_vcs_context; 197 198 return 0; 199 200 destroy_vcs_context: 201 destroy_vcs_execution_resources(pxp); 202 return err; 203 } 204 205 void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp) 206 { 207 destroy_gsc_client_resources(&pxp->gsc_res); 208 destroy_vcs_execution_resources(pxp); 209 } 210 211 #define emit_cmd(xe_, map_, offset_, val_) \ 212 xe_map_wr(xe_, map_, (offset_) * sizeof(u32), u32, val_) 213 214 /* stall until prior PXP and MFX/HCP/HUC objects are completed */ 215 #define MFX_WAIT_PXP (MFX_WAIT | \ 216 MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG | \ 217 MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG) 218 static u32 pxp_emit_wait(struct xe_device *xe, struct iosys_map *batch, u32 offset) 219 { 220 /* wait for cmds to go through */ 221 emit_cmd(xe, batch, offset++, MFX_WAIT_PXP); 222 emit_cmd(xe, batch, offset++, 0); 223 224 return offset; 225 } 226 227 static u32 pxp_emit_session_selection(struct xe_device *xe, struct iosys_map *batch, 228 u32 offset, u32 idx) 229 { 230 offset = pxp_emit_wait(xe, batch, offset); 231 232 /* pxp off */ 233 emit_cmd(xe, batch, offset++, MI_FLUSH_DW | MI_FLUSH_IMM_DW); 234 emit_cmd(xe, batch, offset++, 0); 235 emit_cmd(xe, batch, offset++, 0); 236 emit_cmd(xe, batch, offset++, 0); 237 238 /* select session */ 239 emit_cmd(xe, batch, offset++, MI_SET_APPID | MI_SET_APPID_SESSION_ID(idx)); 240 emit_cmd(xe, batch, offset++, 0); 241 242 offset = pxp_emit_wait(xe, batch, offset); 243 244 /* pxp on */ 245 emit_cmd(xe, batch, offset++, MI_FLUSH_DW | 246 MI_FLUSH_DW_PROTECTED_MEM_EN | 247 MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX | 248 MI_FLUSH_IMM_DW); 249 emit_cmd(xe, batch, offset++, LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR | 250 MI_FLUSH_DW_USE_GTT); 251 emit_cmd(xe, batch, offset++, 0); 252 emit_cmd(xe, batch, offset++, 0); 253 254 offset = pxp_emit_wait(xe, batch, offset); 255 256 return offset; 257 } 258 259 static u32 pxp_emit_inline_termination(struct xe_device *xe, 260 struct iosys_map *batch, u32 offset) 261 { 262 /* session inline termination */ 263 emit_cmd(xe, batch, offset++, CRYPTO_KEY_EXCHANGE); 264 emit_cmd(xe, batch, offset++, 0); 265 266 return offset; 267 } 268 269 static u32 pxp_emit_session_termination(struct xe_device *xe, struct iosys_map *batch, 270 u32 offset, u32 idx) 271 { 272 offset = pxp_emit_session_selection(xe, batch, offset, idx); 273 offset = pxp_emit_inline_termination(xe, batch, offset); 274 275 return offset; 276 } 277 278 /** 279 * xe_pxp_submit_session_termination - submits a PXP inline termination 280 * @pxp: the xe_pxp structure 281 * @id: the session to terminate 282 * 283 * Emit an inline termination via the VCS engine to terminate a session. 284 * 285 * Returns 0 if the submission is successful, an errno value otherwise. 286 */ 287 int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id) 288 { 289 struct xe_sched_job *job; 290 struct dma_fence *fence; 291 long timeout; 292 u32 offset = 0; 293 u64 addr = xe_bo_ggtt_addr(pxp->vcs_exec.bo); 294 295 offset = pxp_emit_session_termination(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, id); 296 offset = pxp_emit_wait(pxp->xe, &pxp->vcs_exec.bo->vmap, offset); 297 emit_cmd(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, MI_BATCH_BUFFER_END); 298 299 job = xe_sched_job_create(pxp->vcs_exec.q, &addr); 300 if (IS_ERR(job)) 301 return PTR_ERR(job); 302 303 xe_sched_job_arm(job); 304 fence = dma_fence_get(&job->drm.s_fence->finished); 305 xe_sched_job_push(job); 306 307 timeout = dma_fence_wait_timeout(fence, false, HZ); 308 309 dma_fence_put(fence); 310 311 if (!timeout) 312 return -ETIMEDOUT; 313 else if (timeout < 0) 314 return timeout; 315 316 return 0; 317 } 318 319 static bool 320 is_fw_err_platform_config(u32 type) 321 { 322 switch (type) { 323 case PXP_STATUS_ERROR_API_VERSION: 324 case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF: 325 case PXP_STATUS_PLATFCONFIG_KF1_BAD: 326 case PXP_STATUS_PLATFCONFIG_FIXED_KF1_NOT_SUPPORTED: 327 return true; 328 default: 329 break; 330 } 331 return false; 332 } 333 334 static const char * 335 fw_err_to_string(u32 type) 336 { 337 switch (type) { 338 case PXP_STATUS_ERROR_API_VERSION: 339 return "ERR_API_VERSION"; 340 case PXP_STATUS_NOT_READY: 341 return "ERR_NOT_READY"; 342 case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF: 343 case PXP_STATUS_PLATFCONFIG_KF1_BAD: 344 case PXP_STATUS_PLATFCONFIG_FIXED_KF1_NOT_SUPPORTED: 345 return "ERR_PLATFORM_CONFIG"; 346 default: 347 break; 348 } 349 return NULL; 350 } 351 352 static int pxp_pkt_submit(struct xe_exec_queue *q, u64 batch_addr) 353 { 354 struct xe_gt *gt = q->gt; 355 struct xe_device *xe = gt_to_xe(gt); 356 struct xe_sched_job *job; 357 struct dma_fence *fence; 358 long timeout; 359 360 xe_assert(xe, q->hwe->engine_id == XE_HW_ENGINE_GSCCS0); 361 362 job = xe_sched_job_create(q, &batch_addr); 363 if (IS_ERR(job)) 364 return PTR_ERR(job); 365 366 xe_sched_job_arm(job); 367 fence = dma_fence_get(&job->drm.s_fence->finished); 368 xe_sched_job_push(job); 369 370 timeout = dma_fence_wait_timeout(fence, false, HZ); 371 dma_fence_put(fence); 372 if (timeout < 0) 373 return timeout; 374 else if (!timeout) 375 return -ETIME; 376 377 return 0; 378 } 379 380 static void emit_pxp_heci_cmd(struct xe_device *xe, struct iosys_map *batch, 381 u64 addr_in, u32 size_in, u64 addr_out, u32 size_out) 382 { 383 u32 len = 0; 384 385 xe_map_wr(xe, batch, len++ * sizeof(u32), u32, GSC_HECI_CMD_PKT); 386 xe_map_wr(xe, batch, len++ * sizeof(u32), u32, lower_32_bits(addr_in)); 387 xe_map_wr(xe, batch, len++ * sizeof(u32), u32, upper_32_bits(addr_in)); 388 xe_map_wr(xe, batch, len++ * sizeof(u32), u32, size_in); 389 xe_map_wr(xe, batch, len++ * sizeof(u32), u32, lower_32_bits(addr_out)); 390 xe_map_wr(xe, batch, len++ * sizeof(u32), u32, upper_32_bits(addr_out)); 391 xe_map_wr(xe, batch, len++ * sizeof(u32), u32, size_out); 392 xe_map_wr(xe, batch, len++ * sizeof(u32), u32, 0); 393 xe_map_wr(xe, batch, len++ * sizeof(u32), u32, MI_BATCH_BUFFER_END); 394 } 395 396 #define GSC_PENDING_RETRY_MAXCOUNT 40 397 #define GSC_PENDING_RETRY_PAUSE_MS 50 398 static int gsccs_send_message(struct xe_pxp_gsc_client_resources *gsc_res, 399 void *msg_in, size_t msg_in_size, 400 void *msg_out, size_t msg_out_size_max) 401 { 402 struct xe_device *xe = gsc_res->vm->xe; 403 const size_t max_msg_size = gsc_res->inout_size - sizeof(struct intel_gsc_mtl_header); 404 u32 wr_offset; 405 u32 rd_offset; 406 u32 reply_size; 407 u32 min_reply_size = 0; 408 int ret; 409 int retry = GSC_PENDING_RETRY_MAXCOUNT; 410 411 if (msg_in_size > max_msg_size || msg_out_size_max > max_msg_size) 412 return -ENOSPC; 413 414 wr_offset = xe_gsc_emit_header(xe, &gsc_res->msg_in, 0, 415 HECI_MEADDRESS_PXP, 416 gsc_res->host_session_handle, 417 msg_in_size); 418 419 /* NOTE: zero size packets are used for session-cleanups */ 420 if (msg_in && msg_in_size) { 421 xe_map_memcpy_to(xe, &gsc_res->msg_in, wr_offset, 422 msg_in, msg_in_size); 423 min_reply_size = sizeof(struct pxp_cmd_header); 424 } 425 426 /* Make sure the reply header does not contain stale data */ 427 xe_gsc_poison_header(xe, &gsc_res->msg_out, 0); 428 429 /* 430 * The BO is mapped at address 0 of the PPGTT, so no need to add its 431 * base offset when calculating the in/out addresses. 432 */ 433 emit_pxp_heci_cmd(xe, &gsc_res->batch, PXP_BB_SIZE, 434 wr_offset + msg_in_size, PXP_BB_SIZE + gsc_res->inout_size, 435 wr_offset + msg_out_size_max); 436 437 xe_device_wmb(xe); 438 439 /* 440 * If the GSC needs to communicate with CSME to complete our request, 441 * it'll set the "pending" flag in the return header. In this scenario 442 * we're expected to wait 50ms to give some time to the proxy code to 443 * handle the GSC<->CSME communication and then try again. Note that, 444 * although in most case the 50ms window is enough, the proxy flow is 445 * not actually guaranteed to complete within that time period, so we 446 * might have to try multiple times, up to a worst case of 2 seconds, 447 * after which the request is considered aborted. 448 */ 449 do { 450 ret = pxp_pkt_submit(gsc_res->q, 0); 451 if (ret) 452 break; 453 454 if (xe_gsc_check_and_update_pending(xe, &gsc_res->msg_in, 0, 455 &gsc_res->msg_out, 0)) { 456 ret = -EAGAIN; 457 msleep(GSC_PENDING_RETRY_PAUSE_MS); 458 } 459 } while (--retry && ret == -EAGAIN); 460 461 if (ret) { 462 drm_err(&xe->drm, "failed to submit GSC PXP message (%pe)\n", ERR_PTR(ret)); 463 return ret; 464 } 465 466 ret = xe_gsc_read_out_header(xe, &gsc_res->msg_out, 0, 467 min_reply_size, &rd_offset); 468 if (ret) { 469 drm_err(&xe->drm, "invalid GSC reply for PXP (%pe)\n", ERR_PTR(ret)); 470 return ret; 471 } 472 473 if (msg_out && min_reply_size) { 474 reply_size = xe_map_rd_field(xe, &gsc_res->msg_out, rd_offset, 475 struct pxp_cmd_header, buffer_len); 476 reply_size += sizeof(struct pxp_cmd_header); 477 478 if (reply_size > msg_out_size_max) { 479 drm_warn(&xe->drm, "PXP reply size overflow: %u (%zu)\n", 480 reply_size, msg_out_size_max); 481 reply_size = msg_out_size_max; 482 } 483 484 xe_map_memcpy_from(xe, msg_out, &gsc_res->msg_out, 485 rd_offset, reply_size); 486 } 487 488 xe_gsc_poison_header(xe, &gsc_res->msg_in, 0); 489 490 return ret; 491 } 492 493 /** 494 * xe_pxp_submit_session_invalidation - submits a PXP GSC invalidation 495 * @gsc_res: the pxp client resources 496 * @id: the session to invalidate 497 * 498 * Submit a message to the GSC FW to notify it that a session has been 499 * terminated and is therefore invalid. 500 * 501 * Returns 0 if the submission is successful, an errno value otherwise. 502 */ 503 int xe_pxp_submit_session_invalidation(struct xe_pxp_gsc_client_resources *gsc_res, u32 id) 504 { 505 struct xe_device *xe = gsc_res->vm->xe; 506 struct pxp43_inv_stream_key_in msg_in = {0}; 507 struct pxp43_inv_stream_key_out msg_out = {0}; 508 int ret = 0; 509 510 /* 511 * Stream key invalidation reuses the same version 4.2 input/output 512 * command format but firmware requires 4.3 API interaction 513 */ 514 msg_in.header.api_version = PXP_APIVER(4, 3); 515 msg_in.header.command_id = PXP43_CMDID_INVALIDATE_STREAM_KEY; 516 msg_in.header.buffer_len = sizeof(msg_in) - sizeof(msg_in.header); 517 518 msg_in.header.stream_id = FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_VALID, 1); 519 msg_in.header.stream_id |= FIELD_PREP(PXP_CMDHDR_EXTDATA_APP_TYPE, 0); 520 msg_in.header.stream_id |= FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_ID, id); 521 522 ret = gsccs_send_message(gsc_res, &msg_in, sizeof(msg_in), 523 &msg_out, sizeof(msg_out)); 524 if (ret) { 525 drm_err(&xe->drm, "Failed to invalidate PXP stream-key %u (%pe)\n", 526 id, ERR_PTR(ret)); 527 } else if (msg_out.header.status != 0) { 528 ret = -EIO; 529 530 if (is_fw_err_platform_config(msg_out.header.status)) 531 drm_info_once(&xe->drm, 532 "Failed to invalidate PXP stream-key %u: BIOS/SOC 0x%08x(%s)\n", 533 id, msg_out.header.status, 534 fw_err_to_string(msg_out.header.status)); 535 else 536 drm_dbg(&xe->drm, "Failed to invalidate stream-key %u, s=0x%08x\n", 537 id, msg_out.header.status); 538 } 539 540 return ret; 541 } 542