1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include "xe_gsc.h" 7 8 #include <linux/delay.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/drm_print.h> 12 13 #include <generated/xe_wa_oob.h> 14 15 #include "abi/gsc_mkhi_commands_abi.h" 16 #include "xe_bb.h" 17 #include "xe_bo.h" 18 #include "xe_device.h" 19 #include "xe_exec_queue.h" 20 #include "xe_force_wake.h" 21 #include "xe_gsc_proxy.h" 22 #include "xe_gsc_submit.h" 23 #include "xe_gt.h" 24 #include "xe_gt_mcr.h" 25 #include "xe_gt_printk.h" 26 #include "xe_guc_pc.h" 27 #include "xe_huc.h" 28 #include "xe_map.h" 29 #include "xe_mmio.h" 30 #include "xe_pm.h" 31 #include "xe_sched_job.h" 32 #include "xe_uc_fw.h" 33 #include "xe_wa.h" 34 #include "instructions/xe_gsc_commands.h" 35 #include "regs/xe_gsc_regs.h" 36 #include "regs/xe_gt_regs.h" 37 38 static struct xe_gt * 39 gsc_to_gt(struct xe_gsc *gsc) 40 { 41 return container_of(gsc, struct xe_gt, uc.gsc); 42 } 43 44 static int memcpy_fw(struct xe_gsc *gsc) 45 { 46 struct xe_gt *gt = gsc_to_gt(gsc); 47 struct xe_device *xe = gt_to_xe(gt); 48 u32 fw_size = gsc->fw.size; 49 void *storage; 50 51 /* 52 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use 53 * a memcpy for now. 54 */ 55 storage = kmalloc(fw_size, GFP_KERNEL); 56 if (!storage) 57 return -ENOMEM; 58 59 xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); 60 xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); 61 xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); 62 63 kfree(storage); 64 65 return 0; 66 } 67 68 static int emit_gsc_upload(struct xe_gsc *gsc) 69 { 70 struct xe_gt *gt = gsc_to_gt(gsc); 71 u64 offset = xe_bo_ggtt_addr(gsc->private); 72 struct xe_bb *bb; 73 struct xe_sched_job *job; 74 struct dma_fence *fence; 75 long timeout; 76 77 bb = xe_bb_new(gt, 4, false); 78 if (IS_ERR(bb)) 79 return PTR_ERR(bb); 80 81 bb->cs[bb->len++] = GSC_FW_LOAD; 82 bb->cs[bb->len++] = lower_32_bits(offset); 83 bb->cs[bb->len++] = upper_32_bits(offset); 84 bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; 85 86 job = xe_bb_create_job(gsc->q, bb); 87 if (IS_ERR(job)) { 88 xe_bb_free(bb, NULL); 89 return PTR_ERR(job); 90 } 91 92 xe_sched_job_arm(job); 93 fence = dma_fence_get(&job->drm.s_fence->finished); 94 xe_sched_job_push(job); 95 96 timeout = dma_fence_wait_timeout(fence, false, HZ); 97 dma_fence_put(fence); 98 xe_bb_free(bb, NULL); 99 if (timeout < 0) 100 return timeout; 101 else if (!timeout) 102 return -ETIME; 103 104 return 0; 105 } 106 107 #define version_query_wr(xe_, map_, offset_, field_, val_) \ 108 xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) 109 #define version_query_rd(xe_, map_, offset_, field_) \ 110 xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) 111 112 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) 113 { 114 xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); 115 116 version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); 117 version_query_wr(xe, map, wr_offset, header.command, 118 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); 119 120 return wr_offset + sizeof(struct gsc_get_compatibility_version_in); 121 } 122 123 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ 124 static int query_compatibility_version(struct xe_gsc *gsc) 125 { 126 struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; 127 struct xe_gt *gt = gsc_to_gt(gsc); 128 struct xe_tile *tile = gt_to_tile(gt); 129 struct xe_device *xe = gt_to_xe(gt); 130 struct xe_bo *bo; 131 u32 wr_offset; 132 u32 rd_offset; 133 u64 ggtt_offset; 134 int err; 135 136 bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, 137 ttm_bo_type_kernel, 138 XE_BO_FLAG_SYSTEM | 139 XE_BO_FLAG_GGTT); 140 if (IS_ERR(bo)) { 141 xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); 142 return PTR_ERR(bo); 143 } 144 145 ggtt_offset = xe_bo_ggtt_addr(bo); 146 147 wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, 148 sizeof(struct gsc_get_compatibility_version_in)); 149 wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); 150 151 err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, 152 ggtt_offset + GSC_VER_PKT_SZ, 153 GSC_VER_PKT_SZ); 154 if (err) { 155 xe_gt_err(gt, 156 "failed to submit GSC request for compatibility version: %d\n", 157 err); 158 goto out_bo; 159 } 160 161 err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, 162 sizeof(struct gsc_get_compatibility_version_out), 163 &rd_offset); 164 if (err) { 165 xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); 166 return err; 167 } 168 169 compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major); 170 compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); 171 compat->patch = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); 172 173 xe_gt_info(gt, "found GSC cv%u.%u.%u\n", compat->major, compat->minor, compat->patch); 174 175 out_bo: 176 xe_bo_unpin_map_no_vm(bo); 177 return err; 178 } 179 180 static int gsc_fw_is_loaded(struct xe_gt *gt) 181 { 182 return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & 183 HECI1_FWSTS1_INIT_COMPLETE; 184 } 185 186 static int gsc_fw_wait(struct xe_gt *gt) 187 { 188 /* 189 * GSC load can take up to 250ms from the moment the instruction is 190 * executed by the GSCCS. To account for possible submission delays or 191 * other issues, we use a 500ms timeout in the wait here. 192 */ 193 return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), 194 HECI1_FWSTS1_INIT_COMPLETE, 195 HECI1_FWSTS1_INIT_COMPLETE, 196 500 * USEC_PER_MSEC, NULL, false); 197 } 198 199 static int gsc_upload(struct xe_gsc *gsc) 200 { 201 struct xe_gt *gt = gsc_to_gt(gsc); 202 struct xe_device *xe = gt_to_xe(gt); 203 int err; 204 205 /* we should only be here if the init step were successful */ 206 xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); 207 208 if (gsc_fw_is_loaded(gt)) { 209 xe_gt_err(gt, "GSC already loaded at upload time\n"); 210 return -EEXIST; 211 } 212 213 err = memcpy_fw(gsc); 214 if (err) { 215 xe_gt_err(gt, "Failed to memcpy GSC FW\n"); 216 return err; 217 } 218 219 /* 220 * GSC is only killed by an FLR, so we need to trigger one on unload to 221 * make sure we stop it. This is because we assign a chunk of memory to 222 * the GSC as part of the FW load, so we need to make sure it stops 223 * using it when we release it to the system on driver unload. Note that 224 * this is not a problem of the unload per-se, because the GSC will not 225 * touch that memory unless there are requests for it coming from the 226 * driver; therefore, no accesses will happen while Xe is not loaded, 227 * but if we re-load the driver then the GSC might wake up and try to 228 * access that old memory location again. 229 * Given that an FLR is a very disruptive action (see the FLR function 230 * for details), we want to do it as the last action before releasing 231 * the access to the MMIO bar, which means we need to do it as part of 232 * mmio cleanup. 233 */ 234 xe->needs_flr_on_fini = true; 235 236 err = emit_gsc_upload(gsc); 237 if (err) { 238 xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); 239 return err; 240 } 241 242 err = gsc_fw_wait(gt); 243 if (err) { 244 xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); 245 return err; 246 } 247 248 err = query_compatibility_version(gsc); 249 if (err) 250 return err; 251 252 err = xe_uc_fw_check_version_requirements(&gsc->fw); 253 if (err) 254 return err; 255 256 return 0; 257 } 258 259 static int gsc_upload_and_init(struct xe_gsc *gsc) 260 { 261 struct xe_gt *gt = gsc_to_gt(gsc); 262 struct xe_tile *tile = gt_to_tile(gt); 263 int ret; 264 265 if (XE_WA(tile->primary_gt, 14018094691)) { 266 ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 267 268 /* 269 * If the forcewake fails we want to keep going, because the worst 270 * case outcome in failing to apply the WA is that PXP won't work, 271 * which is not fatal. We still throw a warning so the issue is 272 * seen if it happens. 273 */ 274 xe_gt_WARN_ON(tile->primary_gt, ret); 275 276 xe_gt_mcr_multicast_write(tile->primary_gt, 277 EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK, 278 EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT); 279 } 280 281 ret = gsc_upload(gsc); 282 283 if (XE_WA(tile->primary_gt, 14018094691)) 284 xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 285 286 if (ret) 287 return ret; 288 289 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 290 291 /* GSC load is done, restore expected GT frequencies */ 292 xe_gt_sanitize_freq(gt); 293 294 xe_gt_dbg(gt, "GSC FW async load completed\n"); 295 296 /* HuC auth failure is not fatal */ 297 if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) 298 xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); 299 300 ret = xe_gsc_proxy_start(gsc); 301 if (ret) 302 return ret; 303 304 xe_gt_dbg(gt, "GSC proxy init completed\n"); 305 306 return 0; 307 } 308 309 static int gsc_er_complete(struct xe_gt *gt) 310 { 311 u32 er_status; 312 313 if (!gsc_fw_is_loaded(gt)) 314 return 0; 315 316 /* 317 * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the 318 * driver or the GuC hit the GDRST register, the CS is immediately reset 319 * and a success is reported, but the GSC shim keeps resetting in the 320 * background. While the shim reset is ongoing, the CS is able to accept 321 * new context submission, but any commands that require the shim will 322 * be stalled until the reset is completed. This means that we can keep 323 * submitting to the GSCCS as long as we make sure that the preemption 324 * timeout is big enough to cover any delay introduced by the reset. 325 * When the shim reset completes, a specific CS interrupt is triggered, 326 * in response to which we need to check the GSCI_TIMER_STATUS register 327 * to see if the reset was successful or not. 328 * Note that the GSCI_TIMER_STATUS register is not power save/restored, 329 * so it gets reset on MC6 entry. However, a reset failure stops MC6, 330 * so in that scenario we're always guaranteed to find the correct 331 * value. 332 */ 333 er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; 334 335 if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { 336 /* 337 * XXX: we should trigger an FLR here, but we don't have support 338 * for that yet. 339 */ 340 xe_gt_err(gt, "GSC ER timed out!\n"); 341 return -EIO; 342 } 343 344 return 0; 345 } 346 347 static void gsc_work(struct work_struct *work) 348 { 349 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); 350 struct xe_gt *gt = gsc_to_gt(gsc); 351 struct xe_device *xe = gt_to_xe(gt); 352 u32 actions; 353 int ret; 354 355 spin_lock_irq(&gsc->lock); 356 actions = gsc->work_actions; 357 gsc->work_actions = 0; 358 spin_unlock_irq(&gsc->lock); 359 360 xe_pm_runtime_get(xe); 361 xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); 362 363 if (actions & GSC_ACTION_ER_COMPLETE) { 364 ret = gsc_er_complete(gt); 365 if (ret) 366 goto out; 367 } 368 369 if (actions & GSC_ACTION_FW_LOAD) { 370 ret = gsc_upload_and_init(gsc); 371 if (ret && ret != -EEXIST) 372 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 373 else 374 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); 375 } 376 377 if (actions & GSC_ACTION_SW_PROXY) 378 xe_gsc_proxy_request_handler(gsc); 379 380 out: 381 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 382 xe_pm_runtime_put(xe); 383 } 384 385 void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec) 386 { 387 struct xe_gt *gt = hwe->gt; 388 struct xe_gsc *gsc = >->uc.gsc; 389 390 if (unlikely(!intr_vec)) 391 return; 392 393 if (intr_vec & GSC_ER_COMPLETE) { 394 spin_lock(&gsc->lock); 395 gsc->work_actions |= GSC_ACTION_ER_COMPLETE; 396 spin_unlock(&gsc->lock); 397 398 queue_work(gsc->wq, &gsc->work); 399 } 400 } 401 402 int xe_gsc_init(struct xe_gsc *gsc) 403 { 404 struct xe_gt *gt = gsc_to_gt(gsc); 405 struct xe_tile *tile = gt_to_tile(gt); 406 int ret; 407 408 gsc->fw.type = XE_UC_FW_TYPE_GSC; 409 INIT_WORK(&gsc->work, gsc_work); 410 spin_lock_init(&gsc->lock); 411 412 /* The GSC uC is only available on the media GT */ 413 if (tile->media_gt && (gt != tile->media_gt)) { 414 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); 415 return 0; 416 } 417 418 /* 419 * Some platforms can have GuC but not GSC. That would cause 420 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort 421 * all firmware loading. So check for GSC being enabled before 422 * propagating the failure back up. That way the higher level will keep 423 * going and load GuC as appropriate. 424 */ 425 ret = xe_uc_fw_init(&gsc->fw); 426 if (!xe_uc_fw_is_enabled(&gsc->fw)) 427 return 0; 428 else if (ret) 429 goto out; 430 431 ret = xe_gsc_proxy_init(gsc); 432 if (ret && ret != -ENODEV) 433 goto out; 434 435 return 0; 436 437 out: 438 xe_gt_err(gt, "GSC init failed with %d", ret); 439 return ret; 440 } 441 442 static void free_resources(void *arg) 443 { 444 struct xe_gsc *gsc = arg; 445 446 if (gsc->wq) { 447 destroy_workqueue(gsc->wq); 448 gsc->wq = NULL; 449 } 450 451 if (gsc->q) { 452 xe_exec_queue_put(gsc->q); 453 gsc->q = NULL; 454 } 455 } 456 457 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) 458 { 459 struct xe_gt *gt = gsc_to_gt(gsc); 460 struct xe_tile *tile = gt_to_tile(gt); 461 struct xe_device *xe = gt_to_xe(gt); 462 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); 463 struct xe_exec_queue *q; 464 struct workqueue_struct *wq; 465 struct xe_bo *bo; 466 int err; 467 468 if (!xe_uc_fw_is_available(&gsc->fw)) 469 return 0; 470 471 if (!hwe) 472 return -ENODEV; 473 474 bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M, 475 XE_BO_FLAG_STOLEN | 476 XE_BO_FLAG_GGTT); 477 if (IS_ERR(bo)) 478 return PTR_ERR(bo); 479 480 q = xe_exec_queue_create(xe, NULL, 481 BIT(hwe->logical_instance), 1, hwe, 482 EXEC_QUEUE_FLAG_KERNEL | 483 EXEC_QUEUE_FLAG_PERMANENT, 0); 484 if (IS_ERR(q)) { 485 xe_gt_err(gt, "Failed to create queue for GSC submission\n"); 486 err = PTR_ERR(q); 487 goto out_bo; 488 } 489 490 wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); 491 if (!wq) { 492 err = -ENOMEM; 493 goto out_q; 494 } 495 496 gsc->private = bo; 497 gsc->q = q; 498 gsc->wq = wq; 499 500 err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); 501 if (err) 502 return err; 503 504 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); 505 506 return 0; 507 508 out_q: 509 xe_exec_queue_put(q); 510 out_bo: 511 xe_bo_unpin_map_no_vm(bo); 512 return err; 513 } 514 515 void xe_gsc_load_start(struct xe_gsc *gsc) 516 { 517 struct xe_gt *gt = gsc_to_gt(gsc); 518 struct xe_device *xe = gt_to_xe(gt); 519 520 if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) 521 return; 522 523 /* 524 * The GSC HW is only reset by driver FLR or D3cold entry. We don't 525 * support the former at runtime, while the latter is only supported on 526 * DGFX, for which we don't support GSC. Therefore, if GSC failed to 527 * load previously there is no need to try again because the HW is 528 * stuck in the error state. 529 */ 530 xe_assert(xe, !IS_DGFX(xe)); 531 if (xe_uc_fw_is_in_error_state(&gsc->fw)) 532 return; 533 534 /* GSC FW survives GT reset and D3Hot */ 535 if (gsc_fw_is_loaded(gt)) { 536 if (xe_gsc_proxy_init_done(gsc)) 537 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); 538 else 539 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 540 return; 541 } 542 543 spin_lock_irq(&gsc->lock); 544 gsc->work_actions |= GSC_ACTION_FW_LOAD; 545 spin_unlock_irq(&gsc->lock); 546 547 queue_work(gsc->wq, &gsc->work); 548 } 549 550 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) 551 { 552 if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) 553 flush_work(&gsc->work); 554 } 555 556 /** 557 * xe_gsc_remove() - Clean up the GSC structures before driver removal 558 * @gsc: the GSC uC 559 */ 560 void xe_gsc_remove(struct xe_gsc *gsc) 561 { 562 xe_gsc_proxy_remove(gsc); 563 } 564 565 /* 566 * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a 567 * GSC engine reset by writing a notification bit in the GS1 register and then 568 * triggering an interrupt to GSC; from the interrupt it will take up to 200ms 569 * for the FW to get prepare for the reset, so we need to wait for that amount 570 * of time. 571 * After the reset is complete we need to then clear the GS1 register. 572 */ 573 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) 574 { 575 u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; 576 u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; 577 578 /* WA only applies if the GSC is loaded */ 579 if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) 580 return; 581 582 xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); 583 584 if (prep) { 585 /* make sure the reset bit is clear when writing the CSR reg */ 586 xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), 587 HECI_H_CSR_RST, HECI_H_CSR_IG); 588 msleep(200); 589 } 590 } 591 592 /** 593 * xe_gsc_print_info - print info about GSC FW status 594 * @gsc: the GSC structure 595 * @p: the printer to be used to print the info 596 */ 597 void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p) 598 { 599 struct xe_gt *gt = gsc_to_gt(gsc); 600 int err; 601 602 xe_uc_fw_print(&gsc->fw, p); 603 604 drm_printf(p, "\tfound security version %u\n", gsc->security_version); 605 606 if (!xe_uc_fw_is_enabled(&gsc->fw)) 607 return; 608 609 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 610 if (err) 611 return; 612 613 drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", 614 xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)), 615 xe_mmio_read32(gt, HECI_FWSTS2(MTL_GSC_HECI1_BASE)), 616 xe_mmio_read32(gt, HECI_FWSTS3(MTL_GSC_HECI1_BASE)), 617 xe_mmio_read32(gt, HECI_FWSTS4(MTL_GSC_HECI1_BASE)), 618 xe_mmio_read32(gt, HECI_FWSTS5(MTL_GSC_HECI1_BASE)), 619 xe_mmio_read32(gt, HECI_FWSTS6(MTL_GSC_HECI1_BASE))); 620 621 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 622 } 623