1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include "xe_gsc.h" 7 8 #include <linux/delay.h> 9 10 #include <drm/drm_managed.h> 11 12 #include <generated/xe_wa_oob.h> 13 14 #include "abi/gsc_mkhi_commands_abi.h" 15 #include "xe_bb.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_exec_queue.h" 19 #include "xe_force_wake.h" 20 #include "xe_gsc_proxy.h" 21 #include "xe_gsc_submit.h" 22 #include "xe_gt.h" 23 #include "xe_gt_mcr.h" 24 #include "xe_gt_printk.h" 25 #include "xe_guc_pc.h" 26 #include "xe_huc.h" 27 #include "xe_map.h" 28 #include "xe_mmio.h" 29 #include "xe_pm.h" 30 #include "xe_sched_job.h" 31 #include "xe_uc_fw.h" 32 #include "xe_wa.h" 33 #include "instructions/xe_gsc_commands.h" 34 #include "regs/xe_gsc_regs.h" 35 #include "regs/xe_gt_regs.h" 36 37 static struct xe_gt * 38 gsc_to_gt(struct xe_gsc *gsc) 39 { 40 return container_of(gsc, struct xe_gt, uc.gsc); 41 } 42 43 static int memcpy_fw(struct xe_gsc *gsc) 44 { 45 struct xe_gt *gt = gsc_to_gt(gsc); 46 struct xe_device *xe = gt_to_xe(gt); 47 u32 fw_size = gsc->fw.size; 48 void *storage; 49 50 /* 51 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use 52 * a memcpy for now. 53 */ 54 storage = kmalloc(fw_size, GFP_KERNEL); 55 if (!storage) 56 return -ENOMEM; 57 58 xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); 59 xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); 60 xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); 61 62 kfree(storage); 63 64 return 0; 65 } 66 67 static int emit_gsc_upload(struct xe_gsc *gsc) 68 { 69 struct xe_gt *gt = gsc_to_gt(gsc); 70 u64 offset = xe_bo_ggtt_addr(gsc->private); 71 struct xe_bb *bb; 72 struct xe_sched_job *job; 73 struct dma_fence *fence; 74 long timeout; 75 76 bb = xe_bb_new(gt, 4, false); 77 if (IS_ERR(bb)) 78 return PTR_ERR(bb); 79 80 bb->cs[bb->len++] = GSC_FW_LOAD; 81 bb->cs[bb->len++] = lower_32_bits(offset); 82 bb->cs[bb->len++] = upper_32_bits(offset); 83 bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; 84 85 job = xe_bb_create_job(gsc->q, bb); 86 if (IS_ERR(job)) { 87 xe_bb_free(bb, NULL); 88 return PTR_ERR(job); 89 } 90 91 xe_sched_job_arm(job); 92 fence = dma_fence_get(&job->drm.s_fence->finished); 93 xe_sched_job_push(job); 94 95 timeout = dma_fence_wait_timeout(fence, false, HZ); 96 dma_fence_put(fence); 97 xe_bb_free(bb, NULL); 98 if (timeout < 0) 99 return timeout; 100 else if (!timeout) 101 return -ETIME; 102 103 return 0; 104 } 105 106 #define version_query_wr(xe_, map_, offset_, field_, val_) \ 107 xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) 108 #define version_query_rd(xe_, map_, offset_, field_) \ 109 xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) 110 111 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) 112 { 113 xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); 114 115 version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); 116 version_query_wr(xe, map, wr_offset, header.command, 117 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); 118 119 return wr_offset + sizeof(struct gsc_get_compatibility_version_in); 120 } 121 122 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ 123 static int query_compatibility_version(struct xe_gsc *gsc) 124 { 125 struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; 126 struct xe_gt *gt = gsc_to_gt(gsc); 127 struct xe_tile *tile = gt_to_tile(gt); 128 struct xe_device *xe = gt_to_xe(gt); 129 struct xe_bo *bo; 130 u32 wr_offset; 131 u32 rd_offset; 132 u64 ggtt_offset; 133 int err; 134 135 bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, 136 ttm_bo_type_kernel, 137 XE_BO_FLAG_SYSTEM | 138 XE_BO_FLAG_GGTT); 139 if (IS_ERR(bo)) { 140 xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); 141 return PTR_ERR(bo); 142 } 143 144 ggtt_offset = xe_bo_ggtt_addr(bo); 145 146 wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, 147 sizeof(struct gsc_get_compatibility_version_in)); 148 wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); 149 150 err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, 151 ggtt_offset + GSC_VER_PKT_SZ, 152 GSC_VER_PKT_SZ); 153 if (err) { 154 xe_gt_err(gt, 155 "failed to submit GSC request for compatibility version: %d\n", 156 err); 157 goto out_bo; 158 } 159 160 err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, 161 sizeof(struct gsc_get_compatibility_version_out), 162 &rd_offset); 163 if (err) { 164 xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); 165 return err; 166 } 167 168 compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); 169 compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); 170 171 xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); 172 173 out_bo: 174 xe_bo_unpin_map_no_vm(bo); 175 return err; 176 } 177 178 static int gsc_fw_is_loaded(struct xe_gt *gt) 179 { 180 return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & 181 HECI1_FWSTS1_INIT_COMPLETE; 182 } 183 184 static int gsc_fw_wait(struct xe_gt *gt) 185 { 186 /* 187 * GSC load can take up to 250ms from the moment the instruction is 188 * executed by the GSCCS. To account for possible submission delays or 189 * other issues, we use a 500ms timeout in the wait here. 190 */ 191 return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), 192 HECI1_FWSTS1_INIT_COMPLETE, 193 HECI1_FWSTS1_INIT_COMPLETE, 194 500 * USEC_PER_MSEC, NULL, false); 195 } 196 197 static int gsc_upload(struct xe_gsc *gsc) 198 { 199 struct xe_gt *gt = gsc_to_gt(gsc); 200 struct xe_device *xe = gt_to_xe(gt); 201 int err; 202 203 /* we should only be here if the init step were successful */ 204 xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); 205 206 if (gsc_fw_is_loaded(gt)) { 207 xe_gt_err(gt, "GSC already loaded at upload time\n"); 208 return -EEXIST; 209 } 210 211 err = memcpy_fw(gsc); 212 if (err) { 213 xe_gt_err(gt, "Failed to memcpy GSC FW\n"); 214 return err; 215 } 216 217 /* 218 * GSC is only killed by an FLR, so we need to trigger one on unload to 219 * make sure we stop it. This is because we assign a chunk of memory to 220 * the GSC as part of the FW load, so we need to make sure it stops 221 * using it when we release it to the system on driver unload. Note that 222 * this is not a problem of the unload per-se, because the GSC will not 223 * touch that memory unless there are requests for it coming from the 224 * driver; therefore, no accesses will happen while Xe is not loaded, 225 * but if we re-load the driver then the GSC might wake up and try to 226 * access that old memory location again. 227 * Given that an FLR is a very disruptive action (see the FLR function 228 * for details), we want to do it as the last action before releasing 229 * the access to the MMIO bar, which means we need to do it as part of 230 * mmio cleanup. 231 */ 232 xe->needs_flr_on_fini = true; 233 234 err = emit_gsc_upload(gsc); 235 if (err) { 236 xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); 237 return err; 238 } 239 240 err = gsc_fw_wait(gt); 241 if (err) { 242 xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); 243 return err; 244 } 245 246 err = query_compatibility_version(gsc); 247 if (err) 248 return err; 249 250 err = xe_uc_fw_check_version_requirements(&gsc->fw); 251 if (err) 252 return err; 253 254 return 0; 255 } 256 257 static int gsc_upload_and_init(struct xe_gsc *gsc) 258 { 259 struct xe_gt *gt = gsc_to_gt(gsc); 260 struct xe_tile *tile = gt_to_tile(gt); 261 int ret; 262 263 if (XE_WA(tile->primary_gt, 14018094691)) { 264 ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 265 266 /* 267 * If the forcewake fails we want to keep going, because the worst 268 * case outcome in failing to apply the WA is that PXP won't work, 269 * which is not fatal. We still throw a warning so the issue is 270 * seen if it happens. 271 */ 272 xe_gt_WARN_ON(tile->primary_gt, ret); 273 274 xe_gt_mcr_multicast_write(tile->primary_gt, 275 EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK, 276 EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT); 277 } 278 279 ret = gsc_upload(gsc); 280 281 if (XE_WA(tile->primary_gt, 14018094691)) 282 xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 283 284 if (ret) 285 return ret; 286 287 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 288 289 /* GSC load is done, restore expected GT frequencies */ 290 xe_gt_sanitize_freq(gt); 291 292 xe_gt_dbg(gt, "GSC FW async load completed\n"); 293 294 /* HuC auth failure is not fatal */ 295 if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) 296 xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); 297 298 ret = xe_gsc_proxy_start(gsc); 299 if (ret) 300 return ret; 301 302 xe_gt_dbg(gt, "GSC proxy init completed\n"); 303 304 return 0; 305 } 306 307 static int gsc_er_complete(struct xe_gt *gt) 308 { 309 u32 er_status; 310 311 if (!gsc_fw_is_loaded(gt)) 312 return 0; 313 314 /* 315 * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the 316 * driver or the GuC hit the GDRST register, the CS is immediately reset 317 * and a success is reported, but the GSC shim keeps resetting in the 318 * background. While the shim reset is ongoing, the CS is able to accept 319 * new context submission, but any commands that require the shim will 320 * be stalled until the reset is completed. This means that we can keep 321 * submitting to the GSCCS as long as we make sure that the preemption 322 * timeout is big enough to cover any delay introduced by the reset. 323 * When the shim reset completes, a specific CS interrupt is triggered, 324 * in response to which we need to check the GSCI_TIMER_STATUS register 325 * to see if the reset was successful or not. 326 * Note that the GSCI_TIMER_STATUS register is not power save/restored, 327 * so it gets reset on MC6 entry. However, a reset failure stops MC6, 328 * so in that scenario we're always guaranteed to find the correct 329 * value. 330 */ 331 er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; 332 333 if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { 334 /* 335 * XXX: we should trigger an FLR here, but we don't have support 336 * for that yet. 337 */ 338 xe_gt_err(gt, "GSC ER timed out!\n"); 339 return -EIO; 340 } 341 342 return 0; 343 } 344 345 static void gsc_work(struct work_struct *work) 346 { 347 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); 348 struct xe_gt *gt = gsc_to_gt(gsc); 349 struct xe_device *xe = gt_to_xe(gt); 350 u32 actions; 351 int ret; 352 353 spin_lock_irq(&gsc->lock); 354 actions = gsc->work_actions; 355 gsc->work_actions = 0; 356 spin_unlock_irq(&gsc->lock); 357 358 xe_pm_runtime_get(xe); 359 xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); 360 361 if (actions & GSC_ACTION_ER_COMPLETE) { 362 ret = gsc_er_complete(gt); 363 if (ret) 364 goto out; 365 } 366 367 if (actions & GSC_ACTION_FW_LOAD) { 368 ret = gsc_upload_and_init(gsc); 369 if (ret && ret != -EEXIST) 370 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 371 else 372 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); 373 } 374 375 if (actions & GSC_ACTION_SW_PROXY) 376 xe_gsc_proxy_request_handler(gsc); 377 378 out: 379 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 380 xe_pm_runtime_put(xe); 381 } 382 383 void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec) 384 { 385 struct xe_gt *gt = hwe->gt; 386 struct xe_gsc *gsc = >->uc.gsc; 387 388 if (unlikely(!intr_vec)) 389 return; 390 391 if (intr_vec & GSC_ER_COMPLETE) { 392 spin_lock(&gsc->lock); 393 gsc->work_actions |= GSC_ACTION_ER_COMPLETE; 394 spin_unlock(&gsc->lock); 395 396 queue_work(gsc->wq, &gsc->work); 397 } 398 } 399 400 int xe_gsc_init(struct xe_gsc *gsc) 401 { 402 struct xe_gt *gt = gsc_to_gt(gsc); 403 struct xe_tile *tile = gt_to_tile(gt); 404 int ret; 405 406 gsc->fw.type = XE_UC_FW_TYPE_GSC; 407 INIT_WORK(&gsc->work, gsc_work); 408 spin_lock_init(&gsc->lock); 409 410 /* The GSC uC is only available on the media GT */ 411 if (tile->media_gt && (gt != tile->media_gt)) { 412 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); 413 return 0; 414 } 415 416 /* 417 * Some platforms can have GuC but not GSC. That would cause 418 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort 419 * all firmware loading. So check for GSC being enabled before 420 * propagating the failure back up. That way the higher level will keep 421 * going and load GuC as appropriate. 422 */ 423 ret = xe_uc_fw_init(&gsc->fw); 424 if (!xe_uc_fw_is_enabled(&gsc->fw)) 425 return 0; 426 else if (ret) 427 goto out; 428 429 ret = xe_gsc_proxy_init(gsc); 430 if (ret && ret != -ENODEV) 431 goto out; 432 433 return 0; 434 435 out: 436 xe_gt_err(gt, "GSC init failed with %d", ret); 437 return ret; 438 } 439 440 static void free_resources(void *arg) 441 { 442 struct xe_gsc *gsc = arg; 443 444 if (gsc->wq) { 445 destroy_workqueue(gsc->wq); 446 gsc->wq = NULL; 447 } 448 449 if (gsc->q) { 450 xe_exec_queue_put(gsc->q); 451 gsc->q = NULL; 452 } 453 454 if (gsc->private) { 455 xe_bo_unpin_map_no_vm(gsc->private); 456 gsc->private = NULL; 457 } 458 } 459 460 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) 461 { 462 struct xe_gt *gt = gsc_to_gt(gsc); 463 struct xe_tile *tile = gt_to_tile(gt); 464 struct xe_device *xe = gt_to_xe(gt); 465 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); 466 struct xe_exec_queue *q; 467 struct workqueue_struct *wq; 468 struct xe_bo *bo; 469 int err; 470 471 if (!xe_uc_fw_is_available(&gsc->fw)) 472 return 0; 473 474 if (!hwe) 475 return -ENODEV; 476 477 bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, 478 ttm_bo_type_kernel, 479 XE_BO_FLAG_STOLEN | 480 XE_BO_FLAG_GGTT); 481 if (IS_ERR(bo)) 482 return PTR_ERR(bo); 483 484 q = xe_exec_queue_create(xe, NULL, 485 BIT(hwe->logical_instance), 1, hwe, 486 EXEC_QUEUE_FLAG_KERNEL | 487 EXEC_QUEUE_FLAG_PERMANENT, 0); 488 if (IS_ERR(q)) { 489 xe_gt_err(gt, "Failed to create queue for GSC submission\n"); 490 err = PTR_ERR(q); 491 goto out_bo; 492 } 493 494 wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); 495 if (!wq) { 496 err = -ENOMEM; 497 goto out_q; 498 } 499 500 gsc->private = bo; 501 gsc->q = q; 502 gsc->wq = wq; 503 504 err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); 505 if (err) 506 return err; 507 508 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); 509 510 return 0; 511 512 out_q: 513 xe_exec_queue_put(q); 514 out_bo: 515 xe_bo_unpin_map_no_vm(bo); 516 return err; 517 } 518 519 void xe_gsc_load_start(struct xe_gsc *gsc) 520 { 521 struct xe_gt *gt = gsc_to_gt(gsc); 522 struct xe_device *xe = gt_to_xe(gt); 523 524 if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) 525 return; 526 527 /* 528 * The GSC HW is only reset by driver FLR or D3cold entry. We don't 529 * support the former at runtime, while the latter is only supported on 530 * DGFX, for which we don't support GSC. Therefore, if GSC failed to 531 * load previously there is no need to try again because the HW is 532 * stuck in the error state. 533 */ 534 xe_assert(xe, !IS_DGFX(xe)); 535 if (xe_uc_fw_is_in_error_state(&gsc->fw)) 536 return; 537 538 /* GSC FW survives GT reset and D3Hot */ 539 if (gsc_fw_is_loaded(gt)) { 540 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 541 return; 542 } 543 544 spin_lock_irq(&gsc->lock); 545 gsc->work_actions |= GSC_ACTION_FW_LOAD; 546 spin_unlock_irq(&gsc->lock); 547 548 queue_work(gsc->wq, &gsc->work); 549 } 550 551 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) 552 { 553 if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) 554 flush_work(&gsc->work); 555 } 556 557 /** 558 * xe_gsc_remove() - Clean up the GSC structures before driver removal 559 * @gsc: the GSC uC 560 */ 561 void xe_gsc_remove(struct xe_gsc *gsc) 562 { 563 xe_gsc_proxy_remove(gsc); 564 } 565 566 /* 567 * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a 568 * GSC engine reset by writing a notification bit in the GS1 register and then 569 * triggering an interrupt to GSC; from the interrupt it will take up to 200ms 570 * for the FW to get prepare for the reset, so we need to wait for that amount 571 * of time. 572 * After the reset is complete we need to then clear the GS1 register. 573 */ 574 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) 575 { 576 u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; 577 u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; 578 579 /* WA only applies if the GSC is loaded */ 580 if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) 581 return; 582 583 xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); 584 585 if (prep) { 586 /* make sure the reset bit is clear when writing the CSR reg */ 587 xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), 588 HECI_H_CSR_RST, HECI_H_CSR_IG); 589 msleep(200); 590 } 591 } 592