1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include "xe_gsc.h" 7 8 #include <drm/drm_managed.h> 9 10 #include <generated/xe_wa_oob.h> 11 12 #include "abi/gsc_mkhi_commands_abi.h" 13 #include "xe_bb.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_exec_queue.h" 17 #include "xe_gsc_proxy.h" 18 #include "xe_gsc_submit.h" 19 #include "xe_gt.h" 20 #include "xe_gt_mcr.h" 21 #include "xe_gt_printk.h" 22 #include "xe_huc.h" 23 #include "xe_map.h" 24 #include "xe_mmio.h" 25 #include "xe_pm.h" 26 #include "xe_sched_job.h" 27 #include "xe_uc_fw.h" 28 #include "xe_wa.h" 29 #include "instructions/xe_gsc_commands.h" 30 #include "regs/xe_gsc_regs.h" 31 #include "regs/xe_gt_regs.h" 32 33 static struct xe_gt * 34 gsc_to_gt(struct xe_gsc *gsc) 35 { 36 return container_of(gsc, struct xe_gt, uc.gsc); 37 } 38 39 static int memcpy_fw(struct xe_gsc *gsc) 40 { 41 struct xe_gt *gt = gsc_to_gt(gsc); 42 struct xe_device *xe = gt_to_xe(gt); 43 u32 fw_size = gsc->fw.size; 44 void *storage; 45 46 /* 47 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use 48 * a memcpy for now. 49 */ 50 storage = kmalloc(fw_size, GFP_KERNEL); 51 if (!storage) 52 return -ENOMEM; 53 54 xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); 55 xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); 56 xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); 57 58 kfree(storage); 59 60 return 0; 61 } 62 63 static int emit_gsc_upload(struct xe_gsc *gsc) 64 { 65 struct xe_gt *gt = gsc_to_gt(gsc); 66 u64 offset = xe_bo_ggtt_addr(gsc->private); 67 struct xe_bb *bb; 68 struct xe_sched_job *job; 69 struct dma_fence *fence; 70 long timeout; 71 72 bb = xe_bb_new(gt, 4, false); 73 if (IS_ERR(bb)) 74 return PTR_ERR(bb); 75 76 bb->cs[bb->len++] = GSC_FW_LOAD; 77 bb->cs[bb->len++] = lower_32_bits(offset); 78 bb->cs[bb->len++] = upper_32_bits(offset); 79 bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; 80 81 job = xe_bb_create_job(gsc->q, bb); 82 if (IS_ERR(job)) { 83 xe_bb_free(bb, NULL); 84 return PTR_ERR(job); 85 } 86 87 xe_sched_job_arm(job); 88 fence = dma_fence_get(&job->drm.s_fence->finished); 89 xe_sched_job_push(job); 90 91 timeout = dma_fence_wait_timeout(fence, false, HZ); 92 dma_fence_put(fence); 93 xe_bb_free(bb, NULL); 94 if (timeout < 0) 95 return timeout; 96 else if (!timeout) 97 return -ETIME; 98 99 return 0; 100 } 101 102 #define version_query_wr(xe_, map_, offset_, field_, val_) \ 103 xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) 104 #define version_query_rd(xe_, map_, offset_, field_) \ 105 xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) 106 107 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) 108 { 109 xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); 110 111 version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); 112 version_query_wr(xe, map, wr_offset, header.command, 113 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); 114 115 return wr_offset + sizeof(struct gsc_get_compatibility_version_in); 116 } 117 118 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ 119 static int query_compatibility_version(struct xe_gsc *gsc) 120 { 121 struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; 122 struct xe_gt *gt = gsc_to_gt(gsc); 123 struct xe_tile *tile = gt_to_tile(gt); 124 struct xe_device *xe = gt_to_xe(gt); 125 struct xe_bo *bo; 126 u32 wr_offset; 127 u32 rd_offset; 128 u64 ggtt_offset; 129 int err; 130 131 bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, 132 ttm_bo_type_kernel, 133 XE_BO_FLAG_SYSTEM | 134 XE_BO_FLAG_GGTT); 135 if (IS_ERR(bo)) { 136 xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); 137 return PTR_ERR(bo); 138 } 139 140 ggtt_offset = xe_bo_ggtt_addr(bo); 141 142 wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, 143 sizeof(struct gsc_get_compatibility_version_in)); 144 wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); 145 146 err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, 147 ggtt_offset + GSC_VER_PKT_SZ, 148 GSC_VER_PKT_SZ); 149 if (err) { 150 xe_gt_err(gt, 151 "failed to submit GSC request for compatibility version: %d\n", 152 err); 153 goto out_bo; 154 } 155 156 err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, 157 sizeof(struct gsc_get_compatibility_version_out), 158 &rd_offset); 159 if (err) { 160 xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); 161 return err; 162 } 163 164 compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); 165 compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); 166 167 xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); 168 169 out_bo: 170 xe_bo_unpin_map_no_vm(bo); 171 return err; 172 } 173 174 static int gsc_fw_is_loaded(struct xe_gt *gt) 175 { 176 return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & 177 HECI1_FWSTS1_INIT_COMPLETE; 178 } 179 180 static int gsc_fw_wait(struct xe_gt *gt) 181 { 182 /* 183 * GSC load can take up to 250ms from the moment the instruction is 184 * executed by the GSCCS. To account for possible submission delays or 185 * other issues, we use a 500ms timeout in the wait here. 186 */ 187 return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), 188 HECI1_FWSTS1_INIT_COMPLETE, 189 HECI1_FWSTS1_INIT_COMPLETE, 190 500 * USEC_PER_MSEC, NULL, false); 191 } 192 193 static int gsc_upload(struct xe_gsc *gsc) 194 { 195 struct xe_gt *gt = gsc_to_gt(gsc); 196 struct xe_device *xe = gt_to_xe(gt); 197 int err; 198 199 /* we should only be here if the init step were successful */ 200 xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); 201 202 if (gsc_fw_is_loaded(gt)) { 203 xe_gt_err(gt, "GSC already loaded at upload time\n"); 204 return -EEXIST; 205 } 206 207 err = memcpy_fw(gsc); 208 if (err) { 209 xe_gt_err(gt, "Failed to memcpy GSC FW\n"); 210 return err; 211 } 212 213 /* 214 * GSC is only killed by an FLR, so we need to trigger one on unload to 215 * make sure we stop it. This is because we assign a chunk of memory to 216 * the GSC as part of the FW load, so we need to make sure it stops 217 * using it when we release it to the system on driver unload. Note that 218 * this is not a problem of the unload per-se, because the GSC will not 219 * touch that memory unless there are requests for it coming from the 220 * driver; therefore, no accesses will happen while Xe is not loaded, 221 * but if we re-load the driver then the GSC might wake up and try to 222 * access that old memory location again. 223 * Given that an FLR is a very disruptive action (see the FLR function 224 * for details), we want to do it as the last action before releasing 225 * the access to the MMIO bar, which means we need to do it as part of 226 * mmio cleanup. 227 */ 228 xe->needs_flr_on_fini = true; 229 230 err = emit_gsc_upload(gsc); 231 if (err) { 232 xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); 233 return err; 234 } 235 236 err = gsc_fw_wait(gt); 237 if (err) { 238 xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); 239 return err; 240 } 241 242 err = query_compatibility_version(gsc); 243 if (err) 244 return err; 245 246 err = xe_uc_fw_check_version_requirements(&gsc->fw); 247 if (err) 248 return err; 249 250 return 0; 251 } 252 253 static int gsc_upload_and_init(struct xe_gsc *gsc) 254 { 255 struct xe_gt *gt = gsc_to_gt(gsc); 256 struct xe_tile *tile = gt_to_tile(gt); 257 int ret; 258 259 if (XE_WA(gt, 14018094691)) { 260 ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 261 262 /* 263 * If the forcewake fails we want to keep going, because the worst 264 * case outcome in failing to apply the WA is that PXP won't work, 265 * which is not fatal. We still throw a warning so the issue is 266 * seen if it happens. 267 */ 268 xe_gt_WARN_ON(tile->primary_gt, ret); 269 270 xe_gt_mcr_multicast_write(tile->primary_gt, 271 EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK, 272 EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT); 273 } 274 275 ret = gsc_upload(gsc); 276 277 if (XE_WA(gt, 14018094691)) 278 xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 279 280 if (ret) 281 return ret; 282 283 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 284 xe_gt_dbg(gt, "GSC FW async load completed\n"); 285 286 /* HuC auth failure is not fatal */ 287 if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) 288 xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); 289 290 ret = xe_gsc_proxy_start(gsc); 291 if (ret) 292 return ret; 293 294 xe_gt_dbg(gt, "GSC proxy init completed\n"); 295 296 return 0; 297 } 298 299 static int gsc_er_complete(struct xe_gt *gt) 300 { 301 u32 er_status; 302 303 if (!gsc_fw_is_loaded(gt)) 304 return 0; 305 306 /* 307 * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the 308 * driver or the GuC hit the GDRST register, the CS is immediately reset 309 * and a success is reported, but the GSC shim keeps resetting in the 310 * background. While the shim reset is ongoing, the CS is able to accept 311 * new context submission, but any commands that require the shim will 312 * be stalled until the reset is completed. This means that we can keep 313 * submitting to the GSCCS as long as we make sure that the preemption 314 * timeout is big enough to cover any delay introduced by the reset. 315 * When the shim reset completes, a specific CS interrupt is triggered, 316 * in response to which we need to check the GSCI_TIMER_STATUS register 317 * to see if the reset was successful or not. 318 * Note that the GSCI_TIMER_STATUS register is not power save/restored, 319 * so it gets reset on MC6 entry. However, a reset failure stops MC6, 320 * so in that scenario we're always guaranteed to find the correct 321 * value. 322 */ 323 er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; 324 325 if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { 326 /* 327 * XXX: we should trigger an FLR here, but we don't have support 328 * for that yet. 329 */ 330 xe_gt_err(gt, "GSC ER timed out!\n"); 331 return -EIO; 332 } 333 334 return 0; 335 } 336 337 static void gsc_work(struct work_struct *work) 338 { 339 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); 340 struct xe_gt *gt = gsc_to_gt(gsc); 341 struct xe_device *xe = gt_to_xe(gt); 342 u32 actions; 343 int ret; 344 345 spin_lock_irq(&gsc->lock); 346 actions = gsc->work_actions; 347 gsc->work_actions = 0; 348 spin_unlock_irq(&gsc->lock); 349 350 xe_pm_runtime_get(xe); 351 xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); 352 353 if (actions & GSC_ACTION_ER_COMPLETE) { 354 ret = gsc_er_complete(gt); 355 if (ret) 356 goto out; 357 } 358 359 if (actions & GSC_ACTION_FW_LOAD) { 360 ret = gsc_upload_and_init(gsc); 361 if (ret && ret != -EEXIST) 362 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 363 else 364 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); 365 } 366 367 if (actions & GSC_ACTION_SW_PROXY) 368 xe_gsc_proxy_request_handler(gsc); 369 370 out: 371 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 372 xe_pm_runtime_put(xe); 373 } 374 375 void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec) 376 { 377 struct xe_gt *gt = hwe->gt; 378 struct xe_gsc *gsc = >->uc.gsc; 379 380 if (unlikely(!intr_vec)) 381 return; 382 383 if (intr_vec & GSC_ER_COMPLETE) { 384 spin_lock(&gsc->lock); 385 gsc->work_actions |= GSC_ACTION_ER_COMPLETE; 386 spin_unlock(&gsc->lock); 387 388 queue_work(gsc->wq, &gsc->work); 389 } 390 } 391 392 int xe_gsc_init(struct xe_gsc *gsc) 393 { 394 struct xe_gt *gt = gsc_to_gt(gsc); 395 struct xe_tile *tile = gt_to_tile(gt); 396 int ret; 397 398 gsc->fw.type = XE_UC_FW_TYPE_GSC; 399 INIT_WORK(&gsc->work, gsc_work); 400 spin_lock_init(&gsc->lock); 401 402 /* The GSC uC is only available on the media GT */ 403 if (tile->media_gt && (gt != tile->media_gt)) { 404 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); 405 return 0; 406 } 407 408 /* 409 * Some platforms can have GuC but not GSC. That would cause 410 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort 411 * all firmware loading. So check for GSC being enabled before 412 * propagating the failure back up. That way the higher level will keep 413 * going and load GuC as appropriate. 414 */ 415 ret = xe_uc_fw_init(&gsc->fw); 416 if (!xe_uc_fw_is_enabled(&gsc->fw)) 417 return 0; 418 else if (ret) 419 goto out; 420 421 ret = xe_gsc_proxy_init(gsc); 422 if (ret && ret != -ENODEV) 423 goto out; 424 425 return 0; 426 427 out: 428 xe_gt_err(gt, "GSC init failed with %d", ret); 429 return ret; 430 } 431 432 static void free_resources(struct drm_device *drm, void *arg) 433 { 434 struct xe_gsc *gsc = arg; 435 436 if (gsc->wq) { 437 destroy_workqueue(gsc->wq); 438 gsc->wq = NULL; 439 } 440 441 if (gsc->q) { 442 xe_exec_queue_put(gsc->q); 443 gsc->q = NULL; 444 } 445 446 if (gsc->private) { 447 xe_bo_unpin_map_no_vm(gsc->private); 448 gsc->private = NULL; 449 } 450 } 451 452 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) 453 { 454 struct xe_gt *gt = gsc_to_gt(gsc); 455 struct xe_tile *tile = gt_to_tile(gt); 456 struct xe_device *xe = gt_to_xe(gt); 457 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); 458 struct xe_exec_queue *q; 459 struct workqueue_struct *wq; 460 struct xe_bo *bo; 461 int err; 462 463 if (!xe_uc_fw_is_available(&gsc->fw)) 464 return 0; 465 466 if (!hwe) 467 return -ENODEV; 468 469 bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, 470 ttm_bo_type_kernel, 471 XE_BO_FLAG_STOLEN | 472 XE_BO_FLAG_GGTT); 473 if (IS_ERR(bo)) 474 return PTR_ERR(bo); 475 476 q = xe_exec_queue_create(xe, NULL, 477 BIT(hwe->logical_instance), 1, hwe, 478 EXEC_QUEUE_FLAG_KERNEL | 479 EXEC_QUEUE_FLAG_PERMANENT, 0); 480 if (IS_ERR(q)) { 481 xe_gt_err(gt, "Failed to create queue for GSC submission\n"); 482 err = PTR_ERR(q); 483 goto out_bo; 484 } 485 486 wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); 487 if (!wq) { 488 err = -ENOMEM; 489 goto out_q; 490 } 491 492 gsc->private = bo; 493 gsc->q = q; 494 gsc->wq = wq; 495 496 err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); 497 if (err) 498 return err; 499 500 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); 501 502 return 0; 503 504 out_q: 505 xe_exec_queue_put(q); 506 out_bo: 507 xe_bo_unpin_map_no_vm(bo); 508 return err; 509 } 510 511 void xe_gsc_load_start(struct xe_gsc *gsc) 512 { 513 struct xe_gt *gt = gsc_to_gt(gsc); 514 515 if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) 516 return; 517 518 /* GSC FW survives GT reset and D3Hot */ 519 if (gsc_fw_is_loaded(gt)) { 520 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 521 return; 522 } 523 524 spin_lock_irq(&gsc->lock); 525 gsc->work_actions |= GSC_ACTION_FW_LOAD; 526 spin_unlock_irq(&gsc->lock); 527 528 queue_work(gsc->wq, &gsc->work); 529 } 530 531 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) 532 { 533 if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) 534 flush_work(&gsc->work); 535 } 536 537 /** 538 * xe_gsc_remove() - Clean up the GSC structures before driver removal 539 * @gsc: the GSC uC 540 */ 541 void xe_gsc_remove(struct xe_gsc *gsc) 542 { 543 xe_gsc_proxy_remove(gsc); 544 } 545 546 /* 547 * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a 548 * GSC engine reset by writing a notification bit in the GS1 register and then 549 * triggering an interrupt to GSC; from the interrupt it will take up to 200ms 550 * for the FW to get prepare for the reset, so we need to wait for that amount 551 * of time. 552 * After the reset is complete we need to then clear the GS1 register. 553 */ 554 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) 555 { 556 u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; 557 u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; 558 559 /* WA only applies if the GSC is loaded */ 560 if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) 561 return; 562 563 xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); 564 565 if (prep) { 566 /* make sure the reset bit is clear when writing the CSR reg */ 567 xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), 568 HECI_H_CSR_RST, HECI_H_CSR_IG); 569 msleep(200); 570 } 571 } 572