1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include "xe_gsc.h" 7 8 #include <linux/delay.h> 9 10 #include <drm/drm_managed.h> 11 12 #include <generated/xe_wa_oob.h> 13 14 #include "abi/gsc_mkhi_commands_abi.h" 15 #include "xe_bb.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_exec_queue.h" 19 #include "xe_force_wake.h" 20 #include "xe_gsc_proxy.h" 21 #include "xe_gsc_submit.h" 22 #include "xe_gt.h" 23 #include "xe_gt_mcr.h" 24 #include "xe_gt_printk.h" 25 #include "xe_huc.h" 26 #include "xe_map.h" 27 #include "xe_mmio.h" 28 #include "xe_pm.h" 29 #include "xe_sched_job.h" 30 #include "xe_uc_fw.h" 31 #include "xe_wa.h" 32 #include "instructions/xe_gsc_commands.h" 33 #include "regs/xe_gsc_regs.h" 34 #include "regs/xe_gt_regs.h" 35 36 static struct xe_gt * 37 gsc_to_gt(struct xe_gsc *gsc) 38 { 39 return container_of(gsc, struct xe_gt, uc.gsc); 40 } 41 42 static int memcpy_fw(struct xe_gsc *gsc) 43 { 44 struct xe_gt *gt = gsc_to_gt(gsc); 45 struct xe_device *xe = gt_to_xe(gt); 46 u32 fw_size = gsc->fw.size; 47 void *storage; 48 49 /* 50 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use 51 * a memcpy for now. 52 */ 53 storage = kmalloc(fw_size, GFP_KERNEL); 54 if (!storage) 55 return -ENOMEM; 56 57 xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); 58 xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); 59 xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); 60 61 kfree(storage); 62 63 return 0; 64 } 65 66 static int emit_gsc_upload(struct xe_gsc *gsc) 67 { 68 struct xe_gt *gt = gsc_to_gt(gsc); 69 u64 offset = xe_bo_ggtt_addr(gsc->private); 70 struct xe_bb *bb; 71 struct xe_sched_job *job; 72 struct dma_fence *fence; 73 long timeout; 74 75 bb = xe_bb_new(gt, 4, false); 76 if (IS_ERR(bb)) 77 return PTR_ERR(bb); 78 79 bb->cs[bb->len++] = GSC_FW_LOAD; 80 bb->cs[bb->len++] = lower_32_bits(offset); 81 bb->cs[bb->len++] = upper_32_bits(offset); 82 bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; 83 84 job = xe_bb_create_job(gsc->q, bb); 85 if (IS_ERR(job)) { 86 xe_bb_free(bb, NULL); 87 return PTR_ERR(job); 88 } 89 90 xe_sched_job_arm(job); 91 fence = dma_fence_get(&job->drm.s_fence->finished); 92 xe_sched_job_push(job); 93 94 timeout = dma_fence_wait_timeout(fence, false, HZ); 95 dma_fence_put(fence); 96 xe_bb_free(bb, NULL); 97 if (timeout < 0) 98 return timeout; 99 else if (!timeout) 100 return -ETIME; 101 102 return 0; 103 } 104 105 #define version_query_wr(xe_, map_, offset_, field_, val_) \ 106 xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) 107 #define version_query_rd(xe_, map_, offset_, field_) \ 108 xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) 109 110 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) 111 { 112 xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); 113 114 version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); 115 version_query_wr(xe, map, wr_offset, header.command, 116 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); 117 118 return wr_offset + sizeof(struct gsc_get_compatibility_version_in); 119 } 120 121 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ 122 static int query_compatibility_version(struct xe_gsc *gsc) 123 { 124 struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; 125 struct xe_gt *gt = gsc_to_gt(gsc); 126 struct xe_tile *tile = gt_to_tile(gt); 127 struct xe_device *xe = gt_to_xe(gt); 128 struct xe_bo *bo; 129 u32 wr_offset; 130 u32 rd_offset; 131 u64 ggtt_offset; 132 int err; 133 134 bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, 135 ttm_bo_type_kernel, 136 XE_BO_FLAG_SYSTEM | 137 XE_BO_FLAG_GGTT); 138 if (IS_ERR(bo)) { 139 xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); 140 return PTR_ERR(bo); 141 } 142 143 ggtt_offset = xe_bo_ggtt_addr(bo); 144 145 wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, 146 sizeof(struct gsc_get_compatibility_version_in)); 147 wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); 148 149 err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, 150 ggtt_offset + GSC_VER_PKT_SZ, 151 GSC_VER_PKT_SZ); 152 if (err) { 153 xe_gt_err(gt, 154 "failed to submit GSC request for compatibility version: %d\n", 155 err); 156 goto out_bo; 157 } 158 159 err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, 160 sizeof(struct gsc_get_compatibility_version_out), 161 &rd_offset); 162 if (err) { 163 xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); 164 return err; 165 } 166 167 compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); 168 compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); 169 170 xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); 171 172 out_bo: 173 xe_bo_unpin_map_no_vm(bo); 174 return err; 175 } 176 177 static int gsc_fw_is_loaded(struct xe_gt *gt) 178 { 179 return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & 180 HECI1_FWSTS1_INIT_COMPLETE; 181 } 182 183 static int gsc_fw_wait(struct xe_gt *gt) 184 { 185 /* 186 * GSC load can take up to 250ms from the moment the instruction is 187 * executed by the GSCCS. To account for possible submission delays or 188 * other issues, we use a 500ms timeout in the wait here. 189 */ 190 return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), 191 HECI1_FWSTS1_INIT_COMPLETE, 192 HECI1_FWSTS1_INIT_COMPLETE, 193 500 * USEC_PER_MSEC, NULL, false); 194 } 195 196 static int gsc_upload(struct xe_gsc *gsc) 197 { 198 struct xe_gt *gt = gsc_to_gt(gsc); 199 struct xe_device *xe = gt_to_xe(gt); 200 int err; 201 202 /* we should only be here if the init step were successful */ 203 xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); 204 205 if (gsc_fw_is_loaded(gt)) { 206 xe_gt_err(gt, "GSC already loaded at upload time\n"); 207 return -EEXIST; 208 } 209 210 err = memcpy_fw(gsc); 211 if (err) { 212 xe_gt_err(gt, "Failed to memcpy GSC FW\n"); 213 return err; 214 } 215 216 /* 217 * GSC is only killed by an FLR, so we need to trigger one on unload to 218 * make sure we stop it. This is because we assign a chunk of memory to 219 * the GSC as part of the FW load, so we need to make sure it stops 220 * using it when we release it to the system on driver unload. Note that 221 * this is not a problem of the unload per-se, because the GSC will not 222 * touch that memory unless there are requests for it coming from the 223 * driver; therefore, no accesses will happen while Xe is not loaded, 224 * but if we re-load the driver then the GSC might wake up and try to 225 * access that old memory location again. 226 * Given that an FLR is a very disruptive action (see the FLR function 227 * for details), we want to do it as the last action before releasing 228 * the access to the MMIO bar, which means we need to do it as part of 229 * mmio cleanup. 230 */ 231 xe->needs_flr_on_fini = true; 232 233 err = emit_gsc_upload(gsc); 234 if (err) { 235 xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); 236 return err; 237 } 238 239 err = gsc_fw_wait(gt); 240 if (err) { 241 xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); 242 return err; 243 } 244 245 err = query_compatibility_version(gsc); 246 if (err) 247 return err; 248 249 err = xe_uc_fw_check_version_requirements(&gsc->fw); 250 if (err) 251 return err; 252 253 return 0; 254 } 255 256 static int gsc_upload_and_init(struct xe_gsc *gsc) 257 { 258 struct xe_gt *gt = gsc_to_gt(gsc); 259 struct xe_tile *tile = gt_to_tile(gt); 260 int ret; 261 262 if (XE_WA(gt, 14018094691)) { 263 ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 264 265 /* 266 * If the forcewake fails we want to keep going, because the worst 267 * case outcome in failing to apply the WA is that PXP won't work, 268 * which is not fatal. We still throw a warning so the issue is 269 * seen if it happens. 270 */ 271 xe_gt_WARN_ON(tile->primary_gt, ret); 272 273 xe_gt_mcr_multicast_write(tile->primary_gt, 274 EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK, 275 EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT); 276 } 277 278 ret = gsc_upload(gsc); 279 280 if (XE_WA(gt, 14018094691)) 281 xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); 282 283 if (ret) 284 return ret; 285 286 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 287 xe_gt_dbg(gt, "GSC FW async load completed\n"); 288 289 /* HuC auth failure is not fatal */ 290 if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) 291 xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); 292 293 ret = xe_gsc_proxy_start(gsc); 294 if (ret) 295 return ret; 296 297 xe_gt_dbg(gt, "GSC proxy init completed\n"); 298 299 return 0; 300 } 301 302 static int gsc_er_complete(struct xe_gt *gt) 303 { 304 u32 er_status; 305 306 if (!gsc_fw_is_loaded(gt)) 307 return 0; 308 309 /* 310 * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the 311 * driver or the GuC hit the GDRST register, the CS is immediately reset 312 * and a success is reported, but the GSC shim keeps resetting in the 313 * background. While the shim reset is ongoing, the CS is able to accept 314 * new context submission, but any commands that require the shim will 315 * be stalled until the reset is completed. This means that we can keep 316 * submitting to the GSCCS as long as we make sure that the preemption 317 * timeout is big enough to cover any delay introduced by the reset. 318 * When the shim reset completes, a specific CS interrupt is triggered, 319 * in response to which we need to check the GSCI_TIMER_STATUS register 320 * to see if the reset was successful or not. 321 * Note that the GSCI_TIMER_STATUS register is not power save/restored, 322 * so it gets reset on MC6 entry. However, a reset failure stops MC6, 323 * so in that scenario we're always guaranteed to find the correct 324 * value. 325 */ 326 er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; 327 328 if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { 329 /* 330 * XXX: we should trigger an FLR here, but we don't have support 331 * for that yet. 332 */ 333 xe_gt_err(gt, "GSC ER timed out!\n"); 334 return -EIO; 335 } 336 337 return 0; 338 } 339 340 static void gsc_work(struct work_struct *work) 341 { 342 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); 343 struct xe_gt *gt = gsc_to_gt(gsc); 344 struct xe_device *xe = gt_to_xe(gt); 345 u32 actions; 346 int ret; 347 348 spin_lock_irq(&gsc->lock); 349 actions = gsc->work_actions; 350 gsc->work_actions = 0; 351 spin_unlock_irq(&gsc->lock); 352 353 xe_pm_runtime_get(xe); 354 xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); 355 356 if (actions & GSC_ACTION_ER_COMPLETE) { 357 ret = gsc_er_complete(gt); 358 if (ret) 359 goto out; 360 } 361 362 if (actions & GSC_ACTION_FW_LOAD) { 363 ret = gsc_upload_and_init(gsc); 364 if (ret && ret != -EEXIST) 365 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 366 else 367 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); 368 } 369 370 if (actions & GSC_ACTION_SW_PROXY) 371 xe_gsc_proxy_request_handler(gsc); 372 373 out: 374 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 375 xe_pm_runtime_put(xe); 376 } 377 378 void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec) 379 { 380 struct xe_gt *gt = hwe->gt; 381 struct xe_gsc *gsc = >->uc.gsc; 382 383 if (unlikely(!intr_vec)) 384 return; 385 386 if (intr_vec & GSC_ER_COMPLETE) { 387 spin_lock(&gsc->lock); 388 gsc->work_actions |= GSC_ACTION_ER_COMPLETE; 389 spin_unlock(&gsc->lock); 390 391 queue_work(gsc->wq, &gsc->work); 392 } 393 } 394 395 int xe_gsc_init(struct xe_gsc *gsc) 396 { 397 struct xe_gt *gt = gsc_to_gt(gsc); 398 struct xe_tile *tile = gt_to_tile(gt); 399 int ret; 400 401 gsc->fw.type = XE_UC_FW_TYPE_GSC; 402 INIT_WORK(&gsc->work, gsc_work); 403 spin_lock_init(&gsc->lock); 404 405 /* The GSC uC is only available on the media GT */ 406 if (tile->media_gt && (gt != tile->media_gt)) { 407 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); 408 return 0; 409 } 410 411 /* 412 * Some platforms can have GuC but not GSC. That would cause 413 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort 414 * all firmware loading. So check for GSC being enabled before 415 * propagating the failure back up. That way the higher level will keep 416 * going and load GuC as appropriate. 417 */ 418 ret = xe_uc_fw_init(&gsc->fw); 419 if (!xe_uc_fw_is_enabled(&gsc->fw)) 420 return 0; 421 else if (ret) 422 goto out; 423 424 ret = xe_gsc_proxy_init(gsc); 425 if (ret && ret != -ENODEV) 426 goto out; 427 428 return 0; 429 430 out: 431 xe_gt_err(gt, "GSC init failed with %d", ret); 432 return ret; 433 } 434 435 static void free_resources(struct drm_device *drm, void *arg) 436 { 437 struct xe_gsc *gsc = arg; 438 439 if (gsc->wq) { 440 destroy_workqueue(gsc->wq); 441 gsc->wq = NULL; 442 } 443 444 if (gsc->q) { 445 xe_exec_queue_put(gsc->q); 446 gsc->q = NULL; 447 } 448 449 if (gsc->private) { 450 xe_bo_unpin_map_no_vm(gsc->private); 451 gsc->private = NULL; 452 } 453 } 454 455 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) 456 { 457 struct xe_gt *gt = gsc_to_gt(gsc); 458 struct xe_tile *tile = gt_to_tile(gt); 459 struct xe_device *xe = gt_to_xe(gt); 460 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); 461 struct xe_exec_queue *q; 462 struct workqueue_struct *wq; 463 struct xe_bo *bo; 464 int err; 465 466 if (!xe_uc_fw_is_available(&gsc->fw)) 467 return 0; 468 469 if (!hwe) 470 return -ENODEV; 471 472 bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, 473 ttm_bo_type_kernel, 474 XE_BO_FLAG_STOLEN | 475 XE_BO_FLAG_GGTT); 476 if (IS_ERR(bo)) 477 return PTR_ERR(bo); 478 479 q = xe_exec_queue_create(xe, NULL, 480 BIT(hwe->logical_instance), 1, hwe, 481 EXEC_QUEUE_FLAG_KERNEL | 482 EXEC_QUEUE_FLAG_PERMANENT, 0); 483 if (IS_ERR(q)) { 484 xe_gt_err(gt, "Failed to create queue for GSC submission\n"); 485 err = PTR_ERR(q); 486 goto out_bo; 487 } 488 489 wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); 490 if (!wq) { 491 err = -ENOMEM; 492 goto out_q; 493 } 494 495 gsc->private = bo; 496 gsc->q = q; 497 gsc->wq = wq; 498 499 err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); 500 if (err) 501 return err; 502 503 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); 504 505 return 0; 506 507 out_q: 508 xe_exec_queue_put(q); 509 out_bo: 510 xe_bo_unpin_map_no_vm(bo); 511 return err; 512 } 513 514 void xe_gsc_load_start(struct xe_gsc *gsc) 515 { 516 struct xe_gt *gt = gsc_to_gt(gsc); 517 518 if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) 519 return; 520 521 /* GSC FW survives GT reset and D3Hot */ 522 if (gsc_fw_is_loaded(gt)) { 523 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 524 return; 525 } 526 527 spin_lock_irq(&gsc->lock); 528 gsc->work_actions |= GSC_ACTION_FW_LOAD; 529 spin_unlock_irq(&gsc->lock); 530 531 queue_work(gsc->wq, &gsc->work); 532 } 533 534 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) 535 { 536 if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) 537 flush_work(&gsc->work); 538 } 539 540 /** 541 * xe_gsc_remove() - Clean up the GSC structures before driver removal 542 * @gsc: the GSC uC 543 */ 544 void xe_gsc_remove(struct xe_gsc *gsc) 545 { 546 xe_gsc_proxy_remove(gsc); 547 } 548 549 /* 550 * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a 551 * GSC engine reset by writing a notification bit in the GS1 register and then 552 * triggering an interrupt to GSC; from the interrupt it will take up to 200ms 553 * for the FW to get prepare for the reset, so we need to wait for that amount 554 * of time. 555 * After the reset is complete we need to then clear the GS1 register. 556 */ 557 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) 558 { 559 u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; 560 u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; 561 562 /* WA only applies if the GSC is loaded */ 563 if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) 564 return; 565 566 xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); 567 568 if (prep) { 569 /* make sure the reset bit is clear when writing the CSR reg */ 570 xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), 571 HECI_H_CSR_RST, HECI_H_CSR_IG); 572 msleep(200); 573 } 574 } 575