1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include "xe_gsc.h" 7 8 #include <drm/drm_managed.h> 9 10 #include <generated/xe_wa_oob.h> 11 12 #include "abi/gsc_mkhi_commands_abi.h" 13 #include "xe_bb.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_exec_queue.h" 17 #include "xe_gsc_proxy.h" 18 #include "xe_gsc_submit.h" 19 #include "xe_gt.h" 20 #include "xe_gt_printk.h" 21 #include "xe_huc.h" 22 #include "xe_map.h" 23 #include "xe_mmio.h" 24 #include "xe_sched_job.h" 25 #include "xe_uc_fw.h" 26 #include "xe_wa.h" 27 #include "instructions/xe_gsc_commands.h" 28 #include "regs/xe_gsc_regs.h" 29 30 static struct xe_gt * 31 gsc_to_gt(struct xe_gsc *gsc) 32 { 33 return container_of(gsc, struct xe_gt, uc.gsc); 34 } 35 36 static int memcpy_fw(struct xe_gsc *gsc) 37 { 38 struct xe_gt *gt = gsc_to_gt(gsc); 39 struct xe_device *xe = gt_to_xe(gt); 40 u32 fw_size = gsc->fw.size; 41 void *storage; 42 43 /* 44 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use 45 * a memcpy for now. 46 */ 47 storage = kmalloc(fw_size, GFP_KERNEL); 48 if (!storage) 49 return -ENOMEM; 50 51 xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); 52 xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); 53 xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); 54 55 kfree(storage); 56 57 return 0; 58 } 59 60 static int emit_gsc_upload(struct xe_gsc *gsc) 61 { 62 struct xe_gt *gt = gsc_to_gt(gsc); 63 u64 offset = xe_bo_ggtt_addr(gsc->private); 64 struct xe_bb *bb; 65 struct xe_sched_job *job; 66 struct dma_fence *fence; 67 long timeout; 68 69 bb = xe_bb_new(gt, 4, false); 70 if (IS_ERR(bb)) 71 return PTR_ERR(bb); 72 73 bb->cs[bb->len++] = GSC_FW_LOAD; 74 bb->cs[bb->len++] = lower_32_bits(offset); 75 bb->cs[bb->len++] = upper_32_bits(offset); 76 bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; 77 78 job = xe_bb_create_job(gsc->q, bb); 79 if (IS_ERR(job)) { 80 xe_bb_free(bb, NULL); 81 return PTR_ERR(job); 82 } 83 84 xe_sched_job_arm(job); 85 fence = dma_fence_get(&job->drm.s_fence->finished); 86 xe_sched_job_push(job); 87 88 timeout = dma_fence_wait_timeout(fence, false, HZ); 89 dma_fence_put(fence); 90 xe_bb_free(bb, NULL); 91 if (timeout < 0) 92 return timeout; 93 else if (!timeout) 94 return -ETIME; 95 96 return 0; 97 } 98 99 #define version_query_wr(xe_, map_, offset_, field_, val_) \ 100 xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) 101 #define version_query_rd(xe_, map_, offset_, field_) \ 102 xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) 103 104 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) 105 { 106 xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); 107 108 version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); 109 version_query_wr(xe, map, wr_offset, header.command, 110 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); 111 112 return wr_offset + sizeof(struct gsc_get_compatibility_version_in); 113 } 114 115 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ 116 static int query_compatibility_version(struct xe_gsc *gsc) 117 { 118 struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; 119 struct xe_gt *gt = gsc_to_gt(gsc); 120 struct xe_tile *tile = gt_to_tile(gt); 121 struct xe_device *xe = gt_to_xe(gt); 122 struct xe_bo *bo; 123 u32 wr_offset; 124 u32 rd_offset; 125 u64 ggtt_offset; 126 int err; 127 128 bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, 129 ttm_bo_type_kernel, 130 XE_BO_CREATE_SYSTEM_BIT | 131 XE_BO_CREATE_GGTT_BIT); 132 if (IS_ERR(bo)) { 133 xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); 134 return PTR_ERR(bo); 135 } 136 137 ggtt_offset = xe_bo_ggtt_addr(bo); 138 139 wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, 140 sizeof(struct gsc_get_compatibility_version_in)); 141 wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); 142 143 err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, 144 ggtt_offset + GSC_VER_PKT_SZ, 145 GSC_VER_PKT_SZ); 146 if (err) { 147 xe_gt_err(gt, 148 "failed to submit GSC request for compatibility version: %d\n", 149 err); 150 goto out_bo; 151 } 152 153 err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, 154 sizeof(struct gsc_get_compatibility_version_out), 155 &rd_offset); 156 if (err) { 157 xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); 158 return err; 159 } 160 161 compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); 162 compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); 163 164 xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); 165 166 out_bo: 167 xe_bo_unpin_map_no_vm(bo); 168 return err; 169 } 170 171 static int gsc_fw_is_loaded(struct xe_gt *gt) 172 { 173 return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & 174 HECI1_FWSTS1_INIT_COMPLETE; 175 } 176 177 static int gsc_fw_wait(struct xe_gt *gt) 178 { 179 /* 180 * GSC load can take up to 250ms from the moment the instruction is 181 * executed by the GSCCS. To account for possible submission delays or 182 * other issues, we use a 500ms timeout in the wait here. 183 */ 184 return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), 185 HECI1_FWSTS1_INIT_COMPLETE, 186 HECI1_FWSTS1_INIT_COMPLETE, 187 500 * USEC_PER_MSEC, NULL, false); 188 } 189 190 static int gsc_upload(struct xe_gsc *gsc) 191 { 192 struct xe_gt *gt = gsc_to_gt(gsc); 193 struct xe_device *xe = gt_to_xe(gt); 194 int err; 195 196 /* we should only be here if the init step were successful */ 197 xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); 198 199 if (gsc_fw_is_loaded(gt)) { 200 xe_gt_err(gt, "GSC already loaded at upload time\n"); 201 return -EEXIST; 202 } 203 204 err = memcpy_fw(gsc); 205 if (err) { 206 xe_gt_err(gt, "Failed to memcpy GSC FW\n"); 207 return err; 208 } 209 210 /* 211 * GSC is only killed by an FLR, so we need to trigger one on unload to 212 * make sure we stop it. This is because we assign a chunk of memory to 213 * the GSC as part of the FW load, so we need to make sure it stops 214 * using it when we release it to the system on driver unload. Note that 215 * this is not a problem of the unload per-se, because the GSC will not 216 * touch that memory unless there are requests for it coming from the 217 * driver; therefore, no accesses will happen while Xe is not loaded, 218 * but if we re-load the driver then the GSC might wake up and try to 219 * access that old memory location again. 220 * Given that an FLR is a very disruptive action (see the FLR function 221 * for details), we want to do it as the last action before releasing 222 * the access to the MMIO bar, which means we need to do it as part of 223 * mmio cleanup. 224 */ 225 xe->needs_flr_on_fini = true; 226 227 err = emit_gsc_upload(gsc); 228 if (err) { 229 xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); 230 return err; 231 } 232 233 err = gsc_fw_wait(gt); 234 if (err) { 235 xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); 236 return err; 237 } 238 239 err = query_compatibility_version(gsc); 240 if (err) 241 return err; 242 243 err = xe_uc_fw_check_version_requirements(&gsc->fw); 244 if (err) 245 return err; 246 247 return 0; 248 } 249 250 static int gsc_upload_and_init(struct xe_gsc *gsc) 251 { 252 struct xe_gt *gt = gsc_to_gt(gsc); 253 int ret; 254 255 ret = gsc_upload(gsc); 256 if (ret) 257 return ret; 258 259 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 260 xe_gt_dbg(gt, "GSC FW async load completed\n"); 261 262 /* HuC auth failure is not fatal */ 263 if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) 264 xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); 265 266 ret = xe_gsc_proxy_start(gsc); 267 if (ret) 268 return ret; 269 270 xe_gt_dbg(gt, "GSC proxy init completed\n"); 271 272 return 0; 273 } 274 275 static void gsc_work(struct work_struct *work) 276 { 277 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); 278 struct xe_gt *gt = gsc_to_gt(gsc); 279 struct xe_device *xe = gt_to_xe(gt); 280 u32 actions; 281 int ret; 282 283 spin_lock_irq(&gsc->lock); 284 actions = gsc->work_actions; 285 gsc->work_actions = 0; 286 spin_unlock_irq(&gsc->lock); 287 288 xe_device_mem_access_get(xe); 289 xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 290 291 if (actions & GSC_ACTION_FW_LOAD) { 292 ret = gsc_upload_and_init(gsc); 293 if (ret && ret != -EEXIST) 294 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 295 else 296 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); 297 } 298 299 if (actions & GSC_ACTION_SW_PROXY) 300 xe_gsc_proxy_request_handler(gsc); 301 302 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 303 xe_device_mem_access_put(xe); 304 } 305 306 int xe_gsc_init(struct xe_gsc *gsc) 307 { 308 struct xe_gt *gt = gsc_to_gt(gsc); 309 struct xe_tile *tile = gt_to_tile(gt); 310 int ret; 311 312 gsc->fw.type = XE_UC_FW_TYPE_GSC; 313 INIT_WORK(&gsc->work, gsc_work); 314 spin_lock_init(&gsc->lock); 315 316 /* The GSC uC is only available on the media GT */ 317 if (tile->media_gt && (gt != tile->media_gt)) { 318 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); 319 return 0; 320 } 321 322 /* 323 * Some platforms can have GuC but not GSC. That would cause 324 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort 325 * all firmware loading. So check for GSC being enabled before 326 * propagating the failure back up. That way the higher level will keep 327 * going and load GuC as appropriate. 328 */ 329 ret = xe_uc_fw_init(&gsc->fw); 330 if (!xe_uc_fw_is_enabled(&gsc->fw)) 331 return 0; 332 else if (ret) 333 goto out; 334 335 ret = xe_gsc_proxy_init(gsc); 336 if (ret && ret != -ENODEV) 337 goto out; 338 339 return 0; 340 341 out: 342 xe_gt_err(gt, "GSC init failed with %d", ret); 343 return ret; 344 } 345 346 static void free_resources(struct drm_device *drm, void *arg) 347 { 348 struct xe_gsc *gsc = arg; 349 350 if (gsc->wq) { 351 destroy_workqueue(gsc->wq); 352 gsc->wq = NULL; 353 } 354 355 if (gsc->q) { 356 xe_exec_queue_put(gsc->q); 357 gsc->q = NULL; 358 } 359 360 if (gsc->private) { 361 xe_bo_unpin_map_no_vm(gsc->private); 362 gsc->private = NULL; 363 } 364 } 365 366 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) 367 { 368 struct xe_gt *gt = gsc_to_gt(gsc); 369 struct xe_tile *tile = gt_to_tile(gt); 370 struct xe_device *xe = gt_to_xe(gt); 371 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); 372 struct xe_exec_queue *q; 373 struct workqueue_struct *wq; 374 struct xe_bo *bo; 375 int err; 376 377 if (!xe_uc_fw_is_available(&gsc->fw)) 378 return 0; 379 380 if (!hwe) 381 return -ENODEV; 382 383 bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, 384 ttm_bo_type_kernel, 385 XE_BO_CREATE_STOLEN_BIT | 386 XE_BO_CREATE_GGTT_BIT); 387 if (IS_ERR(bo)) 388 return PTR_ERR(bo); 389 390 q = xe_exec_queue_create(xe, NULL, 391 BIT(hwe->logical_instance), 1, hwe, 392 EXEC_QUEUE_FLAG_KERNEL | 393 EXEC_QUEUE_FLAG_PERMANENT, 0); 394 if (IS_ERR(q)) { 395 xe_gt_err(gt, "Failed to create queue for GSC submission\n"); 396 err = PTR_ERR(q); 397 goto out_bo; 398 } 399 400 wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); 401 if (!wq) { 402 err = -ENOMEM; 403 goto out_q; 404 } 405 406 gsc->private = bo; 407 gsc->q = q; 408 gsc->wq = wq; 409 410 err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); 411 if (err) 412 return err; 413 414 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); 415 416 return 0; 417 418 out_q: 419 xe_exec_queue_put(q); 420 out_bo: 421 xe_bo_unpin_map_no_vm(bo); 422 return err; 423 } 424 425 void xe_gsc_load_start(struct xe_gsc *gsc) 426 { 427 struct xe_gt *gt = gsc_to_gt(gsc); 428 429 if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) 430 return; 431 432 /* GSC FW survives GT reset and D3Hot */ 433 if (gsc_fw_is_loaded(gt)) { 434 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 435 return; 436 } 437 438 spin_lock_irq(&gsc->lock); 439 gsc->work_actions |= GSC_ACTION_FW_LOAD; 440 spin_unlock_irq(&gsc->lock); 441 442 queue_work(gsc->wq, &gsc->work); 443 } 444 445 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) 446 { 447 if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) 448 flush_work(&gsc->work); 449 } 450 451 /** 452 * xe_gsc_remove() - Clean up the GSC structures before driver removal 453 * @gsc: the GSC uC 454 */ 455 void xe_gsc_remove(struct xe_gsc *gsc) 456 { 457 xe_gsc_proxy_remove(gsc); 458 } 459 460 /* 461 * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a 462 * GSC engine reset by writing a notification bit in the GS1 register and then 463 * triggering an interrupt to GSC; from the interrupt it will take up to 200ms 464 * for the FW to get prepare for the reset, so we need to wait for that amount 465 * of time. 466 * After the reset is complete we need to then clear the GS1 register. 467 */ 468 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) 469 { 470 u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; 471 u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; 472 473 /* WA only applies if the GSC is loaded */ 474 if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) 475 return; 476 477 xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); 478 479 if (prep) { 480 /* make sure the reset bit is clear when writing the CSR reg */ 481 xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), 482 HECI_H_CSR_RST, HECI_H_CSR_IG); 483 msleep(200); 484 } 485 } 486