1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include "xe_gsc.h" 7 8 #include <drm/drm_managed.h> 9 10 #include "abi/gsc_mkhi_commands_abi.h" 11 #include "generated/xe_wa_oob.h" 12 #include "xe_bb.h" 13 #include "xe_bo.h" 14 #include "xe_device.h" 15 #include "xe_exec_queue.h" 16 #include "xe_gsc_submit.h" 17 #include "xe_gt.h" 18 #include "xe_gt_printk.h" 19 #include "xe_huc.h" 20 #include "xe_map.h" 21 #include "xe_mmio.h" 22 #include "xe_sched_job.h" 23 #include "xe_uc_fw.h" 24 #include "xe_wa.h" 25 #include "instructions/xe_gsc_commands.h" 26 #include "regs/xe_gsc_regs.h" 27 28 static struct xe_gt * 29 gsc_to_gt(struct xe_gsc *gsc) 30 { 31 return container_of(gsc, struct xe_gt, uc.gsc); 32 } 33 34 static int memcpy_fw(struct xe_gsc *gsc) 35 { 36 struct xe_gt *gt = gsc_to_gt(gsc); 37 struct xe_device *xe = gt_to_xe(gt); 38 u32 fw_size = gsc->fw.size; 39 void *storage; 40 41 /* 42 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use 43 * a memcpy for now. 44 */ 45 storage = kmalloc(fw_size, GFP_KERNEL); 46 if (!storage) 47 return -ENOMEM; 48 49 xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); 50 xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); 51 xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); 52 53 kfree(storage); 54 55 return 0; 56 } 57 58 static int emit_gsc_upload(struct xe_gsc *gsc) 59 { 60 struct xe_gt *gt = gsc_to_gt(gsc); 61 u64 offset = xe_bo_ggtt_addr(gsc->private); 62 struct xe_bb *bb; 63 struct xe_sched_job *job; 64 struct dma_fence *fence; 65 long timeout; 66 67 bb = xe_bb_new(gt, 4, false); 68 if (IS_ERR(bb)) 69 return PTR_ERR(bb); 70 71 bb->cs[bb->len++] = GSC_FW_LOAD; 72 bb->cs[bb->len++] = lower_32_bits(offset); 73 bb->cs[bb->len++] = upper_32_bits(offset); 74 bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; 75 76 job = xe_bb_create_job(gsc->q, bb); 77 if (IS_ERR(job)) { 78 xe_bb_free(bb, NULL); 79 return PTR_ERR(job); 80 } 81 82 xe_sched_job_arm(job); 83 fence = dma_fence_get(&job->drm.s_fence->finished); 84 xe_sched_job_push(job); 85 86 timeout = dma_fence_wait_timeout(fence, false, HZ); 87 dma_fence_put(fence); 88 xe_bb_free(bb, NULL); 89 if (timeout < 0) 90 return timeout; 91 else if (!timeout) 92 return -ETIME; 93 94 return 0; 95 } 96 97 #define version_query_wr(xe_, map_, offset_, field_, val_) \ 98 xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) 99 #define version_query_rd(xe_, map_, offset_, field_) \ 100 xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) 101 102 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) 103 { 104 xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); 105 106 version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); 107 version_query_wr(xe, map, wr_offset, header.command, 108 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); 109 110 return wr_offset + sizeof(struct gsc_get_compatibility_version_in); 111 } 112 113 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ 114 static int query_compatibility_version(struct xe_gsc *gsc) 115 { 116 struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; 117 struct xe_gt *gt = gsc_to_gt(gsc); 118 struct xe_tile *tile = gt_to_tile(gt); 119 struct xe_device *xe = gt_to_xe(gt); 120 struct xe_bo *bo; 121 u32 wr_offset; 122 u32 rd_offset; 123 u64 ggtt_offset; 124 int err; 125 126 bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, 127 ttm_bo_type_kernel, 128 XE_BO_CREATE_SYSTEM_BIT | 129 XE_BO_CREATE_GGTT_BIT); 130 if (IS_ERR(bo)) { 131 xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); 132 return PTR_ERR(bo); 133 } 134 135 ggtt_offset = xe_bo_ggtt_addr(bo); 136 137 wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, 138 sizeof(struct gsc_get_compatibility_version_in)); 139 wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); 140 141 err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, 142 ggtt_offset + GSC_VER_PKT_SZ, 143 GSC_VER_PKT_SZ); 144 if (err) { 145 xe_gt_err(gt, 146 "failed to submit GSC request for compatibility version: %d\n", 147 err); 148 goto out_bo; 149 } 150 151 err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, 152 sizeof(struct gsc_get_compatibility_version_out), 153 &rd_offset); 154 if (err) { 155 xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); 156 return err; 157 } 158 159 compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); 160 compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); 161 162 xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); 163 164 out_bo: 165 xe_bo_unpin_map_no_vm(bo); 166 return err; 167 } 168 169 static int gsc_fw_is_loaded(struct xe_gt *gt) 170 { 171 return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & 172 HECI1_FWSTS1_INIT_COMPLETE; 173 } 174 175 static int gsc_fw_wait(struct xe_gt *gt) 176 { 177 /* 178 * GSC load can take up to 250ms from the moment the instruction is 179 * executed by the GSCCS. To account for possible submission delays or 180 * other issues, we use a 500ms timeout in the wait here. 181 */ 182 return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), 183 HECI1_FWSTS1_INIT_COMPLETE, 184 HECI1_FWSTS1_INIT_COMPLETE, 185 500 * USEC_PER_MSEC, NULL, false); 186 } 187 188 static int gsc_upload(struct xe_gsc *gsc) 189 { 190 struct xe_gt *gt = gsc_to_gt(gsc); 191 struct xe_device *xe = gt_to_xe(gt); 192 int err; 193 194 /* we should only be here if the init step were successful */ 195 xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); 196 197 if (gsc_fw_is_loaded(gt)) { 198 xe_gt_err(gt, "GSC already loaded at upload time\n"); 199 return -EEXIST; 200 } 201 202 err = memcpy_fw(gsc); 203 if (err) { 204 xe_gt_err(gt, "Failed to memcpy GSC FW\n"); 205 return err; 206 } 207 208 /* 209 * GSC is only killed by an FLR, so we need to trigger one on unload to 210 * make sure we stop it. This is because we assign a chunk of memory to 211 * the GSC as part of the FW load, so we need to make sure it stops 212 * using it when we release it to the system on driver unload. Note that 213 * this is not a problem of the unload per-se, because the GSC will not 214 * touch that memory unless there are requests for it coming from the 215 * driver; therefore, no accesses will happen while Xe is not loaded, 216 * but if we re-load the driver then the GSC might wake up and try to 217 * access that old memory location again. 218 * Given that an FLR is a very disruptive action (see the FLR function 219 * for details), we want to do it as the last action before releasing 220 * the access to the MMIO bar, which means we need to do it as part of 221 * mmio cleanup. 222 */ 223 xe->needs_flr_on_fini = true; 224 225 err = emit_gsc_upload(gsc); 226 if (err) { 227 xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); 228 return err; 229 } 230 231 err = gsc_fw_wait(gt); 232 if (err) { 233 xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); 234 return err; 235 } 236 237 err = query_compatibility_version(gsc); 238 if (err) 239 return err; 240 241 err = xe_uc_fw_check_version_requirements(&gsc->fw); 242 if (err) 243 return err; 244 245 xe_gt_dbg(gt, "GSC FW async load completed\n"); 246 247 return 0; 248 } 249 250 static void gsc_work(struct work_struct *work) 251 { 252 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); 253 struct xe_gt *gt = gsc_to_gt(gsc); 254 struct xe_device *xe = gt_to_xe(gt); 255 int ret; 256 257 xe_device_mem_access_get(xe); 258 xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 259 260 ret = gsc_upload(gsc); 261 if (ret && ret != -EEXIST) { 262 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 263 goto out; 264 } 265 266 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 267 268 /* HuC auth failure is not fatal */ 269 if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) 270 xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); 271 272 out: 273 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 274 xe_device_mem_access_put(xe); 275 } 276 277 int xe_gsc_init(struct xe_gsc *gsc) 278 { 279 struct xe_gt *gt = gsc_to_gt(gsc); 280 struct xe_tile *tile = gt_to_tile(gt); 281 int ret; 282 283 gsc->fw.type = XE_UC_FW_TYPE_GSC; 284 INIT_WORK(&gsc->work, gsc_work); 285 286 /* The GSC uC is only available on the media GT */ 287 if (tile->media_gt && (gt != tile->media_gt)) { 288 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); 289 return 0; 290 } 291 292 /* 293 * Some platforms can have GuC but not GSC. That would cause 294 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort 295 * all firmware loading. So check for GSC being enabled before 296 * propagating the failure back up. That way the higher level will keep 297 * going and load GuC as appropriate. 298 */ 299 ret = xe_uc_fw_init(&gsc->fw); 300 if (!xe_uc_fw_is_enabled(&gsc->fw)) 301 return 0; 302 else if (ret) 303 goto out; 304 305 return 0; 306 307 out: 308 xe_gt_err(gt, "GSC init failed with %d", ret); 309 return ret; 310 } 311 312 static void free_resources(struct drm_device *drm, void *arg) 313 { 314 struct xe_gsc *gsc = arg; 315 316 if (gsc->wq) { 317 destroy_workqueue(gsc->wq); 318 gsc->wq = NULL; 319 } 320 321 if (gsc->q) { 322 xe_exec_queue_put(gsc->q); 323 gsc->q = NULL; 324 } 325 326 if (gsc->private) { 327 xe_bo_unpin_map_no_vm(gsc->private); 328 gsc->private = NULL; 329 } 330 } 331 332 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) 333 { 334 struct xe_gt *gt = gsc_to_gt(gsc); 335 struct xe_tile *tile = gt_to_tile(gt); 336 struct xe_device *xe = gt_to_xe(gt); 337 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); 338 struct xe_exec_queue *q; 339 struct workqueue_struct *wq; 340 struct xe_bo *bo; 341 int err; 342 343 if (!xe_uc_fw_is_available(&gsc->fw)) 344 return 0; 345 346 if (!hwe) 347 return -ENODEV; 348 349 bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, 350 ttm_bo_type_kernel, 351 XE_BO_CREATE_STOLEN_BIT | 352 XE_BO_CREATE_GGTT_BIT); 353 if (IS_ERR(bo)) 354 return PTR_ERR(bo); 355 356 q = xe_exec_queue_create(xe, NULL, 357 BIT(hwe->logical_instance), 1, hwe, 358 EXEC_QUEUE_FLAG_KERNEL | 359 EXEC_QUEUE_FLAG_PERMANENT); 360 if (IS_ERR(q)) { 361 xe_gt_err(gt, "Failed to create queue for GSC submission\n"); 362 err = PTR_ERR(q); 363 goto out_bo; 364 } 365 366 wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); 367 if (!wq) { 368 err = -ENOMEM; 369 goto out_q; 370 } 371 372 gsc->private = bo; 373 gsc->q = q; 374 gsc->wq = wq; 375 376 err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); 377 if (err) 378 return err; 379 380 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); 381 382 return 0; 383 384 out_q: 385 xe_exec_queue_put(q); 386 out_bo: 387 xe_bo_unpin_map_no_vm(bo); 388 return err; 389 } 390 391 void xe_gsc_load_start(struct xe_gsc *gsc) 392 { 393 struct xe_gt *gt = gsc_to_gt(gsc); 394 395 if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) 396 return; 397 398 /* GSC FW survives GT reset and D3Hot */ 399 if (gsc_fw_is_loaded(gt)) { 400 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 401 return; 402 } 403 404 queue_work(gsc->wq, &gsc->work); 405 } 406 407 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) 408 { 409 if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) 410 flush_work(&gsc->work); 411 } 412 413 /* 414 * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a 415 * GSC engine reset by writing a notification bit in the GS1 register and then 416 * triggering an interrupt to GSC; from the interrupt it will take up to 200ms 417 * for the FW to get prepare for the reset, so we need to wait for that amount 418 * of time. 419 * After the reset is complete we need to then clear the GS1 register. 420 */ 421 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) 422 { 423 u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; 424 u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; 425 426 /* WA only applies if the GSC is loaded */ 427 if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) 428 return; 429 430 xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); 431 432 if (prep) { 433 /* make sure the reset bit is clear when writing the CSR reg */ 434 xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), 435 HECI_H_CSR_RST, HECI_H_CSR_IG); 436 msleep(200); 437 } 438 } 439