1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include "xe_gsc.h" 7 8 #include <drm/drm_managed.h> 9 10 #include "abi/gsc_mkhi_commands_abi.h" 11 #include "generated/xe_wa_oob.h" 12 #include "xe_bb.h" 13 #include "xe_bo.h" 14 #include "xe_device.h" 15 #include "xe_exec_queue.h" 16 #include "xe_gsc_submit.h" 17 #include "xe_gt.h" 18 #include "xe_gt_printk.h" 19 #include "xe_map.h" 20 #include "xe_mmio.h" 21 #include "xe_sched_job.h" 22 #include "xe_uc_fw.h" 23 #include "xe_wa.h" 24 #include "instructions/xe_gsc_commands.h" 25 #include "regs/xe_gsc_regs.h" 26 27 static struct xe_gt * 28 gsc_to_gt(struct xe_gsc *gsc) 29 { 30 return container_of(gsc, struct xe_gt, uc.gsc); 31 } 32 33 static int memcpy_fw(struct xe_gsc *gsc) 34 { 35 struct xe_gt *gt = gsc_to_gt(gsc); 36 struct xe_device *xe = gt_to_xe(gt); 37 u32 fw_size = gsc->fw.size; 38 void *storage; 39 40 /* 41 * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use 42 * a memcpy for now. 43 */ 44 storage = kmalloc(fw_size, GFP_KERNEL); 45 if (!storage) 46 return -ENOMEM; 47 48 xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); 49 xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); 50 xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); 51 52 kfree(storage); 53 54 return 0; 55 } 56 57 static int emit_gsc_upload(struct xe_gsc *gsc) 58 { 59 struct xe_gt *gt = gsc_to_gt(gsc); 60 u64 offset = xe_bo_ggtt_addr(gsc->private); 61 struct xe_bb *bb; 62 struct xe_sched_job *job; 63 struct dma_fence *fence; 64 long timeout; 65 66 bb = xe_bb_new(gt, 4, false); 67 if (IS_ERR(bb)) 68 return PTR_ERR(bb); 69 70 bb->cs[bb->len++] = GSC_FW_LOAD; 71 bb->cs[bb->len++] = lower_32_bits(offset); 72 bb->cs[bb->len++] = upper_32_bits(offset); 73 bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; 74 75 job = xe_bb_create_job(gsc->q, bb); 76 if (IS_ERR(job)) { 77 xe_bb_free(bb, NULL); 78 return PTR_ERR(job); 79 } 80 81 xe_sched_job_arm(job); 82 fence = dma_fence_get(&job->drm.s_fence->finished); 83 xe_sched_job_push(job); 84 85 timeout = dma_fence_wait_timeout(fence, false, HZ); 86 dma_fence_put(fence); 87 xe_bb_free(bb, NULL); 88 if (timeout < 0) 89 return timeout; 90 else if (!timeout) 91 return -ETIME; 92 93 return 0; 94 } 95 96 #define version_query_wr(xe_, map_, offset_, field_, val_) \ 97 xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) 98 #define version_query_rd(xe_, map_, offset_, field_) \ 99 xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) 100 101 static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) 102 { 103 xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); 104 105 version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); 106 version_query_wr(xe, map, wr_offset, header.command, 107 MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); 108 109 return wr_offset + sizeof(struct gsc_get_compatibility_version_in); 110 } 111 112 #define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ 113 static int query_compatibility_version(struct xe_gsc *gsc) 114 { 115 struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; 116 struct xe_gt *gt = gsc_to_gt(gsc); 117 struct xe_tile *tile = gt_to_tile(gt); 118 struct xe_device *xe = gt_to_xe(gt); 119 struct xe_bo *bo; 120 u32 wr_offset; 121 u32 rd_offset; 122 u64 ggtt_offset; 123 int err; 124 125 bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, 126 ttm_bo_type_kernel, 127 XE_BO_CREATE_SYSTEM_BIT | 128 XE_BO_CREATE_GGTT_BIT); 129 if (IS_ERR(bo)) { 130 xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); 131 return PTR_ERR(bo); 132 } 133 134 ggtt_offset = xe_bo_ggtt_addr(bo); 135 136 wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, 137 sizeof(struct gsc_get_compatibility_version_in)); 138 wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); 139 140 err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, 141 ggtt_offset + GSC_VER_PKT_SZ, 142 GSC_VER_PKT_SZ); 143 if (err) { 144 xe_gt_err(gt, 145 "failed to submit GSC request for compatibility version: %d\n", 146 err); 147 goto out_bo; 148 } 149 150 err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, 151 sizeof(struct gsc_get_compatibility_version_out), 152 &rd_offset); 153 if (err) { 154 xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); 155 return err; 156 } 157 158 compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); 159 compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); 160 161 xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); 162 163 out_bo: 164 xe_bo_unpin_map_no_vm(bo); 165 return err; 166 } 167 168 static int gsc_fw_is_loaded(struct xe_gt *gt) 169 { 170 return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & 171 HECI1_FWSTS1_INIT_COMPLETE; 172 } 173 174 static int gsc_fw_wait(struct xe_gt *gt) 175 { 176 /* 177 * GSC load can take up to 250ms from the moment the instruction is 178 * executed by the GSCCS. To account for possible submission delays or 179 * other issues, we use a 500ms timeout in the wait here. 180 */ 181 return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), 182 HECI1_FWSTS1_INIT_COMPLETE, 183 HECI1_FWSTS1_INIT_COMPLETE, 184 500 * USEC_PER_MSEC, NULL, false); 185 } 186 187 static int gsc_upload(struct xe_gsc *gsc) 188 { 189 struct xe_gt *gt = gsc_to_gt(gsc); 190 struct xe_device *xe = gt_to_xe(gt); 191 int err; 192 193 /* we should only be here if the init step were successful */ 194 xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); 195 196 if (gsc_fw_is_loaded(gt)) { 197 xe_gt_err(gt, "GSC already loaded at upload time\n"); 198 return -EEXIST; 199 } 200 201 err = memcpy_fw(gsc); 202 if (err) { 203 xe_gt_err(gt, "Failed to memcpy GSC FW\n"); 204 return err; 205 } 206 207 /* 208 * GSC is only killed by an FLR, so we need to trigger one on unload to 209 * make sure we stop it. This is because we assign a chunk of memory to 210 * the GSC as part of the FW load, so we need to make sure it stops 211 * using it when we release it to the system on driver unload. Note that 212 * this is not a problem of the unload per-se, because the GSC will not 213 * touch that memory unless there are requests for it coming from the 214 * driver; therefore, no accesses will happen while Xe is not loaded, 215 * but if we re-load the driver then the GSC might wake up and try to 216 * access that old memory location again. 217 * Given that an FLR is a very disruptive action (see the FLR function 218 * for details), we want to do it as the last action before releasing 219 * the access to the MMIO bar, which means we need to do it as part of 220 * mmio cleanup. 221 */ 222 xe->needs_flr_on_fini = true; 223 224 err = emit_gsc_upload(gsc); 225 if (err) { 226 xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); 227 return err; 228 } 229 230 err = gsc_fw_wait(gt); 231 if (err) { 232 xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); 233 return err; 234 } 235 236 err = query_compatibility_version(gsc); 237 if (err) 238 return err; 239 240 err = xe_uc_fw_check_version_requirements(&gsc->fw); 241 if (err) 242 return err; 243 244 xe_gt_dbg(gt, "GSC FW async load completed\n"); 245 246 return 0; 247 } 248 249 static void gsc_work(struct work_struct *work) 250 { 251 struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); 252 struct xe_gt *gt = gsc_to_gt(gsc); 253 struct xe_device *xe = gt_to_xe(gt); 254 int ret; 255 256 xe_device_mem_access_get(xe); 257 xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); 258 259 ret = gsc_upload(gsc); 260 if (ret && ret != -EEXIST) 261 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 262 else 263 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 264 265 xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); 266 xe_device_mem_access_put(xe); 267 } 268 269 int xe_gsc_init(struct xe_gsc *gsc) 270 { 271 struct xe_gt *gt = gsc_to_gt(gsc); 272 struct xe_tile *tile = gt_to_tile(gt); 273 int ret; 274 275 gsc->fw.type = XE_UC_FW_TYPE_GSC; 276 INIT_WORK(&gsc->work, gsc_work); 277 278 /* The GSC uC is only available on the media GT */ 279 if (tile->media_gt && (gt != tile->media_gt)) { 280 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); 281 return 0; 282 } 283 284 /* 285 * Some platforms can have GuC but not GSC. That would cause 286 * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort 287 * all firmware loading. So check for GSC being enabled before 288 * propagating the failure back up. That way the higher level will keep 289 * going and load GuC as appropriate. 290 */ 291 ret = xe_uc_fw_init(&gsc->fw); 292 if (!xe_uc_fw_is_enabled(&gsc->fw)) 293 return 0; 294 else if (ret) 295 goto out; 296 297 return 0; 298 299 out: 300 xe_gt_err(gt, "GSC init failed with %d", ret); 301 return ret; 302 } 303 304 static void free_resources(struct drm_device *drm, void *arg) 305 { 306 struct xe_gsc *gsc = arg; 307 308 if (gsc->wq) { 309 destroy_workqueue(gsc->wq); 310 gsc->wq = NULL; 311 } 312 313 if (gsc->q) { 314 xe_exec_queue_put(gsc->q); 315 gsc->q = NULL; 316 } 317 318 if (gsc->private) { 319 xe_bo_unpin_map_no_vm(gsc->private); 320 gsc->private = NULL; 321 } 322 } 323 324 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) 325 { 326 struct xe_gt *gt = gsc_to_gt(gsc); 327 struct xe_tile *tile = gt_to_tile(gt); 328 struct xe_device *xe = gt_to_xe(gt); 329 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); 330 struct xe_exec_queue *q; 331 struct workqueue_struct *wq; 332 struct xe_bo *bo; 333 int err; 334 335 if (!xe_uc_fw_is_available(&gsc->fw)) 336 return 0; 337 338 if (!hwe) 339 return -ENODEV; 340 341 bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, 342 ttm_bo_type_kernel, 343 XE_BO_CREATE_STOLEN_BIT | 344 XE_BO_CREATE_GGTT_BIT); 345 if (IS_ERR(bo)) 346 return PTR_ERR(bo); 347 348 q = xe_exec_queue_create(xe, NULL, 349 BIT(hwe->logical_instance), 1, hwe, 350 EXEC_QUEUE_FLAG_KERNEL | 351 EXEC_QUEUE_FLAG_PERMANENT); 352 if (IS_ERR(q)) { 353 xe_gt_err(gt, "Failed to create queue for GSC submission\n"); 354 err = PTR_ERR(q); 355 goto out_bo; 356 } 357 358 wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); 359 if (!wq) { 360 err = -ENOMEM; 361 goto out_q; 362 } 363 364 gsc->private = bo; 365 gsc->q = q; 366 gsc->wq = wq; 367 368 err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); 369 if (err) 370 return err; 371 372 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); 373 374 return 0; 375 376 out_q: 377 xe_exec_queue_put(q); 378 out_bo: 379 xe_bo_unpin_map_no_vm(bo); 380 return err; 381 } 382 383 void xe_gsc_load_start(struct xe_gsc *gsc) 384 { 385 struct xe_gt *gt = gsc_to_gt(gsc); 386 387 if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) 388 return; 389 390 /* GSC FW survives GT reset and D3Hot */ 391 if (gsc_fw_is_loaded(gt)) { 392 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 393 return; 394 } 395 396 queue_work(gsc->wq, &gsc->work); 397 } 398 399 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) 400 { 401 if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) 402 flush_work(&gsc->work); 403 } 404 405 /* 406 * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a 407 * GSC engine reset by writing a notification bit in the GS1 register and then 408 * triggering an interrupt to GSC; from the interrupt it will take up to 200ms 409 * for the FW to get prepare for the reset, so we need to wait for that amount 410 * of time. 411 * After the reset is complete we need to then clear the GS1 register. 412 */ 413 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) 414 { 415 u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; 416 u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; 417 418 /* WA only applies if the GSC is loaded */ 419 if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) 420 return; 421 422 xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); 423 424 if (prep) { 425 /* make sure the reset bit is clear when writing the CSR reg */ 426 xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), 427 HECI_H_CSR_RST, HECI_H_CSR_IG); 428 msleep(200); 429 } 430 } 431