1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023-2024 Intel Corporation 4 */ 5 6 #include <linux/bitfield.h> 7 #include <linux/bsearch.h> 8 9 #include <drm/drm_managed.h> 10 #include <drm/drm_print.h> 11 12 #include "abi/guc_actions_sriov_abi.h" 13 #include "abi/guc_communication_mmio_abi.h" 14 #include "abi/guc_klvs_abi.h" 15 #include "abi/guc_relay_actions_abi.h" 16 #include "regs/xe_gt_regs.h" 17 #include "regs/xe_gtt_defs.h" 18 19 #include "xe_assert.h" 20 #include "xe_device.h" 21 #include "xe_ggtt.h" 22 #include "xe_gt_sriov_printk.h" 23 #include "xe_gt_sriov_vf.h" 24 #include "xe_gt_sriov_vf_types.h" 25 #include "xe_guc.h" 26 #include "xe_guc_ct.h" 27 #include "xe_guc_hxg_helpers.h" 28 #include "xe_guc_relay.h" 29 #include "xe_guc_submit.h" 30 #include "xe_irq.h" 31 #include "xe_lrc.h" 32 #include "xe_memirq.h" 33 #include "xe_mmio.h" 34 #include "xe_pm.h" 35 #include "xe_sriov.h" 36 #include "xe_sriov_vf.h" 37 #include "xe_sriov_vf_ccs.h" 38 #include "xe_tile_sriov_vf.h" 39 #include "xe_tlb_inval.h" 40 #include "xe_uc_fw.h" 41 #include "xe_wopcm.h" 42 43 #define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) 44 45 static int guc_action_vf_reset(struct xe_guc *guc) 46 { 47 u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { 48 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 49 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 50 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_VF_RESET), 51 }; 52 int ret; 53 54 ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); 55 56 return ret > 0 ? -EPROTO : ret; 57 } 58 59 #define GUC_RESET_VF_STATE_RETRY_MAX 10 60 static int vf_reset_guc_state(struct xe_gt *gt) 61 { 62 unsigned int retry = GUC_RESET_VF_STATE_RETRY_MAX; 63 struct xe_guc *guc = >->uc.guc; 64 int err; 65 66 do { 67 err = guc_action_vf_reset(guc); 68 if (!err || err != -ETIMEDOUT) 69 break; 70 } while (--retry); 71 72 if (unlikely(err)) 73 xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err)); 74 return err; 75 } 76 77 /** 78 * xe_gt_sriov_vf_reset - Reset GuC VF internal state. 79 * @gt: the &xe_gt 80 * 81 * It requires functional `GuC MMIO based communication`_. 82 * 83 * Return: 0 on success or a negative error code on failure. 84 */ 85 int xe_gt_sriov_vf_reset(struct xe_gt *gt) 86 { 87 if (!xe_device_uc_enabled(gt_to_xe(gt))) 88 return -ENODEV; 89 90 return vf_reset_guc_state(gt); 91 } 92 93 static int guc_action_match_version(struct xe_guc *guc, 94 struct xe_uc_fw_version *wanted, 95 struct xe_uc_fw_version *found) 96 { 97 u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = { 98 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 99 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 100 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, 101 GUC_ACTION_VF2GUC_MATCH_VERSION), 102 FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted->branch) | 103 FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted->major) | 104 FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted->minor), 105 }; 106 u32 response[GUC_MAX_MMIO_MSG_LEN]; 107 int ret; 108 109 BUILD_BUG_ON(VF2GUC_MATCH_VERSION_RESPONSE_MSG_LEN > GUC_MAX_MMIO_MSG_LEN); 110 111 ret = xe_guc_mmio_send_recv(guc, request, ARRAY_SIZE(request), response); 112 if (unlikely(ret < 0)) 113 return ret; 114 115 if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0]))) 116 return -EPROTO; 117 118 memset(found, 0, sizeof(struct xe_uc_fw_version)); 119 found->branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); 120 found->major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); 121 found->minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); 122 found->patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); 123 124 return 0; 125 } 126 127 static int guc_action_match_version_any(struct xe_guc *guc, 128 struct xe_uc_fw_version *found) 129 { 130 struct xe_uc_fw_version wanted = { 131 .branch = GUC_VERSION_BRANCH_ANY, 132 .major = GUC_VERSION_MAJOR_ANY, 133 .minor = GUC_VERSION_MINOR_ANY, 134 .patch = 0 135 }; 136 137 return guc_action_match_version(guc, &wanted, found); 138 } 139 140 static void vf_minimum_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) 141 { 142 struct xe_device *xe = gt_to_xe(gt); 143 144 memset(ver, 0, sizeof(struct xe_uc_fw_version)); 145 146 switch (xe->info.platform) { 147 case XE_TIGERLAKE ... XE_PVC: 148 /* 1.1 this is current baseline for Xe driver */ 149 ver->branch = 0; 150 ver->major = 1; 151 ver->minor = 1; 152 break; 153 default: 154 /* 1.2 has support for the GMD_ID KLV */ 155 ver->branch = 0; 156 ver->major = 1; 157 ver->minor = 2; 158 break; 159 } 160 } 161 162 static void vf_wanted_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver) 163 { 164 /* for now it's the same as minimum */ 165 return vf_minimum_guc_version(gt, ver); 166 } 167 168 static int vf_handshake_with_guc(struct xe_gt *gt) 169 { 170 struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; 171 struct xe_uc_fw_version wanted = {0}; 172 struct xe_guc *guc = >->uc.guc; 173 bool old = false; 174 int err; 175 176 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 177 178 /* select wanted version - prefer previous (if any) */ 179 if (guc_version->major || guc_version->minor) { 180 wanted = *guc_version; 181 old = true; 182 } else { 183 vf_wanted_guc_version(gt, &wanted); 184 xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); 185 186 /* First time we handshake, so record the minimum wanted */ 187 gt->sriov.vf.wanted_guc_version = wanted; 188 } 189 190 err = guc_action_match_version(guc, &wanted, guc_version); 191 if (unlikely(err)) 192 goto fail; 193 194 if (old) { 195 /* we don't support interface version change */ 196 if (MAKE_GUC_VER_STRUCT(*guc_version) != MAKE_GUC_VER_STRUCT(wanted)) { 197 xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", 198 guc_version->branch, guc_version->major, 199 guc_version->minor, guc_version->patch); 200 xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", 201 wanted.branch, wanted.major, 202 wanted.minor, wanted.patch); 203 err = -EREMCHG; 204 goto fail; 205 } else { 206 /* version is unchanged, no need to re-verify it */ 207 return 0; 208 } 209 } 210 211 /* illegal */ 212 if (guc_version->major > wanted.major) { 213 err = -EPROTO; 214 goto unsupported; 215 } 216 217 /* there's no fallback on major version. */ 218 if (guc_version->major != wanted.major) { 219 err = -ENOPKG; 220 goto unsupported; 221 } 222 223 /* check against minimum version supported by us */ 224 vf_minimum_guc_version(gt, &wanted); 225 xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY); 226 if (MAKE_GUC_VER_STRUCT(*guc_version) < MAKE_GUC_VER_STRUCT(wanted)) { 227 err = -ENOKEY; 228 goto unsupported; 229 } 230 231 xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n", 232 guc_version->branch, guc_version->major, 233 guc_version->minor, guc_version->patch); 234 235 return 0; 236 237 unsupported: 238 xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n", 239 guc_version->branch, guc_version->major, 240 guc_version->minor, guc_version->patch, 241 ERR_PTR(err)); 242 fail: 243 xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n", 244 wanted.major, wanted.minor, ERR_PTR(err)); 245 246 /* try again with *any* just to query which version is supported */ 247 if (!guc_action_match_version_any(guc, &wanted)) 248 xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n", 249 wanted.branch, wanted.major, wanted.minor, wanted.patch); 250 return err; 251 } 252 253 /** 254 * xe_gt_sriov_vf_bootstrap - Query and setup GuC ABI interface version. 255 * @gt: the &xe_gt 256 * 257 * This function is for VF use only. 258 * It requires functional `GuC MMIO based communication`_. 259 * 260 * Return: 0 on success or a negative error code on failure. 261 */ 262 int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) 263 { 264 int err; 265 266 if (!xe_device_uc_enabled(gt_to_xe(gt))) 267 return -ENODEV; 268 269 err = vf_reset_guc_state(gt); 270 if (unlikely(err)) 271 return err; 272 273 err = vf_handshake_with_guc(gt); 274 if (unlikely(err)) 275 return err; 276 277 return 0; 278 } 279 280 /** 281 * xe_gt_sriov_vf_guc_versions - Minimum required and found GuC ABI versions 282 * @gt: the &xe_gt 283 * @wanted: pointer to the xe_uc_fw_version to be filled with the wanted version 284 * @found: pointer to the xe_uc_fw_version to be filled with the found version 285 * 286 * This function is for VF use only and it can only be used after successful 287 * version handshake with the GuC. 288 */ 289 void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, 290 struct xe_uc_fw_version *wanted, 291 struct xe_uc_fw_version *found) 292 { 293 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 294 xe_gt_assert(gt, gt->sriov.vf.guc_version.major); 295 296 if (wanted) 297 *wanted = gt->sriov.vf.wanted_guc_version; 298 299 if (found) 300 *found = gt->sriov.vf.guc_version; 301 } 302 303 static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) 304 { 305 u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { 306 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 307 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 308 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE), 309 }; 310 int ret; 311 312 ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); 313 314 return ret > 0 ? -EPROTO : ret; 315 } 316 317 /** 318 * vf_notify_resfix_done - Notify GuC about resource fixups apply completed. 319 * @gt: the &xe_gt struct instance linked to target GuC 320 * 321 * Returns: 0 if the operation completed successfully, or a negative error 322 * code otherwise. 323 */ 324 static int vf_notify_resfix_done(struct xe_gt *gt) 325 { 326 struct xe_guc *guc = >->uc.guc; 327 int err; 328 329 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 330 331 err = guc_action_vf_notify_resfix_done(guc); 332 if (unlikely(err)) 333 xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n", 334 ERR_PTR(err)); 335 else 336 xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n"); 337 338 return err; 339 } 340 341 static int guc_action_query_single_klv(struct xe_guc *guc, u32 key, 342 u32 *value, u32 value_len) 343 { 344 u32 request[VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_LEN] = { 345 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 346 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 347 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, 348 GUC_ACTION_VF2GUC_QUERY_SINGLE_KLV), 349 FIELD_PREP(VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_1_KEY, key), 350 }; 351 u32 response[GUC_MAX_MMIO_MSG_LEN]; 352 u32 length; 353 int ret; 354 355 BUILD_BUG_ON(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_MAX_LEN > GUC_MAX_MMIO_MSG_LEN); 356 ret = xe_guc_mmio_send_recv(guc, request, ARRAY_SIZE(request), response); 357 if (unlikely(ret < 0)) 358 return ret; 359 360 if (unlikely(FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_MBZ, response[0]))) 361 return -EPROTO; 362 363 length = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_LENGTH, response[0]); 364 if (unlikely(length > value_len)) 365 return -EOVERFLOW; 366 if (unlikely(length < value_len)) 367 return -ENODATA; 368 369 switch (value_len) { 370 default: 371 xe_gt_WARN_ON(guc_to_gt(guc), value_len > 3); 372 fallthrough; 373 case 3: 374 value[2] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96, response[3]); 375 fallthrough; 376 case 2: 377 value[1] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64, response[2]); 378 fallthrough; 379 case 1: 380 value[0] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_1_VALUE32, response[1]); 381 fallthrough; 382 case 0: 383 break; 384 } 385 386 return 0; 387 } 388 389 static int guc_action_query_single_klv32(struct xe_guc *guc, u32 key, u32 *value32) 390 { 391 return guc_action_query_single_klv(guc, key, value32, hxg_sizeof(u32)); 392 } 393 394 static int guc_action_query_single_klv64(struct xe_guc *guc, u32 key, u64 *value64) 395 { 396 u32 value[2]; 397 int err; 398 399 err = guc_action_query_single_klv(guc, key, value, hxg_sizeof(value)); 400 if (unlikely(err)) 401 return err; 402 403 *value64 = make_u64_from_u32(value[1], value[0]); 404 return 0; 405 } 406 407 static bool has_gmdid(struct xe_device *xe) 408 { 409 return GRAPHICS_VERx100(xe) >= 1270; 410 } 411 412 /** 413 * xe_gt_sriov_vf_gmdid - Query GMDID over MMIO. 414 * @gt: the &xe_gt 415 * 416 * This function is for VF use only. 417 * 418 * Return: value of GMDID KLV on success or 0 on failure. 419 */ 420 u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt) 421 { 422 const char *type = xe_gt_is_media_type(gt) ? "media" : "graphics"; 423 struct xe_guc *guc = >->uc.guc; 424 u32 value; 425 int err; 426 427 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 428 xe_gt_assert(gt, !GRAPHICS_VERx100(gt_to_xe(gt)) || has_gmdid(gt_to_xe(gt))); 429 xe_gt_assert(gt, gt->sriov.vf.guc_version.major > 1 || gt->sriov.vf.guc_version.minor >= 2); 430 431 err = guc_action_query_single_klv32(guc, GUC_KLV_GLOBAL_CFG_GMD_ID_KEY, &value); 432 if (unlikely(err)) { 433 xe_gt_sriov_err(gt, "Failed to obtain %s GMDID (%pe)\n", 434 type, ERR_PTR(err)); 435 return 0; 436 } 437 438 xe_gt_sriov_dbg(gt, "%s GMDID = %#x\n", type, value); 439 return value; 440 } 441 442 static int vf_get_ggtt_info(struct xe_gt *gt) 443 { 444 struct xe_tile *tile = gt_to_tile(gt); 445 struct xe_ggtt *ggtt = tile->mem.ggtt; 446 struct xe_guc *guc = >->uc.guc; 447 u64 start, size, ggtt_size; 448 s64 shift; 449 int err; 450 451 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 452 453 guard(mutex)(&ggtt->lock); 454 455 err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start); 456 if (unlikely(err)) 457 return err; 458 459 err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_SIZE_KEY, &size); 460 if (unlikely(err)) 461 return err; 462 463 if (!size) 464 return -ENODATA; 465 466 ggtt_size = xe_tile_sriov_vf_ggtt(tile); 467 if (ggtt_size && ggtt_size != size) { 468 xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n", 469 size / SZ_1K, ggtt_size / SZ_1K); 470 return -EREMCHG; 471 } 472 473 xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n", 474 start, start + size - 1, size / SZ_1K); 475 476 shift = start - (s64)xe_tile_sriov_vf_ggtt_base(tile); 477 xe_tile_sriov_vf_ggtt_base_store(tile, start); 478 xe_tile_sriov_vf_ggtt_store(tile, size); 479 480 if (shift && shift != start) { 481 xe_gt_sriov_info(gt, "Shifting GGTT base by %lld to 0x%016llx\n", 482 shift, start); 483 xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift); 484 } 485 486 if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) { 487 WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); 488 smp_wmb(); /* Ensure above write visible before wake */ 489 wake_up_all(>->sriov.vf.migration.wq); 490 } 491 492 return 0; 493 } 494 495 static int vf_get_lmem_info(struct xe_gt *gt) 496 { 497 struct xe_tile *tile = gt_to_tile(gt); 498 struct xe_guc *guc = >->uc.guc; 499 char size_str[10]; 500 u64 size, lmem_size; 501 int err; 502 503 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 504 505 err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, &size); 506 if (unlikely(err)) 507 return err; 508 509 lmem_size = xe_tile_sriov_vf_lmem(tile); 510 if (lmem_size && lmem_size != size) { 511 xe_gt_sriov_err(gt, "Unexpected LMEM reassignment: %lluM != %lluM\n", 512 size / SZ_1M, lmem_size / SZ_1M); 513 return -EREMCHG; 514 } 515 516 string_get_size(size, 1, STRING_UNITS_2, size_str, sizeof(size_str)); 517 xe_gt_sriov_dbg_verbose(gt, "LMEM %lluM %s\n", size / SZ_1M, size_str); 518 519 xe_tile_sriov_vf_lmem_store(tile, size); 520 521 return size ? 0 : -ENODATA; 522 } 523 524 static int vf_get_submission_cfg(struct xe_gt *gt) 525 { 526 struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; 527 struct xe_guc *guc = >->uc.guc; 528 u32 num_ctxs, num_dbs; 529 int err; 530 531 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 532 533 err = guc_action_query_single_klv32(guc, GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY, &num_ctxs); 534 if (unlikely(err)) 535 return err; 536 537 err = guc_action_query_single_klv32(guc, GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY, &num_dbs); 538 if (unlikely(err)) 539 return err; 540 541 if (config->num_ctxs && config->num_ctxs != num_ctxs) { 542 xe_gt_sriov_err(gt, "Unexpected CTXs reassignment: %u != %u\n", 543 num_ctxs, config->num_ctxs); 544 return -EREMCHG; 545 } 546 if (config->num_dbs && config->num_dbs != num_dbs) { 547 xe_gt_sriov_err(gt, "Unexpected DBs reassignment: %u != %u\n", 548 num_dbs, config->num_dbs); 549 return -EREMCHG; 550 } 551 552 xe_gt_sriov_dbg_verbose(gt, "CTXs %u DBs %u\n", num_ctxs, num_dbs); 553 554 config->num_ctxs = num_ctxs; 555 config->num_dbs = num_dbs; 556 557 return config->num_ctxs ? 0 : -ENODATA; 558 } 559 560 static void vf_cache_gmdid(struct xe_gt *gt) 561 { 562 xe_gt_assert(gt, has_gmdid(gt_to_xe(gt))); 563 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 564 565 gt->sriov.vf.runtime.gmdid = xe_gt_sriov_vf_gmdid(gt); 566 } 567 568 /** 569 * xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO. 570 * @gt: the &xe_gt 571 * 572 * This function is for VF use only. This function may shift the GGTT and is 573 * performed under GGTT lock, making this step visible to all GTs that share a 574 * GGTT. 575 * 576 * Return: 0 on success or a negative error code on failure. 577 */ 578 int xe_gt_sriov_vf_query_config(struct xe_gt *gt) 579 { 580 struct xe_device *xe = gt_to_xe(gt); 581 int err; 582 583 err = vf_get_ggtt_info(gt); 584 if (unlikely(err)) 585 return err; 586 587 if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { 588 err = vf_get_lmem_info(gt); 589 if (unlikely(err)) 590 return err; 591 } 592 593 err = vf_get_submission_cfg(gt); 594 if (unlikely(err)) 595 return err; 596 597 if (has_gmdid(xe)) 598 vf_cache_gmdid(gt); 599 600 return 0; 601 } 602 603 /** 604 * xe_gt_sriov_vf_guc_ids - VF GuC context IDs configuration. 605 * @gt: the &xe_gt 606 * 607 * This function is for VF use only. 608 * 609 * Return: number of GuC context IDs assigned to VF. 610 */ 611 u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt) 612 { 613 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 614 xe_gt_assert(gt, gt->sriov.vf.guc_version.major); 615 xe_gt_assert(gt, gt->sriov.vf.self_config.num_ctxs); 616 617 return gt->sriov.vf.self_config.num_ctxs; 618 } 619 620 static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) 621 { 622 u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = { 623 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 624 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 625 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VF2PF_HANDSHAKE), 626 FIELD_PREP(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, *major) | 627 FIELD_PREP(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, *minor), 628 }; 629 u32 response[VF2PF_HANDSHAKE_RESPONSE_MSG_LEN]; 630 int ret; 631 632 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 633 634 ret = xe_guc_relay_send_to_pf(>->uc.guc.relay, 635 request, ARRAY_SIZE(request), 636 response, ARRAY_SIZE(response)); 637 if (unlikely(ret < 0)) 638 return ret; 639 640 if (unlikely(ret != VF2PF_HANDSHAKE_RESPONSE_MSG_LEN)) 641 return -EPROTO; 642 643 if (unlikely(FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_0_MBZ, response[0]))) 644 return -EPROTO; 645 646 *major = FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR, response[1]); 647 *minor = FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR, response[1]); 648 649 return 0; 650 } 651 652 static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor) 653 { 654 xe_assert(xe, IS_SRIOV_VF(xe)); 655 656 xe->sriov.vf.pf_version.major = major; 657 xe->sriov.vf.pf_version.minor = minor; 658 } 659 660 static void vf_disconnect_pf(struct xe_device *xe) 661 { 662 vf_connect_pf(xe, 0, 0); 663 } 664 665 static int vf_handshake_with_pf(struct xe_gt *gt) 666 { 667 struct xe_device *xe = gt_to_xe(gt); 668 u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR; 669 u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR; 670 u32 major = major_wanted, minor = minor_wanted; 671 int err; 672 673 err = relay_action_handshake(gt, &major, &minor); 674 if (unlikely(err)) 675 goto failed; 676 677 if (!major && !minor) { 678 err = -ENODATA; 679 goto failed; 680 } 681 682 xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor); 683 vf_connect_pf(xe, major, minor); 684 return 0; 685 686 failed: 687 xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n", 688 major, minor, ERR_PTR(err)); 689 vf_disconnect_pf(xe); 690 return err; 691 } 692 693 /** 694 * xe_gt_sriov_vf_connect - Establish connection with the PF driver. 695 * @gt: the &xe_gt 696 * 697 * This function is for VF use only. 698 * 699 * Return: 0 on success or a negative error code on failure. 700 */ 701 int xe_gt_sriov_vf_connect(struct xe_gt *gt) 702 { 703 int err; 704 705 err = vf_handshake_with_pf(gt); 706 if (unlikely(err)) 707 goto failed; 708 709 return 0; 710 711 failed: 712 xe_gt_sriov_err(gt, "Failed to get version info (%pe)\n", ERR_PTR(err)); 713 return err; 714 } 715 716 /** 717 * xe_gt_sriov_vf_default_lrcs_hwsp_rebase - Update GGTT references in HWSP of default LRCs. 718 * @gt: the &xe_gt struct instance 719 */ 720 static void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt) 721 { 722 struct xe_hw_engine *hwe; 723 enum xe_hw_engine_id id; 724 725 for_each_hw_engine(hwe, gt, id) 726 xe_default_lrc_update_memirq_regs_with_address(hwe); 727 } 728 729 static void vf_start_migration_recovery(struct xe_gt *gt) 730 { 731 bool started; 732 733 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 734 735 spin_lock(>->sriov.vf.migration.lock); 736 737 if (!gt->sriov.vf.migration.recovery_queued || 738 !gt->sriov.vf.migration.recovery_teardown) { 739 gt->sriov.vf.migration.recovery_queued = true; 740 WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true); 741 WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true); 742 smp_wmb(); /* Ensure above writes visable before wake */ 743 744 xe_guc_ct_wake_waiters(>->uc.guc.ct); 745 746 started = queue_work(gt->ordered_wq, >->sriov.vf.migration.worker); 747 xe_gt_sriov_info(gt, "VF migration recovery %s\n", started ? 748 "scheduled" : "already in progress"); 749 } 750 751 spin_unlock(>->sriov.vf.migration.lock); 752 } 753 754 /** 755 * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery, 756 * or just mark that a GuC is ready for it. 757 * @gt: the &xe_gt struct instance linked to target GuC 758 * 759 * This function shall be called only by VF. 760 */ 761 void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) 762 { 763 struct xe_device *xe = gt_to_xe(gt); 764 765 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 766 xe_gt_assert(gt, xe_gt_sriov_vf_recovery_pending(gt)); 767 768 if (!xe_sriov_vf_migration_supported(xe)) { 769 xe_gt_sriov_err(gt, "migration not supported\n"); 770 return; 771 } 772 773 xe_gt_sriov_info(gt, "ready for recovery after migration\n"); 774 vf_start_migration_recovery(gt); 775 } 776 777 static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) 778 { 779 struct xe_device *xe = gt_to_xe(gt); 780 781 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 782 783 return major == xe->sriov.vf.pf_version.major && 784 minor <= xe->sriov.vf.pf_version.minor; 785 } 786 787 static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs) 788 { 789 struct vf_runtime_reg *regs = gt->sriov.vf.runtime.regs; 790 unsigned int regs_size = round_up(num_regs, 4); 791 struct xe_device *xe = gt_to_xe(gt); 792 793 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 794 795 if (regs) { 796 if (num_regs <= gt->sriov.vf.runtime.regs_size) { 797 memset(regs, 0, num_regs * sizeof(*regs)); 798 gt->sriov.vf.runtime.num_regs = num_regs; 799 return 0; 800 } 801 802 drmm_kfree(&xe->drm, regs); 803 gt->sriov.vf.runtime.regs = NULL; 804 gt->sriov.vf.runtime.num_regs = 0; 805 gt->sriov.vf.runtime.regs_size = 0; 806 } 807 808 regs = drmm_kcalloc(&xe->drm, regs_size, sizeof(*regs), GFP_KERNEL); 809 if (unlikely(!regs)) 810 return -ENOMEM; 811 812 gt->sriov.vf.runtime.regs = regs; 813 gt->sriov.vf.runtime.num_regs = num_regs; 814 gt->sriov.vf.runtime.regs_size = regs_size; 815 return 0; 816 } 817 818 static int vf_query_runtime_info(struct xe_gt *gt) 819 { 820 u32 request[VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN]; 821 u32 response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 32]; /* up to 16 regs */ 822 u32 limit = (ARRAY_SIZE(response) - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2; 823 u32 count, remaining, num, i; 824 u32 start = 0; 825 int ret; 826 827 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 828 xe_gt_assert(gt, limit); 829 830 /* this is part of the 1.0 PF/VF ABI */ 831 if (!vf_is_negotiated(gt, 1, 0)) 832 return -ENOPKG; 833 834 request[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 835 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 836 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, 837 GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME) | 838 FIELD_PREP(VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT, limit); 839 840 repeat: 841 request[1] = FIELD_PREP(VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START, start); 842 ret = xe_guc_relay_send_to_pf(>->uc.guc.relay, 843 request, ARRAY_SIZE(request), 844 response, ARRAY_SIZE(response)); 845 if (unlikely(ret < 0)) 846 goto failed; 847 848 if (unlikely(ret < VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN)) { 849 ret = -EPROTO; 850 goto failed; 851 } 852 if (unlikely((ret - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) % 2)) { 853 ret = -EPROTO; 854 goto failed; 855 } 856 857 num = (ret - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2; 858 count = FIELD_GET(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT, response[0]); 859 remaining = FIELD_GET(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING, response[1]); 860 861 xe_gt_sriov_dbg_verbose(gt, "count=%u num=%u ret=%d start=%u remaining=%u\n", 862 count, num, ret, start, remaining); 863 864 if (unlikely(count != num)) { 865 ret = -EPROTO; 866 goto failed; 867 } 868 869 if (start == 0) { 870 ret = vf_prepare_runtime_info(gt, num + remaining); 871 if (unlikely(ret < 0)) 872 goto failed; 873 } else if (unlikely(start + num > gt->sriov.vf.runtime.num_regs)) { 874 ret = -EPROTO; 875 goto failed; 876 } 877 878 for (i = 0; i < num; ++i) { 879 struct vf_runtime_reg *reg = >->sriov.vf.runtime.regs[start + i]; 880 881 reg->offset = response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 2 * i]; 882 reg->value = response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 2 * i + 1]; 883 } 884 885 if (remaining) { 886 start += num; 887 goto repeat; 888 } 889 890 return 0; 891 892 failed: 893 vf_prepare_runtime_info(gt, 0); 894 return ret; 895 } 896 897 static void vf_show_runtime_info(struct xe_gt *gt) 898 { 899 struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs; 900 unsigned int size = gt->sriov.vf.runtime.num_regs; 901 902 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 903 904 for (; size--; vf_regs++) 905 xe_gt_sriov_dbg(gt, "runtime(%#x) = %#x\n", 906 vf_regs->offset, vf_regs->value); 907 } 908 909 /** 910 * xe_gt_sriov_vf_query_runtime - Query SR-IOV runtime data. 911 * @gt: the &xe_gt 912 * 913 * This function is for VF use only. 914 * 915 * Return: 0 on success or a negative error code on failure. 916 */ 917 int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt) 918 { 919 int err; 920 921 err = vf_query_runtime_info(gt); 922 if (unlikely(err)) 923 goto failed; 924 925 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) 926 vf_show_runtime_info(gt); 927 928 return 0; 929 930 failed: 931 xe_gt_sriov_err(gt, "Failed to get runtime info (%pe)\n", 932 ERR_PTR(err)); 933 return err; 934 } 935 936 static int vf_runtime_reg_cmp(const void *a, const void *b) 937 { 938 const struct vf_runtime_reg *ra = a; 939 const struct vf_runtime_reg *rb = b; 940 941 return (int)ra->offset - (int)rb->offset; 942 } 943 944 static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr) 945 { 946 struct xe_gt_sriov_vf_runtime *runtime = >->sriov.vf.runtime; 947 struct vf_runtime_reg key = { .offset = addr }; 948 949 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 950 951 return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key), 952 vf_runtime_reg_cmp); 953 } 954 955 /** 956 * xe_gt_sriov_vf_read32 - Get a register value from the runtime data. 957 * @gt: the &xe_gt 958 * @reg: the register to read 959 * 960 * This function is for VF use only. 961 * This function shall be called after VF has connected to PF. 962 * This function is dedicated for registers that VFs can't read directly. 963 * 964 * Return: register value obtained from the PF or 0 if not found. 965 */ 966 u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) 967 { 968 u32 addr = xe_mmio_adjusted_addr(>->mmio, reg.addr); 969 struct vf_runtime_reg *rr; 970 971 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 972 xe_gt_assert(gt, !reg.vf); 973 974 if (reg.addr == GMD_ID.addr) { 975 xe_gt_sriov_dbg_verbose(gt, "gmdid(%#x) = %#x\n", 976 addr, gt->sriov.vf.runtime.gmdid); 977 return gt->sriov.vf.runtime.gmdid; 978 } 979 980 rr = vf_lookup_reg(gt, addr); 981 if (!rr) { 982 xe_gt_WARN(gt, IS_ENABLED(CONFIG_DRM_XE_DEBUG), 983 "VF is trying to read an inaccessible register %#x+%#x\n", 984 reg.addr, addr - reg.addr); 985 return 0; 986 } 987 988 xe_gt_sriov_dbg_verbose(gt, "runtime[%#x] = %#x\n", addr, rr->value); 989 return rr->value; 990 } 991 992 /** 993 * xe_gt_sriov_vf_write32 - Handle a write to an inaccessible register. 994 * @gt: the &xe_gt 995 * @reg: the register to write 996 * @val: value to write 997 * 998 * This function is for VF use only. 999 * Currently it will trigger a WARN if running on debug build. 1000 */ 1001 void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) 1002 { 1003 u32 addr = xe_mmio_adjusted_addr(>->mmio, reg.addr); 1004 1005 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 1006 xe_gt_assert(gt, !reg.vf); 1007 1008 /* 1009 * In the future, we may want to handle selected writes to inaccessible 1010 * registers in some custom way, but for now let's just log a warning 1011 * about such attempt, as likely we might be doing something wrong. 1012 */ 1013 xe_gt_WARN(gt, IS_ENABLED(CONFIG_DRM_XE_DEBUG), 1014 "VF is trying to write %#x to an inaccessible register %#x+%#x\n", 1015 val, reg.addr, addr - reg.addr); 1016 } 1017 1018 /** 1019 * xe_gt_sriov_vf_print_config - Print VF self config. 1020 * @gt: the &xe_gt 1021 * @p: the &drm_printer 1022 * 1023 * This function is for VF use only. 1024 */ 1025 void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) 1026 { 1027 struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; 1028 struct xe_device *xe = gt_to_xe(gt); 1029 u64 lmem_size; 1030 char buf[10]; 1031 1032 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 1033 1034 if (xe_gt_is_main_type(gt)) { 1035 u64 ggtt_size = xe_tile_sriov_vf_ggtt(gt_to_tile(gt)); 1036 u64 ggtt_base = xe_tile_sriov_vf_ggtt_base(gt_to_tile(gt)); 1037 1038 drm_printf(p, "GGTT range:\t%#llx-%#llx\n", 1039 ggtt_base, ggtt_base + ggtt_size - 1); 1040 string_get_size(ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 1041 drm_printf(p, "GGTT size:\t%llu (%s)\n", ggtt_size, buf); 1042 1043 if (IS_DGFX(xe)) { 1044 lmem_size = xe_tile_sriov_vf_lmem(gt_to_tile(gt)); 1045 string_get_size(lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 1046 drm_printf(p, "LMEM size:\t%llu (%s)\n", lmem_size, buf); 1047 } 1048 } 1049 1050 drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs); 1051 drm_printf(p, "GuC doorbells:\t%u\n", config->num_dbs); 1052 } 1053 1054 /** 1055 * xe_gt_sriov_vf_print_runtime - Print VF's runtime regs received from PF. 1056 * @gt: the &xe_gt 1057 * @p: the &drm_printer 1058 * 1059 * This function is for VF use only. 1060 */ 1061 void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) 1062 { 1063 struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs; 1064 unsigned int size = gt->sriov.vf.runtime.num_regs; 1065 1066 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 1067 1068 for (; size--; vf_regs++) 1069 drm_printf(p, "%#x = %#x\n", vf_regs->offset, vf_regs->value); 1070 } 1071 1072 /** 1073 * xe_gt_sriov_vf_print_version - Print VF ABI versions. 1074 * @gt: the &xe_gt 1075 * @p: the &drm_printer 1076 * 1077 * This function is for VF use only. 1078 */ 1079 void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) 1080 { 1081 struct xe_device *xe = gt_to_xe(gt); 1082 struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; 1083 struct xe_uc_fw_version *wanted = >->sriov.vf.wanted_guc_version; 1084 struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version; 1085 struct xe_uc_fw_version ver; 1086 1087 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 1088 1089 drm_printf(p, "GuC ABI:\n"); 1090 1091 vf_minimum_guc_version(gt, &ver); 1092 drm_printf(p, "\tbase:\t%u.%u.%u.*\n", ver.branch, ver.major, ver.minor); 1093 1094 drm_printf(p, "\twanted:\t%u.%u.%u.*\n", 1095 wanted->branch, wanted->major, wanted->minor); 1096 1097 drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n", 1098 guc_version->branch, guc_version->major, 1099 guc_version->minor, guc_version->patch); 1100 1101 drm_printf(p, "PF ABI:\n"); 1102 1103 drm_printf(p, "\tbase:\t%u.%u\n", 1104 GUC_RELAY_VERSION_BASE_MAJOR, GUC_RELAY_VERSION_BASE_MINOR); 1105 drm_printf(p, "\twanted:\t%u.%u\n", 1106 GUC_RELAY_VERSION_LATEST_MAJOR, GUC_RELAY_VERSION_LATEST_MINOR); 1107 drm_printf(p, "\thandshake:\t%u.%u\n", 1108 pf_version->major, pf_version->minor); 1109 } 1110 1111 static bool vf_post_migration_shutdown(struct xe_gt *gt) 1112 { 1113 struct xe_device *xe = gt_to_xe(gt); 1114 1115 /* 1116 * On platforms where CCS must be restored by the primary GT, the media 1117 * GT's VF post-migration recovery must run afterward. Detect this case 1118 * and re-queue the media GT's restore work item if necessary. 1119 */ 1120 if (xe->info.needs_shared_vf_gt_wq && xe_gt_is_media_type(gt)) { 1121 struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; 1122 1123 if (xe_gt_sriov_vf_recovery_pending(primary_gt)) 1124 return true; 1125 } 1126 1127 spin_lock_irq(>->sriov.vf.migration.lock); 1128 gt->sriov.vf.migration.recovery_queued = false; 1129 spin_unlock_irq(>->sriov.vf.migration.lock); 1130 1131 xe_guc_ct_flush_and_stop(>->uc.guc.ct); 1132 xe_guc_submit_pause(>->uc.guc); 1133 xe_tlb_inval_reset(>->tlb_inval); 1134 1135 return false; 1136 } 1137 1138 static size_t post_migration_scratch_size(struct xe_device *xe) 1139 { 1140 return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE); 1141 } 1142 1143 static int vf_post_migration_fixups(struct xe_gt *gt) 1144 { 1145 void *buf = gt->sriov.vf.migration.scratch; 1146 int err; 1147 1148 /* xe_gt_sriov_vf_query_config will fixup the GGTT addresses */ 1149 err = xe_gt_sriov_vf_query_config(gt); 1150 if (err) 1151 return err; 1152 1153 if (xe_gt_is_main_type(gt)) 1154 xe_sriov_vf_ccs_rebase(gt_to_xe(gt)); 1155 1156 xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); 1157 err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); 1158 if (err) 1159 return err; 1160 1161 return 0; 1162 } 1163 1164 static void vf_post_migration_rearm(struct xe_gt *gt) 1165 { 1166 xe_guc_ct_restart(>->uc.guc.ct); 1167 xe_guc_submit_unpause_prepare(>->uc.guc); 1168 } 1169 1170 static void vf_post_migration_kickstart(struct xe_gt *gt) 1171 { 1172 xe_guc_submit_unpause(>->uc.guc); 1173 } 1174 1175 static void vf_post_migration_abort(struct xe_gt *gt) 1176 { 1177 spin_lock_irq(>->sriov.vf.migration.lock); 1178 WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false); 1179 WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); 1180 spin_unlock_irq(>->sriov.vf.migration.lock); 1181 1182 wake_up_all(>->sriov.vf.migration.wq); 1183 1184 xe_guc_submit_pause_abort(>->uc.guc); 1185 } 1186 1187 static int vf_post_migration_notify_resfix_done(struct xe_gt *gt) 1188 { 1189 bool skip_resfix = false; 1190 1191 spin_lock_irq(>->sriov.vf.migration.lock); 1192 if (gt->sriov.vf.migration.recovery_queued) { 1193 skip_resfix = true; 1194 xe_gt_sriov_dbg(gt, "another recovery imminent, resfix skipped\n"); 1195 } else { 1196 WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false); 1197 } 1198 spin_unlock_irq(>->sriov.vf.migration.lock); 1199 1200 if (skip_resfix) 1201 return -EAGAIN; 1202 1203 /* 1204 * Make sure interrupts on the new HW are properly set. The GuC IRQ 1205 * must be working at this point, since the recovery did started, 1206 * but the rest was not enabled using the procedure from spec. 1207 */ 1208 xe_irq_resume(gt_to_xe(gt)); 1209 1210 return vf_notify_resfix_done(gt); 1211 } 1212 1213 static void vf_post_migration_recovery(struct xe_gt *gt) 1214 { 1215 struct xe_device *xe = gt_to_xe(gt); 1216 int err; 1217 bool retry; 1218 1219 xe_gt_sriov_dbg(gt, "migration recovery in progress\n"); 1220 1221 xe_pm_runtime_get(xe); 1222 retry = vf_post_migration_shutdown(gt); 1223 if (retry) 1224 goto queue; 1225 1226 if (!xe_sriov_vf_migration_supported(xe)) { 1227 xe_gt_sriov_err(gt, "migration is not supported\n"); 1228 err = -ENOTRECOVERABLE; 1229 goto fail; 1230 } 1231 1232 err = vf_post_migration_fixups(gt); 1233 if (err) 1234 goto fail; 1235 1236 vf_post_migration_rearm(gt); 1237 1238 err = vf_post_migration_notify_resfix_done(gt); 1239 if (err && err != -EAGAIN) 1240 goto fail; 1241 1242 vf_post_migration_kickstart(gt); 1243 1244 xe_pm_runtime_put(xe); 1245 xe_gt_sriov_notice(gt, "migration recovery ended\n"); 1246 return; 1247 fail: 1248 vf_post_migration_abort(gt); 1249 xe_pm_runtime_put(xe); 1250 xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err)); 1251 xe_device_declare_wedged(xe); 1252 return; 1253 1254 queue: 1255 xe_gt_sriov_info(gt, "Re-queuing migration recovery\n"); 1256 queue_work(gt->ordered_wq, >->sriov.vf.migration.worker); 1257 xe_pm_runtime_put(xe); 1258 } 1259 1260 static void migration_worker_func(struct work_struct *w) 1261 { 1262 struct xe_gt *gt = container_of(w, struct xe_gt, 1263 sriov.vf.migration.worker); 1264 1265 vf_post_migration_recovery(gt); 1266 } 1267 1268 static void vf_migration_fini(void *arg) 1269 { 1270 struct xe_gt *gt = arg; 1271 1272 spin_lock_irq(>->sriov.vf.migration.lock); 1273 gt->sriov.vf.migration.recovery_teardown = true; 1274 spin_unlock_irq(>->sriov.vf.migration.lock); 1275 1276 cancel_work_sync(>->sriov.vf.migration.worker); 1277 } 1278 1279 /** 1280 * xe_gt_sriov_vf_init_early() - GT VF init early 1281 * @gt: the &xe_gt 1282 * 1283 * Return 0 on success, errno on failure 1284 */ 1285 int xe_gt_sriov_vf_init_early(struct xe_gt *gt) 1286 { 1287 void *buf; 1288 1289 if (!xe_sriov_vf_migration_supported(gt_to_xe(gt))) 1290 return 0; 1291 1292 buf = drmm_kmalloc(>_to_xe(gt)->drm, 1293 post_migration_scratch_size(gt_to_xe(gt)), 1294 GFP_KERNEL); 1295 if (!buf) 1296 return -ENOMEM; 1297 1298 gt->sriov.vf.migration.scratch = buf; 1299 spin_lock_init(>->sriov.vf.migration.lock); 1300 INIT_WORK(>->sriov.vf.migration.worker, migration_worker_func); 1301 init_waitqueue_head(>->sriov.vf.migration.wq); 1302 1303 return 0; 1304 } 1305 1306 /** 1307 * xe_gt_sriov_vf_init() - GT VF init 1308 * @gt: the &xe_gt 1309 * 1310 * Return 0 on success, errno on failure 1311 */ 1312 int xe_gt_sriov_vf_init(struct xe_gt *gt) 1313 { 1314 if (!xe_sriov_vf_migration_supported(gt_to_xe(gt))) 1315 return 0; 1316 1317 /* 1318 * We want to tear down the VF post-migration early during driver 1319 * unload; therefore, we add this finalization action later during 1320 * driver load. 1321 */ 1322 return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, 1323 vf_migration_fini, gt); 1324 } 1325 1326 /** 1327 * xe_gt_sriov_vf_recovery_pending() - VF post migration recovery pending 1328 * @gt: the &xe_gt 1329 * 1330 * The return value of this function must be immediately visible upon vCPU 1331 * unhalt and must persist until RESFIX_DONE is issued. This guarantee is 1332 * currently implemented only for platforms that support memirq. If non-memirq 1333 * platforms begin to support VF migration, this function will need to be 1334 * updated accordingly. 1335 * 1336 * Return: True if VF post migration recovery is pending, False otherwise 1337 */ 1338 bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt) 1339 { 1340 struct xe_memirq *memirq = >_to_tile(gt)->memirq; 1341 1342 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 1343 1344 /* early detection until recovery starts */ 1345 if (xe_device_uses_memirq(gt_to_xe(gt)) && 1346 xe_memirq_guc_sw_int_0_irq_pending(memirq, >->uc.guc)) 1347 return true; 1348 1349 return READ_ONCE(gt->sriov.vf.migration.recovery_inprogress); 1350 } 1351 1352 static bool vf_valid_ggtt(struct xe_gt *gt) 1353 { 1354 struct xe_memirq *memirq = >_to_tile(gt)->memirq; 1355 bool irq_pending = xe_device_uses_memirq(gt_to_xe(gt)) && 1356 xe_memirq_guc_sw_int_0_irq_pending(memirq, >->uc.guc); 1357 1358 xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 1359 1360 if (irq_pending || READ_ONCE(gt->sriov.vf.migration.ggtt_need_fixes)) 1361 return false; 1362 1363 return true; 1364 } 1365 1366 /** 1367 * xe_gt_sriov_vf_wait_valid_ggtt() - VF wait for valid GGTT addresses 1368 * @gt: the &xe_gt 1369 */ 1370 void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt) 1371 { 1372 int ret; 1373 1374 if (!IS_SRIOV_VF(gt_to_xe(gt)) || 1375 !xe_sriov_vf_migration_supported(gt_to_xe(gt))) 1376 return; 1377 1378 ret = wait_event_interruptible_timeout(gt->sriov.vf.migration.wq, 1379 vf_valid_ggtt(gt), 1380 HZ * 5); 1381 xe_gt_WARN_ON(gt, !ret); 1382 } 1383