1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023-2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "abi/guc_actions_sriov_abi.h" 9 10 #include "xe_device.h" 11 #include "xe_gt.h" 12 #include "xe_gt_sriov_pf.h" 13 #include "xe_gt_sriov_pf_config.h" 14 #include "xe_gt_sriov_pf_control.h" 15 #include "xe_gt_sriov_pf_helpers.h" 16 #include "xe_gt_sriov_pf_migration.h" 17 #include "xe_gt_sriov_pf_monitor.h" 18 #include "xe_gt_sriov_printk.h" 19 #include "xe_guc_ct.h" 20 #include "xe_sriov.h" 21 #include "xe_sriov_pf_service.h" 22 #include "xe_tile.h" 23 24 static const char *control_cmd_to_string(u32 cmd) 25 { 26 switch (cmd) { 27 case GUC_PF_TRIGGER_VF_PAUSE: 28 return "PAUSE"; 29 case GUC_PF_TRIGGER_VF_RESUME: 30 return "RESUME"; 31 case GUC_PF_TRIGGER_VF_STOP: 32 return "STOP"; 33 case GUC_PF_TRIGGER_VF_FLR_START: 34 return "FLR_START"; 35 case GUC_PF_TRIGGER_VF_FLR_FINISH: 36 return "FLR_FINISH"; 37 default: 38 return "<unknown>"; 39 } 40 } 41 42 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd) 43 { 44 u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = { 45 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 46 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 47 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL), 48 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid), 49 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd), 50 }; 51 int ret; 52 53 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); 54 return ret > 0 ? -EPROTO : ret; 55 } 56 57 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd) 58 { 59 int err; 60 61 xe_gt_assert(gt, vfid != PFID); 62 xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n", 63 vfid, control_cmd_to_string(cmd)); 64 65 err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd); 66 if (unlikely(err)) 67 xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n", 68 vfid, control_cmd_to_string(cmd), ERR_PTR(err)); 69 return err; 70 } 71 72 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid) 73 { 74 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE); 75 } 76 77 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid) 78 { 79 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME); 80 } 81 82 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid) 83 { 84 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP); 85 } 86 87 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid) 88 { 89 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START); 90 } 91 92 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid) 93 { 94 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH); 95 } 96 97 /** 98 * DOC: The VF state machine 99 * 100 * The simplified VF state machine could be presented as:: 101 * 102 * pause--------------------------o 103 * / | 104 * / v 105 * (READY)<------------------resume-----(PAUSED) 106 * ^ \ / / 107 * | \ / / 108 * | stop---->(STOPPED)<----stop / 109 * | / / 110 * | / / 111 * o--------<-----flr / 112 * \ / 113 * o------<--------------------flr 114 * 115 * Where: 116 * 117 * * READY - represents a state in which VF is fully operable 118 * * PAUSED - represents a state in which VF activity is temporarily suspended 119 * * STOPPED - represents a state in which VF activity is definitely halted 120 * * pause - represents a request to temporarily suspend VF activity 121 * * resume - represents a request to resume VF activity 122 * * stop - represents a request to definitely halt VF activity 123 * * flr - represents a request to perform VF FLR to restore VF activity 124 * 125 * However, each state transition requires additional steps that involves 126 * communication with GuC that might fail or be interrupted by other requests:: 127 * 128 * .................................WIP.... 129 * : : 130 * pause--------------------->PAUSE_WIP----------------------------o 131 * / : / \ : | 132 * / : o----<---stop flr--o : | 133 * / : | \ / | : V 134 * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED) 135 * ^ \ \ : | | : / / 136 * | \ \ : | | : / / 137 * | \ \ : | | : / / 138 * | \ \ : o----<----------------------+--<-------stop / 139 * | \ \ : | | : / 140 * | \ \ : V | : / 141 * | \ stop----->STOP_WIP---------flr--->-----o : / 142 * | \ : | | : / 143 * | \ : | V : / 144 * | flr--------+----->----------------->FLR_WIP<-----flr 145 * | : | / ^ : 146 * | : | / | : 147 * o--------<-------:----+-----<----------------o | : 148 * : | | : 149 * :....|...........................|.....: 150 * | | 151 * V | 152 * (STOPPED)--------------------flr 153 * 154 * For details about each internal WIP state machine see: 155 * 156 * * `The VF PAUSE state machine`_ 157 * * `The VF RESUME state machine`_ 158 * * `The VF STOP state machine`_ 159 * * `The VF FLR state machine`_ 160 */ 161 162 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV 163 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) 164 { 165 switch (bit) { 166 #define CASE2STR(_X) \ 167 case XE_GT_SRIOV_STATE_##_X: return #_X 168 CASE2STR(WIP); 169 CASE2STR(FLR_WIP); 170 CASE2STR(FLR_SEND_START); 171 CASE2STR(FLR_WAIT_GUC); 172 CASE2STR(FLR_GUC_DONE); 173 CASE2STR(FLR_RESET_CONFIG); 174 CASE2STR(FLR_RESET_DATA); 175 CASE2STR(FLR_RESET_MMIO); 176 CASE2STR(FLR_SEND_FINISH); 177 CASE2STR(FLR_FAILED); 178 CASE2STR(PAUSE_WIP); 179 CASE2STR(PAUSE_SEND_PAUSE); 180 CASE2STR(PAUSE_WAIT_GUC); 181 CASE2STR(PAUSE_GUC_DONE); 182 CASE2STR(PAUSE_SAVE_GUC); 183 CASE2STR(PAUSE_FAILED); 184 CASE2STR(PAUSED); 185 CASE2STR(RESUME_WIP); 186 CASE2STR(RESUME_SEND_RESUME); 187 CASE2STR(RESUME_FAILED); 188 CASE2STR(RESUMED); 189 CASE2STR(STOP_WIP); 190 CASE2STR(STOP_SEND_STOP); 191 CASE2STR(STOP_FAILED); 192 CASE2STR(STOPPED); 193 CASE2STR(MISMATCH); 194 #undef CASE2STR 195 default: return "?"; 196 } 197 } 198 #endif 199 200 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit) 201 { 202 switch (bit) { 203 case XE_GT_SRIOV_STATE_FLR_WAIT_GUC: 204 case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: 205 return HZ / 2; 206 case XE_GT_SRIOV_STATE_FLR_WIP: 207 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: 208 return 5 * HZ; 209 default: 210 return HZ; 211 } 212 } 213 214 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid) 215 { 216 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 217 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 218 219 return >->sriov.pf.vfs[vfid].control; 220 } 221 222 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid) 223 { 224 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 225 226 return &cs->state; 227 } 228 229 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid, 230 enum xe_gt_sriov_control_bits bit) 231 { 232 return test_bit(bit, pf_peek_vf_state(gt, vfid)); 233 } 234 235 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid) 236 { 237 unsigned long state = *pf_peek_vf_state(gt, vfid); 238 enum xe_gt_sriov_control_bits bit; 239 240 if (state) { 241 xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n", 242 vfid, state, state ? " bits " : "", 243 (int)BITS_PER_LONG, &state); 244 for_each_set_bit(bit, &state, BITS_PER_LONG) 245 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n", 246 vfid, control_bit_to_string(bit), bit); 247 } else { 248 xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid); 249 } 250 } 251 252 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid, 253 enum xe_gt_sriov_control_bits bit) 254 { 255 bool result = pf_check_vf_state(gt, vfid, bit); 256 257 if (unlikely(!result)) 258 pf_dump_vf_state(gt, vfid); 259 260 return result; 261 } 262 263 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid, 264 enum xe_gt_sriov_control_bits bit) 265 { 266 bool result = !pf_check_vf_state(gt, vfid, bit); 267 268 if (unlikely(!result)) 269 pf_dump_vf_state(gt, vfid); 270 271 return result; 272 } 273 274 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid, 275 enum xe_gt_sriov_control_bits bit) 276 { 277 if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) { 278 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n", 279 vfid, control_bit_to_string(bit), bit); 280 return true; 281 } 282 return false; 283 } 284 285 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid, 286 enum xe_gt_sriov_control_bits bit) 287 { 288 if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) { 289 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n", 290 vfid, control_bit_to_string(bit), bit); 291 return true; 292 } 293 return false; 294 } 295 296 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid, 297 enum xe_gt_sriov_control_bits bit) 298 { 299 if (pf_exit_vf_state(gt, vfid, bit)) 300 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n", 301 vfid, control_bit_to_string(bit), bit, 302 __builtin_return_address(0)); 303 } 304 305 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 306 { 307 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) { 308 xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n", 309 vfid, __builtin_return_address(0)); 310 pf_dump_vf_state(gt, vfid); 311 } 312 } 313 314 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 315 { 316 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) 317 xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n", 318 vfid, __builtin_return_address(0)); 319 320 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 321 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 322 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 323 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED); 324 } 325 326 #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \ 327 pf_enter_vf_mismatch((gt), (vfid)); \ 328 }) 329 330 static void pf_queue_control_worker(struct xe_gt *gt) 331 { 332 struct xe_device *xe = gt_to_xe(gt); 333 334 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 335 336 queue_work(xe->sriov.wq, >->sriov.pf.control.worker); 337 } 338 339 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid) 340 { 341 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; 342 343 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 344 345 spin_lock(&pfc->lock); 346 list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list); 347 spin_unlock(&pfc->lock); 348 349 pf_queue_control_worker(gt); 350 } 351 352 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid); 353 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid); 354 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid); 355 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid); 356 357 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid) 358 { 359 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 360 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 361 362 reinit_completion(&cs->done); 363 return true; 364 } 365 return false; 366 } 367 368 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid) 369 { 370 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 371 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 372 373 pf_exit_vf_flr_wip(gt, vfid); 374 pf_exit_vf_stop_wip(gt, vfid); 375 pf_exit_vf_pause_wip(gt, vfid); 376 pf_exit_vf_resume_wip(gt, vfid); 377 378 complete_all(&cs->done); 379 } 380 } 381 382 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout) 383 { 384 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 385 386 return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT; 387 } 388 389 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) 390 { 391 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 392 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED); 393 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 394 pf_exit_vf_mismatch(gt, vfid); 395 pf_exit_vf_wip(gt, vfid); 396 } 397 398 /** 399 * DOC: The VF PAUSE state machine 400 * 401 * The VF PAUSE state machine looks like:: 402 * 403 * (READY,RESUMED)<-------------<---------------------o---------o 404 * | \ \ 405 * pause \ \ 406 * | \ \ 407 * ....V...........................PAUSE_WIP........ \ \ 408 * : \ : o \ 409 * : \ o------<-----busy : | \ 410 * : \ / / : | | 411 * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) | 412 * : | \ : | | 413 * : acked rejected---->----------o--->(MISMATCH) / 414 * : | : / 415 * : v : / 416 * : PAUSE_WAIT_GUC : / 417 * : | : / 418 * : done : / 419 * : | : / 420 * : v : / 421 * : PAUSE_GUC_DONE o-----restart 422 * : | : 423 * : | o---<--busy : 424 * : v / / : 425 * : PAUSE_SAVE_GUC : 426 * : / : 427 * : / : 428 * :....o..............o...............o...........: 429 * | | | 430 * completed flr stop 431 * | | | 432 * V .....V..... ......V..... 433 * (PAUSED) : FLR_WIP : : STOP_WIP : 434 * :.........: :..........: 435 * 436 * For the full state machine view, see `The VF state machine`_. 437 */ 438 439 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 440 { 441 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 442 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); 443 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 444 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); 445 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC); 446 } 447 } 448 449 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid) 450 { 451 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) 452 pf_enter_vf_state_machine_bug(gt, vfid); 453 454 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 455 pf_exit_vf_mismatch(gt, vfid); 456 pf_exit_vf_wip(gt, vfid); 457 } 458 459 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid) 460 { 461 pf_enter_vf_paused(gt, vfid); 462 } 463 464 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid) 465 { 466 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 467 pf_exit_vf_wip(gt, vfid); 468 } 469 470 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid) 471 { 472 pf_enter_vf_mismatch(gt, vfid); 473 pf_enter_vf_pause_failed(gt, vfid); 474 } 475 476 static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) 477 { 478 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) 479 pf_enter_vf_state_machine_bug(gt, vfid); 480 } 481 482 static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) 483 { 484 int err; 485 486 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) 487 return false; 488 489 err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid); 490 if (err) { 491 /* retry if busy */ 492 if (err == -EBUSY) { 493 pf_enter_vf_pause_save_guc(gt, vfid); 494 return true; 495 } 496 /* give up on error */ 497 if (err == -EIO) 498 pf_enter_vf_mismatch(gt, vfid); 499 } 500 501 pf_enter_vf_pause_completed(gt, vfid); 502 return true; 503 } 504 505 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 506 { 507 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 508 return false; 509 510 pf_enter_vf_pause_save_guc(gt, vfid); 511 return true; 512 } 513 514 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 515 { 516 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 517 pf_queue_vf(gt, vfid); 518 } 519 520 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 521 { 522 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) 523 pf_enter_vf_state_machine_bug(gt, vfid); 524 } 525 526 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 527 { 528 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 529 } 530 531 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 532 { 533 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 534 pf_enter_vf_state_machine_bug(gt, vfid); 535 536 pf_queue_vf(gt, vfid); 537 } 538 539 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 540 { 541 int err; 542 543 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 544 return false; 545 546 /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */ 547 pf_enter_pause_wait_guc(gt, vfid); 548 549 err = pf_send_vf_pause(gt, vfid); 550 if (err) { 551 /* send failed, so we shouldn't expect PAUSE_DONE from GuC */ 552 pf_exit_pause_wait_guc(gt, vfid); 553 554 if (err == -EBUSY) 555 pf_enter_vf_pause_send_pause(gt, vfid); 556 else if (err == -EIO) 557 pf_enter_vf_pause_rejected(gt, vfid); 558 else 559 pf_enter_vf_pause_failed(gt, vfid); 560 } else { 561 /* 562 * we have already moved to WAIT_GUC, maybe even to GUC_DONE 563 * but since GuC didn't complain, we may clear MISMATCH 564 */ 565 pf_exit_vf_mismatch(gt, vfid); 566 } 567 568 return true; 569 } 570 571 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 572 { 573 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 574 pf_enter_vf_wip(gt, vfid); 575 pf_enter_vf_pause_send_pause(gt, vfid); 576 return true; 577 } 578 579 return false; 580 } 581 582 /** 583 * xe_gt_sriov_pf_control_pause_vf - Pause a VF. 584 * @gt: the &xe_gt 585 * @vfid: the VF identifier 586 * 587 * This function is for PF only. 588 * 589 * Return: 0 on success or a negative error code on failure. 590 */ 591 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) 592 { 593 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP); 594 int err; 595 596 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 597 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); 598 return -EPERM; 599 } 600 601 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 602 xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid); 603 return -ESTALE; 604 } 605 606 if (!pf_enter_vf_pause_wip(gt, vfid)) { 607 xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid); 608 return -EALREADY; 609 } 610 611 err = pf_wait_vf_wip_done(gt, vfid, timeout); 612 if (err) { 613 xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n", 614 vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 615 return err; 616 } 617 618 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 619 xe_gt_sriov_info(gt, "VF%u paused!\n", vfid); 620 return 0; 621 } 622 623 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) { 624 xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid); 625 return -EIO; 626 } 627 628 xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid); 629 return -ECANCELED; 630 } 631 632 /** 633 * DOC: The VF RESUME state machine 634 * 635 * The VF RESUME state machine looks like:: 636 * 637 * (PAUSED)<-----------------<------------------------o 638 * | \ 639 * resume \ 640 * | \ 641 * ....V............................RESUME_WIP...... \ 642 * : \ : o 643 * : \ o-------<-----busy : | 644 * : \ / / : | 645 * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED) 646 * : / \ : | 647 * : acked rejected---->---------o--->(MISMATCH) 648 * : / : 649 * :....o..............o...............o.....o.....: 650 * | | | \ 651 * completed flr stop restart-->(READY) 652 * | | | 653 * V .....V..... ......V..... 654 * (RESUMED) : FLR_WIP : : STOP_WIP : 655 * :.........: :..........: 656 * 657 * For the full state machine view, see `The VF state machine`_. 658 */ 659 660 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 661 { 662 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) 663 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME); 664 } 665 666 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid) 667 { 668 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 669 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 670 pf_exit_vf_mismatch(gt, vfid); 671 pf_exit_vf_wip(gt, vfid); 672 } 673 674 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid) 675 { 676 pf_enter_vf_resumed(gt, vfid); 677 } 678 679 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid) 680 { 681 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 682 pf_exit_vf_wip(gt, vfid); 683 } 684 685 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid) 686 { 687 pf_enter_vf_mismatch(gt, vfid); 688 pf_enter_vf_resume_failed(gt, vfid); 689 } 690 691 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 692 { 693 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 694 pf_enter_vf_state_machine_bug(gt, vfid); 695 696 pf_queue_vf(gt, vfid); 697 } 698 699 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 700 { 701 int err; 702 703 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 704 return false; 705 706 err = pf_send_vf_resume(gt, vfid); 707 if (err == -EBUSY) 708 pf_enter_vf_resume_send_resume(gt, vfid); 709 else if (err == -EIO) 710 pf_enter_vf_resume_rejected(gt, vfid); 711 else if (err) 712 pf_enter_vf_resume_failed(gt, vfid); 713 else 714 pf_enter_vf_resume_completed(gt, vfid); 715 return true; 716 } 717 718 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 719 { 720 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) { 721 pf_enter_vf_wip(gt, vfid); 722 pf_enter_vf_resume_send_resume(gt, vfid); 723 return true; 724 } 725 726 return false; 727 } 728 729 /** 730 * xe_gt_sriov_pf_control_resume_vf - Resume a VF. 731 * @gt: the &xe_gt 732 * @vfid: the VF identifier 733 * 734 * This function is for PF only. 735 * 736 * Return: 0 on success or a negative error code on failure. 737 */ 738 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) 739 { 740 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP); 741 int err; 742 743 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 744 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); 745 return -EPERM; 746 } 747 748 if (!pf_enter_vf_resume_wip(gt, vfid)) { 749 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid); 750 return -EALREADY; 751 } 752 753 err = pf_wait_vf_wip_done(gt, vfid, timeout); 754 if (err) 755 return err; 756 757 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) { 758 xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid); 759 return 0; 760 } 761 762 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) { 763 xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid); 764 return -EIO; 765 } 766 767 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid); 768 return -ECANCELED; 769 } 770 771 /** 772 * DOC: The VF STOP state machine 773 * 774 * The VF STOP state machine looks like:: 775 * 776 * (READY,PAUSED,RESUMED)<-------<--------------------o 777 * | \ 778 * stop \ 779 * | \ 780 * ....V..............................STOP_WIP...... \ 781 * : \ : o 782 * : \ o----<----busy : | 783 * : \ / / : | 784 * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED) 785 * : / \ : | 786 * : acked rejected-------->--------o--->(MISMATCH) 787 * : / : 788 * :....o..............o...............o...........: 789 * | | | 790 * completed flr restart 791 * | | | 792 * V .....V..... V 793 * (STOPPED) : FLR_WIP : (READY) 794 * :.........: 795 * 796 * For the full state machine view, see `The VF state machine`_. 797 */ 798 799 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 800 { 801 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) 802 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP); 803 } 804 805 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid) 806 { 807 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) 808 pf_enter_vf_state_machine_bug(gt, vfid); 809 810 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 811 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 812 pf_exit_vf_mismatch(gt, vfid); 813 pf_exit_vf_wip(gt, vfid); 814 } 815 816 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid) 817 { 818 pf_enter_vf_stopped(gt, vfid); 819 } 820 821 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid) 822 { 823 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 824 pf_exit_vf_wip(gt, vfid); 825 } 826 827 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid) 828 { 829 pf_enter_vf_mismatch(gt, vfid); 830 pf_enter_vf_stop_failed(gt, vfid); 831 } 832 833 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 834 { 835 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 836 pf_enter_vf_state_machine_bug(gt, vfid); 837 838 pf_queue_vf(gt, vfid); 839 } 840 841 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 842 { 843 int err; 844 845 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 846 return false; 847 848 err = pf_send_vf_stop(gt, vfid); 849 if (err == -EBUSY) 850 pf_enter_vf_stop_send_stop(gt, vfid); 851 else if (err == -EIO) 852 pf_enter_vf_stop_rejected(gt, vfid); 853 else if (err) 854 pf_enter_vf_stop_failed(gt, vfid); 855 else 856 pf_enter_vf_stop_completed(gt, vfid); 857 return true; 858 } 859 860 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 861 { 862 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) { 863 pf_enter_vf_wip(gt, vfid); 864 pf_enter_vf_stop_send_stop(gt, vfid); 865 return true; 866 } 867 return false; 868 } 869 870 /** 871 * xe_gt_sriov_pf_control_stop_vf - Stop a VF. 872 * @gt: the &xe_gt 873 * @vfid: the VF identifier 874 * 875 * This function is for PF only. 876 * 877 * Return: 0 on success or a negative error code on failure. 878 */ 879 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) 880 { 881 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP); 882 int err; 883 884 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 885 xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid); 886 return -ESTALE; 887 } 888 889 if (!pf_enter_vf_stop_wip(gt, vfid)) { 890 xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid); 891 return -EALREADY; 892 } 893 894 err = pf_wait_vf_wip_done(gt, vfid, timeout); 895 if (err) 896 return err; 897 898 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 899 xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid); 900 return 0; 901 } 902 903 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) { 904 xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid); 905 return -EIO; 906 } 907 908 xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid); 909 return -ECANCELED; 910 } 911 912 /** 913 * DOC: The VF FLR state machine 914 * 915 * The VF FLR state machine looks like:: 916 * 917 * (READY,PAUSED,STOPPED)<------------<--------------o 918 * | \ 919 * flr \ 920 * | \ 921 * ....V..........................FLR_WIP........... \ 922 * : \ : \ 923 * : \ o----<----busy : | 924 * : \ / / : | 925 * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o 926 * : | \ : | | 927 * : acked rejected----->-----------o--->(MISMATCH) | 928 * : | : ^ | 929 * : v : | | 930 * : FLR_WAIT_GUC : | | 931 * : | : | | 932 * : done : | | 933 * : | : | | 934 * : v : | | 935 * : FLR_GUC_DONE : | | 936 * : | : | | 937 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o 938 * : | : | | 939 * : FLR_RESET_DATA : | | 940 * : | : | | 941 * : FLR_RESET_MMIO : | | 942 * : | : | | 943 * : | o----<----busy : | | 944 * : |/ / : | | 945 * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o 946 * : / \ : | 947 * : acked rejected----->-----------o--------o 948 * : / : 949 * :....o..............................o...........: 950 * | | 951 * completed restart 952 * | / 953 * V / 954 * (READY)<----------<------------o 955 * 956 * For the full state machine view, see `The VF state machine`_. 957 */ 958 959 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 960 { 961 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 962 pf_enter_vf_state_machine_bug(gt, vfid); 963 964 pf_queue_vf(gt, vfid); 965 } 966 967 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 968 { 969 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 970 xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid); 971 return; 972 } 973 974 pf_enter_vf_wip(gt, vfid); 975 pf_enter_vf_flr_send_start(gt, vfid); 976 } 977 978 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 979 { 980 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 981 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH); 982 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO); 983 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA); 984 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 985 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE); 986 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 987 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START); 988 } 989 } 990 991 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid) 992 { 993 pf_enter_vf_ready(gt, vfid); 994 } 995 996 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid) 997 { 998 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 999 xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid); 1000 pf_exit_vf_wip(gt, vfid); 1001 } 1002 1003 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid) 1004 { 1005 pf_enter_vf_mismatch(gt, vfid); 1006 pf_enter_vf_flr_failed(gt, vfid); 1007 } 1008 1009 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 1010 { 1011 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 1012 pf_enter_vf_state_machine_bug(gt, vfid); 1013 1014 pf_queue_vf(gt, vfid); 1015 } 1016 1017 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 1018 { 1019 int err; 1020 1021 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 1022 return false; 1023 1024 err = pf_send_vf_flr_finish(gt, vfid); 1025 if (err == -EBUSY) 1026 pf_enter_vf_flr_send_finish(gt, vfid); 1027 else if (err == -EIO) 1028 pf_enter_vf_flr_rejected(gt, vfid); 1029 else if (err) 1030 pf_enter_vf_flr_failed(gt, vfid); 1031 else 1032 pf_enter_vf_flr_completed(gt, vfid); 1033 return true; 1034 } 1035 1036 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 1037 { 1038 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 1039 pf_enter_vf_state_machine_bug(gt, vfid); 1040 1041 pf_queue_vf(gt, vfid); 1042 } 1043 1044 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 1045 { 1046 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 1047 return false; 1048 1049 xe_gt_sriov_pf_sanitize_hw(gt, vfid); 1050 1051 pf_enter_vf_flr_send_finish(gt, vfid); 1052 return true; 1053 } 1054 1055 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 1056 { 1057 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 1058 pf_enter_vf_state_machine_bug(gt, vfid); 1059 1060 pf_queue_vf(gt, vfid); 1061 } 1062 1063 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 1064 { 1065 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 1066 return false; 1067 1068 if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) 1069 xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); 1070 1071 xe_gt_sriov_pf_monitor_flr(gt, vfid); 1072 1073 pf_enter_vf_flr_reset_mmio(gt, vfid); 1074 return true; 1075 } 1076 1077 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 1078 { 1079 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 1080 pf_enter_vf_state_machine_bug(gt, vfid); 1081 1082 pf_queue_vf(gt, vfid); 1083 } 1084 1085 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 1086 { 1087 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 1088 int err; 1089 1090 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 1091 return false; 1092 1093 err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout); 1094 if (err) 1095 pf_enter_vf_flr_failed(gt, vfid); 1096 else 1097 pf_enter_vf_flr_reset_data(gt, vfid); 1098 return true; 1099 } 1100 1101 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 1102 { 1103 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) 1104 pf_enter_vf_state_machine_bug(gt, vfid); 1105 } 1106 1107 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 1108 { 1109 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 1110 } 1111 1112 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 1113 { 1114 int err; 1115 1116 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 1117 return false; 1118 1119 /* GuC may actually send a FLR_DONE before we get a RESPONSE */ 1120 pf_enter_vf_flr_wait_guc(gt, vfid); 1121 1122 err = pf_send_vf_flr_start(gt, vfid); 1123 if (err) { 1124 /* send failed, so we shouldn't expect FLR_DONE from GuC */ 1125 pf_exit_vf_flr_wait_guc(gt, vfid); 1126 1127 if (err == -EBUSY) 1128 pf_enter_vf_flr_send_start(gt, vfid); 1129 else if (err == -EIO) 1130 pf_enter_vf_flr_rejected(gt, vfid); 1131 else 1132 pf_enter_vf_flr_failed(gt, vfid); 1133 } else { 1134 /* 1135 * we have already moved to WAIT_GUC, maybe even to GUC_DONE 1136 * but since GuC didn't complain, we may clear MISMATCH 1137 */ 1138 pf_exit_vf_mismatch(gt, vfid); 1139 } 1140 1141 return true; 1142 } 1143 1144 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 1145 { 1146 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 1147 return false; 1148 1149 pf_enter_vf_flr_reset_config(gt, vfid); 1150 return true; 1151 } 1152 1153 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 1154 { 1155 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 1156 pf_queue_vf(gt, vfid); 1157 } 1158 1159 /** 1160 * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence. 1161 * @gt: the &xe_gt 1162 * @vfid: the VF identifier 1163 * 1164 * This function is for PF only. 1165 * 1166 * Return: 0 on success or a negative error code on failure. 1167 */ 1168 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) 1169 { 1170 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP); 1171 int err; 1172 1173 pf_enter_vf_flr_wip(gt, vfid); 1174 1175 err = pf_wait_vf_wip_done(gt, vfid, timeout); 1176 if (err) { 1177 xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n", 1178 vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 1179 return err; 1180 } 1181 1182 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1183 return -EIO; 1184 1185 return 0; 1186 } 1187 1188 /** 1189 * DOC: The VF FLR Flow with GuC 1190 * 1191 * The VF FLR flow includes several steps:: 1192 * 1193 * PF GUC PCI 1194 * ======================================================== 1195 * | | | 1196 * (1) | [ ] <----- FLR --| 1197 * | [ ] : 1198 * (2) [ ] <-------- NOTIFY FLR --[ ] 1199 * [ ] | 1200 * (3) [ ] | 1201 * [ ] | 1202 * [ ]-- START FLR ---------> [ ] 1203 * | [ ] 1204 * (4) | [ ] 1205 * | [ ] 1206 * [ ] <--------- FLR DONE -- [ ] 1207 * [ ] | 1208 * (5) [ ] | 1209 * [ ] | 1210 * [ ]-- FINISH FLR --------> [ ] 1211 * | | 1212 * 1213 * * Step 1: PCI HW generates interrupt to the GuC about VF FLR 1214 * * Step 2: GuC FW sends G2H notification to the PF about VF FLR 1215 * * Step 2a: on some platforms G2H is only received from root GuC 1216 * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence 1217 * * Step 3a: on some platforms PF must send H2G to all other GuCs 1218 * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done 1219 * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished 1220 */ 1221 1222 static bool needs_dispatch_flr(struct xe_device *xe) 1223 { 1224 return xe->info.platform == XE_PVC; 1225 } 1226 1227 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid) 1228 { 1229 struct xe_device *xe = gt_to_xe(gt); 1230 struct xe_gt *gtit; 1231 unsigned int gtid; 1232 1233 xe_gt_sriov_info(gt, "VF%u FLR\n", vfid); 1234 1235 if (needs_dispatch_flr(xe)) { 1236 for_each_gt(gtit, xe, gtid) 1237 pf_enter_vf_flr_wip(gtit, vfid); 1238 } else { 1239 pf_enter_vf_flr_wip(gt, vfid); 1240 } 1241 } 1242 1243 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid) 1244 { 1245 if (!pf_exit_vf_flr_wait_guc(gt, vfid)) { 1246 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid); 1247 pf_enter_vf_mismatch(gt, vfid); 1248 return; 1249 } 1250 1251 pf_enter_vf_flr_guc_done(gt, vfid); 1252 } 1253 1254 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid) 1255 { 1256 if (!pf_exit_pause_wait_guc(gt, vfid)) { 1257 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid); 1258 pf_enter_vf_mismatch(gt, vfid); 1259 return; 1260 } 1261 1262 pf_enter_vf_pause_guc_done(gt, vfid); 1263 } 1264 1265 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid) 1266 { 1267 xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid); 1268 1269 if (vfid > xe_gt_sriov_pf_get_totalvfs(gt)) 1270 return -EPROTO; 1271 1272 switch (eventid) { 1273 case GUC_PF_NOTIFY_VF_FLR: 1274 pf_handle_vf_flr(gt, vfid); 1275 break; 1276 case GUC_PF_NOTIFY_VF_FLR_DONE: 1277 pf_handle_vf_flr_done(gt, vfid); 1278 break; 1279 case GUC_PF_NOTIFY_VF_PAUSE_DONE: 1280 pf_handle_vf_pause_done(gt, vfid); 1281 break; 1282 case GUC_PF_NOTIFY_VF_FIXUP_DONE: 1283 break; 1284 default: 1285 return -ENOPKG; 1286 } 1287 return 0; 1288 } 1289 1290 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid) 1291 { 1292 switch (eventid) { 1293 case GUC_PF_NOTIFY_VF_ENABLE: 1294 xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n", 1295 str_enabled_disabled(true), 1296 str_enabled_disabled(false)); 1297 break; 1298 default: 1299 return -ENOPKG; 1300 } 1301 return 0; 1302 } 1303 1304 /** 1305 * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC. 1306 * @gt: the &xe_gt 1307 * @msg: the G2H message 1308 * @len: the length of the G2H message 1309 * 1310 * This function is for PF only. 1311 * 1312 * Return: 0 on success or a negative error code on failure. 1313 */ 1314 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) 1315 { 1316 u32 vfid; 1317 u32 eventid; 1318 1319 xe_gt_assert(gt, len); 1320 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC); 1321 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT); 1322 xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) == 1323 GUC_ACTION_GUC2PF_VF_STATE_NOTIFY); 1324 1325 if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt)))) 1326 return -EPROTO; 1327 1328 if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0]))) 1329 return -EPFNOSUPPORT; 1330 1331 if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN)) 1332 return -EPROTO; 1333 1334 vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]); 1335 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]); 1336 1337 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid); 1338 } 1339 1340 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid) 1341 { 1342 if (pf_exit_vf_flr_send_start(gt, vfid)) 1343 return true; 1344 1345 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) { 1346 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1347 control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC)); 1348 return false; 1349 } 1350 1351 if (pf_exit_vf_flr_guc_done(gt, vfid)) 1352 return true; 1353 1354 if (pf_exit_vf_flr_reset_config(gt, vfid)) 1355 return true; 1356 1357 if (pf_exit_vf_flr_reset_data(gt, vfid)) 1358 return true; 1359 1360 if (pf_exit_vf_flr_reset_mmio(gt, vfid)) 1361 return true; 1362 1363 if (pf_exit_vf_flr_send_finish(gt, vfid)) 1364 return true; 1365 1366 if (pf_exit_vf_stop_send_stop(gt, vfid)) 1367 return true; 1368 1369 if (pf_exit_vf_pause_send_pause(gt, vfid)) 1370 return true; 1371 1372 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) { 1373 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1374 control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)); 1375 return true; 1376 } 1377 1378 if (pf_exit_vf_pause_guc_done(gt, vfid)) 1379 return true; 1380 1381 if (pf_exit_vf_pause_save_guc(gt, vfid)) 1382 return true; 1383 1384 if (pf_exit_vf_resume_send_resume(gt, vfid)) 1385 return true; 1386 1387 return false; 1388 } 1389 1390 static unsigned int pf_control_state_index(struct xe_gt *gt, 1391 struct xe_gt_sriov_control_state *cs) 1392 { 1393 return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs; 1394 } 1395 1396 static void pf_worker_find_work(struct xe_gt *gt) 1397 { 1398 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; 1399 struct xe_gt_sriov_control_state *cs; 1400 unsigned int vfid; 1401 bool empty; 1402 bool more; 1403 1404 spin_lock(&pfc->lock); 1405 cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link); 1406 if (cs) 1407 list_del_init(&cs->link); 1408 empty = list_empty(&pfc->list); 1409 spin_unlock(&pfc->lock); 1410 1411 if (!cs) 1412 return; 1413 1414 /* VF metadata structures are indexed by the VFID */ 1415 vfid = pf_control_state_index(gt, cs); 1416 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 1417 1418 more = pf_process_vf_state_machine(gt, vfid); 1419 if (more) 1420 pf_queue_vf(gt, vfid); 1421 else if (!empty) 1422 pf_queue_control_worker(gt); 1423 } 1424 1425 static void control_worker_func(struct work_struct *w) 1426 { 1427 struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker); 1428 1429 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 1430 pf_worker_find_work(gt); 1431 } 1432 1433 static void pf_stop_worker(struct xe_gt *gt) 1434 { 1435 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 1436 cancel_work_sync(>->sriov.pf.control.worker); 1437 } 1438 1439 static void control_fini_action(struct drm_device *dev, void *data) 1440 { 1441 struct xe_gt *gt = data; 1442 1443 pf_stop_worker(gt); 1444 } 1445 1446 /** 1447 * xe_gt_sriov_pf_control_init() - Initialize PF's control data. 1448 * @gt: the &xe_gt 1449 * 1450 * This function is for PF only. 1451 * 1452 * Return: 0 on success or a negative error code on failure. 1453 */ 1454 int xe_gt_sriov_pf_control_init(struct xe_gt *gt) 1455 { 1456 struct xe_device *xe = gt_to_xe(gt); 1457 unsigned int n, totalvfs; 1458 1459 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 1460 1461 totalvfs = xe_sriov_pf_get_totalvfs(xe); 1462 for (n = 0; n <= totalvfs; n++) { 1463 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n); 1464 1465 init_completion(&cs->done); 1466 INIT_LIST_HEAD(&cs->link); 1467 } 1468 1469 spin_lock_init(>->sriov.pf.control.lock); 1470 INIT_LIST_HEAD(>->sriov.pf.control.list); 1471 INIT_WORK(>->sriov.pf.control.worker, control_worker_func); 1472 1473 return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt); 1474 } 1475 1476 /** 1477 * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset. 1478 * @gt: the &xe_gt 1479 * 1480 * Any per-VF status maintained by the PF or any ongoing VF control activity 1481 * performed by the PF must be reset or cancelled when the GT is reset. 1482 * 1483 * This function is for PF only. 1484 */ 1485 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt) 1486 { 1487 struct xe_device *xe = gt_to_xe(gt); 1488 unsigned int n, totalvfs; 1489 1490 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 1491 1492 pf_stop_worker(gt); 1493 1494 totalvfs = xe_sriov_pf_get_totalvfs(xe); 1495 for (n = 1; n <= totalvfs; n++) 1496 pf_enter_vf_ready(gt, n); 1497 } 1498