1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023-2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "abi/guc_actions_sriov_abi.h" 9 10 #include "xe_device.h" 11 #include "xe_gt.h" 12 #include "xe_gt_sriov_pf.h" 13 #include "xe_gt_sriov_pf_config.h" 14 #include "xe_gt_sriov_pf_control.h" 15 #include "xe_gt_sriov_pf_helpers.h" 16 #include "xe_gt_sriov_pf_migration.h" 17 #include "xe_gt_sriov_pf_monitor.h" 18 #include "xe_gt_sriov_printk.h" 19 #include "xe_guc_ct.h" 20 #include "xe_sriov.h" 21 #include "xe_sriov_packet.h" 22 #include "xe_sriov_packet_types.h" 23 #include "xe_sriov_pf_control.h" 24 #include "xe_sriov_pf_migration.h" 25 #include "xe_sriov_pf_service.h" 26 #include "xe_tile.h" 27 28 static const char *control_cmd_to_string(u32 cmd) 29 { 30 switch (cmd) { 31 case GUC_PF_TRIGGER_VF_PAUSE: 32 return "PAUSE"; 33 case GUC_PF_TRIGGER_VF_RESUME: 34 return "RESUME"; 35 case GUC_PF_TRIGGER_VF_STOP: 36 return "STOP"; 37 case GUC_PF_TRIGGER_VF_FLR_START: 38 return "FLR_START"; 39 case GUC_PF_TRIGGER_VF_FLR_FINISH: 40 return "FLR_FINISH"; 41 default: 42 return "<unknown>"; 43 } 44 } 45 46 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd) 47 { 48 u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = { 49 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 50 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 51 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL), 52 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid), 53 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd), 54 }; 55 int ret; 56 57 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); 58 return ret > 0 ? -EPROTO : ret; 59 } 60 61 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd) 62 { 63 int err; 64 65 xe_gt_assert(gt, vfid != PFID); 66 xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n", 67 vfid, control_cmd_to_string(cmd)); 68 69 err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd); 70 if (unlikely(err)) 71 xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n", 72 vfid, control_cmd_to_string(cmd), ERR_PTR(err)); 73 return err; 74 } 75 76 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid) 77 { 78 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE); 79 } 80 81 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid) 82 { 83 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME); 84 } 85 86 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid) 87 { 88 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP); 89 } 90 91 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid) 92 { 93 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START); 94 } 95 96 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid) 97 { 98 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH); 99 } 100 101 /** 102 * DOC: The VF state machine 103 * 104 * The simplified VF state machine could be presented as:: 105 * 106 * pause--------------------------o 107 * / | 108 * / v 109 * (READY)<------------------resume-----(PAUSED) 110 * ^ \ / / 111 * | \ / / 112 * | stop---->(STOPPED)<----stop / 113 * | / / 114 * | / / 115 * o--------<-----flr / 116 * \ / 117 * o------<--------------------flr 118 * 119 * Where: 120 * 121 * * READY - represents a state in which VF is fully operable 122 * * PAUSED - represents a state in which VF activity is temporarily suspended 123 * * STOPPED - represents a state in which VF activity is definitely halted 124 * * pause - represents a request to temporarily suspend VF activity 125 * * resume - represents a request to resume VF activity 126 * * stop - represents a request to definitely halt VF activity 127 * * flr - represents a request to perform VF FLR to restore VF activity 128 * 129 * However, each state transition requires additional steps that involves 130 * communication with GuC that might fail or be interrupted by other requests:: 131 * 132 * .................................WIP.... 133 * : : 134 * pause--------------------->PAUSE_WIP----------------------------o 135 * / : / \ : | 136 * / : o----<---stop flr--o : | 137 * / : | \ / | : V 138 * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED) 139 * ^ \ \ : | | : / / 140 * | \ \ : | | : / / 141 * | \ \ : | | : / / 142 * | \ \ : o----<----------------------+--<-------stop / 143 * | \ \ : | | : / 144 * | \ \ : V | : / 145 * | \ stop----->STOP_WIP---------flr--->-----o : / 146 * | \ : | | : / 147 * | \ : | V : / 148 * | flr--------+----->----------------->FLR_WIP<-----flr 149 * | : | / ^ : 150 * | : | / | : 151 * o--------<-------:----+-----<----------------o | : 152 * : | | : 153 * :....|...........................|.....: 154 * | | 155 * V | 156 * (STOPPED)--------------------flr 157 * 158 * For details about each internal WIP state machine see: 159 * 160 * * `The VF PAUSE state machine`_ 161 * * `The VF RESUME state machine`_ 162 * * `The VF STOP state machine`_ 163 * * `The VF FLR state machine`_ 164 */ 165 166 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV 167 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) 168 { 169 switch (bit) { 170 #define CASE2STR(_X) \ 171 case XE_GT_SRIOV_STATE_##_X: return #_X 172 CASE2STR(WIP); 173 CASE2STR(FLR_WIP); 174 CASE2STR(FLR_SEND_START); 175 CASE2STR(FLR_WAIT_GUC); 176 CASE2STR(FLR_GUC_DONE); 177 CASE2STR(FLR_SYNC); 178 CASE2STR(FLR_RESET_CONFIG); 179 CASE2STR(FLR_RESET_DATA); 180 CASE2STR(FLR_RESET_MMIO); 181 CASE2STR(FLR_SEND_FINISH); 182 CASE2STR(FLR_FAILED); 183 CASE2STR(PAUSE_WIP); 184 CASE2STR(PAUSE_SEND_PAUSE); 185 CASE2STR(PAUSE_WAIT_GUC); 186 CASE2STR(PAUSE_GUC_DONE); 187 CASE2STR(PAUSE_FAILED); 188 CASE2STR(PAUSED); 189 CASE2STR(SAVE_WIP); 190 CASE2STR(SAVE_PROCESS_DATA); 191 CASE2STR(SAVE_WAIT_DATA); 192 CASE2STR(SAVE_DATA_DONE); 193 CASE2STR(SAVE_FAILED); 194 CASE2STR(SAVED); 195 CASE2STR(RESTORE_WIP); 196 CASE2STR(RESTORE_PROCESS_DATA); 197 CASE2STR(RESTORE_WAIT_DATA); 198 CASE2STR(RESTORE_DATA_DONE); 199 CASE2STR(RESTORE_FAILED); 200 CASE2STR(RESTORED); 201 CASE2STR(RESUME_WIP); 202 CASE2STR(RESUME_SEND_RESUME); 203 CASE2STR(RESUME_FAILED); 204 CASE2STR(RESUMED); 205 CASE2STR(STOP_WIP); 206 CASE2STR(STOP_SEND_STOP); 207 CASE2STR(STOP_FAILED); 208 CASE2STR(STOPPED); 209 CASE2STR(MISMATCH); 210 #undef CASE2STR 211 default: return "?"; 212 } 213 } 214 #endif 215 216 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit) 217 { 218 switch (bit) { 219 case XE_GT_SRIOV_STATE_FLR_WAIT_GUC: 220 case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: 221 return HZ / 2; 222 case XE_GT_SRIOV_STATE_FLR_WIP: 223 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: 224 return 5 * HZ; 225 case XE_GT_SRIOV_STATE_RESTORE_WIP: 226 return 20 * HZ; 227 default: 228 return HZ; 229 } 230 } 231 232 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid) 233 { 234 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 235 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 236 237 return >->sriov.pf.vfs[vfid].control; 238 } 239 240 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid) 241 { 242 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 243 244 return cs->state; 245 } 246 247 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid, 248 enum xe_gt_sriov_control_bits bit) 249 { 250 return test_bit(bit, pf_peek_vf_state(gt, vfid)); 251 } 252 253 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid) 254 { 255 unsigned long state = *pf_peek_vf_state(gt, vfid); 256 enum xe_gt_sriov_control_bits bit; 257 258 if (state) { 259 xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n", 260 vfid, state, state ? " bits " : "", 261 (int)BITS_PER_LONG, &state); 262 for_each_set_bit(bit, &state, BITS_PER_LONG) 263 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n", 264 vfid, control_bit_to_string(bit), bit); 265 } else { 266 xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid); 267 } 268 } 269 270 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid, 271 enum xe_gt_sriov_control_bits bit) 272 { 273 bool result = pf_check_vf_state(gt, vfid, bit); 274 275 if (unlikely(!result)) 276 pf_dump_vf_state(gt, vfid); 277 278 return result; 279 } 280 281 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid, 282 enum xe_gt_sriov_control_bits bit) 283 { 284 bool result = !pf_check_vf_state(gt, vfid, bit); 285 286 if (unlikely(!result)) 287 pf_dump_vf_state(gt, vfid); 288 289 return result; 290 } 291 292 static void pf_track_vf_state(struct xe_gt *gt, unsigned int vfid, 293 enum xe_gt_sriov_control_bits bit, 294 const char *what) 295 { 296 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) %s\n", 297 vfid, control_bit_to_string(bit), bit, what); 298 } 299 300 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid, 301 enum xe_gt_sriov_control_bits bit) 302 { 303 if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) { 304 pf_track_vf_state(gt, vfid, bit, "enter"); 305 return true; 306 } 307 return false; 308 } 309 310 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid, 311 enum xe_gt_sriov_control_bits bit) 312 { 313 if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) { 314 pf_track_vf_state(gt, vfid, bit, "exit"); 315 return true; 316 } 317 return false; 318 } 319 320 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid, 321 enum xe_gt_sriov_control_bits bit) 322 { 323 if (pf_exit_vf_state(gt, vfid, bit)) 324 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n", 325 vfid, control_bit_to_string(bit), bit, 326 __builtin_return_address(0)); 327 } 328 329 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 330 { 331 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) { 332 xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n", 333 vfid, __builtin_return_address(0)); 334 pf_dump_vf_state(gt, vfid); 335 } 336 } 337 338 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 339 { 340 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) 341 xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n", 342 vfid, __builtin_return_address(0)); 343 344 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 345 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 346 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 347 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED); 348 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED); 349 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED); 350 } 351 352 #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \ 353 pf_enter_vf_mismatch((gt), (vfid)); \ 354 }) 355 356 static void pf_queue_control_worker(struct xe_gt *gt) 357 { 358 struct xe_device *xe = gt_to_xe(gt); 359 360 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 361 362 queue_work(xe->sriov.wq, >->sriov.pf.control.worker); 363 } 364 365 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid) 366 { 367 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; 368 369 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 370 371 spin_lock(&pfc->lock); 372 list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list); 373 spin_unlock(&pfc->lock); 374 375 pf_queue_control_worker(gt); 376 } 377 378 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid); 379 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid); 380 static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid); 381 static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid); 382 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid); 383 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid); 384 385 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid) 386 { 387 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 388 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 389 390 reinit_completion(&cs->done); 391 return true; 392 } 393 return false; 394 } 395 396 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid) 397 { 398 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 399 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 400 401 pf_exit_vf_flr_wip(gt, vfid); 402 pf_exit_vf_stop_wip(gt, vfid); 403 pf_exit_vf_save_wip(gt, vfid); 404 pf_exit_vf_restore_wip(gt, vfid); 405 pf_exit_vf_pause_wip(gt, vfid); 406 pf_exit_vf_resume_wip(gt, vfid); 407 408 complete_all(&cs->done); 409 } 410 } 411 412 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout) 413 { 414 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 415 416 return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT; 417 } 418 419 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) 420 { 421 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 422 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED); 423 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 424 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); 425 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); 426 pf_exit_vf_mismatch(gt, vfid); 427 pf_exit_vf_wip(gt, vfid); 428 } 429 430 /** 431 * DOC: The VF PAUSE state machine 432 * 433 * The VF PAUSE state machine looks like:: 434 * 435 * (READY,RESUMED)<-------------<---------------------o---------o 436 * | \ \ 437 * pause \ \ 438 * | \ \ 439 * ....V...........................PAUSE_WIP........ \ \ 440 * : \ : o \ 441 * : \ o------<-----busy : | \ 442 * : \ / / : | | 443 * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) | 444 * : | \ : | | 445 * : acked rejected---->----------o--->(MISMATCH) / 446 * : | : / 447 * : v : / 448 * : PAUSE_WAIT_GUC : / 449 * : | : / 450 * : done : / 451 * : | : / 452 * : v : / 453 * : PAUSE_GUC_DONE o-----restart 454 * : | : 455 * : | o---<--busy : 456 * : / : 457 * : / : 458 * : / : 459 * :....o..............o...............o...........: 460 * | | | 461 * completed flr stop 462 * | | | 463 * V .....V..... ......V..... 464 * (PAUSED) : FLR_WIP : : STOP_WIP : 465 * :.........: :..........: 466 * 467 * For the full state machine view, see `The VF state machine`_. 468 */ 469 470 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 471 { 472 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 473 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); 474 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 475 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); 476 } 477 } 478 479 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid) 480 { 481 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) 482 pf_enter_vf_state_machine_bug(gt, vfid); 483 484 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 485 pf_exit_vf_mismatch(gt, vfid); 486 pf_exit_vf_wip(gt, vfid); 487 } 488 489 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid) 490 { 491 pf_enter_vf_paused(gt, vfid); 492 } 493 494 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid) 495 { 496 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 497 pf_exit_vf_wip(gt, vfid); 498 } 499 500 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid) 501 { 502 pf_enter_vf_mismatch(gt, vfid); 503 pf_enter_vf_pause_failed(gt, vfid); 504 } 505 506 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 507 { 508 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 509 return false; 510 511 pf_enter_vf_pause_completed(gt, vfid); 512 return true; 513 } 514 515 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 516 { 517 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 518 pf_queue_vf(gt, vfid); 519 } 520 521 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 522 { 523 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) 524 pf_enter_vf_state_machine_bug(gt, vfid); 525 } 526 527 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 528 { 529 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 530 } 531 532 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 533 { 534 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 535 pf_enter_vf_state_machine_bug(gt, vfid); 536 537 pf_queue_vf(gt, vfid); 538 } 539 540 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 541 { 542 int err; 543 544 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 545 return false; 546 547 /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */ 548 pf_enter_pause_wait_guc(gt, vfid); 549 550 err = pf_send_vf_pause(gt, vfid); 551 if (err) { 552 /* send failed, so we shouldn't expect PAUSE_DONE from GuC */ 553 pf_exit_pause_wait_guc(gt, vfid); 554 555 if (err == -EBUSY) 556 pf_enter_vf_pause_send_pause(gt, vfid); 557 else if (err == -EIO) 558 pf_enter_vf_pause_rejected(gt, vfid); 559 else 560 pf_enter_vf_pause_failed(gt, vfid); 561 } else { 562 /* 563 * we have already moved to WAIT_GUC, maybe even to GUC_DONE 564 * but since GuC didn't complain, we may clear MISMATCH 565 */ 566 pf_exit_vf_mismatch(gt, vfid); 567 } 568 569 return true; 570 } 571 572 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 573 { 574 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 575 pf_enter_vf_wip(gt, vfid); 576 pf_enter_vf_pause_send_pause(gt, vfid); 577 return true; 578 } 579 580 return false; 581 } 582 583 /** 584 * xe_gt_sriov_pf_control_pause_vf - Pause a VF. 585 * @gt: the &xe_gt 586 * @vfid: the VF identifier 587 * 588 * This function is for PF only. 589 * 590 * Return: 0 on success or a negative error code on failure. 591 */ 592 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) 593 { 594 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP); 595 int err; 596 597 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 598 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); 599 return -EPERM; 600 } 601 602 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 603 xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid); 604 return -ESTALE; 605 } 606 607 if (!pf_enter_vf_pause_wip(gt, vfid)) { 608 xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid); 609 return -EALREADY; 610 } 611 612 err = pf_wait_vf_wip_done(gt, vfid, timeout); 613 if (err) { 614 xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n", 615 vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 616 return err; 617 } 618 619 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 620 xe_gt_sriov_dbg(gt, "VF%u paused!\n", vfid); 621 return 0; 622 } 623 624 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) { 625 xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid); 626 return -EIO; 627 } 628 629 xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid); 630 return -ECANCELED; 631 } 632 633 /** 634 * DOC: The VF RESUME state machine 635 * 636 * The VF RESUME state machine looks like:: 637 * 638 * (PAUSED)<-----------------<------------------------o 639 * | \ 640 * resume \ 641 * | \ 642 * ....V............................RESUME_WIP...... \ 643 * : \ : o 644 * : \ o-------<-----busy : | 645 * : \ / / : | 646 * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED) 647 * : / \ : | 648 * : acked rejected---->---------o--->(MISMATCH) 649 * : / : 650 * :....o..............o...............o.....o.....: 651 * | | | \ 652 * completed flr stop restart-->(READY) 653 * | | | 654 * V .....V..... ......V..... 655 * (RESUMED) : FLR_WIP : : STOP_WIP : 656 * :.........: :..........: 657 * 658 * For the full state machine view, see `The VF state machine`_. 659 */ 660 661 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 662 { 663 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) 664 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME); 665 } 666 667 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid) 668 { 669 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 670 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 671 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); 672 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); 673 pf_exit_vf_mismatch(gt, vfid); 674 pf_exit_vf_wip(gt, vfid); 675 } 676 677 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid) 678 { 679 pf_enter_vf_resumed(gt, vfid); 680 } 681 682 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid) 683 { 684 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 685 pf_exit_vf_wip(gt, vfid); 686 } 687 688 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid) 689 { 690 pf_enter_vf_mismatch(gt, vfid); 691 pf_enter_vf_resume_failed(gt, vfid); 692 } 693 694 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 695 { 696 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 697 pf_enter_vf_state_machine_bug(gt, vfid); 698 699 pf_queue_vf(gt, vfid); 700 } 701 702 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 703 { 704 int err; 705 706 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 707 return false; 708 709 err = pf_send_vf_resume(gt, vfid); 710 if (err == -EBUSY) 711 pf_enter_vf_resume_send_resume(gt, vfid); 712 else if (err == -EIO) 713 pf_enter_vf_resume_rejected(gt, vfid); 714 else if (err) 715 pf_enter_vf_resume_failed(gt, vfid); 716 else 717 pf_enter_vf_resume_completed(gt, vfid); 718 return true; 719 } 720 721 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 722 { 723 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) { 724 pf_enter_vf_wip(gt, vfid); 725 pf_enter_vf_resume_send_resume(gt, vfid); 726 return true; 727 } 728 729 return false; 730 } 731 732 /** 733 * xe_gt_sriov_pf_control_resume_vf - Resume a VF. 734 * @gt: the &xe_gt 735 * @vfid: the VF identifier 736 * 737 * This function is for PF only. 738 * 739 * Return: 0 on success or a negative error code on failure. 740 */ 741 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) 742 { 743 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP); 744 int err; 745 746 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 747 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); 748 return -EPERM; 749 } 750 751 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 752 xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid); 753 return -EBUSY; 754 } 755 756 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 757 xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid); 758 return -EBUSY; 759 } 760 761 if (!pf_enter_vf_resume_wip(gt, vfid)) { 762 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid); 763 return -EALREADY; 764 } 765 766 err = pf_wait_vf_wip_done(gt, vfid, timeout); 767 if (err) 768 return err; 769 770 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) { 771 xe_gt_sriov_dbg(gt, "VF%u resumed!\n", vfid); 772 return 0; 773 } 774 775 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) { 776 xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid); 777 return -EIO; 778 } 779 780 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid); 781 return -ECANCELED; 782 } 783 784 /** 785 * DOC: The VF SAVE state machine 786 * 787 * SAVE extends the PAUSED state. 788 * 789 * The VF SAVE state machine looks like:: 790 * 791 * ....PAUSED.................................................... 792 * : : 793 * : (O)<---------o : 794 * : | \ : 795 * : save (SAVED) (SAVE_FAILED) : 796 * : | ^ ^ : 797 * : | | | : 798 * : ....V...............o...........o......SAVE_WIP......... : 799 * : : | | | : : 800 * : : | empty | : : 801 * : : | | | : : 802 * : : | | | : : 803 * : : | DATA_DONE | : : 804 * : : | ^ | : : 805 * : : | | error : : 806 * : : | no_data / : : 807 * : : | / / : : 808 * : : | / / : : 809 * : : | / / : : 810 * : : o---------->PROCESS_DATA<----consume : : 811 * : : \ \ : : 812 * : : \ \ : : 813 * : : \ \ : : 814 * : : ring_full----->WAIT_DATA : : 815 * : : : : 816 * : :......................................................: : 817 * :............................................................: 818 * 819 * For the full state machine view, see `The VF state machine`_. 820 */ 821 822 static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid) 823 { 824 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 825 xe_gt_sriov_pf_migration_ring_free(gt, vfid); 826 827 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 828 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); 829 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 830 } 831 } 832 833 static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid) 834 { 835 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED)) 836 pf_enter_vf_state_machine_bug(gt, vfid); 837 838 xe_gt_sriov_dbg(gt, "VF%u saved!\n", vfid); 839 840 pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 841 pf_exit_vf_mismatch(gt, vfid); 842 pf_exit_vf_wip(gt, vfid); 843 } 844 845 static void pf_enter_vf_save_failed(struct xe_gt *gt, unsigned int vfid) 846 { 847 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) 848 pf_enter_vf_state_machine_bug(gt, vfid); 849 850 wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); 851 852 pf_exit_vf_wip(gt, vfid); 853 } 854 855 static int pf_handle_vf_save_data(struct xe_gt *gt, unsigned int vfid) 856 { 857 int ret; 858 859 if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, 860 XE_SRIOV_PACKET_TYPE_GUC)) { 861 ret = xe_gt_sriov_pf_migration_guc_save(gt, vfid); 862 if (ret) 863 return ret; 864 865 xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, 866 XE_SRIOV_PACKET_TYPE_GUC); 867 868 return -EAGAIN; 869 } 870 871 if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, 872 XE_SRIOV_PACKET_TYPE_GGTT)) { 873 ret = xe_gt_sriov_pf_migration_ggtt_save(gt, vfid); 874 if (ret) 875 return ret; 876 877 xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, 878 XE_SRIOV_PACKET_TYPE_GGTT); 879 880 return -EAGAIN; 881 } 882 883 if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, 884 XE_SRIOV_PACKET_TYPE_MMIO)) { 885 ret = xe_gt_sriov_pf_migration_mmio_save(gt, vfid); 886 if (ret) 887 return ret; 888 889 xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, 890 XE_SRIOV_PACKET_TYPE_MMIO); 891 892 return -EAGAIN; 893 } 894 895 if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid, 896 XE_SRIOV_PACKET_TYPE_VRAM)) { 897 ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid); 898 if (ret == -EAGAIN) 899 return -EAGAIN; 900 else if (ret) 901 return ret; 902 903 xe_gt_sriov_pf_migration_save_data_complete(gt, vfid, 904 XE_SRIOV_PACKET_TYPE_VRAM); 905 906 return -EAGAIN; 907 } 908 909 return 0; 910 } 911 912 static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid) 913 { 914 int ret; 915 916 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA)) 917 return false; 918 919 if (xe_gt_sriov_pf_migration_ring_full(gt, vfid)) { 920 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA); 921 return true; 922 } 923 924 ret = pf_handle_vf_save_data(gt, vfid); 925 if (ret == -EAGAIN) 926 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 927 else if (ret) 928 pf_enter_vf_save_failed(gt, vfid); 929 else 930 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 931 932 return true; 933 } 934 935 static void pf_exit_vf_save_wait_data(struct xe_gt *gt, unsigned int vfid) 936 { 937 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) 938 return; 939 940 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 941 pf_queue_vf(gt, vfid); 942 } 943 944 static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid) 945 { 946 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 947 xe_gt_sriov_pf_migration_save_init(gt, vfid); 948 pf_enter_vf_wip(gt, vfid); 949 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA); 950 pf_queue_vf(gt, vfid); 951 return true; 952 } 953 954 return false; 955 } 956 957 /** 958 * xe_gt_sriov_pf_control_check_save_data_done() - Check if all save migration data was produced. 959 * @gt: the &xe_gt 960 * @vfid: the VF identifier 961 * 962 * This function is for PF only. 963 * 964 * Return: true if all migration data was produced, false otherwise. 965 */ 966 bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid) 967 { 968 return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 969 } 970 971 /** 972 * xe_gt_sriov_pf_control_check_save_failed() - Check if save processing has failed. 973 * @gt: the &xe_gt 974 * @vfid: the VF identifier 975 * 976 * This function is for PF only. 977 * 978 * Return: true if save processing failed, false otherwise. 979 */ 980 bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid) 981 { 982 return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED); 983 } 984 985 /** 986 * xe_gt_sriov_pf_control_process_save_data() - Queue VF save migration data processing. 987 * @gt: the &xe_gt 988 * @vfid: the VF identifier 989 * 990 * This function is for PF only. 991 * 992 * Return: 0 on success or a negative error code on failure. 993 */ 994 int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid) 995 { 996 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED)) 997 return -EIO; 998 999 pf_exit_vf_save_wait_data(gt, vfid); 1000 1001 return 0; 1002 } 1003 1004 /** 1005 * xe_gt_sriov_pf_control_trigger_save_vf() - Start an SR-IOV VF migration data save sequence. 1006 * @gt: the &xe_gt 1007 * @vfid: the VF identifier 1008 * 1009 * This function is for PF only. 1010 * 1011 * Return: 0 on success or a negative error code on failure. 1012 */ 1013 int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid) 1014 { 1015 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 1016 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); 1017 return -EPERM; 1018 } 1019 1020 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 1021 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); 1022 return -EPERM; 1023 } 1024 1025 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 1026 xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid); 1027 return -EBUSY; 1028 } 1029 1030 if (!pf_enter_vf_save_wip(gt, vfid)) { 1031 xe_gt_sriov_dbg(gt, "VF%u save already in progress!\n", vfid); 1032 return -EALREADY; 1033 } 1034 1035 return 0; 1036 } 1037 1038 /** 1039 * xe_gt_sriov_pf_control_finish_save_vf() - Complete a VF migration data save sequence. 1040 * @gt: the &xe_gt 1041 * @vfid: the VF identifier 1042 * 1043 * This function is for PF only. 1044 * 1045 * Return: 0 on success or a negative error code on failure. 1046 */ 1047 int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid) 1048 { 1049 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE)) { 1050 xe_gt_sriov_err(gt, "VF%u save is still in progress!\n", vfid); 1051 return -EIO; 1052 } 1053 1054 pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 1055 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE); 1056 pf_enter_vf_saved(gt, vfid); 1057 1058 return 0; 1059 } 1060 1061 /** 1062 * DOC: The VF RESTORE state machine 1063 * 1064 * RESTORE extends the PAUSED state. 1065 * 1066 * The VF RESTORE state machine looks like:: 1067 * 1068 * ....PAUSED.................................................... 1069 * : : 1070 * : (O)<---------o : 1071 * : | \ : 1072 * : restore (RESTORED) (RESTORE_FAILED) : 1073 * : | ^ ^ : 1074 * : | | | : 1075 * : ....V...............o...........o......RESTORE_WIP...... : 1076 * : : | | | : : 1077 * : : | empty | : : 1078 * : : | | | : : 1079 * : : | | | : : 1080 * : : | DATA_DONE | : : 1081 * : : | ^ | : : 1082 * : : | | error : : 1083 * : : | trailer / : : 1084 * : : | / / : : 1085 * : : | / / : : 1086 * : : | / / : : 1087 * : : o---------->PROCESS_DATA<----produce : : 1088 * : : \ \ : : 1089 * : : \ \ : : 1090 * : : \ \ : : 1091 * : : ring_empty---->WAIT_DATA : : 1092 * : : : : 1093 * : :......................................................: : 1094 * :............................................................: 1095 * 1096 * For the full state machine view, see `The VF state machine`_. 1097 */ 1098 1099 static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) 1100 { 1101 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 1102 xe_gt_sriov_pf_migration_ring_free(gt, vfid); 1103 1104 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1105 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); 1106 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE); 1107 } 1108 } 1109 1110 static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid) 1111 { 1112 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) 1113 pf_enter_vf_state_machine_bug(gt, vfid); 1114 1115 xe_gt_sriov_dbg(gt, "VF%u restored!\n", vfid); 1116 1117 pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 1118 pf_exit_vf_mismatch(gt, vfid); 1119 pf_exit_vf_wip(gt, vfid); 1120 } 1121 1122 static void pf_enter_vf_restore_failed(struct xe_gt *gt, unsigned int vfid) 1123 { 1124 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) 1125 pf_enter_vf_state_machine_bug(gt, vfid); 1126 1127 wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid)); 1128 1129 pf_exit_vf_wip(gt, vfid); 1130 } 1131 1132 static int pf_handle_vf_restore_data(struct xe_gt *gt, unsigned int vfid) 1133 { 1134 struct xe_sriov_packet *data = xe_gt_sriov_pf_migration_restore_consume(gt, vfid); 1135 int ret = 0; 1136 1137 switch (data->hdr.type) { 1138 case XE_SRIOV_PACKET_TYPE_GGTT: 1139 ret = xe_gt_sriov_pf_migration_ggtt_restore(gt, vfid, data); 1140 break; 1141 case XE_SRIOV_PACKET_TYPE_MMIO: 1142 ret = xe_gt_sriov_pf_migration_mmio_restore(gt, vfid, data); 1143 break; 1144 case XE_SRIOV_PACKET_TYPE_GUC: 1145 ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data); 1146 break; 1147 case XE_SRIOV_PACKET_TYPE_VRAM: 1148 ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data); 1149 break; 1150 default: 1151 xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n", 1152 vfid, data->hdr.type); 1153 break; 1154 } 1155 1156 xe_sriov_packet_free(data); 1157 1158 return ret; 1159 } 1160 1161 static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid) 1162 { 1163 int ret; 1164 1165 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA)) 1166 return false; 1167 1168 if (xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) { 1169 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) 1170 pf_enter_vf_restored(gt, vfid); 1171 else 1172 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA); 1173 1174 return true; 1175 } 1176 1177 ret = pf_handle_vf_restore_data(gt, vfid); 1178 if (ret) 1179 pf_enter_vf_restore_failed(gt, vfid); 1180 else 1181 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1182 1183 return true; 1184 } 1185 1186 static void pf_exit_vf_restore_wait_data(struct xe_gt *gt, unsigned int vfid) 1187 { 1188 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) 1189 return; 1190 1191 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1192 pf_queue_vf(gt, vfid); 1193 } 1194 1195 static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid) 1196 { 1197 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) { 1198 pf_enter_vf_wip(gt, vfid); 1199 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA); 1200 pf_queue_vf(gt, vfid); 1201 return true; 1202 } 1203 1204 return false; 1205 } 1206 1207 /** 1208 * xe_gt_sriov_pf_control_check_restore_failed() - Check if restore processing has failed. 1209 * @gt: the &xe_gt 1210 * @vfid: the VF identifier 1211 * 1212 * This function is for PF only. 1213 * 1214 * Return: true if restore processing failed, false otherwise. 1215 */ 1216 bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid) 1217 { 1218 return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED); 1219 } 1220 1221 /** 1222 * xe_gt_sriov_pf_control_restore_data_done() - Indicate the end of VF migration data stream. 1223 * @gt: the &xe_gt 1224 * @vfid: the VF identifier 1225 * 1226 * This function is for PF only. 1227 * 1228 * Return: 0 on success or a negative error code on failure. 1229 */ 1230 int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid) 1231 { 1232 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) { 1233 pf_enter_vf_state_machine_bug(gt, vfid); 1234 return -EIO; 1235 } 1236 1237 return xe_gt_sriov_pf_control_process_restore_data(gt, vfid); 1238 } 1239 1240 /** 1241 * xe_gt_sriov_pf_control_process_restore_data() - Queue VF restore migration data processing. 1242 * @gt: the &xe_gt 1243 * @vfid: the VF identifier 1244 * 1245 * This function is for PF only. 1246 * 1247 * Return: 0 on success or a negative error code on failure. 1248 */ 1249 int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid) 1250 { 1251 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) { 1252 xe_gt_sriov_pf_migration_ring_free(gt, vfid); 1253 return -EIO; 1254 } 1255 1256 pf_exit_vf_restore_wait_data(gt, vfid); 1257 1258 return 0; 1259 } 1260 1261 /** 1262 * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence. 1263 * @gt: the &xe_gt 1264 * @vfid: the VF identifier 1265 * 1266 * This function is for PF only. 1267 * 1268 * Return: 0 on success or a negative error code on failure. 1269 */ 1270 int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid) 1271 { 1272 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 1273 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); 1274 return -EPERM; 1275 } 1276 1277 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 1278 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); 1279 return -EPERM; 1280 } 1281 1282 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) { 1283 xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid); 1284 return -EBUSY; 1285 } 1286 1287 if (!pf_enter_vf_restore_wip(gt, vfid)) { 1288 xe_gt_sriov_dbg(gt, "VF%u restore already in progress!\n", vfid); 1289 return -EALREADY; 1290 } 1291 1292 return 0; 1293 } 1294 1295 static int pf_wait_vf_restore_done(struct xe_gt *gt, unsigned int vfid) 1296 { 1297 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESTORE_WIP); 1298 int err; 1299 1300 err = pf_wait_vf_wip_done(gt, vfid, timeout); 1301 if (err) { 1302 xe_gt_sriov_notice(gt, "VF%u RESTORE didn't finish in %u ms (%pe)\n", 1303 vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 1304 return err; 1305 } 1306 1307 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) 1308 return -EIO; 1309 1310 return 0; 1311 } 1312 1313 /** 1314 * xe_gt_sriov_pf_control_finish_restore_vf() - Complete a VF migration data restore sequence. 1315 * @gt: the &xe_gt 1316 * @vfid: the VF identifier 1317 * 1318 * This function is for PF only. 1319 * 1320 * Return: 0 on success or a negative error code on failure. 1321 */ 1322 int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid) 1323 { 1324 int ret; 1325 1326 ret = pf_wait_vf_restore_done(gt, vfid); 1327 if (ret) 1328 return ret; 1329 1330 if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) { 1331 pf_enter_vf_mismatch(gt, vfid); 1332 return -EIO; 1333 } 1334 1335 pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 1336 1337 return 0; 1338 } 1339 1340 /** 1341 * DOC: The VF STOP state machine 1342 * 1343 * The VF STOP state machine looks like:: 1344 * 1345 * (READY,PAUSED,RESUMED)<-------<--------------------o 1346 * | \ 1347 * stop \ 1348 * | \ 1349 * ....V..............................STOP_WIP...... \ 1350 * : \ : o 1351 * : \ o----<----busy : | 1352 * : \ / / : | 1353 * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED) 1354 * : / \ : | 1355 * : acked rejected-------->--------o--->(MISMATCH) 1356 * : / : 1357 * :....o..............o...............o...........: 1358 * | | | 1359 * completed flr restart 1360 * | | | 1361 * V .....V..... V 1362 * (STOPPED) : FLR_WIP : (READY) 1363 * :.........: 1364 * 1365 * For the full state machine view, see `The VF state machine`_. 1366 */ 1367 1368 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 1369 { 1370 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) 1371 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP); 1372 } 1373 1374 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid) 1375 { 1376 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) 1377 pf_enter_vf_state_machine_bug(gt, vfid); 1378 1379 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 1380 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 1381 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED); 1382 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED); 1383 pf_exit_vf_mismatch(gt, vfid); 1384 pf_exit_vf_wip(gt, vfid); 1385 } 1386 1387 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid) 1388 { 1389 pf_enter_vf_stopped(gt, vfid); 1390 } 1391 1392 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid) 1393 { 1394 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 1395 pf_exit_vf_wip(gt, vfid); 1396 } 1397 1398 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid) 1399 { 1400 pf_enter_vf_mismatch(gt, vfid); 1401 pf_enter_vf_stop_failed(gt, vfid); 1402 } 1403 1404 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 1405 { 1406 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 1407 pf_enter_vf_state_machine_bug(gt, vfid); 1408 1409 pf_queue_vf(gt, vfid); 1410 } 1411 1412 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 1413 { 1414 int err; 1415 1416 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 1417 return false; 1418 1419 err = pf_send_vf_stop(gt, vfid); 1420 if (err == -EBUSY) 1421 pf_enter_vf_stop_send_stop(gt, vfid); 1422 else if (err == -EIO) 1423 pf_enter_vf_stop_rejected(gt, vfid); 1424 else if (err) 1425 pf_enter_vf_stop_failed(gt, vfid); 1426 else 1427 pf_enter_vf_stop_completed(gt, vfid); 1428 return true; 1429 } 1430 1431 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 1432 { 1433 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) { 1434 pf_enter_vf_wip(gt, vfid); 1435 pf_enter_vf_stop_send_stop(gt, vfid); 1436 return true; 1437 } 1438 return false; 1439 } 1440 1441 /** 1442 * xe_gt_sriov_pf_control_stop_vf - Stop a VF. 1443 * @gt: the &xe_gt 1444 * @vfid: the VF identifier 1445 * 1446 * This function is for PF only. 1447 * 1448 * Return: 0 on success or a negative error code on failure. 1449 */ 1450 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) 1451 { 1452 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP); 1453 int err; 1454 1455 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 1456 xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid); 1457 return -ESTALE; 1458 } 1459 1460 if (!pf_enter_vf_stop_wip(gt, vfid)) { 1461 xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid); 1462 return -EALREADY; 1463 } 1464 1465 err = pf_wait_vf_wip_done(gt, vfid, timeout); 1466 if (err) 1467 return err; 1468 1469 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 1470 xe_gt_sriov_dbg(gt, "VF%u stopped!\n", vfid); 1471 return 0; 1472 } 1473 1474 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) { 1475 xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid); 1476 return -EIO; 1477 } 1478 1479 xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid); 1480 return -ECANCELED; 1481 } 1482 1483 /** 1484 * DOC: The VF FLR state machine 1485 * 1486 * The VF FLR state machine looks like:: 1487 * 1488 * (READY,PAUSED,STOPPED)<------------<--------------o 1489 * | \ 1490 * flr \ 1491 * | \ 1492 * ....V..........................FLR_WIP........... \ 1493 * : \ : \ 1494 * : \ o----<----busy : | 1495 * : \ / / : | 1496 * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o 1497 * : | \ : | | 1498 * : acked rejected----->-----------o--->(MISMATCH) | 1499 * : | : ^ | 1500 * : v : | | 1501 * : FLR_WAIT_GUC : | | 1502 * : | : | | 1503 * : done : | | 1504 * : | : | | 1505 * : v : | | 1506 * : FLR_GUC_DONE : | | 1507 * : | : | | 1508 * : | o--<--sync : | | 1509 * : |/ / : | | 1510 * : FLR_SYNC--o : | | 1511 * : | : | | 1512 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o 1513 * : | : | | 1514 * : FLR_RESET_DATA : | | 1515 * : | : | | 1516 * : FLR_RESET_MMIO : | | 1517 * : | : | | 1518 * : | o----<----busy : | | 1519 * : |/ / : | | 1520 * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o 1521 * : / \ : | 1522 * : acked rejected----->-----------o--------o 1523 * : / : 1524 * :....o..............................o...........: 1525 * | | 1526 * completed restart 1527 * | / 1528 * V / 1529 * (READY)<----------<------------o 1530 * 1531 * For the full state machine view, see `The VF state machine`_. 1532 */ 1533 1534 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 1535 { 1536 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 1537 pf_enter_vf_state_machine_bug(gt, vfid); 1538 1539 pf_queue_vf(gt, vfid); 1540 } 1541 1542 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 1543 { 1544 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 1545 xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid); 1546 return; 1547 } 1548 1549 pf_enter_vf_wip(gt, vfid); 1550 pf_enter_vf_flr_send_start(gt, vfid); 1551 } 1552 1553 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 1554 { 1555 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 1556 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH); 1557 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO); 1558 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA); 1559 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 1560 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE); 1561 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 1562 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START); 1563 1564 xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); 1565 } 1566 } 1567 1568 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid) 1569 { 1570 pf_enter_vf_ready(gt, vfid); 1571 } 1572 1573 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid) 1574 { 1575 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1576 xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid); 1577 pf_exit_vf_wip(gt, vfid); 1578 } 1579 1580 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid) 1581 { 1582 pf_enter_vf_mismatch(gt, vfid); 1583 pf_enter_vf_flr_failed(gt, vfid); 1584 } 1585 1586 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 1587 { 1588 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 1589 pf_enter_vf_state_machine_bug(gt, vfid); 1590 1591 pf_queue_vf(gt, vfid); 1592 } 1593 1594 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 1595 { 1596 int err; 1597 1598 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 1599 return false; 1600 1601 err = pf_send_vf_flr_finish(gt, vfid); 1602 if (err == -EBUSY) 1603 pf_enter_vf_flr_send_finish(gt, vfid); 1604 else if (err == -EIO) 1605 pf_enter_vf_flr_rejected(gt, vfid); 1606 else if (err) 1607 pf_enter_vf_flr_failed(gt, vfid); 1608 else 1609 pf_enter_vf_flr_completed(gt, vfid); 1610 return true; 1611 } 1612 1613 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 1614 { 1615 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 1616 pf_enter_vf_state_machine_bug(gt, vfid); 1617 1618 pf_queue_vf(gt, vfid); 1619 } 1620 1621 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 1622 { 1623 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 1624 return false; 1625 1626 xe_gt_sriov_pf_sanitize_hw(gt, vfid); 1627 1628 pf_enter_vf_flr_send_finish(gt, vfid); 1629 return true; 1630 } 1631 1632 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 1633 { 1634 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 1635 pf_enter_vf_state_machine_bug(gt, vfid); 1636 1637 pf_queue_vf(gt, vfid); 1638 } 1639 1640 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 1641 { 1642 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 1643 return false; 1644 1645 if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) 1646 xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); 1647 1648 xe_gt_sriov_pf_monitor_flr(gt, vfid); 1649 1650 pf_enter_vf_flr_reset_mmio(gt, vfid); 1651 return true; 1652 } 1653 1654 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 1655 { 1656 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 1657 pf_enter_vf_state_machine_bug(gt, vfid); 1658 1659 pf_queue_vf(gt, vfid); 1660 } 1661 1662 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 1663 { 1664 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 1665 int err; 1666 1667 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 1668 return false; 1669 1670 err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout); 1671 if (err) 1672 pf_enter_vf_flr_failed(gt, vfid); 1673 else 1674 pf_enter_vf_flr_reset_data(gt, vfid); 1675 return true; 1676 } 1677 1678 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 1679 { 1680 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) 1681 pf_enter_vf_state_machine_bug(gt, vfid); 1682 } 1683 1684 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 1685 { 1686 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 1687 } 1688 1689 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 1690 { 1691 int err; 1692 1693 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 1694 return false; 1695 1696 /* GuC may actually send a FLR_DONE before we get a RESPONSE */ 1697 pf_enter_vf_flr_wait_guc(gt, vfid); 1698 1699 err = pf_send_vf_flr_start(gt, vfid); 1700 if (err) { 1701 /* send failed, so we shouldn't expect FLR_DONE from GuC */ 1702 pf_exit_vf_flr_wait_guc(gt, vfid); 1703 1704 if (err == -EBUSY) 1705 pf_enter_vf_flr_send_start(gt, vfid); 1706 else if (err == -EIO) 1707 pf_enter_vf_flr_rejected(gt, vfid); 1708 else 1709 pf_enter_vf_flr_failed(gt, vfid); 1710 } else { 1711 /* 1712 * we have already moved to WAIT_GUC, maybe even to GUC_DONE 1713 * but since GuC didn't complain, we may clear MISMATCH 1714 */ 1715 pf_exit_vf_mismatch(gt, vfid); 1716 } 1717 1718 return true; 1719 } 1720 1721 static bool pf_exit_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) 1722 { 1723 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) 1724 return false; 1725 1726 pf_enter_vf_flr_reset_config(gt, vfid); 1727 return true; 1728 } 1729 1730 static void pf_enter_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) 1731 { 1732 int ret; 1733 1734 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) 1735 pf_enter_vf_state_machine_bug(gt, vfid); 1736 1737 ret = xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); 1738 if (ret < 0) { 1739 xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint %pe\n", ERR_PTR(ret)); 1740 pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); 1741 } else { 1742 xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint pass\n"); 1743 pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); 1744 } 1745 } 1746 1747 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 1748 { 1749 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 1750 return false; 1751 1752 pf_enter_vf_flr_sync(gt, vfid); 1753 return true; 1754 } 1755 1756 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 1757 { 1758 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 1759 pf_queue_vf(gt, vfid); 1760 } 1761 1762 /** 1763 * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence. 1764 * @gt: the &xe_gt 1765 * @vfid: the VF identifier 1766 * 1767 * This function is for PF only. 1768 * 1769 * Return: 0 on success or a negative error code on failure. 1770 */ 1771 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) 1772 { 1773 pf_enter_vf_flr_wip(gt, vfid); 1774 1775 return 0; 1776 } 1777 1778 /** 1779 * xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint. 1780 * @gt: the &xe_gt 1781 * @vfid: the VF identifier 1782 * @sync: if true it will allow to exit the checkpoint 1783 * 1784 * Return: non-zero if FLR checkpoint has been reached, zero if the is no FLR 1785 * in progress, or a negative error code on the FLR busy or failed. 1786 */ 1787 int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync) 1788 { 1789 if (sync && pf_exit_vf_flr_sync(gt, vfid)) 1790 return 1; 1791 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) 1792 return 1; 1793 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) 1794 return -EBUSY; 1795 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1796 return -EIO; 1797 return 0; 1798 } 1799 1800 /** 1801 * xe_gt_sriov_pf_control_wait_flr() - Wait for a VF FLR to complete. 1802 * @gt: the &xe_gt 1803 * @vfid: the VF identifier 1804 * 1805 * This function is for PF only. 1806 * 1807 * Return: 0 on success or a negative error code on failure. 1808 */ 1809 int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid) 1810 { 1811 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP); 1812 int err; 1813 1814 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1815 return -EIO; 1816 1817 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) 1818 return 0; 1819 1820 err = pf_wait_vf_wip_done(gt, vfid, timeout); 1821 if (err) { 1822 xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n", 1823 vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 1824 return err; 1825 } 1826 1827 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1828 return -EIO; 1829 1830 return 0; 1831 } 1832 1833 /** 1834 * DOC: The VF FLR Flow with GuC 1835 * 1836 * The VF FLR flow includes several steps:: 1837 * 1838 * PF GUC PCI 1839 * ======================================================== 1840 * | | | 1841 * (1) | [ ] <----- FLR --| 1842 * | [ ] : 1843 * (2) [ ] <-------- NOTIFY FLR --[ ] 1844 * [ ] | 1845 * (3) [ ] | 1846 * [ ] | 1847 * [ ]-- START FLR ---------> [ ] 1848 * | [ ] 1849 * (4) | [ ] 1850 * | [ ] 1851 * [ ] <--------- FLR DONE -- [ ] 1852 * [ ] | 1853 * (5) [ ] | 1854 * [ ] | 1855 * [ ]-- FINISH FLR --------> [ ] 1856 * | | 1857 * 1858 * * Step 1: PCI HW generates interrupt to the GuC about VF FLR 1859 * * Step 2: GuC FW sends G2H notification to the PF about VF FLR 1860 * * Step 2a: on some platforms G2H is only received from root GuC 1861 * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence 1862 * * Step 3a: on some platforms PF must send H2G to all other GuCs 1863 * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done 1864 * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished 1865 */ 1866 1867 static bool needs_dispatch_flr(struct xe_device *xe) 1868 { 1869 return xe->info.platform == XE_PVC; 1870 } 1871 1872 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid) 1873 { 1874 struct xe_device *xe = gt_to_xe(gt); 1875 struct xe_gt *gtit; 1876 unsigned int gtid; 1877 1878 xe_gt_sriov_info(gt, "VF%u FLR\n", vfid); 1879 1880 if (needs_dispatch_flr(xe)) { 1881 for_each_gt(gtit, xe, gtid) 1882 pf_enter_vf_flr_wip(gtit, vfid); 1883 } else { 1884 pf_enter_vf_flr_wip(gt, vfid); 1885 } 1886 } 1887 1888 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid) 1889 { 1890 if (!pf_exit_vf_flr_wait_guc(gt, vfid)) { 1891 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid); 1892 pf_enter_vf_mismatch(gt, vfid); 1893 return; 1894 } 1895 1896 pf_enter_vf_flr_guc_done(gt, vfid); 1897 } 1898 1899 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid) 1900 { 1901 if (!pf_exit_pause_wait_guc(gt, vfid)) { 1902 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid); 1903 pf_enter_vf_mismatch(gt, vfid); 1904 return; 1905 } 1906 1907 pf_enter_vf_pause_guc_done(gt, vfid); 1908 } 1909 1910 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid) 1911 { 1912 xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid); 1913 1914 if (vfid > xe_gt_sriov_pf_get_totalvfs(gt)) 1915 return -EPROTO; 1916 1917 switch (eventid) { 1918 case GUC_PF_NOTIFY_VF_FLR: 1919 pf_handle_vf_flr(gt, vfid); 1920 break; 1921 case GUC_PF_NOTIFY_VF_FLR_DONE: 1922 pf_handle_vf_flr_done(gt, vfid); 1923 break; 1924 case GUC_PF_NOTIFY_VF_PAUSE_DONE: 1925 pf_handle_vf_pause_done(gt, vfid); 1926 break; 1927 case GUC_PF_NOTIFY_VF_FIXUP_DONE: 1928 break; 1929 default: 1930 return -ENOPKG; 1931 } 1932 return 0; 1933 } 1934 1935 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid) 1936 { 1937 switch (eventid) { 1938 case GUC_PF_NOTIFY_VF_ENABLE: 1939 xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n", 1940 str_enabled_disabled(true), 1941 str_enabled_disabled(false)); 1942 break; 1943 default: 1944 return -ENOPKG; 1945 } 1946 return 0; 1947 } 1948 1949 /** 1950 * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC. 1951 * @gt: the &xe_gt 1952 * @msg: the G2H message 1953 * @len: the length of the G2H message 1954 * 1955 * This function is for PF only. 1956 * 1957 * Return: 0 on success or a negative error code on failure. 1958 */ 1959 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) 1960 { 1961 u32 vfid; 1962 u32 eventid; 1963 1964 xe_gt_assert(gt, len); 1965 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC); 1966 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT); 1967 xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) == 1968 GUC_ACTION_GUC2PF_VF_STATE_NOTIFY); 1969 1970 if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt)))) 1971 return -EPROTO; 1972 1973 if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0]))) 1974 return -EPFNOSUPPORT; 1975 1976 if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN)) 1977 return -EPROTO; 1978 1979 vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]); 1980 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]); 1981 1982 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid); 1983 } 1984 1985 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid) 1986 { 1987 if (pf_exit_vf_flr_send_start(gt, vfid)) 1988 return true; 1989 1990 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) { 1991 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1992 control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC)); 1993 return false; 1994 } 1995 1996 if (pf_exit_vf_flr_guc_done(gt, vfid)) 1997 return true; 1998 1999 if (pf_exit_vf_flr_reset_config(gt, vfid)) 2000 return true; 2001 2002 if (pf_exit_vf_flr_reset_data(gt, vfid)) 2003 return true; 2004 2005 if (pf_exit_vf_flr_reset_mmio(gt, vfid)) 2006 return true; 2007 2008 if (pf_exit_vf_flr_send_finish(gt, vfid)) 2009 return true; 2010 2011 if (pf_exit_vf_stop_send_stop(gt, vfid)) 2012 return true; 2013 2014 if (pf_exit_vf_pause_send_pause(gt, vfid)) 2015 return true; 2016 2017 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) { 2018 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 2019 control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)); 2020 return true; 2021 } 2022 2023 if (pf_exit_vf_pause_guc_done(gt, vfid)) 2024 return true; 2025 2026 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) { 2027 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 2028 control_bit_to_string(XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)); 2029 return false; 2030 } 2031 2032 if (pf_handle_vf_save(gt, vfid)) 2033 return true; 2034 2035 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) { 2036 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 2037 control_bit_to_string(XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)); 2038 return false; 2039 } 2040 2041 if (pf_handle_vf_restore(gt, vfid)) 2042 return true; 2043 2044 if (pf_exit_vf_resume_send_resume(gt, vfid)) 2045 return true; 2046 2047 return false; 2048 } 2049 2050 static unsigned int pf_control_state_index(struct xe_gt *gt, 2051 struct xe_gt_sriov_control_state *cs) 2052 { 2053 return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs; 2054 } 2055 2056 static void pf_worker_find_work(struct xe_gt *gt) 2057 { 2058 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; 2059 struct xe_gt_sriov_control_state *cs; 2060 unsigned int vfid; 2061 bool empty; 2062 bool more; 2063 2064 spin_lock(&pfc->lock); 2065 cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link); 2066 if (cs) 2067 list_del_init(&cs->link); 2068 empty = list_empty(&pfc->list); 2069 spin_unlock(&pfc->lock); 2070 2071 if (!cs) 2072 return; 2073 2074 /* VF metadata structures are indexed by the VFID */ 2075 vfid = pf_control_state_index(gt, cs); 2076 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 2077 2078 more = pf_process_vf_state_machine(gt, vfid); 2079 if (more) 2080 pf_queue_vf(gt, vfid); 2081 else if (!empty) 2082 pf_queue_control_worker(gt); 2083 } 2084 2085 static void control_worker_func(struct work_struct *w) 2086 { 2087 struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker); 2088 2089 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 2090 pf_worker_find_work(gt); 2091 } 2092 2093 static void pf_stop_worker(struct xe_gt *gt) 2094 { 2095 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 2096 cancel_work_sync(>->sriov.pf.control.worker); 2097 } 2098 2099 static void control_fini_action(struct drm_device *dev, void *data) 2100 { 2101 struct xe_gt *gt = data; 2102 2103 pf_stop_worker(gt); 2104 } 2105 2106 /** 2107 * xe_gt_sriov_pf_control_init() - Initialize PF's control data. 2108 * @gt: the &xe_gt 2109 * 2110 * This function is for PF only. 2111 * 2112 * Return: 0 on success or a negative error code on failure. 2113 */ 2114 int xe_gt_sriov_pf_control_init(struct xe_gt *gt) 2115 { 2116 struct xe_device *xe = gt_to_xe(gt); 2117 unsigned int n, totalvfs; 2118 2119 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 2120 2121 totalvfs = xe_sriov_pf_get_totalvfs(xe); 2122 for (n = 0; n <= totalvfs; n++) { 2123 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n); 2124 2125 init_completion(&cs->done); 2126 INIT_LIST_HEAD(&cs->link); 2127 } 2128 2129 spin_lock_init(>->sriov.pf.control.lock); 2130 INIT_LIST_HEAD(>->sriov.pf.control.list); 2131 INIT_WORK(>->sriov.pf.control.worker, control_worker_func); 2132 2133 return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt); 2134 } 2135 2136 /** 2137 * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset. 2138 * @gt: the &xe_gt 2139 * 2140 * Any per-VF status maintained by the PF or any ongoing VF control activity 2141 * performed by the PF must be reset or cancelled when the GT is reset. 2142 * 2143 * This function is for PF only. 2144 */ 2145 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt) 2146 { 2147 struct xe_device *xe = gt_to_xe(gt); 2148 unsigned int n, totalvfs; 2149 2150 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 2151 2152 pf_stop_worker(gt); 2153 2154 totalvfs = xe_sriov_pf_get_totalvfs(xe); 2155 for (n = 1; n <= totalvfs; n++) 2156 pf_enter_vf_ready(gt, n); 2157 } 2158