1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023-2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "abi/guc_actions_sriov_abi.h" 9 10 #include "xe_device.h" 11 #include "xe_gt.h" 12 #include "xe_gt_sriov_pf.h" 13 #include "xe_gt_sriov_pf_config.h" 14 #include "xe_gt_sriov_pf_control.h" 15 #include "xe_gt_sriov_pf_helpers.h" 16 #include "xe_gt_sriov_pf_migration.h" 17 #include "xe_gt_sriov_pf_monitor.h" 18 #include "xe_gt_sriov_printk.h" 19 #include "xe_guc_ct.h" 20 #include "xe_sriov.h" 21 #include "xe_sriov_pf_control.h" 22 #include "xe_sriov_pf_service.h" 23 #include "xe_tile.h" 24 25 static const char *control_cmd_to_string(u32 cmd) 26 { 27 switch (cmd) { 28 case GUC_PF_TRIGGER_VF_PAUSE: 29 return "PAUSE"; 30 case GUC_PF_TRIGGER_VF_RESUME: 31 return "RESUME"; 32 case GUC_PF_TRIGGER_VF_STOP: 33 return "STOP"; 34 case GUC_PF_TRIGGER_VF_FLR_START: 35 return "FLR_START"; 36 case GUC_PF_TRIGGER_VF_FLR_FINISH: 37 return "FLR_FINISH"; 38 default: 39 return "<unknown>"; 40 } 41 } 42 43 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd) 44 { 45 u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = { 46 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 47 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 48 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL), 49 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid), 50 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd), 51 }; 52 int ret; 53 54 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); 55 return ret > 0 ? -EPROTO : ret; 56 } 57 58 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd) 59 { 60 int err; 61 62 xe_gt_assert(gt, vfid != PFID); 63 xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n", 64 vfid, control_cmd_to_string(cmd)); 65 66 err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd); 67 if (unlikely(err)) 68 xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n", 69 vfid, control_cmd_to_string(cmd), ERR_PTR(err)); 70 return err; 71 } 72 73 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid) 74 { 75 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE); 76 } 77 78 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid) 79 { 80 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME); 81 } 82 83 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid) 84 { 85 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP); 86 } 87 88 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid) 89 { 90 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START); 91 } 92 93 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid) 94 { 95 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH); 96 } 97 98 /** 99 * DOC: The VF state machine 100 * 101 * The simplified VF state machine could be presented as:: 102 * 103 * pause--------------------------o 104 * / | 105 * / v 106 * (READY)<------------------resume-----(PAUSED) 107 * ^ \ / / 108 * | \ / / 109 * | stop---->(STOPPED)<----stop / 110 * | / / 111 * | / / 112 * o--------<-----flr / 113 * \ / 114 * o------<--------------------flr 115 * 116 * Where: 117 * 118 * * READY - represents a state in which VF is fully operable 119 * * PAUSED - represents a state in which VF activity is temporarily suspended 120 * * STOPPED - represents a state in which VF activity is definitely halted 121 * * pause - represents a request to temporarily suspend VF activity 122 * * resume - represents a request to resume VF activity 123 * * stop - represents a request to definitely halt VF activity 124 * * flr - represents a request to perform VF FLR to restore VF activity 125 * 126 * However, each state transition requires additional steps that involves 127 * communication with GuC that might fail or be interrupted by other requests:: 128 * 129 * .................................WIP.... 130 * : : 131 * pause--------------------->PAUSE_WIP----------------------------o 132 * / : / \ : | 133 * / : o----<---stop flr--o : | 134 * / : | \ / | : V 135 * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED) 136 * ^ \ \ : | | : / / 137 * | \ \ : | | : / / 138 * | \ \ : | | : / / 139 * | \ \ : o----<----------------------+--<-------stop / 140 * | \ \ : | | : / 141 * | \ \ : V | : / 142 * | \ stop----->STOP_WIP---------flr--->-----o : / 143 * | \ : | | : / 144 * | \ : | V : / 145 * | flr--------+----->----------------->FLR_WIP<-----flr 146 * | : | / ^ : 147 * | : | / | : 148 * o--------<-------:----+-----<----------------o | : 149 * : | | : 150 * :....|...........................|.....: 151 * | | 152 * V | 153 * (STOPPED)--------------------flr 154 * 155 * For details about each internal WIP state machine see: 156 * 157 * * `The VF PAUSE state machine`_ 158 * * `The VF RESUME state machine`_ 159 * * `The VF STOP state machine`_ 160 * * `The VF FLR state machine`_ 161 */ 162 163 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV 164 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) 165 { 166 switch (bit) { 167 #define CASE2STR(_X) \ 168 case XE_GT_SRIOV_STATE_##_X: return #_X 169 CASE2STR(WIP); 170 CASE2STR(FLR_WIP); 171 CASE2STR(FLR_SEND_START); 172 CASE2STR(FLR_WAIT_GUC); 173 CASE2STR(FLR_GUC_DONE); 174 CASE2STR(FLR_SYNC); 175 CASE2STR(FLR_RESET_CONFIG); 176 CASE2STR(FLR_RESET_DATA); 177 CASE2STR(FLR_RESET_MMIO); 178 CASE2STR(FLR_SEND_FINISH); 179 CASE2STR(FLR_FAILED); 180 CASE2STR(PAUSE_WIP); 181 CASE2STR(PAUSE_SEND_PAUSE); 182 CASE2STR(PAUSE_WAIT_GUC); 183 CASE2STR(PAUSE_GUC_DONE); 184 CASE2STR(PAUSE_SAVE_GUC); 185 CASE2STR(PAUSE_FAILED); 186 CASE2STR(PAUSED); 187 CASE2STR(RESUME_WIP); 188 CASE2STR(RESUME_SEND_RESUME); 189 CASE2STR(RESUME_FAILED); 190 CASE2STR(RESUMED); 191 CASE2STR(STOP_WIP); 192 CASE2STR(STOP_SEND_STOP); 193 CASE2STR(STOP_FAILED); 194 CASE2STR(STOPPED); 195 CASE2STR(MISMATCH); 196 #undef CASE2STR 197 default: return "?"; 198 } 199 } 200 #endif 201 202 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit) 203 { 204 switch (bit) { 205 case XE_GT_SRIOV_STATE_FLR_WAIT_GUC: 206 case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: 207 return HZ / 2; 208 case XE_GT_SRIOV_STATE_FLR_WIP: 209 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: 210 return 5 * HZ; 211 default: 212 return HZ; 213 } 214 } 215 216 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid) 217 { 218 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 219 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 220 221 return >->sriov.pf.vfs[vfid].control; 222 } 223 224 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid) 225 { 226 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 227 228 return &cs->state; 229 } 230 231 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid, 232 enum xe_gt_sriov_control_bits bit) 233 { 234 return test_bit(bit, pf_peek_vf_state(gt, vfid)); 235 } 236 237 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid) 238 { 239 unsigned long state = *pf_peek_vf_state(gt, vfid); 240 enum xe_gt_sriov_control_bits bit; 241 242 if (state) { 243 xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n", 244 vfid, state, state ? " bits " : "", 245 (int)BITS_PER_LONG, &state); 246 for_each_set_bit(bit, &state, BITS_PER_LONG) 247 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n", 248 vfid, control_bit_to_string(bit), bit); 249 } else { 250 xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid); 251 } 252 } 253 254 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid, 255 enum xe_gt_sriov_control_bits bit) 256 { 257 bool result = pf_check_vf_state(gt, vfid, bit); 258 259 if (unlikely(!result)) 260 pf_dump_vf_state(gt, vfid); 261 262 return result; 263 } 264 265 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid, 266 enum xe_gt_sriov_control_bits bit) 267 { 268 bool result = !pf_check_vf_state(gt, vfid, bit); 269 270 if (unlikely(!result)) 271 pf_dump_vf_state(gt, vfid); 272 273 return result; 274 } 275 276 static void pf_track_vf_state(struct xe_gt *gt, unsigned int vfid, 277 enum xe_gt_sriov_control_bits bit, 278 const char *what) 279 { 280 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) %s\n", 281 vfid, control_bit_to_string(bit), bit, what); 282 } 283 284 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid, 285 enum xe_gt_sriov_control_bits bit) 286 { 287 if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) { 288 pf_track_vf_state(gt, vfid, bit, "enter"); 289 return true; 290 } 291 return false; 292 } 293 294 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid, 295 enum xe_gt_sriov_control_bits bit) 296 { 297 if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) { 298 pf_track_vf_state(gt, vfid, bit, "exit"); 299 return true; 300 } 301 return false; 302 } 303 304 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid, 305 enum xe_gt_sriov_control_bits bit) 306 { 307 if (pf_exit_vf_state(gt, vfid, bit)) 308 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n", 309 vfid, control_bit_to_string(bit), bit, 310 __builtin_return_address(0)); 311 } 312 313 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 314 { 315 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) { 316 xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n", 317 vfid, __builtin_return_address(0)); 318 pf_dump_vf_state(gt, vfid); 319 } 320 } 321 322 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 323 { 324 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) 325 xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n", 326 vfid, __builtin_return_address(0)); 327 328 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 329 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 330 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 331 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED); 332 } 333 334 #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \ 335 pf_enter_vf_mismatch((gt), (vfid)); \ 336 }) 337 338 static void pf_queue_control_worker(struct xe_gt *gt) 339 { 340 struct xe_device *xe = gt_to_xe(gt); 341 342 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 343 344 queue_work(xe->sriov.wq, >->sriov.pf.control.worker); 345 } 346 347 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid) 348 { 349 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; 350 351 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 352 353 spin_lock(&pfc->lock); 354 list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list); 355 spin_unlock(&pfc->lock); 356 357 pf_queue_control_worker(gt); 358 } 359 360 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid); 361 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid); 362 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid); 363 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid); 364 365 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid) 366 { 367 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 368 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 369 370 reinit_completion(&cs->done); 371 return true; 372 } 373 return false; 374 } 375 376 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid) 377 { 378 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 379 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 380 381 pf_exit_vf_flr_wip(gt, vfid); 382 pf_exit_vf_stop_wip(gt, vfid); 383 pf_exit_vf_pause_wip(gt, vfid); 384 pf_exit_vf_resume_wip(gt, vfid); 385 386 complete_all(&cs->done); 387 } 388 } 389 390 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout) 391 { 392 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 393 394 return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT; 395 } 396 397 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) 398 { 399 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 400 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED); 401 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 402 pf_exit_vf_mismatch(gt, vfid); 403 pf_exit_vf_wip(gt, vfid); 404 } 405 406 /** 407 * DOC: The VF PAUSE state machine 408 * 409 * The VF PAUSE state machine looks like:: 410 * 411 * (READY,RESUMED)<-------------<---------------------o---------o 412 * | \ \ 413 * pause \ \ 414 * | \ \ 415 * ....V...........................PAUSE_WIP........ \ \ 416 * : \ : o \ 417 * : \ o------<-----busy : | \ 418 * : \ / / : | | 419 * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) | 420 * : | \ : | | 421 * : acked rejected---->----------o--->(MISMATCH) / 422 * : | : / 423 * : v : / 424 * : PAUSE_WAIT_GUC : / 425 * : | : / 426 * : done : / 427 * : | : / 428 * : v : / 429 * : PAUSE_GUC_DONE o-----restart 430 * : | : 431 * : | o---<--busy : 432 * : v / / : 433 * : PAUSE_SAVE_GUC : 434 * : / : 435 * : / : 436 * :....o..............o...............o...........: 437 * | | | 438 * completed flr stop 439 * | | | 440 * V .....V..... ......V..... 441 * (PAUSED) : FLR_WIP : : STOP_WIP : 442 * :.........: :..........: 443 * 444 * For the full state machine view, see `The VF state machine`_. 445 */ 446 447 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 448 { 449 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 450 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); 451 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 452 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); 453 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC); 454 } 455 } 456 457 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid) 458 { 459 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) 460 pf_enter_vf_state_machine_bug(gt, vfid); 461 462 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 463 pf_exit_vf_mismatch(gt, vfid); 464 pf_exit_vf_wip(gt, vfid); 465 } 466 467 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid) 468 { 469 pf_enter_vf_paused(gt, vfid); 470 } 471 472 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid) 473 { 474 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 475 pf_exit_vf_wip(gt, vfid); 476 } 477 478 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid) 479 { 480 pf_enter_vf_mismatch(gt, vfid); 481 pf_enter_vf_pause_failed(gt, vfid); 482 } 483 484 static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) 485 { 486 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) 487 pf_enter_vf_state_machine_bug(gt, vfid); 488 } 489 490 static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) 491 { 492 int err; 493 494 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) 495 return false; 496 497 err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid); 498 if (err) { 499 /* retry if busy */ 500 if (err == -EBUSY) { 501 pf_enter_vf_pause_save_guc(gt, vfid); 502 return true; 503 } 504 /* give up on error */ 505 if (err == -EIO) 506 pf_enter_vf_mismatch(gt, vfid); 507 } 508 509 pf_enter_vf_pause_completed(gt, vfid); 510 return true; 511 } 512 513 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 514 { 515 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 516 return false; 517 518 pf_enter_vf_pause_save_guc(gt, vfid); 519 return true; 520 } 521 522 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 523 { 524 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 525 pf_queue_vf(gt, vfid); 526 } 527 528 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 529 { 530 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) 531 pf_enter_vf_state_machine_bug(gt, vfid); 532 } 533 534 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 535 { 536 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 537 } 538 539 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 540 { 541 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 542 pf_enter_vf_state_machine_bug(gt, vfid); 543 544 pf_queue_vf(gt, vfid); 545 } 546 547 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 548 { 549 int err; 550 551 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 552 return false; 553 554 /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */ 555 pf_enter_pause_wait_guc(gt, vfid); 556 557 err = pf_send_vf_pause(gt, vfid); 558 if (err) { 559 /* send failed, so we shouldn't expect PAUSE_DONE from GuC */ 560 pf_exit_pause_wait_guc(gt, vfid); 561 562 if (err == -EBUSY) 563 pf_enter_vf_pause_send_pause(gt, vfid); 564 else if (err == -EIO) 565 pf_enter_vf_pause_rejected(gt, vfid); 566 else 567 pf_enter_vf_pause_failed(gt, vfid); 568 } else { 569 /* 570 * we have already moved to WAIT_GUC, maybe even to GUC_DONE 571 * but since GuC didn't complain, we may clear MISMATCH 572 */ 573 pf_exit_vf_mismatch(gt, vfid); 574 } 575 576 return true; 577 } 578 579 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 580 { 581 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 582 pf_enter_vf_wip(gt, vfid); 583 pf_enter_vf_pause_send_pause(gt, vfid); 584 return true; 585 } 586 587 return false; 588 } 589 590 /** 591 * xe_gt_sriov_pf_control_pause_vf - Pause a VF. 592 * @gt: the &xe_gt 593 * @vfid: the VF identifier 594 * 595 * This function is for PF only. 596 * 597 * Return: 0 on success or a negative error code on failure. 598 */ 599 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) 600 { 601 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP); 602 int err; 603 604 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 605 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); 606 return -EPERM; 607 } 608 609 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 610 xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid); 611 return -ESTALE; 612 } 613 614 if (!pf_enter_vf_pause_wip(gt, vfid)) { 615 xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid); 616 return -EALREADY; 617 } 618 619 err = pf_wait_vf_wip_done(gt, vfid, timeout); 620 if (err) { 621 xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n", 622 vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 623 return err; 624 } 625 626 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 627 xe_gt_sriov_dbg(gt, "VF%u paused!\n", vfid); 628 return 0; 629 } 630 631 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) { 632 xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid); 633 return -EIO; 634 } 635 636 xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid); 637 return -ECANCELED; 638 } 639 640 /** 641 * DOC: The VF RESUME state machine 642 * 643 * The VF RESUME state machine looks like:: 644 * 645 * (PAUSED)<-----------------<------------------------o 646 * | \ 647 * resume \ 648 * | \ 649 * ....V............................RESUME_WIP...... \ 650 * : \ : o 651 * : \ o-------<-----busy : | 652 * : \ / / : | 653 * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED) 654 * : / \ : | 655 * : acked rejected---->---------o--->(MISMATCH) 656 * : / : 657 * :....o..............o...............o.....o.....: 658 * | | | \ 659 * completed flr stop restart-->(READY) 660 * | | | 661 * V .....V..... ......V..... 662 * (RESUMED) : FLR_WIP : : STOP_WIP : 663 * :.........: :..........: 664 * 665 * For the full state machine view, see `The VF state machine`_. 666 */ 667 668 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 669 { 670 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) 671 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME); 672 } 673 674 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid) 675 { 676 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 677 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 678 pf_exit_vf_mismatch(gt, vfid); 679 pf_exit_vf_wip(gt, vfid); 680 } 681 682 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid) 683 { 684 pf_enter_vf_resumed(gt, vfid); 685 } 686 687 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid) 688 { 689 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 690 pf_exit_vf_wip(gt, vfid); 691 } 692 693 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid) 694 { 695 pf_enter_vf_mismatch(gt, vfid); 696 pf_enter_vf_resume_failed(gt, vfid); 697 } 698 699 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 700 { 701 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 702 pf_enter_vf_state_machine_bug(gt, vfid); 703 704 pf_queue_vf(gt, vfid); 705 } 706 707 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 708 { 709 int err; 710 711 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 712 return false; 713 714 err = pf_send_vf_resume(gt, vfid); 715 if (err == -EBUSY) 716 pf_enter_vf_resume_send_resume(gt, vfid); 717 else if (err == -EIO) 718 pf_enter_vf_resume_rejected(gt, vfid); 719 else if (err) 720 pf_enter_vf_resume_failed(gt, vfid); 721 else 722 pf_enter_vf_resume_completed(gt, vfid); 723 return true; 724 } 725 726 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 727 { 728 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) { 729 pf_enter_vf_wip(gt, vfid); 730 pf_enter_vf_resume_send_resume(gt, vfid); 731 return true; 732 } 733 734 return false; 735 } 736 737 /** 738 * xe_gt_sriov_pf_control_resume_vf - Resume a VF. 739 * @gt: the &xe_gt 740 * @vfid: the VF identifier 741 * 742 * This function is for PF only. 743 * 744 * Return: 0 on success or a negative error code on failure. 745 */ 746 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) 747 { 748 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP); 749 int err; 750 751 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 752 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); 753 return -EPERM; 754 } 755 756 if (!pf_enter_vf_resume_wip(gt, vfid)) { 757 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid); 758 return -EALREADY; 759 } 760 761 err = pf_wait_vf_wip_done(gt, vfid, timeout); 762 if (err) 763 return err; 764 765 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) { 766 xe_gt_sriov_dbg(gt, "VF%u resumed!\n", vfid); 767 return 0; 768 } 769 770 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) { 771 xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid); 772 return -EIO; 773 } 774 775 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid); 776 return -ECANCELED; 777 } 778 779 /** 780 * DOC: The VF STOP state machine 781 * 782 * The VF STOP state machine looks like:: 783 * 784 * (READY,PAUSED,RESUMED)<-------<--------------------o 785 * | \ 786 * stop \ 787 * | \ 788 * ....V..............................STOP_WIP...... \ 789 * : \ : o 790 * : \ o----<----busy : | 791 * : \ / / : | 792 * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED) 793 * : / \ : | 794 * : acked rejected-------->--------o--->(MISMATCH) 795 * : / : 796 * :....o..............o...............o...........: 797 * | | | 798 * completed flr restart 799 * | | | 800 * V .....V..... V 801 * (STOPPED) : FLR_WIP : (READY) 802 * :.........: 803 * 804 * For the full state machine view, see `The VF state machine`_. 805 */ 806 807 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 808 { 809 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) 810 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP); 811 } 812 813 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid) 814 { 815 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) 816 pf_enter_vf_state_machine_bug(gt, vfid); 817 818 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 819 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 820 pf_exit_vf_mismatch(gt, vfid); 821 pf_exit_vf_wip(gt, vfid); 822 } 823 824 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid) 825 { 826 pf_enter_vf_stopped(gt, vfid); 827 } 828 829 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid) 830 { 831 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 832 pf_exit_vf_wip(gt, vfid); 833 } 834 835 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid) 836 { 837 pf_enter_vf_mismatch(gt, vfid); 838 pf_enter_vf_stop_failed(gt, vfid); 839 } 840 841 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 842 { 843 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 844 pf_enter_vf_state_machine_bug(gt, vfid); 845 846 pf_queue_vf(gt, vfid); 847 } 848 849 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 850 { 851 int err; 852 853 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 854 return false; 855 856 err = pf_send_vf_stop(gt, vfid); 857 if (err == -EBUSY) 858 pf_enter_vf_stop_send_stop(gt, vfid); 859 else if (err == -EIO) 860 pf_enter_vf_stop_rejected(gt, vfid); 861 else if (err) 862 pf_enter_vf_stop_failed(gt, vfid); 863 else 864 pf_enter_vf_stop_completed(gt, vfid); 865 return true; 866 } 867 868 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 869 { 870 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) { 871 pf_enter_vf_wip(gt, vfid); 872 pf_enter_vf_stop_send_stop(gt, vfid); 873 return true; 874 } 875 return false; 876 } 877 878 /** 879 * xe_gt_sriov_pf_control_stop_vf - Stop a VF. 880 * @gt: the &xe_gt 881 * @vfid: the VF identifier 882 * 883 * This function is for PF only. 884 * 885 * Return: 0 on success or a negative error code on failure. 886 */ 887 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) 888 { 889 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP); 890 int err; 891 892 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 893 xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid); 894 return -ESTALE; 895 } 896 897 if (!pf_enter_vf_stop_wip(gt, vfid)) { 898 xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid); 899 return -EALREADY; 900 } 901 902 err = pf_wait_vf_wip_done(gt, vfid, timeout); 903 if (err) 904 return err; 905 906 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 907 xe_gt_sriov_dbg(gt, "VF%u stopped!\n", vfid); 908 return 0; 909 } 910 911 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) { 912 xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid); 913 return -EIO; 914 } 915 916 xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid); 917 return -ECANCELED; 918 } 919 920 /** 921 * DOC: The VF FLR state machine 922 * 923 * The VF FLR state machine looks like:: 924 * 925 * (READY,PAUSED,STOPPED)<------------<--------------o 926 * | \ 927 * flr \ 928 * | \ 929 * ....V..........................FLR_WIP........... \ 930 * : \ : \ 931 * : \ o----<----busy : | 932 * : \ / / : | 933 * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o 934 * : | \ : | | 935 * : acked rejected----->-----------o--->(MISMATCH) | 936 * : | : ^ | 937 * : v : | | 938 * : FLR_WAIT_GUC : | | 939 * : | : | | 940 * : done : | | 941 * : | : | | 942 * : v : | | 943 * : FLR_GUC_DONE : | | 944 * : | : | | 945 * : | o--<--sync : | | 946 * : |/ / : | | 947 * : FLR_SYNC--o : | | 948 * : | : | | 949 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o 950 * : | : | | 951 * : FLR_RESET_DATA : | | 952 * : | : | | 953 * : FLR_RESET_MMIO : | | 954 * : | : | | 955 * : | o----<----busy : | | 956 * : |/ / : | | 957 * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o 958 * : / \ : | 959 * : acked rejected----->-----------o--------o 960 * : / : 961 * :....o..............................o...........: 962 * | | 963 * completed restart 964 * | / 965 * V / 966 * (READY)<----------<------------o 967 * 968 * For the full state machine view, see `The VF state machine`_. 969 */ 970 971 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 972 { 973 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 974 pf_enter_vf_state_machine_bug(gt, vfid); 975 976 pf_queue_vf(gt, vfid); 977 } 978 979 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 980 { 981 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 982 xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid); 983 return; 984 } 985 986 pf_enter_vf_wip(gt, vfid); 987 pf_enter_vf_flr_send_start(gt, vfid); 988 } 989 990 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 991 { 992 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 993 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH); 994 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO); 995 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA); 996 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 997 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE); 998 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 999 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START); 1000 1001 xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); 1002 } 1003 } 1004 1005 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid) 1006 { 1007 pf_enter_vf_ready(gt, vfid); 1008 } 1009 1010 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid) 1011 { 1012 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1013 xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid); 1014 pf_exit_vf_wip(gt, vfid); 1015 } 1016 1017 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid) 1018 { 1019 pf_enter_vf_mismatch(gt, vfid); 1020 pf_enter_vf_flr_failed(gt, vfid); 1021 } 1022 1023 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 1024 { 1025 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 1026 pf_enter_vf_state_machine_bug(gt, vfid); 1027 1028 pf_queue_vf(gt, vfid); 1029 } 1030 1031 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 1032 { 1033 int err; 1034 1035 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 1036 return false; 1037 1038 err = pf_send_vf_flr_finish(gt, vfid); 1039 if (err == -EBUSY) 1040 pf_enter_vf_flr_send_finish(gt, vfid); 1041 else if (err == -EIO) 1042 pf_enter_vf_flr_rejected(gt, vfid); 1043 else if (err) 1044 pf_enter_vf_flr_failed(gt, vfid); 1045 else 1046 pf_enter_vf_flr_completed(gt, vfid); 1047 return true; 1048 } 1049 1050 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 1051 { 1052 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 1053 pf_enter_vf_state_machine_bug(gt, vfid); 1054 1055 pf_queue_vf(gt, vfid); 1056 } 1057 1058 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 1059 { 1060 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 1061 return false; 1062 1063 xe_gt_sriov_pf_sanitize_hw(gt, vfid); 1064 1065 pf_enter_vf_flr_send_finish(gt, vfid); 1066 return true; 1067 } 1068 1069 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 1070 { 1071 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 1072 pf_enter_vf_state_machine_bug(gt, vfid); 1073 1074 pf_queue_vf(gt, vfid); 1075 } 1076 1077 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 1078 { 1079 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 1080 return false; 1081 1082 if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) 1083 xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); 1084 1085 xe_gt_sriov_pf_monitor_flr(gt, vfid); 1086 1087 pf_enter_vf_flr_reset_mmio(gt, vfid); 1088 return true; 1089 } 1090 1091 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 1092 { 1093 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 1094 pf_enter_vf_state_machine_bug(gt, vfid); 1095 1096 pf_queue_vf(gt, vfid); 1097 } 1098 1099 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 1100 { 1101 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 1102 int err; 1103 1104 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 1105 return false; 1106 1107 err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout); 1108 if (err) 1109 pf_enter_vf_flr_failed(gt, vfid); 1110 else 1111 pf_enter_vf_flr_reset_data(gt, vfid); 1112 return true; 1113 } 1114 1115 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 1116 { 1117 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) 1118 pf_enter_vf_state_machine_bug(gt, vfid); 1119 } 1120 1121 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 1122 { 1123 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 1124 } 1125 1126 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 1127 { 1128 int err; 1129 1130 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 1131 return false; 1132 1133 /* GuC may actually send a FLR_DONE before we get a RESPONSE */ 1134 pf_enter_vf_flr_wait_guc(gt, vfid); 1135 1136 err = pf_send_vf_flr_start(gt, vfid); 1137 if (err) { 1138 /* send failed, so we shouldn't expect FLR_DONE from GuC */ 1139 pf_exit_vf_flr_wait_guc(gt, vfid); 1140 1141 if (err == -EBUSY) 1142 pf_enter_vf_flr_send_start(gt, vfid); 1143 else if (err == -EIO) 1144 pf_enter_vf_flr_rejected(gt, vfid); 1145 else 1146 pf_enter_vf_flr_failed(gt, vfid); 1147 } else { 1148 /* 1149 * we have already moved to WAIT_GUC, maybe even to GUC_DONE 1150 * but since GuC didn't complain, we may clear MISMATCH 1151 */ 1152 pf_exit_vf_mismatch(gt, vfid); 1153 } 1154 1155 return true; 1156 } 1157 1158 static bool pf_exit_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) 1159 { 1160 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) 1161 return false; 1162 1163 pf_enter_vf_flr_reset_config(gt, vfid); 1164 return true; 1165 } 1166 1167 static void pf_enter_vf_flr_sync(struct xe_gt *gt, unsigned int vfid) 1168 { 1169 int ret; 1170 1171 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) 1172 pf_enter_vf_state_machine_bug(gt, vfid); 1173 1174 ret = xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid); 1175 if (ret < 0) { 1176 xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint %pe\n", ERR_PTR(ret)); 1177 pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); 1178 } else { 1179 xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint pass\n"); 1180 pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC); 1181 } 1182 } 1183 1184 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 1185 { 1186 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 1187 return false; 1188 1189 pf_enter_vf_flr_sync(gt, vfid); 1190 return true; 1191 } 1192 1193 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 1194 { 1195 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 1196 pf_queue_vf(gt, vfid); 1197 } 1198 1199 /** 1200 * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence. 1201 * @gt: the &xe_gt 1202 * @vfid: the VF identifier 1203 * 1204 * This function is for PF only. 1205 * 1206 * Return: 0 on success or a negative error code on failure. 1207 */ 1208 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) 1209 { 1210 pf_enter_vf_flr_wip(gt, vfid); 1211 1212 return 0; 1213 } 1214 1215 /** 1216 * xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint. 1217 * @gt: the &xe_gt 1218 * @vfid: the VF identifier 1219 * @sync: if true it will allow to exit the checkpoint 1220 * 1221 * Return: non-zero if FLR checkpoint has been reached, zero if the is no FLR 1222 * in progress, or a negative error code on the FLR busy or failed. 1223 */ 1224 int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync) 1225 { 1226 if (sync && pf_exit_vf_flr_sync(gt, vfid)) 1227 return 1; 1228 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC)) 1229 return 1; 1230 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) 1231 return -EBUSY; 1232 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1233 return -EIO; 1234 return 0; 1235 } 1236 1237 /** 1238 * xe_gt_sriov_pf_control_wait_flr() - Wait for a VF FLR to complete. 1239 * @gt: the &xe_gt 1240 * @vfid: the VF identifier 1241 * 1242 * This function is for PF only. 1243 * 1244 * Return: 0 on success or a negative error code on failure. 1245 */ 1246 int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid) 1247 { 1248 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP); 1249 int err; 1250 1251 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1252 return -EIO; 1253 1254 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) 1255 return 0; 1256 1257 err = pf_wait_vf_wip_done(gt, vfid, timeout); 1258 if (err) { 1259 xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n", 1260 vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 1261 return err; 1262 } 1263 1264 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1265 return -EIO; 1266 1267 return 0; 1268 } 1269 1270 /** 1271 * DOC: The VF FLR Flow with GuC 1272 * 1273 * The VF FLR flow includes several steps:: 1274 * 1275 * PF GUC PCI 1276 * ======================================================== 1277 * | | | 1278 * (1) | [ ] <----- FLR --| 1279 * | [ ] : 1280 * (2) [ ] <-------- NOTIFY FLR --[ ] 1281 * [ ] | 1282 * (3) [ ] | 1283 * [ ] | 1284 * [ ]-- START FLR ---------> [ ] 1285 * | [ ] 1286 * (4) | [ ] 1287 * | [ ] 1288 * [ ] <--------- FLR DONE -- [ ] 1289 * [ ] | 1290 * (5) [ ] | 1291 * [ ] | 1292 * [ ]-- FINISH FLR --------> [ ] 1293 * | | 1294 * 1295 * * Step 1: PCI HW generates interrupt to the GuC about VF FLR 1296 * * Step 2: GuC FW sends G2H notification to the PF about VF FLR 1297 * * Step 2a: on some platforms G2H is only received from root GuC 1298 * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence 1299 * * Step 3a: on some platforms PF must send H2G to all other GuCs 1300 * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done 1301 * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished 1302 */ 1303 1304 static bool needs_dispatch_flr(struct xe_device *xe) 1305 { 1306 return xe->info.platform == XE_PVC; 1307 } 1308 1309 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid) 1310 { 1311 struct xe_device *xe = gt_to_xe(gt); 1312 struct xe_gt *gtit; 1313 unsigned int gtid; 1314 1315 xe_gt_sriov_info(gt, "VF%u FLR\n", vfid); 1316 1317 if (needs_dispatch_flr(xe)) { 1318 for_each_gt(gtit, xe, gtid) 1319 pf_enter_vf_flr_wip(gtit, vfid); 1320 } else { 1321 pf_enter_vf_flr_wip(gt, vfid); 1322 } 1323 } 1324 1325 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid) 1326 { 1327 if (!pf_exit_vf_flr_wait_guc(gt, vfid)) { 1328 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid); 1329 pf_enter_vf_mismatch(gt, vfid); 1330 return; 1331 } 1332 1333 pf_enter_vf_flr_guc_done(gt, vfid); 1334 } 1335 1336 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid) 1337 { 1338 if (!pf_exit_pause_wait_guc(gt, vfid)) { 1339 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid); 1340 pf_enter_vf_mismatch(gt, vfid); 1341 return; 1342 } 1343 1344 pf_enter_vf_pause_guc_done(gt, vfid); 1345 } 1346 1347 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid) 1348 { 1349 xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid); 1350 1351 if (vfid > xe_gt_sriov_pf_get_totalvfs(gt)) 1352 return -EPROTO; 1353 1354 switch (eventid) { 1355 case GUC_PF_NOTIFY_VF_FLR: 1356 pf_handle_vf_flr(gt, vfid); 1357 break; 1358 case GUC_PF_NOTIFY_VF_FLR_DONE: 1359 pf_handle_vf_flr_done(gt, vfid); 1360 break; 1361 case GUC_PF_NOTIFY_VF_PAUSE_DONE: 1362 pf_handle_vf_pause_done(gt, vfid); 1363 break; 1364 case GUC_PF_NOTIFY_VF_FIXUP_DONE: 1365 break; 1366 default: 1367 return -ENOPKG; 1368 } 1369 return 0; 1370 } 1371 1372 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid) 1373 { 1374 switch (eventid) { 1375 case GUC_PF_NOTIFY_VF_ENABLE: 1376 xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n", 1377 str_enabled_disabled(true), 1378 str_enabled_disabled(false)); 1379 break; 1380 default: 1381 return -ENOPKG; 1382 } 1383 return 0; 1384 } 1385 1386 /** 1387 * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC. 1388 * @gt: the &xe_gt 1389 * @msg: the G2H message 1390 * @len: the length of the G2H message 1391 * 1392 * This function is for PF only. 1393 * 1394 * Return: 0 on success or a negative error code on failure. 1395 */ 1396 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) 1397 { 1398 u32 vfid; 1399 u32 eventid; 1400 1401 xe_gt_assert(gt, len); 1402 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC); 1403 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT); 1404 xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) == 1405 GUC_ACTION_GUC2PF_VF_STATE_NOTIFY); 1406 1407 if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt)))) 1408 return -EPROTO; 1409 1410 if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0]))) 1411 return -EPFNOSUPPORT; 1412 1413 if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN)) 1414 return -EPROTO; 1415 1416 vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]); 1417 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]); 1418 1419 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid); 1420 } 1421 1422 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid) 1423 { 1424 if (pf_exit_vf_flr_send_start(gt, vfid)) 1425 return true; 1426 1427 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) { 1428 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1429 control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC)); 1430 return false; 1431 } 1432 1433 if (pf_exit_vf_flr_guc_done(gt, vfid)) 1434 return true; 1435 1436 if (pf_exit_vf_flr_reset_config(gt, vfid)) 1437 return true; 1438 1439 if (pf_exit_vf_flr_reset_data(gt, vfid)) 1440 return true; 1441 1442 if (pf_exit_vf_flr_reset_mmio(gt, vfid)) 1443 return true; 1444 1445 if (pf_exit_vf_flr_send_finish(gt, vfid)) 1446 return true; 1447 1448 if (pf_exit_vf_stop_send_stop(gt, vfid)) 1449 return true; 1450 1451 if (pf_exit_vf_pause_send_pause(gt, vfid)) 1452 return true; 1453 1454 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) { 1455 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1456 control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)); 1457 return true; 1458 } 1459 1460 if (pf_exit_vf_pause_guc_done(gt, vfid)) 1461 return true; 1462 1463 if (pf_exit_vf_pause_save_guc(gt, vfid)) 1464 return true; 1465 1466 if (pf_exit_vf_resume_send_resume(gt, vfid)) 1467 return true; 1468 1469 return false; 1470 } 1471 1472 static unsigned int pf_control_state_index(struct xe_gt *gt, 1473 struct xe_gt_sriov_control_state *cs) 1474 { 1475 return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs; 1476 } 1477 1478 static void pf_worker_find_work(struct xe_gt *gt) 1479 { 1480 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; 1481 struct xe_gt_sriov_control_state *cs; 1482 unsigned int vfid; 1483 bool empty; 1484 bool more; 1485 1486 spin_lock(&pfc->lock); 1487 cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link); 1488 if (cs) 1489 list_del_init(&cs->link); 1490 empty = list_empty(&pfc->list); 1491 spin_unlock(&pfc->lock); 1492 1493 if (!cs) 1494 return; 1495 1496 /* VF metadata structures are indexed by the VFID */ 1497 vfid = pf_control_state_index(gt, cs); 1498 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 1499 1500 more = pf_process_vf_state_machine(gt, vfid); 1501 if (more) 1502 pf_queue_vf(gt, vfid); 1503 else if (!empty) 1504 pf_queue_control_worker(gt); 1505 } 1506 1507 static void control_worker_func(struct work_struct *w) 1508 { 1509 struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker); 1510 1511 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 1512 pf_worker_find_work(gt); 1513 } 1514 1515 static void pf_stop_worker(struct xe_gt *gt) 1516 { 1517 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 1518 cancel_work_sync(>->sriov.pf.control.worker); 1519 } 1520 1521 static void control_fini_action(struct drm_device *dev, void *data) 1522 { 1523 struct xe_gt *gt = data; 1524 1525 pf_stop_worker(gt); 1526 } 1527 1528 /** 1529 * xe_gt_sriov_pf_control_init() - Initialize PF's control data. 1530 * @gt: the &xe_gt 1531 * 1532 * This function is for PF only. 1533 * 1534 * Return: 0 on success or a negative error code on failure. 1535 */ 1536 int xe_gt_sriov_pf_control_init(struct xe_gt *gt) 1537 { 1538 struct xe_device *xe = gt_to_xe(gt); 1539 unsigned int n, totalvfs; 1540 1541 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 1542 1543 totalvfs = xe_sriov_pf_get_totalvfs(xe); 1544 for (n = 0; n <= totalvfs; n++) { 1545 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n); 1546 1547 init_completion(&cs->done); 1548 INIT_LIST_HEAD(&cs->link); 1549 } 1550 1551 spin_lock_init(>->sriov.pf.control.lock); 1552 INIT_LIST_HEAD(>->sriov.pf.control.list); 1553 INIT_WORK(>->sriov.pf.control.worker, control_worker_func); 1554 1555 return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt); 1556 } 1557 1558 /** 1559 * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset. 1560 * @gt: the &xe_gt 1561 * 1562 * Any per-VF status maintained by the PF or any ongoing VF control activity 1563 * performed by the PF must be reset or cancelled when the GT is reset. 1564 * 1565 * This function is for PF only. 1566 */ 1567 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt) 1568 { 1569 struct xe_device *xe = gt_to_xe(gt); 1570 unsigned int n, totalvfs; 1571 1572 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 1573 1574 pf_stop_worker(gt); 1575 1576 totalvfs = xe_sriov_pf_get_totalvfs(xe); 1577 for (n = 1; n <= totalvfs; n++) 1578 pf_enter_vf_ready(gt, n); 1579 } 1580