1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023-2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "abi/guc_actions_sriov_abi.h" 9 10 #include "xe_device.h" 11 #include "xe_gt.h" 12 #include "xe_gt_sriov_pf.h" 13 #include "xe_gt_sriov_pf_config.h" 14 #include "xe_gt_sriov_pf_control.h" 15 #include "xe_gt_sriov_pf_helpers.h" 16 #include "xe_gt_sriov_pf_migration.h" 17 #include "xe_gt_sriov_pf_monitor.h" 18 #include "xe_gt_sriov_pf_service.h" 19 #include "xe_gt_sriov_printk.h" 20 #include "xe_guc_ct.h" 21 #include "xe_sriov.h" 22 23 static const char *control_cmd_to_string(u32 cmd) 24 { 25 switch (cmd) { 26 case GUC_PF_TRIGGER_VF_PAUSE: 27 return "PAUSE"; 28 case GUC_PF_TRIGGER_VF_RESUME: 29 return "RESUME"; 30 case GUC_PF_TRIGGER_VF_STOP: 31 return "STOP"; 32 case GUC_PF_TRIGGER_VF_FLR_START: 33 return "FLR_START"; 34 case GUC_PF_TRIGGER_VF_FLR_FINISH: 35 return "FLR_FINISH"; 36 default: 37 return "<unknown>"; 38 } 39 } 40 41 static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd) 42 { 43 u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = { 44 FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 45 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 46 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL), 47 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid), 48 FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd), 49 }; 50 int ret; 51 52 ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); 53 return ret > 0 ? -EPROTO : ret; 54 } 55 56 static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd) 57 { 58 int err; 59 60 xe_gt_assert(gt, vfid != PFID); 61 xe_gt_sriov_dbg_verbose(gt, "sending VF%u control command %s\n", 62 vfid, control_cmd_to_string(cmd)); 63 64 err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd); 65 if (unlikely(err)) 66 xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n", 67 vfid, control_cmd_to_string(cmd), ERR_PTR(err)); 68 return err; 69 } 70 71 static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid) 72 { 73 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE); 74 } 75 76 static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid) 77 { 78 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME); 79 } 80 81 static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid) 82 { 83 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP); 84 } 85 86 static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid) 87 { 88 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START); 89 } 90 91 static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid) 92 { 93 return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH); 94 } 95 96 /** 97 * DOC: The VF state machine 98 * 99 * The simplified VF state machine could be presented as:: 100 * 101 * pause--------------------------o 102 * / | 103 * / v 104 * (READY)<------------------resume-----(PAUSED) 105 * ^ \ / / 106 * | \ / / 107 * | stop---->(STOPPED)<----stop / 108 * | / / 109 * | / / 110 * o--------<-----flr / 111 * \ / 112 * o------<--------------------flr 113 * 114 * Where: 115 * 116 * * READY - represents a state in which VF is fully operable 117 * * PAUSED - represents a state in which VF activity is temporarily suspended 118 * * STOPPED - represents a state in which VF activity is definitely halted 119 * * pause - represents a request to temporarily suspend VF activity 120 * * resume - represents a request to resume VF activity 121 * * stop - represents a request to definitely halt VF activity 122 * * flr - represents a request to perform VF FLR to restore VF activity 123 * 124 * However, each state transition requires additional steps that involves 125 * communication with GuC that might fail or be interrupted by other requests:: 126 * 127 * .................................WIP.... 128 * : : 129 * pause--------------------->PAUSE_WIP----------------------------o 130 * / : / \ : | 131 * / : o----<---stop flr--o : | 132 * / : | \ / | : V 133 * (READY,RESUMED)<--------+------------RESUME_WIP<----+--<-----resume--(PAUSED) 134 * ^ \ \ : | | : / / 135 * | \ \ : | | : / / 136 * | \ \ : | | : / / 137 * | \ \ : o----<----------------------+--<-------stop / 138 * | \ \ : | | : / 139 * | \ \ : V | : / 140 * | \ stop----->STOP_WIP---------flr--->-----o : / 141 * | \ : | | : / 142 * | \ : | V : / 143 * | flr--------+----->----------------->FLR_WIP<-----flr 144 * | : | / ^ : 145 * | : | / | : 146 * o--------<-------:----+-----<----------------o | : 147 * : | | : 148 * :....|...........................|.....: 149 * | | 150 * V | 151 * (STOPPED)--------------------flr 152 * 153 * For details about each internal WIP state machine see: 154 * 155 * * `The VF PAUSE state machine`_ 156 * * `The VF RESUME state machine`_ 157 * * `The VF STOP state machine`_ 158 * * `The VF FLR state machine`_ 159 */ 160 161 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV 162 static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) 163 { 164 switch (bit) { 165 #define CASE2STR(_X) \ 166 case XE_GT_SRIOV_STATE_##_X: return #_X 167 CASE2STR(WIP); 168 CASE2STR(FLR_WIP); 169 CASE2STR(FLR_SEND_START); 170 CASE2STR(FLR_WAIT_GUC); 171 CASE2STR(FLR_GUC_DONE); 172 CASE2STR(FLR_RESET_CONFIG); 173 CASE2STR(FLR_RESET_DATA); 174 CASE2STR(FLR_RESET_MMIO); 175 CASE2STR(FLR_SEND_FINISH); 176 CASE2STR(FLR_FAILED); 177 CASE2STR(PAUSE_WIP); 178 CASE2STR(PAUSE_SEND_PAUSE); 179 CASE2STR(PAUSE_WAIT_GUC); 180 CASE2STR(PAUSE_GUC_DONE); 181 CASE2STR(PAUSE_SAVE_GUC); 182 CASE2STR(PAUSE_FAILED); 183 CASE2STR(PAUSED); 184 CASE2STR(RESUME_WIP); 185 CASE2STR(RESUME_SEND_RESUME); 186 CASE2STR(RESUME_FAILED); 187 CASE2STR(RESUMED); 188 CASE2STR(STOP_WIP); 189 CASE2STR(STOP_SEND_STOP); 190 CASE2STR(STOP_FAILED); 191 CASE2STR(STOPPED); 192 CASE2STR(MISMATCH); 193 #undef CASE2STR 194 default: return "?"; 195 } 196 } 197 #endif 198 199 static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit) 200 { 201 switch (bit) { 202 case XE_GT_SRIOV_STATE_FLR_WAIT_GUC: 203 case XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: 204 return HZ / 2; 205 case XE_GT_SRIOV_STATE_FLR_WIP: 206 case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: 207 return 5 * HZ; 208 default: 209 return HZ; 210 } 211 } 212 213 static struct xe_gt_sriov_control_state *pf_pick_vf_control(struct xe_gt *gt, unsigned int vfid) 214 { 215 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 216 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 217 218 return >->sriov.pf.vfs[vfid].control; 219 } 220 221 static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid) 222 { 223 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 224 225 return &cs->state; 226 } 227 228 static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid, 229 enum xe_gt_sriov_control_bits bit) 230 { 231 return test_bit(bit, pf_peek_vf_state(gt, vfid)); 232 } 233 234 static void pf_dump_vf_state(struct xe_gt *gt, unsigned int vfid) 235 { 236 unsigned long state = *pf_peek_vf_state(gt, vfid); 237 enum xe_gt_sriov_control_bits bit; 238 239 if (state) { 240 xe_gt_sriov_dbg_verbose(gt, "VF%u state %#lx%s%*pbl\n", 241 vfid, state, state ? " bits " : "", 242 (int)BITS_PER_LONG, &state); 243 for_each_set_bit(bit, &state, BITS_PER_LONG) 244 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d)\n", 245 vfid, control_bit_to_string(bit), bit); 246 } else { 247 xe_gt_sriov_dbg_verbose(gt, "VF%u state READY\n", vfid); 248 } 249 } 250 251 static bool pf_expect_vf_state(struct xe_gt *gt, unsigned int vfid, 252 enum xe_gt_sriov_control_bits bit) 253 { 254 bool result = pf_check_vf_state(gt, vfid, bit); 255 256 if (unlikely(!result)) 257 pf_dump_vf_state(gt, vfid); 258 259 return result; 260 } 261 262 static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid, 263 enum xe_gt_sriov_control_bits bit) 264 { 265 bool result = !pf_check_vf_state(gt, vfid, bit); 266 267 if (unlikely(!result)) 268 pf_dump_vf_state(gt, vfid); 269 270 return result; 271 } 272 273 static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid, 274 enum xe_gt_sriov_control_bits bit) 275 { 276 if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) { 277 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n", 278 vfid, control_bit_to_string(bit), bit); 279 return true; 280 } 281 return false; 282 } 283 284 static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid, 285 enum xe_gt_sriov_control_bits bit) 286 { 287 if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) { 288 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n", 289 vfid, control_bit_to_string(bit), bit); 290 return true; 291 } 292 return false; 293 } 294 295 static void pf_escape_vf_state(struct xe_gt *gt, unsigned int vfid, 296 enum xe_gt_sriov_control_bits bit) 297 { 298 if (pf_exit_vf_state(gt, vfid, bit)) 299 xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) escaped by %ps\n", 300 vfid, control_bit_to_string(bit), bit, 301 __builtin_return_address(0)); 302 } 303 304 static void pf_enter_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 305 { 306 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) { 307 xe_gt_sriov_dbg(gt, "VF%u state mismatch detected by %ps\n", 308 vfid, __builtin_return_address(0)); 309 pf_dump_vf_state(gt, vfid); 310 } 311 } 312 313 static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid) 314 { 315 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_MISMATCH)) 316 xe_gt_sriov_dbg(gt, "VF%u state mismatch cleared by %ps\n", 317 vfid, __builtin_return_address(0)); 318 319 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 320 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 321 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 322 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED); 323 } 324 325 #define pf_enter_vf_state_machine_bug(gt, vfid) ({ \ 326 pf_enter_vf_mismatch((gt), (vfid)); \ 327 }) 328 329 static void pf_queue_control_worker(struct xe_gt *gt) 330 { 331 struct xe_device *xe = gt_to_xe(gt); 332 333 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 334 335 queue_work(xe->sriov.wq, >->sriov.pf.control.worker); 336 } 337 338 static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid) 339 { 340 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; 341 342 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 343 344 spin_lock(&pfc->lock); 345 list_move_tail(>->sriov.pf.vfs[vfid].control.link, &pfc->list); 346 spin_unlock(&pfc->lock); 347 348 pf_queue_control_worker(gt); 349 } 350 351 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid); 352 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid); 353 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid); 354 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid); 355 356 static bool pf_enter_vf_wip(struct xe_gt *gt, unsigned int vfid) 357 { 358 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 359 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 360 361 reinit_completion(&cs->done); 362 return true; 363 } 364 return false; 365 } 366 367 static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid) 368 { 369 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_WIP)) { 370 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 371 372 pf_exit_vf_flr_wip(gt, vfid); 373 pf_exit_vf_stop_wip(gt, vfid); 374 pf_exit_vf_pause_wip(gt, vfid); 375 pf_exit_vf_resume_wip(gt, vfid); 376 377 complete_all(&cs->done); 378 } 379 } 380 381 static int pf_wait_vf_wip_done(struct xe_gt *gt, unsigned int vfid, unsigned long timeout) 382 { 383 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid); 384 385 return wait_for_completion_timeout(&cs->done, timeout) ? 0 : -ETIMEDOUT; 386 } 387 388 static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid) 389 { 390 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 391 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED); 392 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 393 pf_exit_vf_mismatch(gt, vfid); 394 pf_exit_vf_wip(gt, vfid); 395 } 396 397 /** 398 * DOC: The VF PAUSE state machine 399 * 400 * The VF PAUSE state machine looks like:: 401 * 402 * (READY,RESUMED)<-------------<---------------------o---------o 403 * | \ \ 404 * pause \ \ 405 * | \ \ 406 * ....V...........................PAUSE_WIP........ \ \ 407 * : \ : o \ 408 * : \ o------<-----busy : | \ 409 * : \ / / : | | 410 * : PAUSE_SEND_PAUSE ---failed--->----------o--->(PAUSE_FAILED) | 411 * : | \ : | | 412 * : acked rejected---->----------o--->(MISMATCH) / 413 * : | : / 414 * : v : / 415 * : PAUSE_WAIT_GUC : / 416 * : | : / 417 * : done : / 418 * : | : / 419 * : v : / 420 * : PAUSE_GUC_DONE o-----restart 421 * : | : 422 * : | o---<--busy : 423 * : v / / : 424 * : PAUSE_SAVE_GUC : 425 * : / : 426 * : / : 427 * :....o..............o...............o...........: 428 * | | | 429 * completed flr stop 430 * | | | 431 * V .....V..... ......V..... 432 * (PAUSED) : FLR_WIP : : STOP_WIP : 433 * :.........: :..........: 434 * 435 * For the full state machine view, see `The VF state machine`_. 436 */ 437 438 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 439 { 440 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 441 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE); 442 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 443 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE); 444 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC); 445 } 446 } 447 448 static void pf_enter_vf_paused(struct xe_gt *gt, unsigned int vfid) 449 { 450 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) 451 pf_enter_vf_state_machine_bug(gt, vfid); 452 453 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 454 pf_exit_vf_mismatch(gt, vfid); 455 pf_exit_vf_wip(gt, vfid); 456 } 457 458 static void pf_enter_vf_pause_completed(struct xe_gt *gt, unsigned int vfid) 459 { 460 pf_enter_vf_paused(gt, vfid); 461 } 462 463 static void pf_enter_vf_pause_failed(struct xe_gt *gt, unsigned int vfid) 464 { 465 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED); 466 pf_exit_vf_wip(gt, vfid); 467 } 468 469 static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid) 470 { 471 pf_enter_vf_mismatch(gt, vfid); 472 pf_enter_vf_pause_failed(gt, vfid); 473 } 474 475 static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) 476 { 477 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) 478 pf_enter_vf_state_machine_bug(gt, vfid); 479 } 480 481 static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid) 482 { 483 int err; 484 485 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC)) 486 return false; 487 488 err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid); 489 if (err) { 490 /* retry if busy */ 491 if (err == -EBUSY) { 492 pf_enter_vf_pause_save_guc(gt, vfid); 493 return true; 494 } 495 /* give up on error */ 496 if (err == -EIO) 497 pf_enter_vf_mismatch(gt, vfid); 498 } 499 500 pf_enter_vf_pause_completed(gt, vfid); 501 return true; 502 } 503 504 static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 505 { 506 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 507 return false; 508 509 pf_enter_vf_pause_save_guc(gt, vfid); 510 return true; 511 } 512 513 static void pf_enter_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid) 514 { 515 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE)) 516 pf_queue_vf(gt, vfid); 517 } 518 519 static void pf_enter_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 520 { 521 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) 522 pf_enter_vf_state_machine_bug(gt, vfid); 523 } 524 525 static bool pf_exit_pause_wait_guc(struct xe_gt *gt, unsigned int vfid) 526 { 527 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC); 528 } 529 530 static void pf_enter_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 531 { 532 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 533 pf_enter_vf_state_machine_bug(gt, vfid); 534 535 pf_queue_vf(gt, vfid); 536 } 537 538 static bool pf_exit_vf_pause_send_pause(struct xe_gt *gt, unsigned int vfid) 539 { 540 int err; 541 542 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE)) 543 return false; 544 545 /* GuC may actually send a PAUSE_DONE before we get a RESPONSE */ 546 pf_enter_pause_wait_guc(gt, vfid); 547 548 err = pf_send_vf_pause(gt, vfid); 549 if (err) { 550 /* send failed, so we shouldn't expect PAUSE_DONE from GuC */ 551 pf_exit_pause_wait_guc(gt, vfid); 552 553 if (err == -EBUSY) 554 pf_enter_vf_pause_send_pause(gt, vfid); 555 else if (err == -EIO) 556 pf_enter_vf_pause_rejected(gt, vfid); 557 else 558 pf_enter_vf_pause_failed(gt, vfid); 559 } else { 560 /* 561 * we have already moved to WAIT_GUC, maybe even to GUC_DONE 562 * but since GuC didn't complain, we may clear MISMATCH 563 */ 564 pf_exit_vf_mismatch(gt, vfid); 565 } 566 567 return true; 568 } 569 570 static bool pf_enter_vf_pause_wip(struct xe_gt *gt, unsigned int vfid) 571 { 572 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WIP)) { 573 pf_enter_vf_wip(gt, vfid); 574 pf_enter_vf_pause_send_pause(gt, vfid); 575 return true; 576 } 577 578 return false; 579 } 580 581 /** 582 * xe_gt_sriov_pf_control_pause_vf - Pause a VF. 583 * @gt: the &xe_gt 584 * @vfid: the VF identifier 585 * 586 * This function is for PF only. 587 * 588 * Return: 0 on success or a negative error code on failure. 589 */ 590 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) 591 { 592 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_PAUSE_WIP); 593 int err; 594 595 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 596 xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid); 597 return -EPERM; 598 } 599 600 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 601 xe_gt_sriov_dbg(gt, "VF%u was already paused!\n", vfid); 602 return -ESTALE; 603 } 604 605 if (!pf_enter_vf_pause_wip(gt, vfid)) { 606 xe_gt_sriov_dbg(gt, "VF%u pause already in progress!\n", vfid); 607 return -EALREADY; 608 } 609 610 err = pf_wait_vf_wip_done(gt, vfid, timeout); 611 if (err) { 612 xe_gt_sriov_dbg(gt, "VF%u pause didn't finish in %u ms (%pe)\n", 613 vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 614 return err; 615 } 616 617 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 618 xe_gt_sriov_info(gt, "VF%u paused!\n", vfid); 619 return 0; 620 } 621 622 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED)) { 623 xe_gt_sriov_dbg(gt, "VF%u pause failed!\n", vfid); 624 return -EIO; 625 } 626 627 xe_gt_sriov_dbg(gt, "VF%u pause was canceled!\n", vfid); 628 return -ECANCELED; 629 } 630 631 /** 632 * DOC: The VF RESUME state machine 633 * 634 * The VF RESUME state machine looks like:: 635 * 636 * (PAUSED)<-----------------<------------------------o 637 * | \ 638 * resume \ 639 * | \ 640 * ....V............................RESUME_WIP...... \ 641 * : \ : o 642 * : \ o-------<-----busy : | 643 * : \ / / : | 644 * : RESUME_SEND_RESUME ---failed--->--------o--->(RESUME_FAILED) 645 * : / \ : | 646 * : acked rejected---->---------o--->(MISMATCH) 647 * : / : 648 * :....o..............o...............o.....o.....: 649 * | | | \ 650 * completed flr stop restart-->(READY) 651 * | | | 652 * V .....V..... ......V..... 653 * (RESUMED) : FLR_WIP : : STOP_WIP : 654 * :.........: :..........: 655 * 656 * For the full state machine view, see `The VF state machine`_. 657 */ 658 659 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 660 { 661 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) 662 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME); 663 } 664 665 static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid) 666 { 667 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 668 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 669 pf_exit_vf_mismatch(gt, vfid); 670 pf_exit_vf_wip(gt, vfid); 671 } 672 673 static void pf_enter_vf_resume_completed(struct xe_gt *gt, unsigned int vfid) 674 { 675 pf_enter_vf_resumed(gt, vfid); 676 } 677 678 static void pf_enter_vf_resume_failed(struct xe_gt *gt, unsigned int vfid) 679 { 680 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED); 681 pf_exit_vf_wip(gt, vfid); 682 } 683 684 static void pf_enter_vf_resume_rejected(struct xe_gt *gt, unsigned int vfid) 685 { 686 pf_enter_vf_mismatch(gt, vfid); 687 pf_enter_vf_resume_failed(gt, vfid); 688 } 689 690 static void pf_enter_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 691 { 692 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 693 pf_enter_vf_state_machine_bug(gt, vfid); 694 695 pf_queue_vf(gt, vfid); 696 } 697 698 static bool pf_exit_vf_resume_send_resume(struct xe_gt *gt, unsigned int vfid) 699 { 700 int err; 701 702 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_SEND_RESUME)) 703 return false; 704 705 err = pf_send_vf_resume(gt, vfid); 706 if (err == -EBUSY) 707 pf_enter_vf_resume_send_resume(gt, vfid); 708 else if (err == -EIO) 709 pf_enter_vf_resume_rejected(gt, vfid); 710 else if (err) 711 pf_enter_vf_resume_failed(gt, vfid); 712 else 713 pf_enter_vf_resume_completed(gt, vfid); 714 return true; 715 } 716 717 static bool pf_enter_vf_resume_wip(struct xe_gt *gt, unsigned int vfid) 718 { 719 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_WIP)) { 720 pf_enter_vf_wip(gt, vfid); 721 pf_enter_vf_resume_send_resume(gt, vfid); 722 return true; 723 } 724 725 return false; 726 } 727 728 /** 729 * xe_gt_sriov_pf_control_resume_vf - Resume a VF. 730 * @gt: the &xe_gt 731 * @vfid: the VF identifier 732 * 733 * This function is for PF only. 734 * 735 * Return: 0 on success or a negative error code on failure. 736 */ 737 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) 738 { 739 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESUME_WIP); 740 int err; 741 742 if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) { 743 xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid); 744 return -EPERM; 745 } 746 747 if (!pf_enter_vf_resume_wip(gt, vfid)) { 748 xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid); 749 return -EALREADY; 750 } 751 752 err = pf_wait_vf_wip_done(gt, vfid, timeout); 753 if (err) 754 return err; 755 756 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) { 757 xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid); 758 return 0; 759 } 760 761 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED)) { 762 xe_gt_sriov_dbg(gt, "VF%u resume failed!\n", vfid); 763 return -EIO; 764 } 765 766 xe_gt_sriov_dbg(gt, "VF%u resume was canceled!\n", vfid); 767 return -ECANCELED; 768 } 769 770 /** 771 * DOC: The VF STOP state machine 772 * 773 * The VF STOP state machine looks like:: 774 * 775 * (READY,PAUSED,RESUMED)<-------<--------------------o 776 * | \ 777 * stop \ 778 * | \ 779 * ....V..............................STOP_WIP...... \ 780 * : \ : o 781 * : \ o----<----busy : | 782 * : \ / / : | 783 * : STOP_SEND_STOP--------failed--->--------o--->(STOP_FAILED) 784 * : / \ : | 785 * : acked rejected-------->--------o--->(MISMATCH) 786 * : / : 787 * :....o..............o...............o...........: 788 * | | | 789 * completed flr restart 790 * | | | 791 * V .....V..... V 792 * (STOPPED) : FLR_WIP : (READY) 793 * :.........: 794 * 795 * For the full state machine view, see `The VF state machine`_. 796 */ 797 798 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 799 { 800 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) 801 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP); 802 } 803 804 static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid) 805 { 806 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) 807 pf_enter_vf_state_machine_bug(gt, vfid); 808 809 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED); 810 pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED); 811 pf_exit_vf_mismatch(gt, vfid); 812 pf_exit_vf_wip(gt, vfid); 813 } 814 815 static void pf_enter_vf_stop_completed(struct xe_gt *gt, unsigned int vfid) 816 { 817 pf_enter_vf_stopped(gt, vfid); 818 } 819 820 static void pf_enter_vf_stop_failed(struct xe_gt *gt, unsigned int vfid) 821 { 822 pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED); 823 pf_exit_vf_wip(gt, vfid); 824 } 825 826 static void pf_enter_vf_stop_rejected(struct xe_gt *gt, unsigned int vfid) 827 { 828 pf_enter_vf_mismatch(gt, vfid); 829 pf_enter_vf_stop_failed(gt, vfid); 830 } 831 832 static void pf_enter_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 833 { 834 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 835 pf_enter_vf_state_machine_bug(gt, vfid); 836 837 pf_queue_vf(gt, vfid); 838 } 839 840 static bool pf_exit_vf_stop_send_stop(struct xe_gt *gt, unsigned int vfid) 841 { 842 int err; 843 844 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_SEND_STOP)) 845 return false; 846 847 err = pf_send_vf_stop(gt, vfid); 848 if (err == -EBUSY) 849 pf_enter_vf_stop_send_stop(gt, vfid); 850 else if (err == -EIO) 851 pf_enter_vf_stop_rejected(gt, vfid); 852 else if (err) 853 pf_enter_vf_stop_failed(gt, vfid); 854 else 855 pf_enter_vf_stop_completed(gt, vfid); 856 return true; 857 } 858 859 static bool pf_enter_vf_stop_wip(struct xe_gt *gt, unsigned int vfid) 860 { 861 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_WIP)) { 862 pf_enter_vf_wip(gt, vfid); 863 pf_enter_vf_stop_send_stop(gt, vfid); 864 return true; 865 } 866 return false; 867 } 868 869 /** 870 * xe_gt_sriov_pf_control_stop_vf - Stop a VF. 871 * @gt: the &xe_gt 872 * @vfid: the VF identifier 873 * 874 * This function is for PF only. 875 * 876 * Return: 0 on success or a negative error code on failure. 877 */ 878 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) 879 { 880 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_STOP_WIP); 881 int err; 882 883 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 884 xe_gt_sriov_dbg(gt, "VF%u was already stopped!\n", vfid); 885 return -ESTALE; 886 } 887 888 if (!pf_enter_vf_stop_wip(gt, vfid)) { 889 xe_gt_sriov_dbg(gt, "VF%u stop already in progress!\n", vfid); 890 return -EALREADY; 891 } 892 893 err = pf_wait_vf_wip_done(gt, vfid, timeout); 894 if (err) 895 return err; 896 897 if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) { 898 xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid); 899 return 0; 900 } 901 902 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOP_FAILED)) { 903 xe_gt_sriov_dbg(gt, "VF%u stop failed!\n", vfid); 904 return -EIO; 905 } 906 907 xe_gt_sriov_dbg(gt, "VF%u stop was canceled!\n", vfid); 908 return -ECANCELED; 909 } 910 911 /** 912 * DOC: The VF FLR state machine 913 * 914 * The VF FLR state machine looks like:: 915 * 916 * (READY,PAUSED,STOPPED)<------------<--------------o 917 * | \ 918 * flr \ 919 * | \ 920 * ....V..........................FLR_WIP........... \ 921 * : \ : \ 922 * : \ o----<----busy : | 923 * : \ / / : | 924 * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o 925 * : | \ : | | 926 * : acked rejected----->-----------o--->(MISMATCH) | 927 * : | : ^ | 928 * : v : | | 929 * : FLR_WAIT_GUC : | | 930 * : | : | | 931 * : done : | | 932 * : | : | | 933 * : v : | | 934 * : FLR_GUC_DONE : | | 935 * : | : | | 936 * : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o 937 * : | : | | 938 * : FLR_RESET_DATA : | | 939 * : | : | | 940 * : FLR_RESET_MMIO : | | 941 * : | : | | 942 * : | o----<----busy : | | 943 * : |/ / : | | 944 * : FLR_SEND_FINISH----failed--->-----------o--------+-----------o 945 * : / \ : | 946 * : acked rejected----->-----------o--------o 947 * : / : 948 * :....o..............................o...........: 949 * | | 950 * completed restart 951 * | / 952 * V / 953 * (READY)<----------<------------o 954 * 955 * For the full state machine view, see `The VF state machine`_. 956 */ 957 958 static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 959 { 960 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 961 pf_enter_vf_state_machine_bug(gt, vfid); 962 963 pf_queue_vf(gt, vfid); 964 } 965 966 static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 967 { 968 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 969 xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid); 970 return; 971 } 972 973 pf_enter_vf_wip(gt, vfid); 974 pf_enter_vf_flr_send_start(gt, vfid); 975 } 976 977 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) 978 { 979 if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { 980 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH); 981 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO); 982 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA); 983 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 984 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE); 985 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 986 pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START); 987 } 988 } 989 990 static void pf_enter_vf_flr_completed(struct xe_gt *gt, unsigned int vfid) 991 { 992 pf_enter_vf_ready(gt, vfid); 993 } 994 995 static void pf_enter_vf_flr_failed(struct xe_gt *gt, unsigned int vfid) 996 { 997 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 998 xe_gt_sriov_notice(gt, "VF%u FLR failed!\n", vfid); 999 pf_exit_vf_wip(gt, vfid); 1000 } 1001 1002 static void pf_enter_vf_flr_rejected(struct xe_gt *gt, unsigned int vfid) 1003 { 1004 pf_enter_vf_mismatch(gt, vfid); 1005 pf_enter_vf_flr_failed(gt, vfid); 1006 } 1007 1008 static void pf_enter_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 1009 { 1010 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 1011 pf_enter_vf_state_machine_bug(gt, vfid); 1012 1013 pf_queue_vf(gt, vfid); 1014 } 1015 1016 static bool pf_exit_vf_flr_send_finish(struct xe_gt *gt, unsigned int vfid) 1017 { 1018 int err; 1019 1020 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH)) 1021 return false; 1022 1023 err = pf_send_vf_flr_finish(gt, vfid); 1024 if (err == -EBUSY) 1025 pf_enter_vf_flr_send_finish(gt, vfid); 1026 else if (err == -EIO) 1027 pf_enter_vf_flr_rejected(gt, vfid); 1028 else if (err) 1029 pf_enter_vf_flr_failed(gt, vfid); 1030 else 1031 pf_enter_vf_flr_completed(gt, vfid); 1032 return true; 1033 } 1034 1035 static void pf_enter_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 1036 { 1037 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 1038 pf_enter_vf_state_machine_bug(gt, vfid); 1039 1040 pf_queue_vf(gt, vfid); 1041 } 1042 1043 static bool pf_exit_vf_flr_reset_mmio(struct xe_gt *gt, unsigned int vfid) 1044 { 1045 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO)) 1046 return false; 1047 1048 xe_gt_sriov_pf_sanitize_hw(gt, vfid); 1049 1050 pf_enter_vf_flr_send_finish(gt, vfid); 1051 return true; 1052 } 1053 1054 static void pf_enter_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 1055 { 1056 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 1057 pf_enter_vf_state_machine_bug(gt, vfid); 1058 1059 pf_queue_vf(gt, vfid); 1060 } 1061 1062 static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) 1063 { 1064 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) 1065 return false; 1066 1067 xe_gt_sriov_pf_service_reset(gt, vfid); 1068 xe_gt_sriov_pf_monitor_flr(gt, vfid); 1069 1070 pf_enter_vf_flr_reset_mmio(gt, vfid); 1071 return true; 1072 } 1073 1074 static void pf_enter_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 1075 { 1076 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 1077 pf_enter_vf_state_machine_bug(gt, vfid); 1078 1079 pf_queue_vf(gt, vfid); 1080 } 1081 1082 static bool pf_exit_vf_flr_reset_config(struct xe_gt *gt, unsigned int vfid) 1083 { 1084 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_RESET_CONFIG); 1085 int err; 1086 1087 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_CONFIG)) 1088 return false; 1089 1090 err = xe_gt_sriov_pf_config_sanitize(gt, vfid, timeout); 1091 if (err) 1092 pf_enter_vf_flr_failed(gt, vfid); 1093 else 1094 pf_enter_vf_flr_reset_data(gt, vfid); 1095 return true; 1096 } 1097 1098 static void pf_enter_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 1099 { 1100 if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) 1101 pf_enter_vf_state_machine_bug(gt, vfid); 1102 } 1103 1104 static bool pf_exit_vf_flr_wait_guc(struct xe_gt *gt, unsigned int vfid) 1105 { 1106 return pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC); 1107 } 1108 1109 static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) 1110 { 1111 int err; 1112 1113 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START)) 1114 return false; 1115 1116 /* GuC may actually send a FLR_DONE before we get a RESPONSE */ 1117 pf_enter_vf_flr_wait_guc(gt, vfid); 1118 1119 err = pf_send_vf_flr_start(gt, vfid); 1120 if (err) { 1121 /* send failed, so we shouldn't expect FLR_DONE from GuC */ 1122 pf_exit_vf_flr_wait_guc(gt, vfid); 1123 1124 if (err == -EBUSY) 1125 pf_enter_vf_flr_send_start(gt, vfid); 1126 else if (err == -EIO) 1127 pf_enter_vf_flr_rejected(gt, vfid); 1128 else 1129 pf_enter_vf_flr_failed(gt, vfid); 1130 } else { 1131 /* 1132 * we have already moved to WAIT_GUC, maybe even to GUC_DONE 1133 * but since GuC didn't complain, we may clear MISMATCH 1134 */ 1135 pf_exit_vf_mismatch(gt, vfid); 1136 } 1137 1138 return true; 1139 } 1140 1141 static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 1142 { 1143 if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 1144 return false; 1145 1146 pf_enter_vf_flr_reset_config(gt, vfid); 1147 return true; 1148 } 1149 1150 static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) 1151 { 1152 if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE)) 1153 pf_queue_vf(gt, vfid); 1154 } 1155 1156 /** 1157 * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence. 1158 * @gt: the &xe_gt 1159 * @vfid: the VF identifier 1160 * 1161 * This function is for PF only. 1162 * 1163 * Return: 0 on success or a negative error code on failure. 1164 */ 1165 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) 1166 { 1167 unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP); 1168 int err; 1169 1170 pf_enter_vf_flr_wip(gt, vfid); 1171 1172 err = pf_wait_vf_wip_done(gt, vfid, timeout); 1173 if (err) { 1174 xe_gt_sriov_notice(gt, "VF%u FLR didn't finish in %u ms (%pe)\n", 1175 vfid, jiffies_to_msecs(timeout), ERR_PTR(err)); 1176 return err; 1177 } 1178 1179 if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED)) 1180 return -EIO; 1181 1182 return 0; 1183 } 1184 1185 /** 1186 * DOC: The VF FLR Flow with GuC 1187 * 1188 * The VF FLR flow includes several steps:: 1189 * 1190 * PF GUC PCI 1191 * ======================================================== 1192 * | | | 1193 * (1) | [ ] <----- FLR --| 1194 * | [ ] : 1195 * (2) [ ] <-------- NOTIFY FLR --[ ] 1196 * [ ] | 1197 * (3) [ ] | 1198 * [ ] | 1199 * [ ]-- START FLR ---------> [ ] 1200 * | [ ] 1201 * (4) | [ ] 1202 * | [ ] 1203 * [ ] <--------- FLR DONE -- [ ] 1204 * [ ] | 1205 * (5) [ ] | 1206 * [ ] | 1207 * [ ]-- FINISH FLR --------> [ ] 1208 * | | 1209 * 1210 * * Step 1: PCI HW generates interrupt to the GuC about VF FLR 1211 * * Step 2: GuC FW sends G2H notification to the PF about VF FLR 1212 * * Step 2a: on some platforms G2H is only received from root GuC 1213 * * Step 3: PF sends H2G request to the GuC to start VF FLR sequence 1214 * * Step 3a: on some platforms PF must send H2G to all other GuCs 1215 * * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done 1216 * * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished 1217 */ 1218 1219 static bool needs_dispatch_flr(struct xe_device *xe) 1220 { 1221 return xe->info.platform == XE_PVC; 1222 } 1223 1224 static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid) 1225 { 1226 struct xe_device *xe = gt_to_xe(gt); 1227 struct xe_gt *gtit; 1228 unsigned int gtid; 1229 1230 xe_gt_sriov_info(gt, "VF%u FLR\n", vfid); 1231 1232 if (needs_dispatch_flr(xe)) { 1233 for_each_gt(gtit, xe, gtid) 1234 pf_enter_vf_flr_wip(gtit, vfid); 1235 } else { 1236 pf_enter_vf_flr_wip(gt, vfid); 1237 } 1238 } 1239 1240 static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid) 1241 { 1242 if (!pf_exit_vf_flr_wait_guc(gt, vfid)) { 1243 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u FLR done'\n", vfid); 1244 pf_enter_vf_mismatch(gt, vfid); 1245 return; 1246 } 1247 1248 pf_enter_vf_flr_guc_done(gt, vfid); 1249 } 1250 1251 static void pf_handle_vf_pause_done(struct xe_gt *gt, u32 vfid) 1252 { 1253 if (!pf_exit_pause_wait_guc(gt, vfid)) { 1254 xe_gt_sriov_dbg(gt, "Received out of order 'VF%u PAUSE done'\n", vfid); 1255 pf_enter_vf_mismatch(gt, vfid); 1256 return; 1257 } 1258 1259 pf_enter_vf_pause_guc_done(gt, vfid); 1260 } 1261 1262 static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid) 1263 { 1264 xe_gt_sriov_dbg_verbose(gt, "received VF%u event %#x\n", vfid, eventid); 1265 1266 if (vfid > xe_gt_sriov_pf_get_totalvfs(gt)) 1267 return -EPROTO; 1268 1269 switch (eventid) { 1270 case GUC_PF_NOTIFY_VF_FLR: 1271 pf_handle_vf_flr(gt, vfid); 1272 break; 1273 case GUC_PF_NOTIFY_VF_FLR_DONE: 1274 pf_handle_vf_flr_done(gt, vfid); 1275 break; 1276 case GUC_PF_NOTIFY_VF_PAUSE_DONE: 1277 pf_handle_vf_pause_done(gt, vfid); 1278 break; 1279 case GUC_PF_NOTIFY_VF_FIXUP_DONE: 1280 break; 1281 default: 1282 return -ENOPKG; 1283 } 1284 return 0; 1285 } 1286 1287 static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid) 1288 { 1289 switch (eventid) { 1290 case GUC_PF_NOTIFY_VF_ENABLE: 1291 xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n", 1292 str_enabled_disabled(true), 1293 str_enabled_disabled(false)); 1294 break; 1295 default: 1296 return -ENOPKG; 1297 } 1298 return 0; 1299 } 1300 1301 /** 1302 * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC. 1303 * @gt: the &xe_gt 1304 * @msg: the G2H message 1305 * @len: the length of the G2H message 1306 * 1307 * This function is for PF only. 1308 * 1309 * Return: 0 on success or a negative error code on failure. 1310 */ 1311 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) 1312 { 1313 u32 vfid; 1314 u32 eventid; 1315 1316 xe_gt_assert(gt, len); 1317 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC); 1318 xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT); 1319 xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) == 1320 GUC_ACTION_GUC2PF_VF_STATE_NOTIFY); 1321 1322 if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt)))) 1323 return -EPROTO; 1324 1325 if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0]))) 1326 return -EPFNOSUPPORT; 1327 1328 if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN)) 1329 return -EPROTO; 1330 1331 vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]); 1332 eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]); 1333 1334 return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid); 1335 } 1336 1337 static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid) 1338 { 1339 if (pf_exit_vf_flr_send_start(gt, vfid)) 1340 return true; 1341 1342 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC)) { 1343 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1344 control_bit_to_string(XE_GT_SRIOV_STATE_FLR_WAIT_GUC)); 1345 return false; 1346 } 1347 1348 if (pf_exit_vf_flr_guc_done(gt, vfid)) 1349 return true; 1350 1351 if (pf_exit_vf_flr_reset_config(gt, vfid)) 1352 return true; 1353 1354 if (pf_exit_vf_flr_reset_data(gt, vfid)) 1355 return true; 1356 1357 if (pf_exit_vf_flr_reset_mmio(gt, vfid)) 1358 return true; 1359 1360 if (pf_exit_vf_flr_send_finish(gt, vfid)) 1361 return true; 1362 1363 if (pf_exit_vf_stop_send_stop(gt, vfid)) 1364 return true; 1365 1366 if (pf_exit_vf_pause_send_pause(gt, vfid)) 1367 return true; 1368 1369 if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)) { 1370 xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid, 1371 control_bit_to_string(XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC)); 1372 return true; 1373 } 1374 1375 if (pf_exit_vf_pause_guc_done(gt, vfid)) 1376 return true; 1377 1378 if (pf_exit_vf_pause_save_guc(gt, vfid)) 1379 return true; 1380 1381 if (pf_exit_vf_resume_send_resume(gt, vfid)) 1382 return true; 1383 1384 return false; 1385 } 1386 1387 static unsigned int pf_control_state_index(struct xe_gt *gt, 1388 struct xe_gt_sriov_control_state *cs) 1389 { 1390 return container_of(cs, struct xe_gt_sriov_metadata, control) - gt->sriov.pf.vfs; 1391 } 1392 1393 static void pf_worker_find_work(struct xe_gt *gt) 1394 { 1395 struct xe_gt_sriov_pf_control *pfc = >->sriov.pf.control; 1396 struct xe_gt_sriov_control_state *cs; 1397 unsigned int vfid; 1398 bool empty; 1399 bool more; 1400 1401 spin_lock(&pfc->lock); 1402 cs = list_first_entry_or_null(&pfc->list, struct xe_gt_sriov_control_state, link); 1403 if (cs) 1404 list_del_init(&cs->link); 1405 empty = list_empty(&pfc->list); 1406 spin_unlock(&pfc->lock); 1407 1408 if (!cs) 1409 return; 1410 1411 /* VF metadata structures are indexed by the VFID */ 1412 vfid = pf_control_state_index(gt, cs); 1413 xe_gt_assert(gt, vfid <= xe_gt_sriov_pf_get_totalvfs(gt)); 1414 1415 more = pf_process_vf_state_machine(gt, vfid); 1416 if (more) 1417 pf_queue_vf(gt, vfid); 1418 else if (!empty) 1419 pf_queue_control_worker(gt); 1420 } 1421 1422 static void control_worker_func(struct work_struct *w) 1423 { 1424 struct xe_gt *gt = container_of(w, struct xe_gt, sriov.pf.control.worker); 1425 1426 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 1427 pf_worker_find_work(gt); 1428 } 1429 1430 static void pf_stop_worker(struct xe_gt *gt) 1431 { 1432 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 1433 cancel_work_sync(>->sriov.pf.control.worker); 1434 } 1435 1436 static void control_fini_action(struct drm_device *dev, void *data) 1437 { 1438 struct xe_gt *gt = data; 1439 1440 pf_stop_worker(gt); 1441 } 1442 1443 /** 1444 * xe_gt_sriov_pf_control_init() - Initialize PF's control data. 1445 * @gt: the &xe_gt 1446 * 1447 * This function is for PF only. 1448 * 1449 * Return: 0 on success or a negative error code on failure. 1450 */ 1451 int xe_gt_sriov_pf_control_init(struct xe_gt *gt) 1452 { 1453 struct xe_device *xe = gt_to_xe(gt); 1454 unsigned int n, totalvfs; 1455 1456 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 1457 1458 totalvfs = xe_sriov_pf_get_totalvfs(xe); 1459 for (n = 0; n <= totalvfs; n++) { 1460 struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, n); 1461 1462 init_completion(&cs->done); 1463 INIT_LIST_HEAD(&cs->link); 1464 } 1465 1466 spin_lock_init(>->sriov.pf.control.lock); 1467 INIT_LIST_HEAD(>->sriov.pf.control.list); 1468 INIT_WORK(>->sriov.pf.control.worker, control_worker_func); 1469 1470 return drmm_add_action_or_reset(&xe->drm, control_fini_action, gt); 1471 } 1472 1473 /** 1474 * xe_gt_sriov_pf_control_restart() - Restart SR-IOV control data after a GT reset. 1475 * @gt: the &xe_gt 1476 * 1477 * Any per-VF status maintained by the PF or any ongoing VF control activity 1478 * performed by the PF must be reset or cancelled when the GT is reset. 1479 * 1480 * This function is for PF only. 1481 */ 1482 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt) 1483 { 1484 struct xe_device *xe = gt_to_xe(gt); 1485 unsigned int n, totalvfs; 1486 1487 xe_gt_assert(gt, IS_SRIOV_PF(xe)); 1488 1489 pf_stop_worker(gt); 1490 1491 totalvfs = xe_sriov_pf_get_totalvfs(xe); 1492 for (n = 1; n <= totalvfs; n++) 1493 pf_enter_vf_ready(gt, n); 1494 } 1495