1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 #include <linux/math64.h> 14 15 #include <drm/drm_managed.h> 16 17 #include "abi/guc_actions_abi.h" 18 #include "abi/guc_actions_slpc_abi.h" 19 #include "abi/guc_klvs_abi.h" 20 #include "xe_assert.h" 21 #include "xe_bo.h" 22 #include "xe_devcoredump.h" 23 #include "xe_device.h" 24 #include "xe_exec_queue.h" 25 #include "xe_force_wake.h" 26 #include "xe_gpu_scheduler.h" 27 #include "xe_gt.h" 28 #include "xe_gt_clock.h" 29 #include "xe_gt_printk.h" 30 #include "xe_guc.h" 31 #include "xe_guc_capture.h" 32 #include "xe_guc_ct.h" 33 #include "xe_guc_exec_queue_types.h" 34 #include "xe_guc_id_mgr.h" 35 #include "xe_guc_klv_helpers.h" 36 #include "xe_guc_submit_types.h" 37 #include "xe_hw_engine.h" 38 #include "xe_lrc.h" 39 #include "xe_macros.h" 40 #include "xe_map.h" 41 #include "xe_mocs.h" 42 #include "xe_pm.h" 43 #include "xe_ring_ops_types.h" 44 #include "xe_sched_job.h" 45 #include "xe_trace.h" 46 #include "xe_uc_fw.h" 47 #include "xe_vm.h" 48 49 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 50 51 static int guc_submit_reset_prepare(struct xe_guc *guc); 52 53 static struct xe_guc * 54 exec_queue_to_guc(struct xe_exec_queue *q) 55 { 56 return &q->gt->uc.guc; 57 } 58 59 /* 60 * Helpers for engine state, using an atomic as some of the bits can transition 61 * as the same time (e.g. a suspend can be happning at the same time as schedule 62 * engine done being processed). 63 */ 64 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 65 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 66 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 67 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 68 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 69 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 70 #define EXEC_QUEUE_STATE_RESET (1 << 6) 71 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 72 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 73 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 74 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 75 #define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 11) 76 77 static bool exec_queue_registered(struct xe_exec_queue *q) 78 { 79 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 80 } 81 82 static void set_exec_queue_registered(struct xe_exec_queue *q) 83 { 84 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 85 } 86 87 static void clear_exec_queue_registered(struct xe_exec_queue *q) 88 { 89 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 90 } 91 92 static bool exec_queue_enabled(struct xe_exec_queue *q) 93 { 94 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 95 } 96 97 static void set_exec_queue_enabled(struct xe_exec_queue *q) 98 { 99 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 100 } 101 102 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 103 { 104 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 105 } 106 107 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 108 { 109 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 110 } 111 112 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 113 { 114 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 115 } 116 117 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 118 { 119 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 120 } 121 122 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 123 { 124 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 125 } 126 127 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 128 { 129 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 130 } 131 132 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 133 { 134 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 135 } 136 137 static bool exec_queue_destroyed(struct xe_exec_queue *q) 138 { 139 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 140 } 141 142 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 143 { 144 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 145 } 146 147 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 148 { 149 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 150 } 151 152 static bool exec_queue_banned(struct xe_exec_queue *q) 153 { 154 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 155 } 156 157 static void set_exec_queue_banned(struct xe_exec_queue *q) 158 { 159 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 160 } 161 162 static bool exec_queue_suspended(struct xe_exec_queue *q) 163 { 164 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 165 } 166 167 static void set_exec_queue_suspended(struct xe_exec_queue *q) 168 { 169 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 170 } 171 172 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 173 { 174 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 175 } 176 177 static bool exec_queue_reset(struct xe_exec_queue *q) 178 { 179 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 180 } 181 182 static void set_exec_queue_reset(struct xe_exec_queue *q) 183 { 184 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 185 } 186 187 static bool exec_queue_killed(struct xe_exec_queue *q) 188 { 189 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 190 } 191 192 static void set_exec_queue_killed(struct xe_exec_queue *q) 193 { 194 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 195 } 196 197 static bool exec_queue_wedged(struct xe_exec_queue *q) 198 { 199 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 200 } 201 202 static void set_exec_queue_wedged(struct xe_exec_queue *q) 203 { 204 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 205 } 206 207 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 208 { 209 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 210 } 211 212 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 213 { 214 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 215 } 216 217 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 218 { 219 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 220 } 221 222 static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 223 { 224 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND; 225 } 226 227 static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 228 { 229 atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 230 } 231 232 static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q) 233 { 234 atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state); 235 } 236 237 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 238 { 239 return (atomic_read(&q->guc->state) & 240 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 241 EXEC_QUEUE_STATE_BANNED)); 242 } 243 244 static void guc_submit_sw_fini(struct drm_device *drm, void *arg) 245 { 246 struct xe_guc *guc = arg; 247 struct xe_device *xe = guc_to_xe(guc); 248 struct xe_gt *gt = guc_to_gt(guc); 249 int ret; 250 251 ret = wait_event_timeout(guc->submission_state.fini_wq, 252 xa_empty(&guc->submission_state.exec_queue_lookup), 253 HZ * 5); 254 255 drain_workqueue(xe->destroy_wq); 256 257 xe_gt_assert(gt, ret); 258 259 xa_destroy(&guc->submission_state.exec_queue_lookup); 260 } 261 262 static void guc_submit_fini(void *arg) 263 { 264 struct xe_guc *guc = arg; 265 266 /* Forcefully kill any remaining exec queues */ 267 xe_guc_ct_stop(&guc->ct); 268 guc_submit_reset_prepare(guc); 269 xe_guc_softreset(guc); 270 xe_guc_submit_stop(guc); 271 xe_uc_fw_sanitize(&guc->fw); 272 xe_guc_submit_pause_abort(guc); 273 } 274 275 static void guc_submit_wedged_fini(void *arg) 276 { 277 struct xe_guc *guc = arg; 278 struct xe_exec_queue *q; 279 unsigned long index; 280 281 mutex_lock(&guc->submission_state.lock); 282 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 283 if (exec_queue_wedged(q)) { 284 mutex_unlock(&guc->submission_state.lock); 285 xe_exec_queue_put(q); 286 mutex_lock(&guc->submission_state.lock); 287 } 288 } 289 mutex_unlock(&guc->submission_state.lock); 290 } 291 292 static const struct xe_exec_queue_ops guc_exec_queue_ops; 293 294 static void primelockdep(struct xe_guc *guc) 295 { 296 if (!IS_ENABLED(CONFIG_LOCKDEP)) 297 return; 298 299 fs_reclaim_acquire(GFP_KERNEL); 300 301 mutex_lock(&guc->submission_state.lock); 302 mutex_unlock(&guc->submission_state.lock); 303 304 fs_reclaim_release(GFP_KERNEL); 305 } 306 307 /** 308 * xe_guc_submit_init() - Initialize GuC submission. 309 * @guc: the &xe_guc to initialize 310 * @num_ids: number of GuC context IDs to use 311 * 312 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 313 * GuC context IDs supported by the GuC firmware should be used for submission. 314 * 315 * Only VF drivers will have to provide explicit number of GuC context IDs 316 * that they can use for submission. 317 * 318 * Return: 0 on success or a negative error code on failure. 319 */ 320 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 321 { 322 struct xe_device *xe = guc_to_xe(guc); 323 struct xe_gt *gt = guc_to_gt(guc); 324 int err; 325 326 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 327 if (err) 328 return err; 329 330 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 331 if (err) 332 return err; 333 334 gt->exec_queue_ops = &guc_exec_queue_ops; 335 336 xa_init(&guc->submission_state.exec_queue_lookup); 337 338 init_waitqueue_head(&guc->submission_state.fini_wq); 339 340 primelockdep(guc); 341 342 guc->submission_state.initialized = true; 343 344 err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); 345 if (err) 346 return err; 347 348 return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); 349 } 350 351 /* 352 * Given that we want to guarantee enough RCS throughput to avoid missing 353 * frames, we set the yield policy to 20% of each 80ms interval. 354 */ 355 #define RC_YIELD_DURATION 80 /* in ms */ 356 #define RC_YIELD_RATIO 20 /* in percent */ 357 static u32 *emit_render_compute_yield_klv(u32 *emit) 358 { 359 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 360 *emit++ = RC_YIELD_DURATION; 361 *emit++ = RC_YIELD_RATIO; 362 363 return emit; 364 } 365 366 #define SCHEDULING_POLICY_MAX_DWORDS 16 367 static int guc_init_global_schedule_policy(struct xe_guc *guc) 368 { 369 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 370 u32 *emit = data; 371 u32 count = 0; 372 int ret; 373 374 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 375 return 0; 376 377 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 378 379 if (CCS_INSTANCES(guc_to_gt(guc))) 380 emit = emit_render_compute_yield_klv(emit); 381 382 count = emit - data; 383 if (count > 1) { 384 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 385 386 ret = xe_guc_ct_send_block(&guc->ct, data, count); 387 if (ret < 0) { 388 xe_gt_err(guc_to_gt(guc), 389 "failed to enable GuC scheduling policies: %pe\n", 390 ERR_PTR(ret)); 391 return ret; 392 } 393 } 394 395 return 0; 396 } 397 398 int xe_guc_submit_enable(struct xe_guc *guc) 399 { 400 int ret; 401 402 ret = guc_init_global_schedule_policy(guc); 403 if (ret) 404 return ret; 405 406 guc->submission_state.enabled = true; 407 408 return 0; 409 } 410 411 void xe_guc_submit_disable(struct xe_guc *guc) 412 { 413 guc->submission_state.enabled = false; 414 } 415 416 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 417 { 418 int i; 419 420 lockdep_assert_held(&guc->submission_state.lock); 421 422 for (i = 0; i < xa_count; ++i) 423 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 424 425 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 426 q->guc->id, q->width); 427 428 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 429 wake_up(&guc->submission_state.fini_wq); 430 } 431 432 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 433 { 434 int ret; 435 int i; 436 437 /* 438 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 439 * worse case user gets -ENOMEM on engine create and has to try again. 440 * 441 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 442 * failure. 443 */ 444 lockdep_assert_held(&guc->submission_state.lock); 445 446 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 447 q->width); 448 if (ret < 0) 449 return ret; 450 451 q->guc->id = ret; 452 453 for (i = 0; i < q->width; ++i) { 454 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 455 q->guc->id + i, q, GFP_NOWAIT)); 456 if (ret) 457 goto err_release; 458 } 459 460 return 0; 461 462 err_release: 463 __release_guc_id(guc, q, i); 464 465 return ret; 466 } 467 468 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 469 { 470 mutex_lock(&guc->submission_state.lock); 471 __release_guc_id(guc, q, q->width); 472 mutex_unlock(&guc->submission_state.lock); 473 } 474 475 struct exec_queue_policy { 476 u32 count; 477 struct guc_update_exec_queue_policy h2g; 478 }; 479 480 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 481 { 482 size_t bytes = sizeof(policy->h2g.header) + 483 (sizeof(policy->h2g.klv[0]) * policy->count); 484 485 return bytes / sizeof(u32); 486 } 487 488 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 489 u16 guc_id) 490 { 491 policy->h2g.header.action = 492 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 493 policy->h2g.header.guc_id = guc_id; 494 policy->count = 0; 495 } 496 497 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 498 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 499 u32 data) \ 500 { \ 501 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 502 \ 503 policy->h2g.klv[policy->count].kl = \ 504 FIELD_PREP(GUC_KLV_0_KEY, \ 505 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 506 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 507 policy->h2g.klv[policy->count].value = data; \ 508 policy->count++; \ 509 } 510 511 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 512 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 513 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 514 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 515 #undef MAKE_EXEC_QUEUE_POLICY_ADD 516 517 static const int xe_exec_queue_prio_to_guc[] = { 518 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 519 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 520 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 521 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 522 }; 523 524 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 525 { 526 struct exec_queue_policy policy; 527 enum xe_exec_queue_priority prio = q->sched_props.priority; 528 u32 timeslice_us = q->sched_props.timeslice_us; 529 u32 slpc_exec_queue_freq_req = 0; 530 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 531 532 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 533 !xe_exec_queue_is_multi_queue_secondary(q)); 534 535 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 536 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 537 538 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 539 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 540 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 541 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 542 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 543 slpc_exec_queue_freq_req); 544 545 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 546 __guc_exec_queue_policy_action_size(&policy), 0, 0); 547 } 548 549 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 550 { 551 struct exec_queue_policy policy; 552 553 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 554 555 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 556 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 557 558 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 559 __guc_exec_queue_policy_action_size(&policy), 0, 0); 560 } 561 562 static bool vf_recovery(struct xe_guc *guc) 563 { 564 return xe_gt_recovery_pending(guc_to_gt(guc)); 565 } 566 567 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 568 { 569 struct xe_guc *guc = exec_queue_to_guc(q); 570 struct xe_device *xe = guc_to_xe(guc); 571 572 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 573 wake_up_all(&xe->ufence_wq); 574 575 xe_sched_tdr_queue_imm(&q->guc->sched); 576 } 577 578 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 579 { 580 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 581 struct xe_exec_queue_group *group = q->multi_queue.group; 582 struct xe_exec_queue *eq; 583 584 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 585 xe_exec_queue_is_multi_queue(q)); 586 587 /* Group banned, skip timeout check in TDR */ 588 WRITE_ONCE(group->banned, true); 589 xe_guc_exec_queue_trigger_cleanup(primary); 590 591 mutex_lock(&group->list_lock); 592 list_for_each_entry(eq, &group->list, multi_queue.link) 593 xe_guc_exec_queue_trigger_cleanup(eq); 594 mutex_unlock(&group->list_lock); 595 } 596 597 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 598 { 599 if (xe_exec_queue_is_multi_queue(q)) { 600 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 601 struct xe_exec_queue_group *group = q->multi_queue.group; 602 struct xe_exec_queue *eq; 603 604 /* Group banned, skip timeout check in TDR */ 605 WRITE_ONCE(group->banned, true); 606 607 set_exec_queue_reset(primary); 608 if (!exec_queue_banned(primary)) 609 xe_guc_exec_queue_trigger_cleanup(primary); 610 611 mutex_lock(&group->list_lock); 612 list_for_each_entry(eq, &group->list, multi_queue.link) { 613 set_exec_queue_reset(eq); 614 if (!exec_queue_banned(eq)) 615 xe_guc_exec_queue_trigger_cleanup(eq); 616 } 617 mutex_unlock(&group->list_lock); 618 } else { 619 set_exec_queue_reset(q); 620 if (!exec_queue_banned(q)) 621 xe_guc_exec_queue_trigger_cleanup(q); 622 } 623 } 624 625 static void set_exec_queue_group_banned(struct xe_exec_queue *q) 626 { 627 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 628 struct xe_exec_queue_group *group = q->multi_queue.group; 629 struct xe_exec_queue *eq; 630 631 /* Ban all queues of the multi-queue group */ 632 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 633 xe_exec_queue_is_multi_queue(q)); 634 set_exec_queue_banned(primary); 635 636 mutex_lock(&group->list_lock); 637 list_for_each_entry(eq, &group->list, multi_queue.link) 638 set_exec_queue_banned(eq); 639 mutex_unlock(&group->list_lock); 640 } 641 642 /* Helper for context registration H2G */ 643 struct guc_ctxt_registration_info { 644 u32 flags; 645 u32 context_idx; 646 u32 engine_class; 647 u32 engine_submit_mask; 648 u32 wq_desc_lo; 649 u32 wq_desc_hi; 650 u32 wq_base_lo; 651 u32 wq_base_hi; 652 u32 wq_size; 653 u32 cgp_lo; 654 u32 cgp_hi; 655 u32 hwlrca_lo; 656 u32 hwlrca_hi; 657 }; 658 659 #define parallel_read(xe_, map_, field_) \ 660 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 661 field_) 662 #define parallel_write(xe_, map_, field_, val_) \ 663 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 664 field_, val_) 665 666 /** 667 * DOC: Multi Queue Group GuC interface 668 * 669 * The multi queue group coordination between KMD and GuC is through a software 670 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 671 * allocated in the global GTT. 672 * 673 * CGP format: 674 * 675 * +-----------+---------------------------+---------------------------------------------+ 676 * | DWORD | Name | Description | 677 * +-----------+---------------------------+---------------------------------------------+ 678 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 679 * +-----------+---------------------------+---------------------------------------------+ 680 * | 1..15 | RESERVED | MBZ | 681 * +-----------+---------------------------+---------------------------------------------+ 682 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 683 * +-----------+---------------------------+---------------------------------------------+ 684 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 685 * +-----------+---------------------------+---------------------------------------------+ 686 * | 18..31 | RESERVED | MBZ | 687 * +-----------+---------------------------+---------------------------------------------+ 688 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 689 * +-----------+---------------------------+---------------------------------------------+ 690 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 691 * +-----------+---------------------------+---------------------------------------------+ 692 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 693 * +-----------+---------------------------+---------------------------------------------+ 694 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 695 * +-----------+---------------------------+---------------------------------------------+ 696 * | ... |... | ... | 697 * +-----------+---------------------------+---------------------------------------------+ 698 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 699 * +-----------+---------------------------+---------------------------------------------+ 700 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 701 * +-----------+---------------------------+---------------------------------------------+ 702 * | 160..1024 | RESERVED | MBZ | 703 * +-----------+---------------------------+---------------------------------------------+ 704 * 705 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 706 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 707 * the CGP address. When the secondary queues are added to the group, the CGP is 708 * updated with entry for that queue and GuC is notified through the H2G interface 709 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 710 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 711 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 712 * error in the CGP. Only one of these CGP update messages can be outstanding 713 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 714 * fields indicate which queue entry is being updated in the CGP. 715 * 716 * The primary queue (Q0) represents the multi queue group context in GuC and 717 * submission on any queue of the group must be through Q0 GuC interface only. 718 * 719 * As it is not required to register secondary queues with GuC, the secondary queue 720 * context ids in the CGP are populated with Q0 context id. 721 */ 722 723 #define CGP_VERSION_MAJOR_SHIFT 8 724 725 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 726 struct xe_exec_queue *q) 727 { 728 struct xe_exec_queue_group *group = q->multi_queue.group; 729 u32 guc_id = group->primary->guc->id; 730 731 /* Currently implementing CGP version 1.0 */ 732 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 733 1 << CGP_VERSION_MAJOR_SHIFT); 734 735 xe_map_wr(xe, &group->cgp_bo->vmap, 736 (32 + q->multi_queue.pos * 2) * sizeof(u32), 737 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 738 739 xe_map_wr(xe, &group->cgp_bo->vmap, 740 (33 + q->multi_queue.pos * 2) * sizeof(u32), 741 u32, guc_id); 742 743 if (q->multi_queue.pos / 32) { 744 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 745 u32, BIT(q->multi_queue.pos % 32)); 746 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 747 } else { 748 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 749 u32, BIT(q->multi_queue.pos)); 750 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 751 } 752 } 753 754 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 755 struct xe_exec_queue *q, 756 const u32 *action, u32 len) 757 { 758 struct xe_exec_queue_group *group = q->multi_queue.group; 759 struct xe_device *xe = guc_to_xe(guc); 760 long ret; 761 762 /* 763 * As all queues of a multi queue group use single drm scheduler 764 * submit workqueue, CGP synchronization with GuC are serialized. 765 * Hence, no locking is required here. 766 * Wait for any pending CGP_SYNC_DONE response before updating the 767 * CGP page and sending CGP_SYNC message. 768 * 769 * FIXME: Support VF migration 770 */ 771 ret = wait_event_timeout(guc->ct.wq, 772 !READ_ONCE(group->sync_pending) || 773 xe_guc_read_stopped(guc), HZ); 774 if (!ret || xe_guc_read_stopped(guc)) { 775 /* CGP_SYNC failed. Reset gt, cleanup the group */ 776 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 777 set_exec_queue_group_banned(q); 778 xe_gt_reset_async(q->gt); 779 xe_guc_exec_queue_group_trigger_cleanup(q); 780 return; 781 } 782 783 xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority); 784 xe_guc_exec_queue_group_cgp_update(xe, q); 785 786 WRITE_ONCE(group->sync_pending, true); 787 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 788 } 789 790 static void __register_exec_queue_group(struct xe_guc *guc, 791 struct xe_exec_queue *q, 792 struct guc_ctxt_registration_info *info) 793 { 794 #define MAX_MULTI_QUEUE_REG_SIZE (8) 795 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 796 int len = 0; 797 798 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 799 action[len++] = info->flags; 800 action[len++] = info->context_idx; 801 action[len++] = info->engine_class; 802 action[len++] = info->engine_submit_mask; 803 action[len++] = 0; /* Reserved */ 804 action[len++] = info->cgp_lo; 805 action[len++] = info->cgp_hi; 806 807 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 808 #undef MAX_MULTI_QUEUE_REG_SIZE 809 810 /* 811 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 812 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 813 * from guc. 814 */ 815 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 816 } 817 818 static void xe_guc_exec_queue_group_add(struct xe_guc *guc, 819 struct xe_exec_queue *q) 820 { 821 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 822 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 823 int len = 0; 824 825 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); 826 827 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 828 action[len++] = q->multi_queue.group->primary->guc->id; 829 830 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 831 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 832 833 /* 834 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a 835 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 836 * from guc. 837 */ 838 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 839 } 840 841 static void __register_mlrc_exec_queue(struct xe_guc *guc, 842 struct xe_exec_queue *q, 843 struct guc_ctxt_registration_info *info) 844 { 845 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 846 u32 action[MAX_MLRC_REG_SIZE]; 847 int len = 0; 848 int i; 849 850 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 851 852 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 853 action[len++] = info->flags; 854 action[len++] = info->context_idx; 855 action[len++] = info->engine_class; 856 action[len++] = info->engine_submit_mask; 857 action[len++] = info->wq_desc_lo; 858 action[len++] = info->wq_desc_hi; 859 action[len++] = info->wq_base_lo; 860 action[len++] = info->wq_base_hi; 861 action[len++] = info->wq_size; 862 action[len++] = q->width; 863 action[len++] = info->hwlrca_lo; 864 action[len++] = info->hwlrca_hi; 865 866 for (i = 1; i < q->width; ++i) { 867 struct xe_lrc *lrc = q->lrc[i]; 868 869 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 870 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 871 } 872 873 /* explicitly checks some fields that we might fixup later */ 874 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 875 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 876 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 877 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 878 xe_gt_assert(guc_to_gt(guc), q->width == 879 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 880 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 881 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 882 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 883 #undef MAX_MLRC_REG_SIZE 884 885 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 886 } 887 888 static void __register_exec_queue(struct xe_guc *guc, 889 struct guc_ctxt_registration_info *info) 890 { 891 u32 action[] = { 892 XE_GUC_ACTION_REGISTER_CONTEXT, 893 info->flags, 894 info->context_idx, 895 info->engine_class, 896 info->engine_submit_mask, 897 info->wq_desc_lo, 898 info->wq_desc_hi, 899 info->wq_base_lo, 900 info->wq_base_hi, 901 info->wq_size, 902 info->hwlrca_lo, 903 info->hwlrca_hi, 904 }; 905 906 /* explicitly checks some fields that we might fixup later */ 907 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 908 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 909 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 910 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 911 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 912 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 913 914 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 915 } 916 917 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 918 { 919 struct xe_guc *guc = exec_queue_to_guc(q); 920 struct xe_device *xe = guc_to_xe(guc); 921 struct xe_lrc *lrc = q->lrc[0]; 922 struct guc_ctxt_registration_info info; 923 924 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 925 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 926 927 memset(&info, 0, sizeof(info)); 928 info.context_idx = q->guc->id; 929 info.engine_class = xe_engine_class_to_guc_class(q->class); 930 info.engine_submit_mask = q->logical_mask; 931 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 932 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 933 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 934 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 935 936 if (xe_exec_queue_is_multi_queue(q)) { 937 struct xe_exec_queue_group *group = q->multi_queue.group; 938 939 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 940 info.cgp_hi = 0; 941 } 942 943 if (xe_exec_queue_is_parallel(q)) { 944 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 945 struct iosys_map map = xe_lrc_parallel_map(lrc); 946 947 info.wq_desc_lo = lower_32_bits(ggtt_addr + 948 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 949 info.wq_desc_hi = upper_32_bits(ggtt_addr + 950 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 951 info.wq_base_lo = lower_32_bits(ggtt_addr + 952 offsetof(struct guc_submit_parallel_scratch, wq[0])); 953 info.wq_base_hi = upper_32_bits(ggtt_addr + 954 offsetof(struct guc_submit_parallel_scratch, wq[0])); 955 info.wq_size = WQ_SIZE; 956 957 q->guc->wqi_head = 0; 958 q->guc->wqi_tail = 0; 959 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 960 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 961 } 962 963 set_exec_queue_registered(q); 964 trace_xe_exec_queue_register(q); 965 if (xe_exec_queue_is_multi_queue_primary(q)) 966 __register_exec_queue_group(guc, q, &info); 967 else if (xe_exec_queue_is_parallel(q)) 968 __register_mlrc_exec_queue(guc, q, &info); 969 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 970 __register_exec_queue(guc, &info); 971 972 if (!xe_exec_queue_is_multi_queue_secondary(q)) 973 init_policies(guc, q); 974 975 if (xe_exec_queue_is_multi_queue_secondary(q)) 976 xe_guc_exec_queue_group_add(guc, q); 977 } 978 979 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 980 { 981 return (WQ_SIZE - q->guc->wqi_tail); 982 } 983 984 static inline void relaxed_ms_sleep(unsigned int delay_ms) 985 { 986 unsigned long min_us, max_us; 987 988 if (!delay_ms) 989 return; 990 991 if (delay_ms > 20) { 992 msleep(delay_ms); 993 return; 994 } 995 996 min_us = mul_u32_u32(delay_ms, 1000); 997 max_us = min_us + 500; 998 999 usleep_range(min_us, max_us); 1000 } 1001 1002 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 1003 { 1004 struct xe_guc *guc = exec_queue_to_guc(q); 1005 struct xe_device *xe = guc_to_xe(guc); 1006 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1007 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 1008 1009 #define AVAILABLE_SPACE \ 1010 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 1011 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1012 try_again: 1013 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 1014 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1015 if (sleep_total_ms > 2000) { 1016 xe_gt_reset_async(q->gt); 1017 return -ENODEV; 1018 } 1019 1020 msleep(sleep_period_ms); 1021 sleep_total_ms += sleep_period_ms; 1022 if (sleep_period_ms < 64) 1023 sleep_period_ms <<= 1; 1024 goto try_again; 1025 } 1026 } 1027 #undef AVAILABLE_SPACE 1028 1029 return 0; 1030 } 1031 1032 static int wq_noop_append(struct xe_exec_queue *q) 1033 { 1034 struct xe_guc *guc = exec_queue_to_guc(q); 1035 struct xe_device *xe = guc_to_xe(guc); 1036 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1037 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1038 1039 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1040 return -ENODEV; 1041 1042 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1043 1044 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1045 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1046 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1047 q->guc->wqi_tail = 0; 1048 1049 return 0; 1050 } 1051 1052 static void wq_item_append(struct xe_exec_queue *q) 1053 { 1054 struct xe_guc *guc = exec_queue_to_guc(q); 1055 struct xe_device *xe = guc_to_xe(guc); 1056 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1057 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1058 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1059 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1060 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1061 int i = 0, j; 1062 1063 if (wqi_size > wq_space_until_wrap(q)) { 1064 if (wq_noop_append(q)) 1065 return; 1066 } 1067 if (wq_wait_for_space(q, wqi_size)) 1068 return; 1069 1070 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1071 FIELD_PREP(WQ_LEN_MASK, len_dw); 1072 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1073 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1074 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1075 wqi[i++] = 0; 1076 for (j = 1; j < q->width; ++j) { 1077 struct xe_lrc *lrc = q->lrc[j]; 1078 1079 wqi[i++] = lrc->ring.tail / sizeof(u64); 1080 } 1081 1082 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1083 1084 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1085 wq[q->guc->wqi_tail / sizeof(u32)])); 1086 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1087 q->guc->wqi_tail += wqi_size; 1088 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1089 1090 xe_device_wmb(xe); 1091 1092 map = xe_lrc_parallel_map(q->lrc[0]); 1093 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1094 } 1095 1096 #define RESUME_PENDING ~0x0ull 1097 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1098 { 1099 struct xe_guc *guc = exec_queue_to_guc(q); 1100 struct xe_lrc *lrc = q->lrc[0]; 1101 u32 action[3]; 1102 u32 g2h_len = 0; 1103 u32 num_g2h = 0; 1104 int len = 0; 1105 bool extra_submit = false; 1106 1107 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1108 1109 if (!job->restore_replay || job->last_replay) { 1110 if (xe_exec_queue_is_parallel(q)) 1111 wq_item_append(q); 1112 else if (!exec_queue_idle_skip_suspend(q)) 1113 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1114 job->last_replay = false; 1115 } 1116 1117 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1118 return; 1119 1120 /* 1121 * All queues in a multi-queue group will use the primary queue 1122 * of the group to interface with GuC. 1123 */ 1124 q = xe_exec_queue_multi_queue_primary(q); 1125 1126 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1127 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1128 action[len++] = q->guc->id; 1129 action[len++] = GUC_CONTEXT_ENABLE; 1130 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1131 num_g2h = 1; 1132 if (xe_exec_queue_is_parallel(q)) 1133 extra_submit = true; 1134 1135 q->guc->resume_time = RESUME_PENDING; 1136 set_exec_queue_pending_enable(q); 1137 set_exec_queue_enabled(q); 1138 trace_xe_exec_queue_scheduling_enable(q); 1139 } else { 1140 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1141 action[len++] = q->guc->id; 1142 trace_xe_exec_queue_submit(q); 1143 } 1144 1145 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1146 1147 if (extra_submit) { 1148 len = 0; 1149 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1150 action[len++] = q->guc->id; 1151 trace_xe_exec_queue_submit(q); 1152 1153 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1154 } 1155 } 1156 1157 static struct dma_fence * 1158 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1159 { 1160 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1161 struct xe_exec_queue *q = job->q; 1162 struct xe_guc *guc = exec_queue_to_guc(q); 1163 bool killed_or_banned_or_wedged = 1164 exec_queue_killed_or_banned_or_wedged(q); 1165 1166 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1167 exec_queue_banned(q) || exec_queue_suspended(q)); 1168 1169 trace_xe_sched_job_run(job); 1170 1171 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1172 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1173 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1174 1175 if (exec_queue_killed_or_banned_or_wedged(primary)) { 1176 killed_or_banned_or_wedged = true; 1177 goto run_job_out; 1178 } 1179 1180 if (!exec_queue_registered(primary)) 1181 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1182 } 1183 1184 if (!exec_queue_registered(q)) 1185 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1186 if (!job->restore_replay) 1187 q->ring_ops->emit_job(job); 1188 submit_exec_queue(q, job); 1189 job->restore_replay = false; 1190 } 1191 1192 run_job_out: 1193 1194 return job->fence; 1195 } 1196 1197 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1198 { 1199 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1200 1201 trace_xe_sched_job_free(job); 1202 xe_sched_job_put(job); 1203 } 1204 1205 int xe_guc_read_stopped(struct xe_guc *guc) 1206 { 1207 return atomic_read(&guc->submission_state.stopped); 1208 } 1209 1210 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1211 struct xe_exec_queue *q, 1212 u32 runnable_state); 1213 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1214 1215 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1216 u32 action[] = { \ 1217 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1218 q->guc->id, \ 1219 GUC_CONTEXT_##enable_disable, \ 1220 } 1221 1222 static void disable_scheduling_deregister(struct xe_guc *guc, 1223 struct xe_exec_queue *q) 1224 { 1225 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1226 int ret; 1227 1228 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1229 set_min_preemption_timeout(guc, q); 1230 1231 smp_rmb(); 1232 ret = wait_event_timeout(guc->ct.wq, 1233 (!exec_queue_pending_enable(q) && 1234 !exec_queue_pending_disable(q)) || 1235 xe_guc_read_stopped(guc) || 1236 vf_recovery(guc), 1237 HZ * 5); 1238 if (!ret && !vf_recovery(guc)) { 1239 struct xe_gpu_scheduler *sched = &q->guc->sched; 1240 1241 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1242 xe_sched_submission_start(sched); 1243 xe_gt_reset_async(q->gt); 1244 xe_sched_tdr_queue_imm(sched); 1245 return; 1246 } 1247 1248 clear_exec_queue_enabled(q); 1249 set_exec_queue_pending_disable(q); 1250 set_exec_queue_destroyed(q); 1251 trace_xe_exec_queue_scheduling_disable(q); 1252 1253 /* 1254 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1255 * handler and we are not allowed to reserved G2H space in handlers. 1256 */ 1257 if (xe_exec_queue_is_multi_queue_secondary(q)) 1258 handle_multi_queue_secondary_sched_done(guc, q, 0); 1259 else 1260 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1261 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1262 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1263 } 1264 1265 /** 1266 * xe_guc_submit_wedge() - Wedge GuC submission 1267 * @guc: the GuC object 1268 * 1269 * Save exec queue's registered with GuC state by taking a ref to each queue. 1270 * Register a DRMM handler to drop refs upon driver unload. 1271 */ 1272 void xe_guc_submit_wedge(struct xe_guc *guc) 1273 { 1274 struct xe_device *xe = guc_to_xe(guc); 1275 struct xe_gt *gt = guc_to_gt(guc); 1276 struct xe_exec_queue *q; 1277 unsigned long index; 1278 int err; 1279 1280 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1281 1282 /* 1283 * If device is being wedged even before submission_state is 1284 * initialized, there's nothing to do here. 1285 */ 1286 if (!guc->submission_state.initialized) 1287 return; 1288 1289 if (xe->wedged.mode == 2) { 1290 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 1291 guc_submit_wedged_fini, guc); 1292 if (err) { 1293 xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " 1294 "Although device is wedged.\n"); 1295 return; 1296 } 1297 1298 mutex_lock(&guc->submission_state.lock); 1299 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1300 if (xe_exec_queue_get_unless_zero(q)) 1301 set_exec_queue_wedged(q); 1302 mutex_unlock(&guc->submission_state.lock); 1303 } else { 1304 /* Forcefully kill any remaining exec queues, signal fences */ 1305 guc_submit_reset_prepare(guc); 1306 xe_guc_submit_stop(guc); 1307 xe_guc_softreset(guc); 1308 xe_uc_fw_sanitize(&guc->fw); 1309 xe_guc_submit_pause_abort(guc); 1310 } 1311 } 1312 1313 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1314 { 1315 struct xe_device *xe = guc_to_xe(guc); 1316 1317 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1318 return false; 1319 1320 if (xe_device_wedged(xe)) 1321 return true; 1322 1323 xe_device_declare_wedged(xe); 1324 1325 return true; 1326 } 1327 1328 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1329 1330 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1331 { 1332 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1333 u32 ctx_timestamp, ctx_job_timestamp; 1334 u32 timeout_ms = q->sched_props.job_timeout_ms; 1335 u32 diff; 1336 u64 running_time_ms; 1337 1338 if (!xe_sched_job_started(job)) { 1339 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1340 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1341 q->guc->id); 1342 1343 return xe_sched_invalidate_job(job, 2); 1344 } 1345 1346 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1347 if (ctx_timestamp == job->sample_timestamp) { 1348 if (IS_SRIOV_VF(gt_to_xe(gt))) 1349 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1350 xe_sched_job_seqno(job), 1351 xe_sched_job_lrc_seqno(job), q->guc->id); 1352 else 1353 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1354 xe_sched_job_seqno(job), 1355 xe_sched_job_lrc_seqno(job), q->guc->id); 1356 1357 return xe_sched_invalidate_job(job, 0); 1358 } 1359 1360 job->sample_timestamp = ctx_timestamp; 1361 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1362 1363 /* 1364 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1365 * possible overflows with a high timeout. 1366 */ 1367 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1368 1369 diff = ctx_timestamp - ctx_job_timestamp; 1370 1371 /* 1372 * Ensure timeout is within 5% to account for an GuC scheduling latency 1373 */ 1374 running_time_ms = 1375 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1376 1377 xe_gt_dbg(gt, 1378 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1379 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1380 q->guc->id, running_time_ms, timeout_ms, diff); 1381 1382 return running_time_ms >= timeout_ms; 1383 } 1384 1385 static void enable_scheduling(struct xe_exec_queue *q) 1386 { 1387 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1388 struct xe_guc *guc = exec_queue_to_guc(q); 1389 int ret; 1390 1391 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1392 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1393 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1394 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1395 1396 set_exec_queue_pending_enable(q); 1397 set_exec_queue_enabled(q); 1398 trace_xe_exec_queue_scheduling_enable(q); 1399 1400 if (xe_exec_queue_is_multi_queue_secondary(q)) 1401 handle_multi_queue_secondary_sched_done(guc, q, 1); 1402 else 1403 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1404 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1405 1406 ret = wait_event_timeout(guc->ct.wq, 1407 !exec_queue_pending_enable(q) || 1408 xe_guc_read_stopped(guc) || 1409 vf_recovery(guc), HZ * 5); 1410 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1411 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1412 set_exec_queue_banned(q); 1413 xe_gt_reset_async(q->gt); 1414 xe_sched_tdr_queue_imm(&q->guc->sched); 1415 } 1416 } 1417 1418 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1419 { 1420 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1421 struct xe_guc *guc = exec_queue_to_guc(q); 1422 1423 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1424 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1425 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1426 1427 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1428 set_min_preemption_timeout(guc, q); 1429 clear_exec_queue_enabled(q); 1430 set_exec_queue_pending_disable(q); 1431 trace_xe_exec_queue_scheduling_disable(q); 1432 1433 if (xe_exec_queue_is_multi_queue_secondary(q)) 1434 handle_multi_queue_secondary_sched_done(guc, q, 0); 1435 else 1436 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1437 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1438 } 1439 1440 static enum drm_gpu_sched_stat 1441 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1442 { 1443 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1444 struct drm_sched_job *tmp_job; 1445 struct xe_exec_queue *q = job->q; 1446 struct xe_gpu_scheduler *sched = &q->guc->sched; 1447 struct xe_guc *guc = exec_queue_to_guc(q); 1448 const char *process_name = "no process"; 1449 struct xe_device *xe = guc_to_xe(guc); 1450 int err = -ETIME; 1451 pid_t pid = -1; 1452 bool wedged = false, skip_timeout_check; 1453 1454 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1455 1456 /* 1457 * TDR has fired before free job worker. Common if exec queue 1458 * immediately closed after last fence signaled. Add back to pending 1459 * list so job can be freed and kick scheduler ensuring free job is not 1460 * lost. 1461 */ 1462 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1463 vf_recovery(guc)) 1464 return DRM_GPU_SCHED_STAT_NO_HANG; 1465 1466 /* Kill the run_job entry point */ 1467 xe_sched_submission_stop(sched); 1468 1469 /* Must check all state after stopping scheduler */ 1470 skip_timeout_check = exec_queue_reset(q) || 1471 exec_queue_killed_or_banned_or_wedged(q); 1472 1473 /* Skip timeout check if multi-queue group is banned */ 1474 if (xe_exec_queue_is_multi_queue(q) && 1475 READ_ONCE(q->multi_queue.group->banned)) 1476 skip_timeout_check = true; 1477 1478 /* LR jobs can only get here if queue has been killed or hit an error */ 1479 if (xe_exec_queue_is_lr(q)) 1480 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1481 1482 /* 1483 * FIXME: In multi-queue scenario, the TDR must ensure that the whole 1484 * multi-queue group is off the HW before signaling the fences to avoid 1485 * possible memory corruptions. This means disabling scheduling on the 1486 * primary queue before or during the secondary queue's TDR. Need to 1487 * implement this in least obtrusive way. 1488 */ 1489 1490 /* 1491 * If devcoredump not captured and GuC capture for the job is not ready 1492 * do manual capture first and decide later if we need to use it 1493 */ 1494 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1495 !xe_guc_capture_get_matching_and_lock(q)) { 1496 /* take force wake before engine register manual capture */ 1497 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1498 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1499 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1500 1501 xe_engine_snapshot_capture_for_queue(q); 1502 } 1503 1504 /* 1505 * Check if job is actually timed out, if so restart job execution and TDR 1506 */ 1507 if (!skip_timeout_check && !check_timeout(q, job)) 1508 goto rearm; 1509 1510 if (!exec_queue_killed(q)) 1511 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1512 1513 set_exec_queue_banned(q); 1514 1515 /* Kick job / queue off hardware */ 1516 if (!wedged && (exec_queue_enabled(q) || exec_queue_pending_disable(q))) { 1517 int ret; 1518 1519 if (exec_queue_reset(q)) 1520 err = -EIO; 1521 1522 if (xe_uc_fw_is_running(&guc->fw)) { 1523 /* 1524 * Wait for any pending G2H to flush out before 1525 * modifying state 1526 */ 1527 ret = wait_event_timeout(guc->ct.wq, 1528 (!exec_queue_pending_enable(q) && 1529 !exec_queue_pending_disable(q)) || 1530 xe_guc_read_stopped(guc) || 1531 vf_recovery(guc), HZ * 5); 1532 if (vf_recovery(guc)) 1533 goto handle_vf_resume; 1534 if (!ret || xe_guc_read_stopped(guc)) 1535 goto trigger_reset; 1536 1537 disable_scheduling(q, skip_timeout_check); 1538 } 1539 1540 /* 1541 * Must wait for scheduling to be disabled before signalling 1542 * any fences, if GT broken the GT reset code should signal us. 1543 * 1544 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1545 * error) messages which can cause the schedule disable to get 1546 * lost. If this occurs, trigger a GT reset to recover. 1547 */ 1548 smp_rmb(); 1549 ret = wait_event_timeout(guc->ct.wq, 1550 !xe_uc_fw_is_running(&guc->fw) || 1551 !exec_queue_pending_disable(q) || 1552 xe_guc_read_stopped(guc) || 1553 vf_recovery(guc), HZ * 5); 1554 if (vf_recovery(guc)) 1555 goto handle_vf_resume; 1556 if (!ret || xe_guc_read_stopped(guc)) { 1557 trigger_reset: 1558 if (!ret) 1559 xe_gt_warn(guc_to_gt(guc), 1560 "Schedule disable failed to respond, guc_id=%d", 1561 q->guc->id); 1562 xe_devcoredump(q, job, 1563 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1564 q->guc->id, ret, xe_guc_read_stopped(guc)); 1565 xe_gt_reset_async(q->gt); 1566 xe_sched_tdr_queue_imm(sched); 1567 goto rearm; 1568 } 1569 } 1570 1571 if (q->vm && q->vm->xef) { 1572 process_name = q->vm->xef->process_name; 1573 pid = q->vm->xef->pid; 1574 } 1575 1576 if (!exec_queue_killed(q)) 1577 xe_gt_notice(guc_to_gt(guc), 1578 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1579 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1580 q->guc->id, q->flags, process_name, pid); 1581 1582 trace_xe_sched_job_timedout(job); 1583 1584 if (!exec_queue_killed(q)) 1585 xe_devcoredump(q, job, 1586 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1587 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1588 q->guc->id, q->flags); 1589 1590 /* 1591 * Kernel jobs should never fail, nor should VM jobs if they do 1592 * somethings has gone wrong and the GT needs a reset 1593 */ 1594 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1595 "Kernel-submitted job timed out\n"); 1596 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1597 "VM job timed out on non-killed execqueue\n"); 1598 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1599 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1600 if (!xe_sched_invalidate_job(job, 2)) { 1601 xe_gt_reset_async(q->gt); 1602 goto rearm; 1603 } 1604 } 1605 1606 /* Mark all outstanding jobs as bad, thus completing them */ 1607 xe_sched_job_set_error(job, err); 1608 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1609 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1610 1611 xe_sched_submission_start(sched); 1612 1613 if (xe_exec_queue_is_multi_queue(q)) 1614 xe_guc_exec_queue_group_trigger_cleanup(q); 1615 else 1616 xe_guc_exec_queue_trigger_cleanup(q); 1617 1618 /* 1619 * We want the job added back to the pending list so it gets freed; this 1620 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1621 */ 1622 return DRM_GPU_SCHED_STAT_NO_HANG; 1623 1624 rearm: 1625 /* 1626 * XXX: Ideally want to adjust timeout based on current execution time 1627 * but there is not currently an easy way to do in DRM scheduler. With 1628 * some thought, do this in a follow up. 1629 */ 1630 xe_sched_submission_start(sched); 1631 handle_vf_resume: 1632 return DRM_GPU_SCHED_STAT_NO_HANG; 1633 } 1634 1635 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1636 { 1637 struct xe_guc_exec_queue *ge = q->guc; 1638 struct xe_guc *guc = exec_queue_to_guc(q); 1639 1640 release_guc_id(guc, q); 1641 xe_sched_entity_fini(&ge->entity); 1642 xe_sched_fini(&ge->sched); 1643 1644 /* 1645 * RCU free due sched being exported via DRM scheduler fences 1646 * (timeline name). 1647 */ 1648 kfree_rcu(ge, rcu); 1649 } 1650 1651 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1652 { 1653 struct xe_guc_exec_queue *ge = 1654 container_of(w, struct xe_guc_exec_queue, destroy_async); 1655 struct xe_exec_queue *q = ge->q; 1656 struct xe_guc *guc = exec_queue_to_guc(q); 1657 1658 guard(xe_pm_runtime)(guc_to_xe(guc)); 1659 trace_xe_exec_queue_destroy(q); 1660 1661 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1662 struct xe_exec_queue_group *group = q->multi_queue.group; 1663 1664 mutex_lock(&group->list_lock); 1665 list_del(&q->multi_queue.link); 1666 mutex_unlock(&group->list_lock); 1667 } 1668 1669 /* Confirm no work left behind accessing device structures */ 1670 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1671 1672 xe_exec_queue_fini(q); 1673 } 1674 1675 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1676 { 1677 struct xe_guc *guc = exec_queue_to_guc(q); 1678 struct xe_device *xe = guc_to_xe(guc); 1679 1680 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1681 1682 /* We must block on kernel engines so slabs are empty on driver unload */ 1683 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1684 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1685 else 1686 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1687 } 1688 1689 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1690 { 1691 /* 1692 * Might be done from within the GPU scheduler, need to do async as we 1693 * fini the scheduler when the engine is fini'd, the scheduler can't 1694 * complete fini within itself (circular dependency). Async resolves 1695 * this we and don't really care when everything is fini'd, just that it 1696 * is. 1697 */ 1698 guc_exec_queue_destroy_async(q); 1699 } 1700 1701 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1702 { 1703 struct xe_exec_queue *q = msg->private_data; 1704 struct xe_guc *guc = exec_queue_to_guc(q); 1705 1706 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1707 trace_xe_exec_queue_cleanup_entity(q); 1708 1709 /* 1710 * Expected state transitions for cleanup: 1711 * - If the exec queue is registered and GuC firmware is running, we must first 1712 * disable scheduling and deregister the queue to ensure proper teardown and 1713 * resource release in the GuC, then destroy the exec queue on driver side. 1714 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1715 * we cannot expect a response for the deregister request. In this case, 1716 * it is safe to directly destroy the exec queue on driver side, as the GuC 1717 * will not process further requests and all resources must be cleaned up locally. 1718 */ 1719 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1720 disable_scheduling_deregister(guc, q); 1721 else 1722 __guc_exec_queue_destroy(guc, q); 1723 } 1724 1725 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1726 { 1727 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1728 } 1729 1730 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1731 { 1732 struct xe_exec_queue *q = msg->private_data; 1733 struct xe_guc *guc = exec_queue_to_guc(q); 1734 1735 if (guc_exec_queue_allowed_to_change_state(q)) 1736 init_policies(guc, q); 1737 kfree(msg); 1738 } 1739 1740 static void __suspend_fence_signal(struct xe_exec_queue *q) 1741 { 1742 struct xe_guc *guc = exec_queue_to_guc(q); 1743 struct xe_device *xe = guc_to_xe(guc); 1744 1745 if (!q->guc->suspend_pending) 1746 return; 1747 1748 WRITE_ONCE(q->guc->suspend_pending, false); 1749 1750 /* 1751 * We use a GuC shared wait queue for VFs because the VF resfix start 1752 * interrupt must be able to wake all instances of suspend_wait. This 1753 * prevents the VF migration worker from being starved during 1754 * scheduling. 1755 */ 1756 if (IS_SRIOV_VF(xe)) 1757 wake_up_all(&guc->ct.wq); 1758 else 1759 wake_up(&q->guc->suspend_wait); 1760 } 1761 1762 static void suspend_fence_signal(struct xe_exec_queue *q) 1763 { 1764 struct xe_guc *guc = exec_queue_to_guc(q); 1765 1766 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1767 xe_guc_read_stopped(guc)); 1768 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1769 1770 __suspend_fence_signal(q); 1771 } 1772 1773 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1774 { 1775 struct xe_exec_queue *q = msg->private_data; 1776 struct xe_guc *guc = exec_queue_to_guc(q); 1777 bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q); 1778 1779 if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) && 1780 !exec_queue_suspended(q) && exec_queue_enabled(q)) { 1781 wait_event(guc->ct.wq, vf_recovery(guc) || 1782 ((q->guc->resume_time != RESUME_PENDING || 1783 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1784 1785 if (!xe_guc_read_stopped(guc)) { 1786 s64 since_resume_ms = 1787 ktime_ms_delta(ktime_get(), 1788 q->guc->resume_time); 1789 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1790 since_resume_ms; 1791 1792 if (wait_ms > 0 && q->guc->resume_time) 1793 relaxed_ms_sleep(wait_ms); 1794 1795 set_exec_queue_suspended(q); 1796 disable_scheduling(q, false); 1797 } 1798 } else if (q->guc->suspend_pending) { 1799 if (idle_skip_suspend) 1800 set_exec_queue_idle_skip_suspend(q); 1801 set_exec_queue_suspended(q); 1802 suspend_fence_signal(q); 1803 } 1804 } 1805 1806 static void sched_context(struct xe_exec_queue *q) 1807 { 1808 struct xe_guc *guc = exec_queue_to_guc(q); 1809 struct xe_lrc *lrc = q->lrc[0]; 1810 u32 action[] = { 1811 XE_GUC_ACTION_SCHED_CONTEXT, 1812 q->guc->id, 1813 }; 1814 1815 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q)); 1816 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1817 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1818 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1819 1820 trace_xe_exec_queue_submit(q); 1821 1822 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1823 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 1824 } 1825 1826 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1827 { 1828 struct xe_exec_queue *q = msg->private_data; 1829 1830 if (guc_exec_queue_allowed_to_change_state(q)) { 1831 clear_exec_queue_suspended(q); 1832 if (!exec_queue_enabled(q)) { 1833 if (exec_queue_idle_skip_suspend(q)) { 1834 struct xe_lrc *lrc = q->lrc[0]; 1835 1836 clear_exec_queue_idle_skip_suspend(q); 1837 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1838 } 1839 q->guc->resume_time = RESUME_PENDING; 1840 set_exec_queue_pending_resume(q); 1841 enable_scheduling(q); 1842 } else if (exec_queue_idle_skip_suspend(q)) { 1843 clear_exec_queue_idle_skip_suspend(q); 1844 sched_context(q); 1845 } 1846 } else { 1847 clear_exec_queue_suspended(q); 1848 clear_exec_queue_idle_skip_suspend(q); 1849 } 1850 } 1851 1852 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1853 { 1854 struct xe_exec_queue *q = msg->private_data; 1855 1856 if (guc_exec_queue_allowed_to_change_state(q)) { 1857 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 1858 struct xe_guc *guc = exec_queue_to_guc(q); 1859 struct xe_exec_queue_group *group = q->multi_queue.group; 1860 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 1861 int len = 0; 1862 1863 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 1864 action[len++] = group->primary->guc->id; 1865 1866 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 1867 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 1868 1869 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 1870 } 1871 1872 kfree(msg); 1873 } 1874 1875 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1876 #define SET_SCHED_PROPS 2 1877 #define SUSPEND 3 1878 #define RESUME 4 1879 #define SET_MULTI_QUEUE_PRIORITY 5 1880 #define OPCODE_MASK 0xf 1881 #define MSG_LOCKED BIT(8) 1882 #define MSG_HEAD BIT(9) 1883 1884 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1885 { 1886 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1887 1888 trace_xe_sched_msg_recv(msg); 1889 1890 switch (msg->opcode) { 1891 case CLEANUP: 1892 __guc_exec_queue_process_msg_cleanup(msg); 1893 break; 1894 case SET_SCHED_PROPS: 1895 __guc_exec_queue_process_msg_set_sched_props(msg); 1896 break; 1897 case SUSPEND: 1898 __guc_exec_queue_process_msg_suspend(msg); 1899 break; 1900 case RESUME: 1901 __guc_exec_queue_process_msg_resume(msg); 1902 break; 1903 case SET_MULTI_QUEUE_PRIORITY: 1904 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1905 break; 1906 default: 1907 XE_WARN_ON("Unknown message type"); 1908 } 1909 1910 xe_pm_runtime_put(xe); 1911 } 1912 1913 static const struct drm_sched_backend_ops drm_sched_ops = { 1914 .run_job = guc_exec_queue_run_job, 1915 .free_job = guc_exec_queue_free_job, 1916 .timedout_job = guc_exec_queue_timedout_job, 1917 }; 1918 1919 static const struct xe_sched_backend_ops xe_sched_ops = { 1920 .process_msg = guc_exec_queue_process_msg, 1921 }; 1922 1923 static int guc_exec_queue_init(struct xe_exec_queue *q) 1924 { 1925 struct xe_gpu_scheduler *sched; 1926 struct xe_guc *guc = exec_queue_to_guc(q); 1927 struct workqueue_struct *submit_wq = NULL; 1928 struct xe_guc_exec_queue *ge; 1929 long timeout; 1930 int err, i; 1931 1932 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1933 1934 ge = kzalloc_obj(*ge); 1935 if (!ge) 1936 return -ENOMEM; 1937 1938 q->guc = ge; 1939 ge->q = q; 1940 init_rcu_head(&ge->rcu); 1941 init_waitqueue_head(&ge->suspend_wait); 1942 1943 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1944 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1945 1946 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1947 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1948 1949 /* 1950 * Use primary queue's submit_wq for all secondary queues of a 1951 * multi queue group. This serialization avoids any locking around 1952 * CGP synchronization with GuC. 1953 */ 1954 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1955 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1956 1957 submit_wq = primary->guc->sched.base.submit_wq; 1958 } 1959 1960 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1961 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1962 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1963 q->name, gt_to_xe(q->gt)->drm.dev); 1964 if (err) 1965 goto err_free; 1966 1967 sched = &ge->sched; 1968 err = xe_sched_entity_init(&ge->entity, sched); 1969 if (err) 1970 goto err_sched; 1971 1972 mutex_lock(&guc->submission_state.lock); 1973 1974 err = alloc_guc_id(guc, q); 1975 if (err) 1976 goto err_entity; 1977 1978 q->entity = &ge->entity; 1979 1980 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 1981 xe_sched_stop(sched); 1982 1983 mutex_unlock(&guc->submission_state.lock); 1984 1985 xe_exec_queue_assign_name(q, q->guc->id); 1986 1987 /* 1988 * Maintain secondary queues of the multi queue group in a list 1989 * for handling dependencies across the queues in the group. 1990 */ 1991 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1992 struct xe_exec_queue_group *group = q->multi_queue.group; 1993 1994 INIT_LIST_HEAD(&q->multi_queue.link); 1995 mutex_lock(&group->list_lock); 1996 list_add_tail(&q->multi_queue.link, &group->list); 1997 mutex_unlock(&group->list_lock); 1998 } 1999 2000 if (xe_exec_queue_is_multi_queue(q)) 2001 trace_xe_exec_queue_create_multi_queue(q); 2002 else 2003 trace_xe_exec_queue_create(q); 2004 2005 return 0; 2006 2007 err_entity: 2008 mutex_unlock(&guc->submission_state.lock); 2009 xe_sched_entity_fini(&ge->entity); 2010 err_sched: 2011 xe_sched_fini(&ge->sched); 2012 err_free: 2013 kfree(ge); 2014 2015 return err; 2016 } 2017 2018 static void guc_exec_queue_kill(struct xe_exec_queue *q) 2019 { 2020 trace_xe_exec_queue_kill(q); 2021 set_exec_queue_killed(q); 2022 __suspend_fence_signal(q); 2023 xe_guc_exec_queue_trigger_cleanup(q); 2024 } 2025 2026 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 2027 u32 opcode) 2028 { 2029 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 2030 2031 INIT_LIST_HEAD(&msg->link); 2032 msg->opcode = opcode & OPCODE_MASK; 2033 msg->private_data = q; 2034 2035 trace_xe_sched_msg_add(msg); 2036 if (opcode & MSG_HEAD) 2037 xe_sched_add_msg_head(&q->guc->sched, msg); 2038 else if (opcode & MSG_LOCKED) 2039 xe_sched_add_msg_locked(&q->guc->sched, msg); 2040 else 2041 xe_sched_add_msg(&q->guc->sched, msg); 2042 } 2043 2044 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2045 struct xe_sched_msg *msg, 2046 u32 opcode) 2047 { 2048 if (!list_empty(&msg->link)) 2049 return; 2050 2051 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2052 } 2053 2054 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2055 struct xe_sched_msg *msg, 2056 u32 opcode) 2057 { 2058 if (!list_empty(&msg->link)) 2059 return false; 2060 2061 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2062 2063 return true; 2064 } 2065 2066 #define STATIC_MSG_CLEANUP 0 2067 #define STATIC_MSG_SUSPEND 1 2068 #define STATIC_MSG_RESUME 2 2069 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2070 { 2071 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2072 2073 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2074 guc_exec_queue_add_msg(q, msg, CLEANUP); 2075 else 2076 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2077 } 2078 2079 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2080 enum xe_exec_queue_priority priority) 2081 { 2082 struct xe_sched_msg *msg; 2083 2084 if (q->sched_props.priority == priority || 2085 exec_queue_killed_or_banned_or_wedged(q)) 2086 return 0; 2087 2088 msg = kmalloc_obj(*msg); 2089 if (!msg) 2090 return -ENOMEM; 2091 2092 q->sched_props.priority = priority; 2093 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2094 2095 return 0; 2096 } 2097 2098 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2099 { 2100 struct xe_sched_msg *msg; 2101 2102 if (q->sched_props.timeslice_us == timeslice_us || 2103 exec_queue_killed_or_banned_or_wedged(q)) 2104 return 0; 2105 2106 msg = kmalloc_obj(*msg); 2107 if (!msg) 2108 return -ENOMEM; 2109 2110 q->sched_props.timeslice_us = timeslice_us; 2111 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2112 2113 return 0; 2114 } 2115 2116 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2117 u32 preempt_timeout_us) 2118 { 2119 struct xe_sched_msg *msg; 2120 2121 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2122 exec_queue_killed_or_banned_or_wedged(q)) 2123 return 0; 2124 2125 msg = kmalloc_obj(*msg); 2126 if (!msg) 2127 return -ENOMEM; 2128 2129 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2130 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2131 2132 return 0; 2133 } 2134 2135 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2136 enum xe_multi_queue_priority priority) 2137 { 2138 struct xe_sched_msg *msg; 2139 2140 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2141 2142 if (q->multi_queue.priority == priority || 2143 exec_queue_killed_or_banned_or_wedged(q)) 2144 return 0; 2145 2146 msg = kmalloc_obj(*msg); 2147 if (!msg) 2148 return -ENOMEM; 2149 2150 q->multi_queue.priority = priority; 2151 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2152 2153 return 0; 2154 } 2155 2156 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2157 { 2158 struct xe_gpu_scheduler *sched = &q->guc->sched; 2159 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2160 2161 if (exec_queue_killed_or_banned_or_wedged(q)) 2162 return -EINVAL; 2163 2164 xe_sched_msg_lock(sched); 2165 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2166 q->guc->suspend_pending = true; 2167 xe_sched_msg_unlock(sched); 2168 2169 return 0; 2170 } 2171 2172 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2173 { 2174 struct xe_guc *guc = exec_queue_to_guc(q); 2175 struct xe_device *xe = guc_to_xe(guc); 2176 int ret; 2177 2178 /* 2179 * Likely don't need to check exec_queue_killed() as we clear 2180 * suspend_pending upon kill but to be paranoid but races in which 2181 * suspend_pending is set after kill also check kill here. 2182 */ 2183 #define WAIT_COND \ 2184 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2185 xe_guc_read_stopped(guc)) 2186 2187 retry: 2188 if (IS_SRIOV_VF(xe)) 2189 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2190 vf_recovery(guc), 2191 HZ * 5); 2192 else 2193 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2194 WAIT_COND, HZ * 5); 2195 2196 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2197 return -EAGAIN; 2198 2199 if (!ret) { 2200 xe_gt_warn(guc_to_gt(guc), 2201 "Suspend fence, guc_id=%d, failed to respond", 2202 q->guc->id); 2203 /* XXX: Trigger GT reset? */ 2204 return -ETIME; 2205 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2206 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2207 goto retry; 2208 } 2209 2210 #undef WAIT_COND 2211 2212 return ret < 0 ? ret : 0; 2213 } 2214 2215 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2216 { 2217 struct xe_gpu_scheduler *sched = &q->guc->sched; 2218 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2219 struct xe_guc *guc = exec_queue_to_guc(q); 2220 2221 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2222 2223 xe_sched_msg_lock(sched); 2224 guc_exec_queue_try_add_msg(q, msg, RESUME); 2225 xe_sched_msg_unlock(sched); 2226 } 2227 2228 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2229 { 2230 if (xe_exec_queue_is_multi_queue_secondary(q) && 2231 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2232 return true; 2233 2234 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2235 } 2236 2237 /* 2238 * All of these functions are an abstraction layer which other parts of Xe can 2239 * use to trap into the GuC backend. All of these functions, aside from init, 2240 * really shouldn't do much other than trap into the DRM scheduler which 2241 * synchronizes these operations. 2242 */ 2243 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2244 .init = guc_exec_queue_init, 2245 .kill = guc_exec_queue_kill, 2246 .fini = guc_exec_queue_fini, 2247 .destroy = guc_exec_queue_destroy, 2248 .set_priority = guc_exec_queue_set_priority, 2249 .set_timeslice = guc_exec_queue_set_timeslice, 2250 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2251 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2252 .suspend = guc_exec_queue_suspend, 2253 .suspend_wait = guc_exec_queue_suspend_wait, 2254 .resume = guc_exec_queue_resume, 2255 .reset_status = guc_exec_queue_reset_status, 2256 }; 2257 2258 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2259 { 2260 struct xe_gpu_scheduler *sched = &q->guc->sched; 2261 bool do_destroy = false; 2262 2263 /* Stop scheduling + flush any DRM scheduler operations */ 2264 xe_sched_submission_stop(sched); 2265 2266 /* Clean up lost G2H + reset engine state */ 2267 if (exec_queue_registered(q)) { 2268 if (exec_queue_destroyed(q)) 2269 do_destroy = true; 2270 } 2271 if (q->guc->suspend_pending) { 2272 set_exec_queue_suspended(q); 2273 suspend_fence_signal(q); 2274 } 2275 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2276 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2277 EXEC_QUEUE_STATE_SUSPENDED, 2278 &q->guc->state); 2279 q->guc->resume_time = 0; 2280 trace_xe_exec_queue_stop(q); 2281 2282 /* 2283 * Ban any engine (aside from kernel and engines used for VM ops) with a 2284 * started but not complete job or if a job has gone through a GT reset 2285 * more than twice. 2286 */ 2287 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2288 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2289 bool ban = false; 2290 2291 if (job) { 2292 if ((xe_sched_job_started(job) && 2293 !xe_sched_job_completed(job)) || 2294 xe_sched_invalidate_job(job, 2)) { 2295 trace_xe_sched_job_ban(job); 2296 ban = true; 2297 } 2298 } 2299 2300 if (ban) { 2301 set_exec_queue_banned(q); 2302 xe_guc_exec_queue_trigger_cleanup(q); 2303 } 2304 } 2305 2306 if (do_destroy) 2307 __guc_exec_queue_destroy(guc, q); 2308 } 2309 2310 static int guc_submit_reset_prepare(struct xe_guc *guc) 2311 { 2312 int ret; 2313 2314 /* 2315 * Using an atomic here rather than submission_state.lock as this 2316 * function can be called while holding the CT lock (engine reset 2317 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2318 * Atomic is not ideal, but it works to prevent against concurrent reset 2319 * and releasing any TDRs waiting on guc->submission_state.stopped. 2320 */ 2321 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2322 smp_wmb(); 2323 wake_up_all(&guc->ct.wq); 2324 2325 return ret; 2326 } 2327 2328 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2329 { 2330 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2331 return 0; 2332 2333 if (!guc->submission_state.initialized) 2334 return 0; 2335 2336 return guc_submit_reset_prepare(guc); 2337 } 2338 2339 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2340 { 2341 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2342 !xe_guc_read_stopped(guc)); 2343 } 2344 2345 void xe_guc_submit_stop(struct xe_guc *guc) 2346 { 2347 struct xe_exec_queue *q; 2348 unsigned long index; 2349 2350 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2351 2352 mutex_lock(&guc->submission_state.lock); 2353 2354 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2355 /* Prevent redundant attempts to stop parallel queues */ 2356 if (q->guc->id != index) 2357 continue; 2358 2359 guc_exec_queue_stop(guc, q); 2360 } 2361 2362 mutex_unlock(&guc->submission_state.lock); 2363 2364 /* 2365 * No one can enter the backend at this point, aside from new engine 2366 * creation which is protected by guc->submission_state.lock. 2367 */ 2368 2369 } 2370 2371 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2372 struct xe_exec_queue *q) 2373 { 2374 bool pending_enable, pending_disable, pending_resume; 2375 2376 pending_enable = exec_queue_pending_enable(q); 2377 pending_resume = exec_queue_pending_resume(q); 2378 2379 if (pending_enable && pending_resume) { 2380 q->guc->needs_resume = true; 2381 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2382 q->guc->id); 2383 } 2384 2385 if (pending_enable && !pending_resume) { 2386 clear_exec_queue_registered(q); 2387 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2388 q->guc->id); 2389 } 2390 2391 if (pending_enable) { 2392 clear_exec_queue_enabled(q); 2393 clear_exec_queue_pending_resume(q); 2394 clear_exec_queue_pending_enable(q); 2395 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2396 q->guc->id); 2397 } 2398 2399 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2400 clear_exec_queue_destroyed(q); 2401 q->guc->needs_cleanup = true; 2402 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2403 q->guc->id); 2404 } 2405 2406 pending_disable = exec_queue_pending_disable(q); 2407 2408 if (pending_disable && exec_queue_suspended(q)) { 2409 clear_exec_queue_suspended(q); 2410 q->guc->needs_suspend = true; 2411 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2412 q->guc->id); 2413 } 2414 2415 if (pending_disable) { 2416 if (!pending_enable) 2417 set_exec_queue_enabled(q); 2418 clear_exec_queue_pending_disable(q); 2419 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2420 q->guc->id); 2421 } 2422 2423 q->guc->resume_time = 0; 2424 } 2425 2426 static void lrc_parallel_clear(struct xe_lrc *lrc) 2427 { 2428 struct xe_device *xe = gt_to_xe(lrc->gt); 2429 struct iosys_map map = xe_lrc_parallel_map(lrc); 2430 int i; 2431 2432 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2433 parallel_write(xe, map, wq[i], 2434 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2435 FIELD_PREP(WQ_LEN_MASK, 0)); 2436 } 2437 2438 /* 2439 * This function is quite complex but only real way to ensure no state is lost 2440 * during VF resume flows. The function scans the queue state, make adjustments 2441 * as needed, and queues jobs / messages which replayed upon unpause. 2442 */ 2443 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2444 { 2445 struct xe_gpu_scheduler *sched = &q->guc->sched; 2446 struct xe_sched_job *job; 2447 int i; 2448 2449 lockdep_assert_held(&guc->submission_state.lock); 2450 2451 /* Stop scheduling + flush any DRM scheduler operations */ 2452 xe_sched_submission_stop(sched); 2453 cancel_delayed_work_sync(&sched->base.work_tdr); 2454 2455 guc_exec_queue_revert_pending_state_change(guc, q); 2456 2457 if (xe_exec_queue_is_parallel(q)) { 2458 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2459 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2460 2461 /* 2462 * NOP existing WQ commands that may contain stale GGTT 2463 * addresses. These will be replayed upon unpause. The hardware 2464 * seems to get confused if the WQ head/tail pointers are 2465 * adjusted. 2466 */ 2467 if (lrc) 2468 lrc_parallel_clear(lrc); 2469 } 2470 2471 job = xe_sched_first_pending_job(sched); 2472 if (job) { 2473 job->restore_replay = true; 2474 2475 /* 2476 * Adjust software tail so jobs submitted overwrite previous 2477 * position in ring buffer with new GGTT addresses. 2478 */ 2479 for (i = 0; i < q->width; ++i) 2480 q->lrc[i]->ring.tail = job->ptrs[i].head; 2481 } 2482 } 2483 2484 /** 2485 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2486 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2487 */ 2488 void xe_guc_submit_pause(struct xe_guc *guc) 2489 { 2490 struct xe_exec_queue *q; 2491 unsigned long index; 2492 2493 mutex_lock(&guc->submission_state.lock); 2494 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2495 xe_sched_submission_stop(&q->guc->sched); 2496 mutex_unlock(&guc->submission_state.lock); 2497 } 2498 2499 /** 2500 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2501 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2502 */ 2503 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2504 { 2505 struct xe_exec_queue *q; 2506 unsigned long index; 2507 2508 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2509 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2510 2511 mutex_lock(&guc->submission_state.lock); 2512 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2513 /* Prevent redundant attempts to stop parallel queues */ 2514 if (q->guc->id != index) 2515 continue; 2516 2517 guc_exec_queue_pause(guc, q); 2518 } 2519 mutex_unlock(&guc->submission_state.lock); 2520 } 2521 2522 static void guc_exec_queue_start(struct xe_exec_queue *q) 2523 { 2524 struct xe_gpu_scheduler *sched = &q->guc->sched; 2525 2526 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2527 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2528 int i; 2529 2530 trace_xe_exec_queue_resubmit(q); 2531 if (job) { 2532 for (i = 0; i < q->width; ++i) { 2533 /* 2534 * The GuC context is unregistered at this point 2535 * time, adjusting software ring tail ensures 2536 * jobs are rewritten in original placement, 2537 * adjusting LRC tail ensures the newly loaded 2538 * GuC / contexts only view the LRC tail 2539 * increasing as jobs are written out. 2540 */ 2541 q->lrc[i]->ring.tail = job->ptrs[i].head; 2542 xe_lrc_set_ring_tail(q->lrc[i], 2543 xe_lrc_ring_head(q->lrc[i])); 2544 } 2545 } 2546 xe_sched_resubmit_jobs(sched); 2547 } 2548 2549 xe_sched_submission_start(sched); 2550 xe_sched_submission_resume_tdr(sched); 2551 } 2552 2553 int xe_guc_submit_start(struct xe_guc *guc) 2554 { 2555 struct xe_exec_queue *q; 2556 unsigned long index; 2557 2558 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2559 2560 mutex_lock(&guc->submission_state.lock); 2561 atomic_dec(&guc->submission_state.stopped); 2562 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2563 /* Prevent redundant attempts to start parallel queues */ 2564 if (q->guc->id != index) 2565 continue; 2566 2567 guc_exec_queue_start(q); 2568 } 2569 mutex_unlock(&guc->submission_state.lock); 2570 2571 wake_up_all(&guc->ct.wq); 2572 2573 return 0; 2574 } 2575 2576 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2577 struct xe_exec_queue *q) 2578 { 2579 struct xe_gpu_scheduler *sched = &q->guc->sched; 2580 struct xe_sched_job *job = NULL; 2581 struct drm_sched_job *s_job; 2582 bool restore_replay = false; 2583 2584 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2585 job = to_xe_sched_job(s_job); 2586 restore_replay |= job->restore_replay; 2587 if (restore_replay) { 2588 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2589 q->guc->id, xe_sched_job_seqno(job)); 2590 2591 q->ring_ops->emit_job(job); 2592 job->restore_replay = true; 2593 } 2594 } 2595 2596 if (job) 2597 job->last_replay = true; 2598 } 2599 2600 /** 2601 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2602 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2603 */ 2604 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2605 { 2606 struct xe_exec_queue *q; 2607 unsigned long index; 2608 2609 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2610 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2611 2612 mutex_lock(&guc->submission_state.lock); 2613 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2614 /* Prevent redundant attempts to stop parallel queues */ 2615 if (q->guc->id != index) 2616 continue; 2617 2618 guc_exec_queue_unpause_prepare(guc, q); 2619 } 2620 mutex_unlock(&guc->submission_state.lock); 2621 } 2622 2623 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2624 { 2625 struct xe_gpu_scheduler *sched = &q->guc->sched; 2626 struct xe_sched_msg *msg; 2627 2628 if (q->guc->needs_cleanup) { 2629 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2630 2631 guc_exec_queue_add_msg(q, msg, CLEANUP); 2632 q->guc->needs_cleanup = false; 2633 } 2634 2635 if (q->guc->needs_suspend) { 2636 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2637 2638 xe_sched_msg_lock(sched); 2639 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2640 xe_sched_msg_unlock(sched); 2641 2642 q->guc->needs_suspend = false; 2643 } 2644 2645 /* 2646 * The resume must be in the message queue before the suspend as it is 2647 * not possible for a resume to be issued if a suspend pending is, but 2648 * the inverse is possible. 2649 */ 2650 if (q->guc->needs_resume) { 2651 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2652 2653 xe_sched_msg_lock(sched); 2654 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2655 xe_sched_msg_unlock(sched); 2656 2657 q->guc->needs_resume = false; 2658 } 2659 } 2660 2661 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2662 { 2663 struct xe_gpu_scheduler *sched = &q->guc->sched; 2664 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2665 2666 lockdep_assert_held(&guc->submission_state.lock); 2667 2668 xe_sched_resubmit_jobs(sched); 2669 guc_exec_queue_replay_pending_state_change(q); 2670 xe_sched_submission_start(sched); 2671 if (needs_tdr) 2672 xe_guc_exec_queue_trigger_cleanup(q); 2673 xe_sched_submission_resume_tdr(sched); 2674 } 2675 2676 /** 2677 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2678 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2679 */ 2680 void xe_guc_submit_unpause(struct xe_guc *guc) 2681 { 2682 struct xe_exec_queue *q; 2683 unsigned long index; 2684 2685 mutex_lock(&guc->submission_state.lock); 2686 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2687 xe_sched_submission_start(&q->guc->sched); 2688 mutex_unlock(&guc->submission_state.lock); 2689 } 2690 2691 /** 2692 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2693 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2694 */ 2695 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2696 { 2697 struct xe_exec_queue *q; 2698 unsigned long index; 2699 2700 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2701 2702 mutex_lock(&guc->submission_state.lock); 2703 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2704 /* 2705 * Prevent redundant attempts to stop parallel queues, or queues 2706 * created after resfix done. 2707 */ 2708 if (q->guc->id != index || 2709 !drm_sched_is_stopped(&q->guc->sched.base)) 2710 continue; 2711 2712 guc_exec_queue_unpause(guc, q); 2713 } 2714 mutex_unlock(&guc->submission_state.lock); 2715 } 2716 2717 /** 2718 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2719 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2720 */ 2721 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2722 { 2723 struct xe_exec_queue *q; 2724 unsigned long index; 2725 2726 mutex_lock(&guc->submission_state.lock); 2727 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2728 struct xe_gpu_scheduler *sched = &q->guc->sched; 2729 2730 /* Prevent redundant attempts to stop parallel queues */ 2731 if (q->guc->id != index) 2732 continue; 2733 2734 xe_sched_submission_start(sched); 2735 guc_exec_queue_kill(q); 2736 } 2737 mutex_unlock(&guc->submission_state.lock); 2738 } 2739 2740 static struct xe_exec_queue * 2741 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2742 { 2743 struct xe_gt *gt = guc_to_gt(guc); 2744 struct xe_exec_queue *q; 2745 2746 if (unlikely(guc_id >= GUC_ID_MAX)) { 2747 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2748 return NULL; 2749 } 2750 2751 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2752 if (unlikely(!q)) { 2753 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2754 return NULL; 2755 } 2756 2757 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2758 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2759 2760 return q; 2761 } 2762 2763 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2764 { 2765 u32 action[] = { 2766 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2767 q->guc->id, 2768 }; 2769 2770 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2771 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2772 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2773 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2774 2775 trace_xe_exec_queue_deregister(q); 2776 2777 if (xe_exec_queue_is_multi_queue_secondary(q)) 2778 handle_deregister_done(guc, q); 2779 else 2780 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2781 ARRAY_SIZE(action)); 2782 } 2783 2784 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2785 u32 runnable_state) 2786 { 2787 trace_xe_exec_queue_scheduling_done(q); 2788 2789 if (runnable_state == 1) { 2790 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2791 2792 q->guc->resume_time = ktime_get(); 2793 clear_exec_queue_pending_resume(q); 2794 clear_exec_queue_pending_enable(q); 2795 smp_wmb(); 2796 wake_up_all(&guc->ct.wq); 2797 } else { 2798 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2799 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2800 2801 if (q->guc->suspend_pending) { 2802 suspend_fence_signal(q); 2803 clear_exec_queue_pending_disable(q); 2804 } else { 2805 if (exec_queue_banned(q)) { 2806 smp_wmb(); 2807 wake_up_all(&guc->ct.wq); 2808 } 2809 if (exec_queue_destroyed(q)) { 2810 /* 2811 * Make sure to clear the pending_disable only 2812 * after sampling the destroyed state. We want 2813 * to ensure we don't trigger the unregister too 2814 * early with something intending to only 2815 * disable scheduling. The caller doing the 2816 * destroy must wait for an ongoing 2817 * pending_disable before marking as destroyed. 2818 */ 2819 clear_exec_queue_pending_disable(q); 2820 deregister_exec_queue(guc, q); 2821 } else { 2822 clear_exec_queue_pending_disable(q); 2823 } 2824 } 2825 } 2826 } 2827 2828 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2829 struct xe_exec_queue *q, 2830 u32 runnable_state) 2831 { 2832 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2833 mutex_lock(&guc->ct.lock); 2834 handle_sched_done(guc, q, runnable_state); 2835 mutex_unlock(&guc->ct.lock); 2836 } 2837 2838 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2839 { 2840 struct xe_exec_queue *q; 2841 u32 guc_id, runnable_state; 2842 2843 if (unlikely(len < 2)) 2844 return -EPROTO; 2845 2846 guc_id = msg[0]; 2847 runnable_state = msg[1]; 2848 2849 q = g2h_exec_queue_lookup(guc, guc_id); 2850 if (unlikely(!q)) 2851 return -EPROTO; 2852 2853 if (unlikely(!exec_queue_pending_enable(q) && 2854 !exec_queue_pending_disable(q))) { 2855 xe_gt_err(guc_to_gt(guc), 2856 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2857 atomic_read(&q->guc->state), q->guc->id, 2858 runnable_state); 2859 return -EPROTO; 2860 } 2861 2862 handle_sched_done(guc, q, runnable_state); 2863 2864 return 0; 2865 } 2866 2867 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2868 { 2869 trace_xe_exec_queue_deregister_done(q); 2870 2871 clear_exec_queue_registered(q); 2872 __guc_exec_queue_destroy(guc, q); 2873 } 2874 2875 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2876 { 2877 struct xe_exec_queue *q; 2878 u32 guc_id; 2879 2880 if (unlikely(len < 1)) 2881 return -EPROTO; 2882 2883 guc_id = msg[0]; 2884 2885 q = g2h_exec_queue_lookup(guc, guc_id); 2886 if (unlikely(!q)) 2887 return -EPROTO; 2888 2889 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2890 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2891 xe_gt_err(guc_to_gt(guc), 2892 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2893 atomic_read(&q->guc->state), q->guc->id); 2894 return -EPROTO; 2895 } 2896 2897 handle_deregister_done(guc, q); 2898 2899 return 0; 2900 } 2901 2902 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2903 { 2904 struct xe_gt *gt = guc_to_gt(guc); 2905 struct xe_exec_queue *q; 2906 u32 guc_id; 2907 2908 if (unlikely(len < 1)) 2909 return -EPROTO; 2910 2911 guc_id = msg[0]; 2912 2913 q = g2h_exec_queue_lookup(guc, guc_id); 2914 if (unlikely(!q)) 2915 return -EPROTO; 2916 2917 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2918 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2919 atomic_read(&q->guc->state)); 2920 2921 trace_xe_exec_queue_reset(q); 2922 2923 /* 2924 * A banned engine is a NOP at this point (came from 2925 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2926 * jobs by setting timeout of the job to the minimum value kicking 2927 * guc_exec_queue_timedout_job. 2928 */ 2929 xe_guc_exec_queue_reset_trigger_cleanup(q); 2930 2931 return 0; 2932 } 2933 2934 /* 2935 * xe_guc_error_capture_handler - Handler of GuC captured message 2936 * @guc: The GuC object 2937 * @msg: Point to the message 2938 * @len: The message length 2939 * 2940 * When GuC captured data is ready, GuC will send message 2941 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2942 * called 1st to check status before process the data comes with the message. 2943 * 2944 * Returns: error code. 0 if success 2945 */ 2946 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2947 { 2948 u32 status; 2949 2950 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2951 return -EPROTO; 2952 2953 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2954 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2955 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2956 2957 xe_guc_capture_process(guc); 2958 2959 return 0; 2960 } 2961 2962 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2963 u32 len) 2964 { 2965 struct xe_gt *gt = guc_to_gt(guc); 2966 struct xe_exec_queue *q; 2967 u32 guc_id; 2968 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 2969 2970 if (unlikely(!len || len > 2)) 2971 return -EPROTO; 2972 2973 guc_id = msg[0]; 2974 2975 if (len == 2) 2976 type = msg[1]; 2977 2978 if (guc_id == GUC_ID_UNKNOWN) { 2979 /* 2980 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 2981 * context. In such case only PF will be notified about that fault. 2982 */ 2983 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 2984 return 0; 2985 } 2986 2987 q = g2h_exec_queue_lookup(guc, guc_id); 2988 if (unlikely(!q)) 2989 return -EPROTO; 2990 2991 /* 2992 * The type is HW-defined and changes based on platform, so we don't 2993 * decode it in the kernel and only check if it is valid. 2994 * See bspec 54047 and 72187 for details. 2995 */ 2996 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 2997 xe_gt_info(gt, 2998 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 2999 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3000 else 3001 xe_gt_info(gt, 3002 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 3003 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3004 3005 trace_xe_exec_queue_memory_cat_error(q); 3006 3007 /* Treat the same as engine reset */ 3008 xe_guc_exec_queue_reset_trigger_cleanup(q); 3009 3010 return 0; 3011 } 3012 3013 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 3014 { 3015 struct xe_gt *gt = guc_to_gt(guc); 3016 u8 guc_class, instance; 3017 u32 reason; 3018 3019 if (unlikely(len != 3)) 3020 return -EPROTO; 3021 3022 guc_class = msg[0]; 3023 instance = msg[1]; 3024 reason = msg[2]; 3025 3026 /* Unexpected failure of a hardware feature, log an actual error */ 3027 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3028 guc_class, instance, reason); 3029 3030 xe_gt_reset_async(gt); 3031 3032 return 0; 3033 } 3034 3035 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 3036 u32 len) 3037 { 3038 struct xe_gt *gt = guc_to_gt(guc); 3039 struct xe_device *xe = guc_to_xe(guc); 3040 struct xe_exec_queue *q; 3041 u32 guc_id = msg[2]; 3042 3043 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3044 drm_err(&xe->drm, "Invalid length %u", len); 3045 return -EPROTO; 3046 } 3047 3048 q = g2h_exec_queue_lookup(guc, guc_id); 3049 if (unlikely(!q)) 3050 return -EPROTO; 3051 3052 xe_gt_dbg(gt, 3053 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3054 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3055 3056 trace_xe_exec_queue_cgp_context_error(q); 3057 3058 /* Treat the same as engine reset */ 3059 xe_guc_exec_queue_reset_trigger_cleanup(q); 3060 3061 return 0; 3062 } 3063 3064 /** 3065 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3066 * @guc: guc 3067 * @msg: message indicating CGP sync done 3068 * @len: length of message 3069 * 3070 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3071 * for CGP synchronization to complete. 3072 * 3073 * Return: 0 on success, -EPROTO for malformed messages. 3074 */ 3075 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3076 { 3077 struct xe_device *xe = guc_to_xe(guc); 3078 struct xe_exec_queue *q; 3079 u32 guc_id = msg[0]; 3080 3081 if (unlikely(len < 1)) { 3082 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3083 return -EPROTO; 3084 } 3085 3086 q = g2h_exec_queue_lookup(guc, guc_id); 3087 if (unlikely(!q)) 3088 return -EPROTO; 3089 3090 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3091 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3092 return -EPROTO; 3093 } 3094 3095 /* Wakeup the serialized cgp update wait */ 3096 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3097 xe_guc_ct_wake_waiters(&guc->ct); 3098 3099 return 0; 3100 } 3101 3102 static void 3103 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3104 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3105 { 3106 struct xe_guc *guc = exec_queue_to_guc(q); 3107 struct xe_device *xe = guc_to_xe(guc); 3108 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3109 int i; 3110 3111 snapshot->guc.wqi_head = q->guc->wqi_head; 3112 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3113 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3114 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3115 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3116 wq_desc.wq_status); 3117 3118 if (snapshot->parallel.wq_desc.head != 3119 snapshot->parallel.wq_desc.tail) { 3120 for (i = snapshot->parallel.wq_desc.head; 3121 i != snapshot->parallel.wq_desc.tail; 3122 i = (i + sizeof(u32)) % WQ_SIZE) 3123 snapshot->parallel.wq[i / sizeof(u32)] = 3124 parallel_read(xe, map, wq[i / sizeof(u32)]); 3125 } 3126 } 3127 3128 static void 3129 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3130 struct drm_printer *p) 3131 { 3132 int i; 3133 3134 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3135 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3136 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3137 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3138 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3139 3140 if (snapshot->parallel.wq_desc.head != 3141 snapshot->parallel.wq_desc.tail) { 3142 for (i = snapshot->parallel.wq_desc.head; 3143 i != snapshot->parallel.wq_desc.tail; 3144 i = (i + sizeof(u32)) % WQ_SIZE) 3145 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3146 snapshot->parallel.wq[i / sizeof(u32)]); 3147 } 3148 } 3149 3150 /** 3151 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3152 * @q: faulty exec queue 3153 * 3154 * This can be printed out in a later stage like during dev_coredump 3155 * analysis. 3156 * 3157 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3158 * caller, using `xe_guc_exec_queue_snapshot_free`. 3159 */ 3160 struct xe_guc_submit_exec_queue_snapshot * 3161 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3162 { 3163 struct xe_gpu_scheduler *sched = &q->guc->sched; 3164 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3165 int i; 3166 3167 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC); 3168 3169 if (!snapshot) 3170 return NULL; 3171 3172 snapshot->guc.id = q->guc->id; 3173 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3174 snapshot->class = q->class; 3175 snapshot->logical_mask = q->logical_mask; 3176 snapshot->width = q->width; 3177 snapshot->refcount = kref_read(&q->refcount); 3178 snapshot->sched_timeout = sched->base.timeout; 3179 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3180 snapshot->sched_props.preempt_timeout_us = 3181 q->sched_props.preempt_timeout_us; 3182 3183 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width, 3184 GFP_ATOMIC); 3185 3186 if (snapshot->lrc) { 3187 for (i = 0; i < q->width; ++i) { 3188 struct xe_lrc *lrc = q->lrc[i]; 3189 3190 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3191 } 3192 } 3193 3194 snapshot->schedule_state = atomic_read(&q->guc->state); 3195 snapshot->exec_queue_flags = q->flags; 3196 3197 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3198 if (snapshot->parallel_execution) 3199 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3200 3201 if (xe_exec_queue_is_multi_queue(q)) { 3202 snapshot->multi_queue.valid = true; 3203 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3204 snapshot->multi_queue.pos = q->multi_queue.pos; 3205 } 3206 3207 return snapshot; 3208 } 3209 3210 /** 3211 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3212 * @snapshot: Previously captured snapshot of job. 3213 * 3214 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3215 */ 3216 void 3217 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3218 { 3219 int i; 3220 3221 if (!snapshot || !snapshot->lrc) 3222 return; 3223 3224 for (i = 0; i < snapshot->width; ++i) 3225 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3226 } 3227 3228 /** 3229 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3230 * @snapshot: GuC Submit Engine snapshot object. 3231 * @p: drm_printer where it will be printed out. 3232 * 3233 * This function prints out a given GuC Submit Engine snapshot object. 3234 */ 3235 void 3236 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3237 struct drm_printer *p) 3238 { 3239 int i; 3240 3241 if (!snapshot) 3242 return; 3243 3244 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3245 drm_printf(p, "\tName: %s\n", snapshot->name); 3246 drm_printf(p, "\tClass: %d\n", snapshot->class); 3247 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3248 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3249 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3250 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3251 drm_printf(p, "\tTimeslice: %u (us)\n", 3252 snapshot->sched_props.timeslice_us); 3253 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3254 snapshot->sched_props.preempt_timeout_us); 3255 3256 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3257 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3258 3259 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3260 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3261 3262 if (snapshot->parallel_execution) 3263 guc_exec_queue_wq_snapshot_print(snapshot, p); 3264 3265 if (snapshot->multi_queue.valid) { 3266 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3267 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3268 } 3269 } 3270 3271 /** 3272 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3273 * snapshot. 3274 * @snapshot: GuC Submit Engine snapshot object. 3275 * 3276 * This function free all the memory that needed to be allocated at capture 3277 * time. 3278 */ 3279 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3280 { 3281 int i; 3282 3283 if (!snapshot) 3284 return; 3285 3286 if (snapshot->lrc) { 3287 for (i = 0; i < snapshot->width; i++) 3288 xe_lrc_snapshot_free(snapshot->lrc[i]); 3289 kfree(snapshot->lrc); 3290 } 3291 kfree(snapshot); 3292 } 3293 3294 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3295 { 3296 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3297 3298 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3299 xe_guc_exec_queue_snapshot_print(snapshot, p); 3300 xe_guc_exec_queue_snapshot_free(snapshot); 3301 } 3302 3303 /** 3304 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3305 * @q: Execution queue 3306 * @ctx_type: Type of the context 3307 * 3308 * This function registers the execution queue with the guc. Special context 3309 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3310 * are only applicable for IGPU and in the VF. 3311 * Submits the execution queue to GUC after registering it. 3312 * 3313 * Returns - None. 3314 */ 3315 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3316 { 3317 struct xe_guc *guc = exec_queue_to_guc(q); 3318 struct xe_device *xe = guc_to_xe(guc); 3319 struct xe_gt *gt = guc_to_gt(guc); 3320 3321 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3322 xe_gt_assert(gt, !IS_DGFX(xe)); 3323 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3324 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3325 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3326 3327 register_exec_queue(q, ctx_type); 3328 enable_scheduling(q); 3329 } 3330 3331 /** 3332 * xe_guc_submit_print - GuC Submit Print. 3333 * @guc: GuC. 3334 * @p: drm_printer where it will be printed out. 3335 * 3336 * This function capture and prints snapshots of **all** GuC Engines. 3337 */ 3338 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3339 { 3340 struct xe_exec_queue *q; 3341 unsigned long index; 3342 3343 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3344 return; 3345 3346 mutex_lock(&guc->submission_state.lock); 3347 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3348 guc_exec_queue_print(q, p); 3349 mutex_unlock(&guc->submission_state.lock); 3350 } 3351 3352 /** 3353 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3354 * registered with the GuC 3355 * @guc: GuC. 3356 * 3357 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3358 */ 3359 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3360 { 3361 struct xe_exec_queue *q; 3362 unsigned long index; 3363 3364 guard(mutex)(&guc->submission_state.lock); 3365 3366 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3367 if (q->width > 1) 3368 return true; 3369 3370 return false; 3371 } 3372 3373 /** 3374 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3375 * exec queues registered to given GuC. 3376 * @guc: the &xe_guc struct instance 3377 * @scratch: scratch buffer to be used as temporary storage 3378 * 3379 * Returns: zero on success, negative error code on failure. 3380 */ 3381 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3382 { 3383 struct xe_exec_queue *q; 3384 unsigned long index; 3385 int err = 0; 3386 3387 mutex_lock(&guc->submission_state.lock); 3388 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3389 /* Prevent redundant attempts to stop parallel queues */ 3390 if (q->guc->id != index) 3391 continue; 3392 3393 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3394 if (err) 3395 break; 3396 } 3397 mutex_unlock(&guc->submission_state.lock); 3398 3399 return err; 3400 } 3401