1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/dma-fence-array.h> 12 13 #include <drm/drm_managed.h> 14 15 #include "abi/guc_actions_abi.h" 16 #include "abi/guc_actions_slpc_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "xe_assert.h" 19 #include "xe_bo.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_gt_clock.h" 27 #include "xe_gt_printk.h" 28 #include "xe_guc.h" 29 #include "xe_guc_capture.h" 30 #include "xe_guc_ct.h" 31 #include "xe_guc_exec_queue_types.h" 32 #include "xe_guc_id_mgr.h" 33 #include "xe_guc_klv_helpers.h" 34 #include "xe_guc_submit_types.h" 35 #include "xe_hw_engine.h" 36 #include "xe_lrc.h" 37 #include "xe_macros.h" 38 #include "xe_map.h" 39 #include "xe_mocs.h" 40 #include "xe_pm.h" 41 #include "xe_ring_ops_types.h" 42 #include "xe_sched_job.h" 43 #include "xe_sleep.h" 44 #include "xe_trace.h" 45 #include "xe_uc_fw.h" 46 #include "xe_vm.h" 47 48 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 49 50 static int guc_submit_reset_prepare(struct xe_guc *guc); 51 52 static struct xe_guc * 53 exec_queue_to_guc(struct xe_exec_queue *q) 54 { 55 return &q->gt->uc.guc; 56 } 57 58 /* 59 * Helpers for engine state, using an atomic as some of the bits can transition 60 * as the same time (e.g. a suspend can be happning at the same time as schedule 61 * engine done being processed). 62 */ 63 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 64 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 65 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 66 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 67 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 68 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 69 #define EXEC_QUEUE_STATE_RESET (1 << 6) 70 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 71 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 72 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 73 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10) 74 75 static bool exec_queue_registered(struct xe_exec_queue *q) 76 { 77 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 78 } 79 80 static void set_exec_queue_registered(struct xe_exec_queue *q) 81 { 82 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 83 } 84 85 static void clear_exec_queue_registered(struct xe_exec_queue *q) 86 { 87 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 88 } 89 90 static bool exec_queue_enabled(struct xe_exec_queue *q) 91 { 92 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 93 } 94 95 static void set_exec_queue_enabled(struct xe_exec_queue *q) 96 { 97 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 98 } 99 100 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 101 { 102 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 103 } 104 105 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 106 { 107 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 108 } 109 110 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 111 { 112 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 113 } 114 115 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 116 { 117 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 118 } 119 120 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 121 { 122 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 123 } 124 125 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 126 { 127 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 128 } 129 130 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 131 { 132 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 133 } 134 135 static bool exec_queue_destroyed(struct xe_exec_queue *q) 136 { 137 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 138 } 139 140 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 141 { 142 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 143 } 144 145 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 146 { 147 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 148 } 149 150 static bool exec_queue_banned(struct xe_exec_queue *q) 151 { 152 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 153 } 154 155 static void set_exec_queue_banned(struct xe_exec_queue *q) 156 { 157 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 158 } 159 160 static void clear_exec_queue_banned(struct xe_exec_queue *q) 161 { 162 atomic_andnot(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 163 } 164 165 static bool exec_queue_suspended(struct xe_exec_queue *q) 166 { 167 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 168 } 169 170 static void set_exec_queue_suspended(struct xe_exec_queue *q) 171 { 172 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 173 } 174 175 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 176 { 177 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 178 } 179 180 static bool exec_queue_reset(struct xe_exec_queue *q) 181 { 182 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 183 } 184 185 static void set_exec_queue_reset(struct xe_exec_queue *q) 186 { 187 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 188 } 189 190 static bool exec_queue_killed(struct xe_exec_queue *q) 191 { 192 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 193 } 194 195 static void set_exec_queue_killed(struct xe_exec_queue *q) 196 { 197 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 198 } 199 200 static bool exec_queue_wedged(struct xe_exec_queue *q) 201 { 202 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 203 } 204 205 static void set_exec_queue_wedged(struct xe_exec_queue *q) 206 { 207 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 208 } 209 210 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 211 { 212 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 213 } 214 215 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 216 { 217 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 218 } 219 220 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 221 { 222 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 223 } 224 225 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 226 { 227 return (atomic_read(&q->guc->state) & 228 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 229 EXEC_QUEUE_STATE_BANNED)); 230 } 231 232 static void guc_submit_sw_fini(struct drm_device *drm, void *arg) 233 { 234 struct xe_guc *guc = arg; 235 struct xe_device *xe = guc_to_xe(guc); 236 struct xe_gt *gt = guc_to_gt(guc); 237 int ret; 238 239 ret = wait_event_timeout(guc->submission_state.fini_wq, 240 xa_empty(&guc->submission_state.exec_queue_lookup), 241 HZ * 5); 242 243 drain_workqueue(xe->destroy_wq); 244 245 xe_gt_assert(gt, ret); 246 247 xa_destroy(&guc->submission_state.exec_queue_lookup); 248 } 249 250 static void guc_submit_fini(void *arg) 251 { 252 struct xe_guc *guc = arg; 253 struct xe_exec_queue *q; 254 unsigned long index; 255 256 /* Drop any wedged queue refs */ 257 mutex_lock(&guc->submission_state.lock); 258 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 259 if (exec_queue_wedged(q)) { 260 mutex_unlock(&guc->submission_state.lock); 261 xe_exec_queue_put(q); 262 mutex_lock(&guc->submission_state.lock); 263 } 264 } 265 mutex_unlock(&guc->submission_state.lock); 266 267 /* Forcefully kill any remaining exec queues */ 268 xe_guc_ct_stop(&guc->ct); 269 guc_submit_reset_prepare(guc); 270 xe_guc_softreset(guc); 271 xe_guc_submit_stop(guc); 272 xe_uc_fw_sanitize(&guc->fw); 273 xe_guc_submit_pause_abort(guc); 274 } 275 276 static const struct xe_exec_queue_ops guc_exec_queue_ops; 277 278 static void primelockdep(struct xe_guc *guc) 279 { 280 if (!IS_ENABLED(CONFIG_LOCKDEP)) 281 return; 282 283 fs_reclaim_acquire(GFP_KERNEL); 284 285 mutex_lock(&guc->submission_state.lock); 286 mutex_unlock(&guc->submission_state.lock); 287 288 fs_reclaim_release(GFP_KERNEL); 289 } 290 291 /** 292 * xe_guc_submit_init() - Initialize GuC submission. 293 * @guc: the &xe_guc to initialize 294 * @num_ids: number of GuC context IDs to use 295 * 296 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 297 * GuC context IDs supported by the GuC firmware should be used for submission. 298 * 299 * Only VF drivers will have to provide explicit number of GuC context IDs 300 * that they can use for submission. 301 * 302 * Return: 0 on success or a negative error code on failure. 303 */ 304 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 305 { 306 struct xe_device *xe = guc_to_xe(guc); 307 struct xe_gt *gt = guc_to_gt(guc); 308 int err; 309 310 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 311 if (err) 312 return err; 313 314 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 315 if (err) 316 return err; 317 318 gt->exec_queue_ops = &guc_exec_queue_ops; 319 320 xa_init(&guc->submission_state.exec_queue_lookup); 321 322 init_waitqueue_head(&guc->submission_state.fini_wq); 323 324 primelockdep(guc); 325 326 guc->submission_state.initialized = true; 327 328 err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); 329 if (err) 330 return err; 331 332 return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); 333 } 334 335 /* 336 * Given that we want to guarantee enough RCS throughput to avoid missing 337 * frames, we set the yield policy to 20% of each 80ms interval. 338 */ 339 #define RC_YIELD_DURATION 80 /* in ms */ 340 #define RC_YIELD_RATIO 20 /* in percent */ 341 static u32 *emit_render_compute_yield_klv(u32 *emit) 342 { 343 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 344 *emit++ = RC_YIELD_DURATION; 345 *emit++ = RC_YIELD_RATIO; 346 347 return emit; 348 } 349 350 #define SCHEDULING_POLICY_MAX_DWORDS 16 351 static int guc_init_global_schedule_policy(struct xe_guc *guc) 352 { 353 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 354 u32 *emit = data; 355 u32 count = 0; 356 int ret; 357 358 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 359 return 0; 360 361 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 362 363 if (CCS_INSTANCES(guc_to_gt(guc))) 364 emit = emit_render_compute_yield_klv(emit); 365 366 count = emit - data; 367 if (count > 1) { 368 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 369 370 ret = xe_guc_ct_send_block(&guc->ct, data, count); 371 if (ret < 0) { 372 xe_gt_err(guc_to_gt(guc), 373 "failed to enable GuC scheduling policies: %pe\n", 374 ERR_PTR(ret)); 375 return ret; 376 } 377 } 378 379 return 0; 380 } 381 382 int xe_guc_submit_enable(struct xe_guc *guc) 383 { 384 int ret; 385 386 ret = guc_init_global_schedule_policy(guc); 387 if (ret) 388 return ret; 389 390 guc->submission_state.enabled = true; 391 392 return 0; 393 } 394 395 void xe_guc_submit_disable(struct xe_guc *guc) 396 { 397 guc->submission_state.enabled = false; 398 } 399 400 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, 401 int count) 402 { 403 int i; 404 405 mutex_lock(&guc->submission_state.lock); 406 407 for (i = 0; i < count; ++i) 408 xa_erase(&guc->submission_state.exec_queue_lookup, 409 q->guc->id + i); 410 411 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 412 q->guc->id, q->width); 413 414 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 415 wake_up(&guc->submission_state.fini_wq); 416 417 mutex_unlock(&guc->submission_state.lock); 418 } 419 420 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 421 { 422 int ret, i; 423 424 mutex_lock(&guc->submission_state.lock); 425 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 426 q->width); 427 mutex_unlock(&guc->submission_state.lock); 428 if (ret < 0) 429 return ret; 430 431 q->guc->id = ret; 432 433 /* Reserve empty slots. */ 434 for (i = 0; i < q->width; ++i) { 435 ret = xa_insert(&guc->submission_state.exec_queue_lookup, 436 q->guc->id + i, NULL, GFP_KERNEL); 437 if (ret) 438 goto err_release; 439 } 440 441 return 0; 442 443 err_release: 444 __release_guc_id(guc, q, i); 445 446 return ret; 447 } 448 449 static void publish_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 450 { 451 int i; 452 453 lockdep_assert_held(&guc->submission_state.lock); 454 455 for (i = 0; i < q->width; ++i) { 456 void *old; 457 458 old = xa_store(&guc->submission_state.exec_queue_lookup, 459 q->guc->id + i, q, GFP_NOWAIT); 460 XE_WARN_ON(old || xa_is_err(old)); 461 } 462 } 463 464 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 465 { 466 __release_guc_id(guc, q, q->width); 467 } 468 469 struct exec_queue_policy { 470 u32 count; 471 struct guc_update_exec_queue_policy h2g; 472 }; 473 474 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 475 { 476 size_t bytes = sizeof(policy->h2g.header) + 477 (sizeof(policy->h2g.klv[0]) * policy->count); 478 479 return bytes / sizeof(u32); 480 } 481 482 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 483 u16 guc_id) 484 { 485 policy->h2g.header.action = 486 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 487 policy->h2g.header.guc_id = guc_id; 488 policy->count = 0; 489 } 490 491 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 492 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 493 u32 data) \ 494 { \ 495 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 496 \ 497 policy->h2g.klv[policy->count].kl = \ 498 FIELD_PREP(GUC_KLV_0_KEY, \ 499 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 500 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 501 policy->h2g.klv[policy->count].value = data; \ 502 policy->count++; \ 503 } 504 505 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 506 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 507 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 508 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 509 #undef MAKE_EXEC_QUEUE_POLICY_ADD 510 511 static const int xe_exec_queue_prio_to_guc[] = { 512 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 513 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 514 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 515 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 516 }; 517 518 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 519 { 520 struct exec_queue_policy policy; 521 enum xe_exec_queue_priority prio = q->sched_props.priority; 522 u32 timeslice_us = q->sched_props.timeslice_us; 523 u32 slpc_exec_queue_freq_req = 0; 524 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 525 526 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 527 !xe_exec_queue_is_multi_queue_secondary(q)); 528 529 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 530 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 531 532 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 533 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 534 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 535 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 536 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 537 slpc_exec_queue_freq_req); 538 539 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 540 __guc_exec_queue_policy_action_size(&policy), 0, 0); 541 } 542 543 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 544 { 545 struct exec_queue_policy policy; 546 547 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 548 549 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 550 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 551 552 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 553 __guc_exec_queue_policy_action_size(&policy), 0, 0); 554 } 555 556 static bool vf_recovery(struct xe_guc *guc) 557 { 558 return xe_gt_recovery_pending(guc_to_gt(guc)); 559 } 560 561 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 562 { 563 struct xe_guc *guc = exec_queue_to_guc(q); 564 struct xe_device *xe = guc_to_xe(guc); 565 566 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 567 wake_up_all(&xe->ufence_wq); 568 569 xe_sched_tdr_queue_imm(&q->guc->sched); 570 } 571 572 static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) 573 { 574 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 575 struct xe_exec_queue_group *group = q->multi_queue.group; 576 struct xe_exec_queue *eq, *next; 577 LIST_HEAD(tmp); 578 579 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 580 xe_exec_queue_is_multi_queue(q)); 581 582 mutex_lock(&group->list_lock); 583 584 /* 585 * Stop all future queues being from executing while group is stopped. 586 */ 587 group->stopped = true; 588 589 list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) 590 /* 591 * Refcount prevents an attempted removal from &group->list, 592 * temporary list allows safe iteration after dropping 593 * &group->list_lock. 594 */ 595 if (xe_exec_queue_get_unless_zero(eq)) 596 list_move_tail(&eq->multi_queue.link, &tmp); 597 598 mutex_unlock(&group->list_lock); 599 600 /* We cannot stop under list lock without getting inversions */ 601 xe_sched_submission_stop(&primary->guc->sched); 602 list_for_each_entry(eq, &tmp, multi_queue.link) 603 xe_sched_submission_stop(&eq->guc->sched); 604 605 mutex_lock(&group->list_lock); 606 list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { 607 /* 608 * Corner where we got banned while stopping and not on 609 * &group->list 610 */ 611 if (READ_ONCE(group->banned)) 612 xe_guc_exec_queue_trigger_cleanup(eq); 613 614 list_move_tail(&eq->multi_queue.link, &group->list); 615 xe_exec_queue_put(eq); 616 } 617 mutex_unlock(&group->list_lock); 618 } 619 620 static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) 621 { 622 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 623 struct xe_exec_queue_group *group = q->multi_queue.group; 624 struct xe_exec_queue *eq; 625 626 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 627 xe_exec_queue_is_multi_queue(q)); 628 629 xe_sched_submission_start(&primary->guc->sched); 630 631 mutex_lock(&group->list_lock); 632 group->stopped = false; 633 list_for_each_entry(eq, &group->list, multi_queue.link) 634 xe_sched_submission_start(&eq->guc->sched); 635 mutex_unlock(&group->list_lock); 636 } 637 638 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) 639 { 640 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 641 struct xe_exec_queue_group *group = q->multi_queue.group; 642 struct xe_exec_queue *eq; 643 644 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 645 xe_exec_queue_is_multi_queue(q)); 646 647 /* Group banned, skip timeout check in TDR */ 648 WRITE_ONCE(group->banned, true); 649 xe_guc_exec_queue_trigger_cleanup(primary); 650 651 mutex_lock(&group->list_lock); 652 list_for_each_entry(eq, &group->list, multi_queue.link) 653 xe_guc_exec_queue_trigger_cleanup(eq); 654 mutex_unlock(&group->list_lock); 655 } 656 657 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 658 { 659 if (xe_exec_queue_is_multi_queue(q)) { 660 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 661 struct xe_exec_queue_group *group = q->multi_queue.group; 662 struct xe_exec_queue *eq; 663 664 /* Group banned, skip timeout check in TDR */ 665 WRITE_ONCE(group->banned, true); 666 667 set_exec_queue_reset(primary); 668 if (!exec_queue_banned(primary)) 669 xe_guc_exec_queue_trigger_cleanup(primary); 670 671 mutex_lock(&group->list_lock); 672 list_for_each_entry(eq, &group->list, multi_queue.link) { 673 set_exec_queue_reset(eq); 674 if (!exec_queue_banned(eq)) 675 xe_guc_exec_queue_trigger_cleanup(eq); 676 } 677 mutex_unlock(&group->list_lock); 678 } else { 679 set_exec_queue_reset(q); 680 if (!exec_queue_banned(q)) 681 xe_guc_exec_queue_trigger_cleanup(q); 682 } 683 } 684 685 static void set_exec_queue_group_banned(struct xe_exec_queue *q) 686 { 687 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 688 struct xe_exec_queue_group *group = q->multi_queue.group; 689 struct xe_exec_queue *eq; 690 691 /* Ban all queues of the multi-queue group */ 692 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), 693 xe_exec_queue_is_multi_queue(q)); 694 set_exec_queue_banned(primary); 695 696 mutex_lock(&group->list_lock); 697 list_for_each_entry(eq, &group->list, multi_queue.link) 698 set_exec_queue_banned(eq); 699 mutex_unlock(&group->list_lock); 700 } 701 702 /* Helper for context registration H2G */ 703 struct guc_ctxt_registration_info { 704 u32 flags; 705 u32 context_idx; 706 u32 engine_class; 707 u32 engine_submit_mask; 708 u32 wq_desc_lo; 709 u32 wq_desc_hi; 710 u32 wq_base_lo; 711 u32 wq_base_hi; 712 u32 wq_size; 713 u32 cgp_lo; 714 u32 cgp_hi; 715 u32 hwlrca_lo; 716 u32 hwlrca_hi; 717 }; 718 719 #define parallel_read(xe_, map_, field_) \ 720 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 721 field_) 722 #define parallel_write(xe_, map_, field_, val_) \ 723 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 724 field_, val_) 725 726 /** 727 * DOC: Multi Queue Group GuC interface 728 * 729 * The multi queue group coordination between KMD and GuC is through a software 730 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page 731 * allocated in the global GTT. 732 * 733 * CGP format: 734 * 735 * +-----------+---------------------------+---------------------------------------------+ 736 * | DWORD | Name | Description | 737 * +-----------+---------------------------+---------------------------------------------+ 738 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver | 739 * +-----------+---------------------------+---------------------------------------------+ 740 * | 1..15 | RESERVED | MBZ | 741 * +-----------+---------------------------+---------------------------------------------+ 742 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 | 743 * +-----------+---------------------------+---------------------------------------------+ 744 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 | 745 * +-----------+---------------------------+---------------------------------------------+ 746 * | 18..31 | RESERVED | MBZ | 747 * +-----------+---------------------------+---------------------------------------------+ 748 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD | 749 * +-----------+---------------------------+---------------------------------------------+ 750 * | 33 | Q0ContextIndex | Context ID for Queue 0 | 751 * +-----------+---------------------------+---------------------------------------------+ 752 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD | 753 * +-----------+---------------------------+---------------------------------------------+ 754 * | 35 | Q1ContextIndex | Context ID for Queue 1 | 755 * +-----------+---------------------------+---------------------------------------------+ 756 * | ... |... | ... | 757 * +-----------+---------------------------+---------------------------------------------+ 758 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD | 759 * +-----------+---------------------------+---------------------------------------------+ 760 * | 159 | Q63ContextIndex | Context ID for Queue 63 | 761 * +-----------+---------------------------+---------------------------------------------+ 762 * | 160..1024 | RESERVED | MBZ | 763 * +-----------+---------------------------+---------------------------------------------+ 764 * 765 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified 766 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies 767 * the CGP address. When the secondary queues are added to the group, the CGP is 768 * updated with entry for that queue and GuC is notified through the H2G interface 769 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages 770 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also 771 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any 772 * error in the CGP. Only one of these CGP update messages can be outstanding 773 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW* 774 * fields indicate which queue entry is being updated in the CGP. 775 * 776 * The primary queue (Q0) represents the multi queue group context in GuC and 777 * submission on any queue of the group must be through Q0 GuC interface only. 778 * 779 * As it is not required to register secondary queues with GuC, the secondary queue 780 * context ids in the CGP are populated with Q0 context id. 781 */ 782 783 #define CGP_VERSION_MAJOR_SHIFT 8 784 785 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 786 struct xe_exec_queue *q) 787 { 788 struct xe_exec_queue_group *group = q->multi_queue.group; 789 u32 guc_id = group->primary->guc->id; 790 791 /* Currently implementing CGP version 1.0 */ 792 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 793 1 << CGP_VERSION_MAJOR_SHIFT); 794 795 xe_map_wr(xe, &group->cgp_bo->vmap, 796 (32 + q->multi_queue.pos * 2) * sizeof(u32), 797 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 798 799 xe_map_wr(xe, &group->cgp_bo->vmap, 800 (33 + q->multi_queue.pos * 2) * sizeof(u32), 801 u32, guc_id); 802 803 if (q->multi_queue.pos / 32) { 804 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 805 u32, BIT(q->multi_queue.pos % 32)); 806 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 807 } else { 808 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 809 u32, BIT(q->multi_queue.pos)); 810 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 811 } 812 } 813 814 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 815 struct xe_exec_queue *q, 816 const u32 *action, u32 len) 817 { 818 struct xe_exec_queue_group *group = q->multi_queue.group; 819 struct xe_device *xe = guc_to_xe(guc); 820 enum xe_multi_queue_priority priority; 821 long ret; 822 823 /* 824 * As all queues of a multi queue group use single drm scheduler 825 * submit workqueue, CGP synchronization with GuC are serialized. 826 * Hence, no locking is required here. 827 * Wait for any pending CGP_SYNC_DONE response before updating the 828 * CGP page and sending CGP_SYNC message. 829 * 830 * FIXME: Support VF migration 831 */ 832 ret = wait_event_timeout(guc->ct.wq, 833 !READ_ONCE(group->sync_pending) || 834 xe_guc_read_stopped(guc), HZ); 835 if (!ret || xe_guc_read_stopped(guc)) { 836 /* CGP_SYNC failed. Reset gt, cleanup the group */ 837 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 838 set_exec_queue_group_banned(q); 839 xe_gt_reset_async(q->gt); 840 xe_guc_exec_queue_group_trigger_cleanup(q); 841 return; 842 } 843 844 scoped_guard(spinlock, &q->multi_queue.lock) 845 priority = q->multi_queue.priority; 846 847 xe_lrc_set_multi_queue_priority(q->lrc[0], priority); 848 xe_guc_exec_queue_group_cgp_update(xe, q); 849 850 WRITE_ONCE(group->sync_pending, true); 851 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 852 } 853 854 static void guc_exec_queue_send_cgp_sync(struct xe_exec_queue *q) 855 { 856 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 857 struct xe_guc *guc = exec_queue_to_guc(q); 858 struct xe_exec_queue_group *group = q->multi_queue.group; 859 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 860 int len = 0; 861 862 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 863 action[len++] = group->primary->guc->id; 864 865 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 866 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 867 868 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 869 } 870 871 static void __register_exec_queue_group(struct xe_exec_queue *q, 872 struct guc_ctxt_registration_info *info) 873 { 874 struct xe_guc *guc = exec_queue_to_guc(q); 875 #define MAX_MULTI_QUEUE_REG_SIZE (8) 876 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 877 int len = 0; 878 879 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 880 action[len++] = info->flags; 881 action[len++] = info->context_idx; 882 action[len++] = info->engine_class; 883 action[len++] = info->engine_submit_mask; 884 action[len++] = 0; /* Reserved */ 885 action[len++] = info->cgp_lo; 886 action[len++] = info->cgp_hi; 887 888 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 889 #undef MAX_MULTI_QUEUE_REG_SIZE 890 891 /* 892 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 893 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 894 * from guc. 895 */ 896 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 897 } 898 899 static void __register_mlrc_exec_queue(struct xe_guc *guc, 900 struct xe_exec_queue *q, 901 struct guc_ctxt_registration_info *info) 902 { 903 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 904 u32 action[MAX_MLRC_REG_SIZE]; 905 int len = 0; 906 int i; 907 908 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 909 910 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 911 action[len++] = info->flags; 912 action[len++] = info->context_idx; 913 action[len++] = info->engine_class; 914 action[len++] = info->engine_submit_mask; 915 action[len++] = info->wq_desc_lo; 916 action[len++] = info->wq_desc_hi; 917 action[len++] = info->wq_base_lo; 918 action[len++] = info->wq_base_hi; 919 action[len++] = info->wq_size; 920 action[len++] = q->width; 921 action[len++] = info->hwlrca_lo; 922 action[len++] = info->hwlrca_hi; 923 924 for (i = 1; i < q->width; ++i) { 925 struct xe_lrc *lrc = q->lrc[i]; 926 927 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 928 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 929 } 930 931 /* explicitly checks some fields that we might fixup later */ 932 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 933 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 934 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 935 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 936 xe_gt_assert(guc_to_gt(guc), q->width == 937 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 938 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 939 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 940 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 941 #undef MAX_MLRC_REG_SIZE 942 943 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 944 } 945 946 static void __register_exec_queue(struct xe_guc *guc, 947 struct guc_ctxt_registration_info *info) 948 { 949 u32 action[] = { 950 XE_GUC_ACTION_REGISTER_CONTEXT, 951 info->flags, 952 info->context_idx, 953 info->engine_class, 954 info->engine_submit_mask, 955 info->wq_desc_lo, 956 info->wq_desc_hi, 957 info->wq_base_lo, 958 info->wq_base_hi, 959 info->wq_size, 960 info->hwlrca_lo, 961 info->hwlrca_hi, 962 }; 963 964 /* explicitly checks some fields that we might fixup later */ 965 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 966 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 967 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 968 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 969 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 970 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 971 972 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 973 } 974 975 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 976 { 977 struct xe_guc *guc = exec_queue_to_guc(q); 978 struct xe_device *xe = guc_to_xe(guc); 979 struct xe_lrc *lrc = q->lrc[0]; 980 struct guc_ctxt_registration_info info; 981 982 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 983 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 984 985 memset(&info, 0, sizeof(info)); 986 info.context_idx = q->guc->id; 987 info.engine_class = xe_engine_class_to_guc_class(q->class); 988 info.engine_submit_mask = q->logical_mask; 989 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 990 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 991 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 992 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 993 994 if (xe_exec_queue_is_multi_queue(q)) { 995 struct xe_exec_queue_group *group = q->multi_queue.group; 996 997 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 998 info.cgp_hi = 0; 999 } 1000 1001 if (xe_exec_queue_is_parallel(q)) { 1002 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 1003 struct iosys_map map = xe_lrc_parallel_map(lrc); 1004 1005 info.wq_desc_lo = lower_32_bits(ggtt_addr + 1006 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1007 info.wq_desc_hi = upper_32_bits(ggtt_addr + 1008 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 1009 info.wq_base_lo = lower_32_bits(ggtt_addr + 1010 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1011 info.wq_base_hi = upper_32_bits(ggtt_addr + 1012 offsetof(struct guc_submit_parallel_scratch, wq[0])); 1013 info.wq_size = WQ_SIZE; 1014 1015 q->guc->wqi_head = 0; 1016 q->guc->wqi_tail = 0; 1017 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 1018 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 1019 } 1020 1021 set_exec_queue_registered(q); 1022 trace_xe_exec_queue_register(q); 1023 if (xe_exec_queue_is_multi_queue_primary(q)) 1024 __register_exec_queue_group(q, &info); 1025 else if (xe_exec_queue_is_parallel(q)) 1026 __register_mlrc_exec_queue(guc, q, &info); 1027 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 1028 __register_exec_queue(guc, &info); 1029 1030 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1031 init_policies(guc, q); 1032 1033 if (xe_exec_queue_is_multi_queue_secondary(q)) 1034 guc_exec_queue_send_cgp_sync(q); 1035 } 1036 1037 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 1038 { 1039 return (WQ_SIZE - q->guc->wqi_tail); 1040 } 1041 1042 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 1043 { 1044 struct xe_guc *guc = exec_queue_to_guc(q); 1045 struct xe_device *xe = guc_to_xe(guc); 1046 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1047 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 1048 1049 #define AVAILABLE_SPACE \ 1050 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 1051 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1052 try_again: 1053 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 1054 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 1055 if (sleep_total_ms > 2000) { 1056 xe_gt_reset_async(q->gt); 1057 return -ENODEV; 1058 } 1059 1060 sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); 1061 goto try_again; 1062 } 1063 } 1064 #undef AVAILABLE_SPACE 1065 1066 return 0; 1067 } 1068 1069 static int wq_noop_append(struct xe_exec_queue *q) 1070 { 1071 struct xe_guc *guc = exec_queue_to_guc(q); 1072 struct xe_device *xe = guc_to_xe(guc); 1073 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1074 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 1075 1076 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 1077 return -ENODEV; 1078 1079 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 1080 1081 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 1082 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 1083 FIELD_PREP(WQ_LEN_MASK, len_dw)); 1084 q->guc->wqi_tail = 0; 1085 1086 return 0; 1087 } 1088 1089 static void wq_item_append(struct xe_exec_queue *q) 1090 { 1091 struct xe_guc *guc = exec_queue_to_guc(q); 1092 struct xe_device *xe = guc_to_xe(guc); 1093 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 1094 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 1095 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 1096 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 1097 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 1098 int i = 0, j; 1099 1100 if (wqi_size > wq_space_until_wrap(q)) { 1101 if (wq_noop_append(q)) 1102 return; 1103 } 1104 if (wq_wait_for_space(q, wqi_size)) 1105 return; 1106 1107 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 1108 FIELD_PREP(WQ_LEN_MASK, len_dw); 1109 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 1110 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 1111 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 1112 wqi[i++] = 0; 1113 for (j = 1; j < q->width; ++j) { 1114 struct xe_lrc *lrc = q->lrc[j]; 1115 1116 wqi[i++] = lrc->ring.tail / sizeof(u64); 1117 } 1118 1119 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 1120 1121 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 1122 wq[q->guc->wqi_tail / sizeof(u32)])); 1123 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 1124 q->guc->wqi_tail += wqi_size; 1125 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 1126 1127 xe_device_wmb(xe); 1128 1129 map = xe_lrc_parallel_map(q->lrc[0]); 1130 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 1131 } 1132 1133 #define RESUME_PENDING ~0x0ull 1134 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 1135 { 1136 struct xe_guc *guc = exec_queue_to_guc(q); 1137 struct xe_lrc *lrc = q->lrc[0]; 1138 u32 action[3]; 1139 u32 g2h_len = 0; 1140 u32 num_g2h = 0; 1141 int len = 0; 1142 bool extra_submit = false; 1143 1144 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1145 1146 if (!job->restore_replay || job->last_replay) { 1147 if (xe_exec_queue_is_parallel(q)) 1148 wq_item_append(q); 1149 else 1150 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1151 job->last_replay = false; 1152 } 1153 1154 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1155 return; 1156 1157 /* 1158 * All queues in a multi-queue group will use the primary queue 1159 * of the group to interface with GuC. If primay is suspended, 1160 * just return. Jobs will get scheduled once primary is resumed. 1161 */ 1162 q = xe_exec_queue_multi_queue_primary(q); 1163 if (exec_queue_suspended(q)) 1164 return; 1165 1166 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1167 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1168 action[len++] = q->guc->id; 1169 action[len++] = GUC_CONTEXT_ENABLE; 1170 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1171 num_g2h = 1; 1172 if (xe_exec_queue_is_parallel(q)) 1173 extra_submit = true; 1174 1175 q->guc->resume_time = RESUME_PENDING; 1176 set_exec_queue_pending_enable(q); 1177 set_exec_queue_enabled(q); 1178 trace_xe_exec_queue_scheduling_enable(q); 1179 } else { 1180 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1181 action[len++] = q->guc->id; 1182 trace_xe_exec_queue_submit(q); 1183 } 1184 1185 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1186 1187 if (extra_submit) { 1188 len = 0; 1189 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1190 action[len++] = q->guc->id; 1191 trace_xe_exec_queue_submit(q); 1192 1193 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1194 } 1195 } 1196 1197 static struct dma_fence * 1198 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1199 { 1200 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1201 struct xe_exec_queue *q = job->q; 1202 struct xe_guc *guc = exec_queue_to_guc(q); 1203 bool killed_or_banned_or_wedged = 1204 exec_queue_killed_or_banned_or_wedged(q); 1205 1206 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1207 exec_queue_banned(q) || exec_queue_suspended(q)); 1208 1209 trace_xe_sched_job_run(job); 1210 1211 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1212 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1213 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1214 1215 if (exec_queue_killed_or_banned_or_wedged(primary)) 1216 goto run_job_out; 1217 1218 if (!exec_queue_registered(primary)) 1219 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1220 } 1221 1222 if (!exec_queue_registered(q)) 1223 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1224 if (!job->restore_replay) 1225 q->ring_ops->emit_job(job); 1226 submit_exec_queue(q, job); 1227 job->restore_replay = false; 1228 } 1229 1230 run_job_out: 1231 1232 return job->fence; 1233 } 1234 1235 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1236 { 1237 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1238 1239 trace_xe_sched_job_free(job); 1240 xe_sched_job_put(job); 1241 } 1242 1243 int xe_guc_read_stopped(struct xe_guc *guc) 1244 { 1245 return atomic_read(&guc->submission_state.stopped); 1246 } 1247 1248 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1249 struct xe_exec_queue *q, 1250 u32 runnable_state); 1251 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1252 1253 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1254 u32 action[] = { \ 1255 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1256 q->guc->id, \ 1257 GUC_CONTEXT_##enable_disable, \ 1258 } 1259 1260 static void disable_scheduling_deregister(struct xe_guc *guc, 1261 struct xe_exec_queue *q) 1262 { 1263 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1264 int ret; 1265 1266 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1267 set_min_preemption_timeout(guc, q); 1268 1269 smp_rmb(); 1270 ret = wait_event_timeout(guc->ct.wq, 1271 (!exec_queue_pending_enable(q) && 1272 !exec_queue_pending_disable(q)) || 1273 xe_guc_read_stopped(guc) || 1274 vf_recovery(guc), 1275 HZ * 5); 1276 if (!ret && !vf_recovery(guc)) { 1277 struct xe_gpu_scheduler *sched = &q->guc->sched; 1278 1279 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1280 xe_sched_submission_start(sched); 1281 xe_gt_reset_async(q->gt); 1282 xe_sched_tdr_queue_imm(sched); 1283 return; 1284 } 1285 1286 clear_exec_queue_enabled(q); 1287 set_exec_queue_pending_disable(q); 1288 set_exec_queue_destroyed(q); 1289 trace_xe_exec_queue_scheduling_disable(q); 1290 1291 /* 1292 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1293 * handler and we are not allowed to reserved G2H space in handlers. 1294 */ 1295 if (xe_exec_queue_is_multi_queue_secondary(q)) 1296 handle_multi_queue_secondary_sched_done(guc, q, 0); 1297 else 1298 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1299 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1300 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1301 } 1302 1303 /** 1304 * xe_guc_submit_wedge() - Wedge GuC submission 1305 * @guc: the GuC object 1306 * 1307 * Save exec queue's registered with GuC state by taking a ref to each queue. 1308 * Register a DRMM handler to drop refs upon driver unload. 1309 */ 1310 void xe_guc_submit_wedge(struct xe_guc *guc) 1311 { 1312 struct xe_device *xe = guc_to_xe(guc); 1313 struct xe_exec_queue *q; 1314 unsigned long index; 1315 1316 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1317 1318 /* 1319 * If device is being wedged even before submission_state is 1320 * initialized, there's nothing to do here. 1321 */ 1322 if (!guc->submission_state.initialized) 1323 return; 1324 1325 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { 1326 mutex_lock(&guc->submission_state.lock); 1327 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1328 if (xe_exec_queue_get_unless_zero(q)) 1329 set_exec_queue_wedged(q); 1330 mutex_unlock(&guc->submission_state.lock); 1331 } else { 1332 /* Forcefully kill any remaining exec queues, signal fences */ 1333 guc_submit_reset_prepare(guc); 1334 xe_guc_submit_stop(guc); 1335 xe_guc_softreset(guc); 1336 xe_uc_fw_sanitize(&guc->fw); 1337 xe_guc_submit_pause_abort(guc); 1338 } 1339 } 1340 1341 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1342 { 1343 struct xe_device *xe = guc_to_xe(guc); 1344 1345 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 1346 return false; 1347 1348 if (xe_device_wedged(xe)) 1349 return true; 1350 1351 xe_device_declare_wedged(xe); 1352 1353 return true; 1354 } 1355 1356 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1357 1358 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1359 { 1360 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1361 u32 ctx_timestamp, ctx_job_timestamp; 1362 u32 timeout_ms = q->sched_props.job_timeout_ms; 1363 u32 diff; 1364 u64 running_time_ms; 1365 1366 if (!xe_sched_job_started(job)) { 1367 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1368 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1369 q->guc->id); 1370 1371 /* GuC never scheduled this job - let the caller trigger a GT reset. */ 1372 return true; 1373 } 1374 1375 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); 1376 if (ctx_timestamp == job->sample_timestamp) { 1377 if (IS_SRIOV_VF(gt_to_xe(gt))) 1378 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1379 xe_sched_job_seqno(job), 1380 xe_sched_job_lrc_seqno(job), q->guc->id); 1381 else 1382 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", 1383 xe_sched_job_seqno(job), 1384 xe_sched_job_lrc_seqno(job), q->guc->id); 1385 1386 return xe_sched_invalidate_job(job, 0); 1387 } 1388 1389 job->sample_timestamp = ctx_timestamp; 1390 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1391 1392 /* 1393 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1394 * possible overflows with a high timeout. 1395 */ 1396 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1397 1398 diff = ctx_timestamp - ctx_job_timestamp; 1399 1400 /* 1401 * Ensure timeout is within 5% to account for an GuC scheduling latency 1402 */ 1403 running_time_ms = 1404 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1405 1406 xe_gt_dbg(gt, 1407 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1408 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1409 q->guc->id, running_time_ms, timeout_ms, diff); 1410 1411 return running_time_ms >= timeout_ms; 1412 } 1413 1414 static void enable_scheduling(struct xe_exec_queue *q) 1415 { 1416 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1417 struct xe_guc *guc = exec_queue_to_guc(q); 1418 int ret; 1419 1420 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1421 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1422 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1423 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1424 1425 set_exec_queue_pending_enable(q); 1426 set_exec_queue_enabled(q); 1427 trace_xe_exec_queue_scheduling_enable(q); 1428 1429 if (xe_exec_queue_is_multi_queue_secondary(q)) 1430 handle_multi_queue_secondary_sched_done(guc, q, 1); 1431 else 1432 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1433 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1434 1435 ret = wait_event_timeout(guc->ct.wq, 1436 !exec_queue_pending_enable(q) || 1437 xe_guc_read_stopped(guc) || 1438 vf_recovery(guc), HZ * 5); 1439 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1440 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1441 set_exec_queue_banned(q); 1442 xe_gt_reset_async(q->gt); 1443 xe_sched_tdr_queue_imm(&q->guc->sched); 1444 } 1445 } 1446 1447 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1448 { 1449 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1450 struct xe_guc *guc = exec_queue_to_guc(q); 1451 1452 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1453 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1454 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1455 1456 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1457 set_min_preemption_timeout(guc, q); 1458 clear_exec_queue_enabled(q); 1459 set_exec_queue_pending_disable(q); 1460 trace_xe_exec_queue_scheduling_disable(q); 1461 1462 if (xe_exec_queue_is_multi_queue_secondary(q)) 1463 handle_multi_queue_secondary_sched_done(guc, q, 0); 1464 else 1465 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1466 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1467 } 1468 1469 /* 1470 * Recover via GT reset for a kernel queue, or for a GuC scheduling failure (job 1471 * never started) on a queue that was not already killed or banned. An already 1472 * banned queue must stay banned, so its unstarted jobs do not clear the ban or 1473 * trigger a reset. 1474 */ 1475 static bool timeout_needs_gt_reset(struct xe_exec_queue *q, struct xe_sched_job *job, 1476 bool skip_timeout_check) 1477 { 1478 if (q->flags & EXEC_QUEUE_FLAG_KERNEL) 1479 return true; 1480 1481 return !skip_timeout_check && !xe_sched_job_started(job); 1482 } 1483 1484 static enum drm_gpu_sched_stat 1485 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1486 { 1487 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1488 struct drm_sched_job *tmp_job; 1489 struct xe_exec_queue *q = job->q, *primary; 1490 struct xe_gpu_scheduler *sched = &q->guc->sched; 1491 struct xe_guc *guc = exec_queue_to_guc(q); 1492 const char *process_name = "no process"; 1493 struct xe_device *xe = guc_to_xe(guc); 1494 int err = -ETIME; 1495 pid_t pid = -1; 1496 bool wedged = false, skip_timeout_check; 1497 1498 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1499 1500 primary = xe_exec_queue_multi_queue_primary(q); 1501 1502 /* 1503 * TDR has fired before free job worker. Common if exec queue 1504 * immediately closed after last fence signaled. Add back to pending 1505 * list so job can be freed and kick scheduler ensuring free job is not 1506 * lost. 1507 */ 1508 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1509 vf_recovery(guc)) 1510 return DRM_GPU_SCHED_STAT_NO_HANG; 1511 1512 /* Kill the run_job entry point */ 1513 if (xe_exec_queue_is_multi_queue(q)) 1514 xe_guc_exec_queue_group_stop(q); 1515 else 1516 xe_sched_submission_stop(sched); 1517 1518 /* Must check all state after stopping scheduler */ 1519 skip_timeout_check = exec_queue_reset(q) || 1520 exec_queue_killed_or_banned_or_wedged(q); 1521 1522 /* Skip timeout check if multi-queue group is banned */ 1523 if (xe_exec_queue_is_multi_queue(q) && 1524 READ_ONCE(q->multi_queue.group->banned)) 1525 skip_timeout_check = true; 1526 1527 /* LR jobs can only get here if queue has been killed or hit an error */ 1528 if (xe_exec_queue_is_lr(q)) 1529 xe_gt_assert(guc_to_gt(guc), skip_timeout_check); 1530 1531 /* 1532 * If devcoredump not captured and GuC capture for the job is not ready 1533 * do manual capture first and decide later if we need to use it 1534 */ 1535 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1536 !xe_guc_capture_get_matching_and_lock(q)) { 1537 /* take force wake before engine register manual capture */ 1538 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1539 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1540 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1541 1542 xe_engine_snapshot_capture_for_queue(q); 1543 } 1544 1545 /* 1546 * Check if job is actually timed out, if so restart job execution and TDR 1547 */ 1548 if (!skip_timeout_check && !check_timeout(q, job)) 1549 goto rearm; 1550 1551 if (!exec_queue_killed(q)) 1552 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1553 1554 set_exec_queue_banned(q); 1555 1556 /* Kick job / queue off hardware */ 1557 if (!wedged && (exec_queue_enabled(primary) || 1558 exec_queue_pending_disable(primary))) { 1559 int ret; 1560 1561 if (exec_queue_reset(primary)) 1562 err = -EIO; 1563 1564 if (xe_uc_fw_is_running(&guc->fw)) { 1565 /* 1566 * Wait for any pending G2H to flush out before 1567 * modifying state 1568 */ 1569 ret = wait_event_timeout(guc->ct.wq, 1570 (!exec_queue_pending_enable(primary) && 1571 !exec_queue_pending_disable(primary)) || 1572 xe_guc_read_stopped(guc) || 1573 vf_recovery(guc), HZ * 5); 1574 if (vf_recovery(guc)) 1575 goto handle_vf_resume; 1576 if (!ret || xe_guc_read_stopped(guc)) 1577 goto trigger_reset; 1578 1579 disable_scheduling(primary, skip_timeout_check); 1580 } 1581 1582 /* 1583 * Must wait for scheduling to be disabled before signalling 1584 * any fences, if GT broken the GT reset code should signal us. 1585 * 1586 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1587 * error) messages which can cause the schedule disable to get 1588 * lost. If this occurs, trigger a GT reset to recover. 1589 */ 1590 smp_rmb(); 1591 ret = wait_event_timeout(guc->ct.wq, 1592 !xe_uc_fw_is_running(&guc->fw) || 1593 !exec_queue_pending_disable(primary) || 1594 xe_guc_read_stopped(guc) || 1595 vf_recovery(guc), HZ * 5); 1596 if (vf_recovery(guc)) 1597 goto handle_vf_resume; 1598 if (!ret || xe_guc_read_stopped(guc)) { 1599 trigger_reset: 1600 if (!ret) 1601 xe_gt_warn(guc_to_gt(guc), 1602 "Schedule disable failed to respond, guc_id=%d", 1603 primary->guc->id); 1604 xe_devcoredump(primary, job, 1605 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1606 primary->guc->id, ret, xe_guc_read_stopped(guc)); 1607 xe_gt_reset_async(primary->gt); 1608 xe_sched_tdr_queue_imm(sched); 1609 goto rearm; 1610 } 1611 } 1612 1613 if (q->vm && q->vm->xef) { 1614 process_name = q->vm->xef->process_name; 1615 pid = q->vm->xef->pid; 1616 } 1617 1618 if (!exec_queue_killed(q)) 1619 xe_gt_notice(guc_to_gt(guc), 1620 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1621 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1622 q->guc->id, q->flags, process_name, pid); 1623 1624 trace_xe_sched_job_timedout(job); 1625 1626 if (!exec_queue_killed(q)) 1627 xe_devcoredump(q, job, 1628 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1629 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1630 q->guc->id, q->flags); 1631 1632 if (!wedged) { 1633 if (timeout_needs_gt_reset(q, job, skip_timeout_check)) { 1634 if (!xe_sched_invalidate_job(job, 2)) { 1635 clear_exec_queue_banned(q); 1636 xe_gt_reset_async(q->gt); 1637 goto rearm; 1638 } 1639 if (q->flags & EXEC_QUEUE_FLAG_KERNEL) { 1640 xe_gt_WARN(q->gt, true, "Kernel-submitted job timed out\n"); 1641 xe_device_declare_wedged(gt_to_xe(q->gt)); 1642 } 1643 } else if (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)) { 1644 xe_gt_WARN(q->gt, true, "VM job timed out on non-killed execqueue\n"); 1645 } 1646 } 1647 1648 /* Mark all outstanding jobs as bad, thus completing them */ 1649 xe_sched_job_set_error(job, err); 1650 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) 1651 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); 1652 1653 if (xe_exec_queue_is_multi_queue(q)) { 1654 xe_guc_exec_queue_group_start(q); 1655 xe_guc_exec_queue_group_trigger_cleanup(q); 1656 } else { 1657 xe_sched_submission_start(sched); 1658 xe_guc_exec_queue_trigger_cleanup(q); 1659 } 1660 1661 /* 1662 * We want the job added back to the pending list so it gets freed; this 1663 * is what DRM_GPU_SCHED_STAT_NO_HANG does. 1664 */ 1665 return DRM_GPU_SCHED_STAT_NO_HANG; 1666 1667 rearm: 1668 /* 1669 * XXX: Ideally want to adjust timeout based on current execution time 1670 * but there is not currently an easy way to do in DRM scheduler. With 1671 * some thought, do this in a follow up. 1672 */ 1673 if (xe_exec_queue_is_multi_queue(q)) 1674 xe_guc_exec_queue_group_start(q); 1675 else 1676 xe_sched_submission_start(sched); 1677 handle_vf_resume: 1678 return DRM_GPU_SCHED_STAT_NO_HANG; 1679 } 1680 1681 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1682 { 1683 struct xe_guc_exec_queue *ge = q->guc; 1684 struct xe_guc *guc = exec_queue_to_guc(q); 1685 1686 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1687 struct xe_exec_queue_group *group = q->multi_queue.group; 1688 1689 mutex_lock(&group->list_lock); 1690 list_del(&q->multi_queue.link); 1691 mutex_unlock(&group->list_lock); 1692 } 1693 1694 release_guc_id(guc, q); 1695 xe_sched_entity_fini(&ge->entity); 1696 xe_sched_fini(&ge->sched); 1697 1698 /* 1699 * RCU free due sched being exported via DRM scheduler fences 1700 * (timeline name). 1701 */ 1702 kfree_rcu(ge, rcu); 1703 } 1704 1705 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1706 { 1707 struct xe_guc_exec_queue *ge = 1708 container_of(w, struct xe_guc_exec_queue, destroy_async); 1709 struct xe_exec_queue *q = ge->q; 1710 struct xe_guc *guc = exec_queue_to_guc(q); 1711 1712 guard(xe_pm_runtime)(guc_to_xe(guc)); 1713 trace_xe_exec_queue_destroy(q); 1714 1715 /* Confirm no work left behind accessing device structures */ 1716 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1717 1718 xe_exec_queue_fini(q); 1719 } 1720 1721 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1722 { 1723 struct xe_guc *guc = exec_queue_to_guc(q); 1724 struct xe_device *xe = guc_to_xe(guc); 1725 1726 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1727 1728 /* We must block on kernel engines so slabs are empty on driver unload */ 1729 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1730 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1731 else 1732 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1733 } 1734 1735 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1736 { 1737 /* 1738 * Might be done from within the GPU scheduler, need to do async as we 1739 * fini the scheduler when the engine is fini'd, the scheduler can't 1740 * complete fini within itself (circular dependency). Async resolves 1741 * this we and don't really care when everything is fini'd, just that it 1742 * is. 1743 */ 1744 guc_exec_queue_destroy_async(q); 1745 } 1746 1747 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1748 { 1749 struct xe_exec_queue *q = msg->private_data; 1750 struct xe_guc *guc = exec_queue_to_guc(q); 1751 1752 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1753 trace_xe_exec_queue_cleanup_entity(q); 1754 1755 /* 1756 * Expected state transitions for cleanup: 1757 * - If the exec queue is registered and GuC firmware is running, we must first 1758 * disable scheduling and deregister the queue to ensure proper teardown and 1759 * resource release in the GuC, then destroy the exec queue on driver side. 1760 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1761 * we cannot expect a response for the deregister request. In this case, 1762 * it is safe to directly destroy the exec queue on driver side, as the GuC 1763 * will not process further requests and all resources must be cleaned up locally. 1764 */ 1765 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1766 disable_scheduling_deregister(guc, q); 1767 else 1768 __guc_exec_queue_destroy(guc, q); 1769 } 1770 1771 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1772 { 1773 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1774 } 1775 1776 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1777 { 1778 struct xe_exec_queue *q = msg->private_data; 1779 struct xe_guc *guc = exec_queue_to_guc(q); 1780 1781 if (guc_exec_queue_allowed_to_change_state(q)) 1782 init_policies(guc, q); 1783 kfree(msg); 1784 } 1785 1786 static void __suspend_fence_signal(struct xe_exec_queue *q) 1787 { 1788 struct xe_guc *guc = exec_queue_to_guc(q); 1789 struct xe_device *xe = guc_to_xe(guc); 1790 1791 if (!q->guc->suspend_pending) 1792 return; 1793 1794 WRITE_ONCE(q->guc->suspend_pending, false); 1795 1796 /* 1797 * We use a GuC shared wait queue for VFs because the VF resfix start 1798 * interrupt must be able to wake all instances of suspend_wait. This 1799 * prevents the VF migration worker from being starved during 1800 * scheduling. 1801 */ 1802 if (IS_SRIOV_VF(xe)) 1803 wake_up_all(&guc->ct.wq); 1804 else 1805 wake_up(&q->guc->suspend_wait); 1806 } 1807 1808 static void suspend_fence_signal(struct xe_exec_queue *q) 1809 { 1810 struct xe_guc *guc = exec_queue_to_guc(q); 1811 1812 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1813 xe_guc_read_stopped(guc)); 1814 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1815 1816 __suspend_fence_signal(q); 1817 } 1818 1819 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1820 { 1821 struct xe_exec_queue *q = msg->private_data; 1822 struct xe_guc *guc = exec_queue_to_guc(q); 1823 1824 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1825 exec_queue_enabled(q)) { 1826 wait_event(guc->ct.wq, vf_recovery(guc) || 1827 ((q->guc->resume_time != RESUME_PENDING || 1828 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1829 1830 if (!xe_guc_read_stopped(guc)) { 1831 s64 since_resume_ms = 1832 ktime_ms_delta(ktime_get(), 1833 q->guc->resume_time); 1834 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1835 since_resume_ms; 1836 1837 if (wait_ms > 0 && q->guc->resume_time) 1838 xe_sleep_relaxed_ms(wait_ms); 1839 1840 set_exec_queue_suspended(q); 1841 disable_scheduling(q, false); 1842 } 1843 } else if (q->guc->suspend_pending) { 1844 set_exec_queue_suspended(q); 1845 suspend_fence_signal(q); 1846 } 1847 } 1848 1849 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1850 { 1851 struct xe_exec_queue *q = msg->private_data; 1852 1853 if (guc_exec_queue_allowed_to_change_state(q)) { 1854 clear_exec_queue_suspended(q); 1855 if (!exec_queue_enabled(q)) { 1856 q->guc->resume_time = RESUME_PENDING; 1857 set_exec_queue_pending_resume(q); 1858 enable_scheduling(q); 1859 } 1860 } else { 1861 clear_exec_queue_suspended(q); 1862 } 1863 } 1864 1865 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1866 { 1867 struct xe_exec_queue *q = msg->private_data; 1868 1869 if (guc_exec_queue_allowed_to_change_state(q)) 1870 guc_exec_queue_send_cgp_sync(q); 1871 1872 kfree(msg); 1873 } 1874 1875 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1876 #define SET_SCHED_PROPS 2 1877 #define SUSPEND 3 1878 #define RESUME 4 1879 #define SET_MULTI_QUEUE_PRIORITY 5 1880 #define OPCODE_MASK 0xf 1881 #define MSG_LOCKED BIT(8) 1882 #define MSG_HEAD BIT(9) 1883 1884 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1885 { 1886 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1887 1888 trace_xe_sched_msg_recv(msg); 1889 1890 switch (msg->opcode) { 1891 case CLEANUP: 1892 __guc_exec_queue_process_msg_cleanup(msg); 1893 break; 1894 case SET_SCHED_PROPS: 1895 __guc_exec_queue_process_msg_set_sched_props(msg); 1896 break; 1897 case SUSPEND: 1898 __guc_exec_queue_process_msg_suspend(msg); 1899 break; 1900 case RESUME: 1901 __guc_exec_queue_process_msg_resume(msg); 1902 break; 1903 case SET_MULTI_QUEUE_PRIORITY: 1904 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1905 break; 1906 default: 1907 XE_WARN_ON("Unknown message type"); 1908 } 1909 1910 xe_pm_runtime_put(xe); 1911 } 1912 1913 static const struct drm_sched_backend_ops drm_sched_ops = { 1914 .run_job = guc_exec_queue_run_job, 1915 .free_job = guc_exec_queue_free_job, 1916 .timedout_job = guc_exec_queue_timedout_job, 1917 }; 1918 1919 static const struct xe_sched_backend_ops xe_sched_ops = { 1920 .process_msg = guc_exec_queue_process_msg, 1921 }; 1922 1923 static int guc_exec_queue_init(struct xe_exec_queue *q) 1924 { 1925 struct xe_gpu_scheduler *sched; 1926 struct xe_guc *guc = exec_queue_to_guc(q); 1927 struct workqueue_struct *submit_wq = NULL; 1928 struct xe_guc_exec_queue *ge; 1929 long timeout; 1930 int err, i; 1931 1932 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1933 1934 ge = kzalloc_obj(*ge); 1935 if (!ge) 1936 return -ENOMEM; 1937 1938 q->guc = ge; 1939 ge->q = q; 1940 init_rcu_head(&ge->rcu); 1941 init_waitqueue_head(&ge->suspend_wait); 1942 1943 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1944 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1945 1946 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1947 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1948 1949 err = alloc_guc_id(guc, q); 1950 if (err) 1951 goto err_free; 1952 1953 xe_exec_queue_assign_name(q, q->guc->id); 1954 1955 /* 1956 * Use primary queue's submit_wq for all secondary queues of a 1957 * multi queue group. This serialization avoids any locking around 1958 * CGP synchronization with GuC. 1959 */ 1960 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1961 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1962 1963 submit_wq = primary->guc->sched.base.submit_wq; 1964 } 1965 1966 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1967 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1968 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1969 q->name, gt_to_xe(q->gt)->drm.dev); 1970 if (err) 1971 goto err_release_id; 1972 1973 sched = &ge->sched; 1974 err = xe_sched_entity_init(&ge->entity, sched); 1975 if (err) 1976 goto err_sched; 1977 1978 q->entity = &ge->entity; 1979 1980 mutex_lock(&guc->submission_state.lock); 1981 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 1982 xe_sched_stop(sched); 1983 publish_guc_id(guc, q); 1984 mutex_unlock(&guc->submission_state.lock); 1985 1986 /* 1987 * Maintain secondary queues of the multi queue group in a list 1988 * for handling dependencies across the queues in the group. 1989 */ 1990 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1991 struct xe_exec_queue_group *group = q->multi_queue.group; 1992 1993 INIT_LIST_HEAD(&q->multi_queue.link); 1994 mutex_lock(&group->list_lock); 1995 if (group->stopped) 1996 WRITE_ONCE(q->guc->sched.base.pause_submit, true); 1997 list_add_tail(&q->multi_queue.link, &group->list); 1998 mutex_unlock(&group->list_lock); 1999 } 2000 2001 if (xe_exec_queue_is_multi_queue(q)) 2002 trace_xe_exec_queue_create_multi_queue(q); 2003 else 2004 trace_xe_exec_queue_create(q); 2005 2006 return 0; 2007 2008 err_sched: 2009 xe_sched_fini(&ge->sched); 2010 err_release_id: 2011 release_guc_id(guc, q); 2012 err_free: 2013 kfree(ge); 2014 2015 return err; 2016 } 2017 2018 static void guc_exec_queue_kill(struct xe_exec_queue *q) 2019 { 2020 trace_xe_exec_queue_kill(q); 2021 set_exec_queue_killed(q); 2022 __suspend_fence_signal(q); 2023 xe_guc_exec_queue_trigger_cleanup(q); 2024 } 2025 2026 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 2027 u32 opcode) 2028 { 2029 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 2030 2031 INIT_LIST_HEAD(&msg->link); 2032 msg->opcode = opcode & OPCODE_MASK; 2033 msg->private_data = q; 2034 2035 trace_xe_sched_msg_add(msg); 2036 if (opcode & MSG_HEAD) 2037 xe_sched_add_msg_head(&q->guc->sched, msg); 2038 else if (opcode & MSG_LOCKED) 2039 xe_sched_add_msg_locked(&q->guc->sched, msg); 2040 else 2041 xe_sched_add_msg(&q->guc->sched, msg); 2042 } 2043 2044 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2045 struct xe_sched_msg *msg, 2046 u32 opcode) 2047 { 2048 if (!list_empty(&msg->link)) 2049 return; 2050 2051 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2052 } 2053 2054 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2055 struct xe_sched_msg *msg, 2056 u32 opcode) 2057 { 2058 if (!list_empty(&msg->link)) 2059 return false; 2060 2061 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2062 2063 return true; 2064 } 2065 2066 #define STATIC_MSG_CLEANUP 0 2067 #define STATIC_MSG_SUSPEND 1 2068 #define STATIC_MSG_RESUME 2 2069 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2070 { 2071 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2072 2073 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2074 guc_exec_queue_add_msg(q, msg, CLEANUP); 2075 else 2076 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2077 } 2078 2079 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2080 enum xe_exec_queue_priority priority) 2081 { 2082 struct xe_sched_msg *msg; 2083 2084 if (q->sched_props.priority == priority || 2085 exec_queue_killed_or_banned_or_wedged(q)) 2086 return 0; 2087 2088 msg = kmalloc_obj(*msg); 2089 if (!msg) 2090 return -ENOMEM; 2091 2092 q->sched_props.priority = priority; 2093 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2094 2095 return 0; 2096 } 2097 2098 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2099 { 2100 struct xe_sched_msg *msg; 2101 2102 if (q->sched_props.timeslice_us == timeslice_us || 2103 exec_queue_killed_or_banned_or_wedged(q)) 2104 return 0; 2105 2106 msg = kmalloc_obj(*msg); 2107 if (!msg) 2108 return -ENOMEM; 2109 2110 q->sched_props.timeslice_us = timeslice_us; 2111 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2112 2113 return 0; 2114 } 2115 2116 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2117 u32 preempt_timeout_us) 2118 { 2119 struct xe_sched_msg *msg; 2120 2121 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2122 exec_queue_killed_or_banned_or_wedged(q)) 2123 return 0; 2124 2125 msg = kmalloc_obj(*msg); 2126 if (!msg) 2127 return -ENOMEM; 2128 2129 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2130 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2131 2132 return 0; 2133 } 2134 2135 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2136 enum xe_multi_queue_priority priority) 2137 { 2138 struct xe_sched_msg *msg; 2139 2140 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2141 2142 if (exec_queue_killed_or_banned_or_wedged(q)) 2143 return 0; 2144 2145 msg = kmalloc_obj(*msg); 2146 if (!msg) 2147 return -ENOMEM; 2148 2149 scoped_guard(spinlock, &q->multi_queue.lock) { 2150 if (q->multi_queue.priority == priority) { 2151 kfree(msg); 2152 return 0; 2153 } 2154 2155 q->multi_queue.priority = priority; 2156 } 2157 2158 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2159 2160 return 0; 2161 } 2162 2163 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2164 { 2165 struct xe_gpu_scheduler *sched = &q->guc->sched; 2166 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2167 2168 if (exec_queue_killed_or_banned_or_wedged(q)) 2169 return -EINVAL; 2170 2171 xe_sched_msg_lock(sched); 2172 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2173 q->guc->suspend_pending = true; 2174 xe_sched_msg_unlock(sched); 2175 2176 return 0; 2177 } 2178 2179 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2180 { 2181 struct xe_guc *guc = exec_queue_to_guc(q); 2182 struct xe_device *xe = guc_to_xe(guc); 2183 int ret; 2184 2185 /* 2186 * Likely don't need to check exec_queue_killed() as we clear 2187 * suspend_pending upon kill but to be paranoid but races in which 2188 * suspend_pending is set after kill also check kill here. 2189 */ 2190 #define WAIT_COND \ 2191 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2192 xe_guc_read_stopped(guc)) 2193 2194 retry: 2195 if (IS_SRIOV_VF(xe)) 2196 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2197 vf_recovery(guc), 2198 HZ * 5); 2199 else 2200 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2201 WAIT_COND, HZ * 5); 2202 2203 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2204 return -EAGAIN; 2205 2206 if (!ret) { 2207 xe_gt_warn(guc_to_gt(guc), 2208 "Suspend fence, guc_id=%d, failed to respond", 2209 q->guc->id); 2210 /* XXX: Trigger GT reset? */ 2211 return -ETIME; 2212 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2213 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2214 goto retry; 2215 } 2216 2217 #undef WAIT_COND 2218 2219 return ret < 0 ? ret : 0; 2220 } 2221 2222 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2223 { 2224 struct xe_gpu_scheduler *sched = &q->guc->sched; 2225 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2226 struct xe_guc *guc = exec_queue_to_guc(q); 2227 2228 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2229 2230 xe_sched_msg_lock(sched); 2231 guc_exec_queue_try_add_msg(q, msg, RESUME); 2232 xe_sched_msg_unlock(sched); 2233 } 2234 2235 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2236 { 2237 if (xe_exec_queue_is_multi_queue_secondary(q) && 2238 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2239 return true; 2240 2241 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2242 } 2243 2244 static bool guc_exec_queue_active(struct xe_exec_queue *q) 2245 { 2246 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 2247 2248 return exec_queue_enabled(primary) && 2249 !exec_queue_pending_disable(primary); 2250 } 2251 2252 /* 2253 * All of these functions are an abstraction layer which other parts of Xe can 2254 * use to trap into the GuC backend. All of these functions, aside from init, 2255 * really shouldn't do much other than trap into the DRM scheduler which 2256 * synchronizes these operations. 2257 */ 2258 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2259 .init = guc_exec_queue_init, 2260 .kill = guc_exec_queue_kill, 2261 .fini = guc_exec_queue_fini, 2262 .destroy = guc_exec_queue_destroy, 2263 .set_priority = guc_exec_queue_set_priority, 2264 .set_timeslice = guc_exec_queue_set_timeslice, 2265 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2266 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2267 .suspend = guc_exec_queue_suspend, 2268 .suspend_wait = guc_exec_queue_suspend_wait, 2269 .resume = guc_exec_queue_resume, 2270 .reset_status = guc_exec_queue_reset_status, 2271 .active = guc_exec_queue_active, 2272 }; 2273 2274 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2275 { 2276 struct xe_gpu_scheduler *sched = &q->guc->sched; 2277 bool do_destroy = false; 2278 2279 /* Stop scheduling + flush any DRM scheduler operations */ 2280 xe_sched_submission_stop(sched); 2281 2282 /* Clean up lost G2H + reset engine state */ 2283 if (exec_queue_registered(q)) { 2284 if (exec_queue_destroyed(q)) 2285 do_destroy = true; 2286 } 2287 if (q->guc->suspend_pending) { 2288 set_exec_queue_suspended(q); 2289 suspend_fence_signal(q); 2290 } 2291 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2292 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2293 EXEC_QUEUE_STATE_SUSPENDED, 2294 &q->guc->state); 2295 q->guc->resume_time = 0; 2296 trace_xe_exec_queue_stop(q); 2297 2298 /* 2299 * Ban any engine (aside from kernel and engines used for VM ops) with a 2300 * started but not complete job or if a job has gone through a GT reset 2301 * more than twice. 2302 */ 2303 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2304 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2305 bool ban = false; 2306 2307 if (job) { 2308 if ((xe_sched_job_started(job) && 2309 !xe_sched_job_completed(job)) || 2310 xe_sched_invalidate_job(job, 2)) { 2311 trace_xe_sched_job_ban(job); 2312 ban = true; 2313 } 2314 } 2315 2316 if (ban) { 2317 set_exec_queue_banned(q); 2318 xe_guc_exec_queue_trigger_cleanup(q); 2319 } 2320 } 2321 2322 if (do_destroy) 2323 __guc_exec_queue_destroy(guc, q); 2324 } 2325 2326 static int guc_submit_reset_prepare(struct xe_guc *guc) 2327 { 2328 int ret; 2329 2330 /* 2331 * Using an atomic here rather than submission_state.lock as this 2332 * function can be called while holding the CT lock (engine reset 2333 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2334 * Atomic is not ideal, but it works to prevent against concurrent reset 2335 * and releasing any TDRs waiting on guc->submission_state.stopped. 2336 */ 2337 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2338 smp_wmb(); 2339 wake_up_all(&guc->ct.wq); 2340 2341 return ret; 2342 } 2343 2344 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2345 { 2346 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2347 return 0; 2348 2349 if (!guc->submission_state.initialized) 2350 return 0; 2351 2352 return guc_submit_reset_prepare(guc); 2353 } 2354 2355 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2356 { 2357 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2358 !xe_guc_read_stopped(guc)); 2359 } 2360 2361 void xe_guc_submit_stop(struct xe_guc *guc) 2362 { 2363 struct xe_exec_queue *q; 2364 unsigned long index; 2365 2366 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2367 2368 mutex_lock(&guc->submission_state.lock); 2369 2370 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2371 /* Prevent redundant attempts to stop parallel queues */ 2372 if (q->guc->id != index) 2373 continue; 2374 2375 guc_exec_queue_stop(guc, q); 2376 } 2377 2378 mutex_unlock(&guc->submission_state.lock); 2379 2380 /* 2381 * No one can enter the backend at this point, aside from new engine 2382 * creation which is protected by guc->submission_state.lock. 2383 */ 2384 2385 } 2386 2387 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2388 struct xe_exec_queue *q) 2389 { 2390 bool pending_enable, pending_disable, pending_resume; 2391 2392 pending_enable = exec_queue_pending_enable(q); 2393 pending_resume = exec_queue_pending_resume(q); 2394 2395 if (pending_enable && pending_resume) { 2396 q->guc->needs_resume = true; 2397 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2398 q->guc->id); 2399 } 2400 2401 if (pending_enable && !pending_resume) { 2402 clear_exec_queue_registered(q); 2403 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2404 q->guc->id); 2405 } 2406 2407 if (pending_enable) { 2408 clear_exec_queue_enabled(q); 2409 clear_exec_queue_pending_resume(q); 2410 clear_exec_queue_pending_enable(q); 2411 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2412 q->guc->id); 2413 } 2414 2415 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2416 clear_exec_queue_destroyed(q); 2417 q->guc->needs_cleanup = true; 2418 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2419 q->guc->id); 2420 } 2421 2422 pending_disable = exec_queue_pending_disable(q); 2423 2424 if (pending_disable && exec_queue_suspended(q)) { 2425 clear_exec_queue_suspended(q); 2426 q->guc->needs_suspend = true; 2427 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2428 q->guc->id); 2429 } 2430 2431 if (pending_disable) { 2432 if (!pending_enable) 2433 set_exec_queue_enabled(q); 2434 clear_exec_queue_pending_disable(q); 2435 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2436 q->guc->id); 2437 } 2438 2439 q->guc->resume_time = 0; 2440 } 2441 2442 static void lrc_parallel_clear(struct xe_lrc *lrc) 2443 { 2444 struct xe_device *xe = gt_to_xe(lrc->gt); 2445 struct iosys_map map = xe_lrc_parallel_map(lrc); 2446 int i; 2447 2448 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2449 parallel_write(xe, map, wq[i], 2450 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2451 FIELD_PREP(WQ_LEN_MASK, 0)); 2452 } 2453 2454 /* 2455 * This function is quite complex but only real way to ensure no state is lost 2456 * during VF resume flows. The function scans the queue state, make adjustments 2457 * as needed, and queues jobs / messages which replayed upon unpause. 2458 */ 2459 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2460 { 2461 struct xe_gpu_scheduler *sched = &q->guc->sched; 2462 struct xe_sched_job *job; 2463 int i; 2464 2465 lockdep_assert_held(&guc->submission_state.lock); 2466 2467 /* Stop scheduling + flush any DRM scheduler operations */ 2468 xe_sched_submission_stop(sched); 2469 cancel_delayed_work_sync(&sched->base.work_tdr); 2470 2471 guc_exec_queue_revert_pending_state_change(guc, q); 2472 2473 if (xe_exec_queue_is_parallel(q)) { 2474 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2475 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2476 2477 /* 2478 * NOP existing WQ commands that may contain stale GGTT 2479 * addresses. These will be replayed upon unpause. The hardware 2480 * seems to get confused if the WQ head/tail pointers are 2481 * adjusted. 2482 */ 2483 if (lrc) 2484 lrc_parallel_clear(lrc); 2485 } 2486 2487 job = xe_sched_first_pending_job(sched); 2488 if (job) { 2489 job->restore_replay = true; 2490 2491 /* 2492 * Adjust software tail so jobs submitted overwrite previous 2493 * position in ring buffer with new GGTT addresses. 2494 */ 2495 for (i = 0; i < q->width; ++i) 2496 q->lrc[i]->ring.tail = job->ptrs[i].head; 2497 } 2498 } 2499 2500 /** 2501 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2502 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2503 */ 2504 void xe_guc_submit_pause(struct xe_guc *guc) 2505 { 2506 struct xe_exec_queue *q; 2507 unsigned long index; 2508 2509 mutex_lock(&guc->submission_state.lock); 2510 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2511 xe_sched_submission_stop(&q->guc->sched); 2512 mutex_unlock(&guc->submission_state.lock); 2513 } 2514 2515 /** 2516 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2517 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2518 */ 2519 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2520 { 2521 struct xe_exec_queue *q; 2522 unsigned long index; 2523 2524 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2525 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2526 2527 mutex_lock(&guc->submission_state.lock); 2528 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2529 /* Prevent redundant attempts to stop parallel queues */ 2530 if (q->guc->id != index) 2531 continue; 2532 2533 guc_exec_queue_pause(guc, q); 2534 } 2535 mutex_unlock(&guc->submission_state.lock); 2536 } 2537 2538 static void guc_exec_queue_start(struct xe_exec_queue *q) 2539 { 2540 struct xe_gpu_scheduler *sched = &q->guc->sched; 2541 2542 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2543 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2544 int i; 2545 2546 trace_xe_exec_queue_resubmit(q); 2547 if (job) { 2548 for (i = 0; i < q->width; ++i) { 2549 /* 2550 * The GuC context is unregistered at this point 2551 * time, adjusting software ring tail ensures 2552 * jobs are rewritten in original placement, 2553 * adjusting LRC tail ensures the newly loaded 2554 * GuC / contexts only view the LRC tail 2555 * increasing as jobs are written out. 2556 */ 2557 q->lrc[i]->ring.tail = job->ptrs[i].head; 2558 xe_lrc_set_ring_tail(q->lrc[i], 2559 xe_lrc_ring_head(q->lrc[i])); 2560 } 2561 } 2562 xe_sched_resubmit_jobs(sched); 2563 } 2564 2565 xe_sched_submission_start(sched); 2566 xe_sched_submission_resume_tdr(sched); 2567 } 2568 2569 int xe_guc_submit_start(struct xe_guc *guc) 2570 { 2571 struct xe_exec_queue *q; 2572 unsigned long index; 2573 2574 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2575 2576 mutex_lock(&guc->submission_state.lock); 2577 atomic_dec(&guc->submission_state.stopped); 2578 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2579 /* Prevent redundant attempts to start parallel queues */ 2580 if (q->guc->id != index) 2581 continue; 2582 2583 guc_exec_queue_start(q); 2584 } 2585 mutex_unlock(&guc->submission_state.lock); 2586 2587 wake_up_all(&guc->ct.wq); 2588 2589 return 0; 2590 } 2591 2592 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2593 struct xe_exec_queue *q) 2594 { 2595 struct xe_gpu_scheduler *sched = &q->guc->sched; 2596 struct xe_sched_job *job = NULL; 2597 struct drm_sched_job *s_job; 2598 bool restore_replay = false; 2599 2600 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) { 2601 job = to_xe_sched_job(s_job); 2602 restore_replay |= job->restore_replay; 2603 if (restore_replay) { 2604 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2605 q->guc->id, xe_sched_job_seqno(job)); 2606 2607 q->ring_ops->emit_job(job); 2608 job->restore_replay = true; 2609 } 2610 } 2611 2612 if (job) 2613 job->last_replay = true; 2614 } 2615 2616 /** 2617 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2618 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2619 */ 2620 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2621 { 2622 struct xe_exec_queue *q; 2623 unsigned long index; 2624 2625 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2626 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2627 2628 mutex_lock(&guc->submission_state.lock); 2629 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2630 /* Prevent redundant attempts to stop parallel queues */ 2631 if (q->guc->id != index) 2632 continue; 2633 2634 guc_exec_queue_unpause_prepare(guc, q); 2635 } 2636 mutex_unlock(&guc->submission_state.lock); 2637 } 2638 2639 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2640 { 2641 struct xe_gpu_scheduler *sched = &q->guc->sched; 2642 struct xe_sched_msg *msg; 2643 2644 if (q->guc->needs_cleanup) { 2645 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2646 2647 guc_exec_queue_add_msg(q, msg, CLEANUP); 2648 q->guc->needs_cleanup = false; 2649 } 2650 2651 if (q->guc->needs_suspend) { 2652 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2653 2654 xe_sched_msg_lock(sched); 2655 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2656 xe_sched_msg_unlock(sched); 2657 2658 q->guc->needs_suspend = false; 2659 } 2660 2661 /* 2662 * The resume must be in the message queue before the suspend as it is 2663 * not possible for a resume to be issued if a suspend pending is, but 2664 * the inverse is possible. 2665 */ 2666 if (q->guc->needs_resume) { 2667 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2668 2669 xe_sched_msg_lock(sched); 2670 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2671 xe_sched_msg_unlock(sched); 2672 2673 q->guc->needs_resume = false; 2674 } 2675 } 2676 2677 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2678 { 2679 struct xe_gpu_scheduler *sched = &q->guc->sched; 2680 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2681 2682 lockdep_assert_held(&guc->submission_state.lock); 2683 2684 xe_sched_resubmit_jobs(sched); 2685 guc_exec_queue_replay_pending_state_change(q); 2686 xe_sched_submission_start(sched); 2687 if (needs_tdr) 2688 xe_guc_exec_queue_trigger_cleanup(q); 2689 xe_sched_submission_resume_tdr(sched); 2690 } 2691 2692 /** 2693 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2694 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2695 */ 2696 void xe_guc_submit_unpause(struct xe_guc *guc) 2697 { 2698 struct xe_exec_queue *q; 2699 unsigned long index; 2700 2701 mutex_lock(&guc->submission_state.lock); 2702 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2703 xe_sched_submission_start(&q->guc->sched); 2704 mutex_unlock(&guc->submission_state.lock); 2705 } 2706 2707 /** 2708 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2709 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2710 */ 2711 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2712 { 2713 struct xe_exec_queue *q; 2714 unsigned long index; 2715 2716 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2717 2718 mutex_lock(&guc->submission_state.lock); 2719 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2720 /* 2721 * Prevent redundant attempts to stop parallel queues, or queues 2722 * created after resfix done. 2723 */ 2724 if (q->guc->id != index || 2725 !drm_sched_is_stopped(&q->guc->sched.base)) 2726 continue; 2727 2728 guc_exec_queue_unpause(guc, q); 2729 } 2730 mutex_unlock(&guc->submission_state.lock); 2731 } 2732 2733 /** 2734 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2735 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2736 */ 2737 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2738 { 2739 struct xe_exec_queue *q; 2740 unsigned long index; 2741 2742 mutex_lock(&guc->submission_state.lock); 2743 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2744 struct xe_gpu_scheduler *sched = &q->guc->sched; 2745 2746 /* Prevent redundant attempts to stop parallel queues */ 2747 if (q->guc->id != index) 2748 continue; 2749 2750 xe_sched_submission_start(sched); 2751 guc_exec_queue_kill(q); 2752 } 2753 mutex_unlock(&guc->submission_state.lock); 2754 } 2755 2756 static struct xe_exec_queue * 2757 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2758 { 2759 struct xe_gt *gt = guc_to_gt(guc); 2760 struct xe_exec_queue *q; 2761 2762 if (unlikely(guc_id >= GUC_ID_MAX)) { 2763 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2764 return NULL; 2765 } 2766 2767 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2768 if (unlikely(!q)) { 2769 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2770 return NULL; 2771 } 2772 2773 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2774 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2775 2776 return q; 2777 } 2778 2779 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2780 { 2781 u32 action[] = { 2782 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2783 q->guc->id, 2784 }; 2785 2786 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2787 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2788 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2789 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2790 2791 trace_xe_exec_queue_deregister(q); 2792 2793 if (xe_exec_queue_is_multi_queue_secondary(q)) 2794 handle_deregister_done(guc, q); 2795 else 2796 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2797 ARRAY_SIZE(action)); 2798 } 2799 2800 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2801 u32 runnable_state) 2802 { 2803 trace_xe_exec_queue_scheduling_done(q); 2804 2805 if (runnable_state == 1) { 2806 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2807 2808 q->guc->resume_time = ktime_get(); 2809 clear_exec_queue_pending_resume(q); 2810 clear_exec_queue_pending_enable(q); 2811 smp_wmb(); 2812 wake_up_all(&guc->ct.wq); 2813 } else { 2814 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2815 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2816 2817 if (q->guc->suspend_pending) { 2818 clear_exec_queue_pending_disable(q); 2819 suspend_fence_signal(q); 2820 } else { 2821 if (exec_queue_banned(q)) { 2822 smp_wmb(); 2823 wake_up_all(&guc->ct.wq); 2824 } 2825 if (exec_queue_destroyed(q)) { 2826 /* 2827 * Make sure to clear the pending_disable only 2828 * after sampling the destroyed state. We want 2829 * to ensure we don't trigger the unregister too 2830 * early with something intending to only 2831 * disable scheduling. The caller doing the 2832 * destroy must wait for an ongoing 2833 * pending_disable before marking as destroyed. 2834 */ 2835 clear_exec_queue_pending_disable(q); 2836 deregister_exec_queue(guc, q); 2837 } else { 2838 clear_exec_queue_pending_disable(q); 2839 } 2840 } 2841 } 2842 } 2843 2844 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2845 struct xe_exec_queue *q, 2846 u32 runnable_state) 2847 { 2848 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2849 mutex_lock(&guc->ct.lock); 2850 handle_sched_done(guc, q, runnable_state); 2851 mutex_unlock(&guc->ct.lock); 2852 } 2853 2854 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2855 { 2856 struct xe_exec_queue *q; 2857 u32 guc_id, runnable_state; 2858 2859 if (unlikely(len < 2)) 2860 return -EPROTO; 2861 2862 guc_id = msg[0]; 2863 runnable_state = msg[1]; 2864 2865 q = g2h_exec_queue_lookup(guc, guc_id); 2866 if (unlikely(!q)) 2867 return -EPROTO; 2868 2869 if (unlikely(!exec_queue_pending_enable(q) && 2870 !exec_queue_pending_disable(q))) { 2871 xe_gt_err(guc_to_gt(guc), 2872 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2873 atomic_read(&q->guc->state), q->guc->id, 2874 runnable_state); 2875 return -EPROTO; 2876 } 2877 2878 handle_sched_done(guc, q, runnable_state); 2879 2880 return 0; 2881 } 2882 2883 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2884 { 2885 trace_xe_exec_queue_deregister_done(q); 2886 2887 clear_exec_queue_registered(q); 2888 __guc_exec_queue_destroy(guc, q); 2889 } 2890 2891 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2892 { 2893 struct xe_exec_queue *q; 2894 u32 guc_id; 2895 2896 if (unlikely(len < 1)) 2897 return -EPROTO; 2898 2899 guc_id = msg[0]; 2900 2901 q = g2h_exec_queue_lookup(guc, guc_id); 2902 if (unlikely(!q)) 2903 return -EPROTO; 2904 2905 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2906 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2907 xe_gt_err(guc_to_gt(guc), 2908 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2909 atomic_read(&q->guc->state), q->guc->id); 2910 return -EPROTO; 2911 } 2912 2913 handle_deregister_done(guc, q); 2914 2915 return 0; 2916 } 2917 2918 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2919 { 2920 struct xe_gt *gt = guc_to_gt(guc); 2921 struct xe_exec_queue *q; 2922 u32 guc_id; 2923 2924 if (unlikely(len < 1)) 2925 return -EPROTO; 2926 2927 guc_id = msg[0]; 2928 2929 q = g2h_exec_queue_lookup(guc, guc_id); 2930 if (unlikely(!q)) 2931 return -EPROTO; 2932 2933 if (!exec_queue_killed(q)) 2934 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x", 2935 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id, 2936 atomic_read(&q->guc->state)); 2937 2938 trace_xe_exec_queue_reset(q); 2939 2940 /* 2941 * A banned engine is a NOP at this point (came from 2942 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2943 * jobs by setting timeout of the job to the minimum value kicking 2944 * guc_exec_queue_timedout_job. 2945 */ 2946 xe_guc_exec_queue_reset_trigger_cleanup(q); 2947 2948 return 0; 2949 } 2950 2951 /* 2952 * xe_guc_error_capture_handler - Handler of GuC captured message 2953 * @guc: The GuC object 2954 * @msg: Point to the message 2955 * @len: The message length 2956 * 2957 * When GuC captured data is ready, GuC will send message 2958 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2959 * called 1st to check status before process the data comes with the message. 2960 * 2961 * Returns: error code. 0 if success 2962 */ 2963 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2964 { 2965 u32 status; 2966 2967 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2968 return -EPROTO; 2969 2970 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2971 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2972 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2973 2974 xe_guc_capture_process(guc); 2975 2976 return 0; 2977 } 2978 2979 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2980 u32 len) 2981 { 2982 struct xe_gt *gt = guc_to_gt(guc); 2983 struct xe_exec_queue *q; 2984 u32 guc_id; 2985 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 2986 2987 if (unlikely(!len || len > 2)) 2988 return -EPROTO; 2989 2990 guc_id = msg[0]; 2991 2992 if (len == 2) 2993 type = msg[1]; 2994 2995 if (guc_id == GUC_ID_UNKNOWN) { 2996 /* 2997 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 2998 * context. In such case only PF will be notified about that fault. 2999 */ 3000 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 3001 return 0; 3002 } 3003 3004 q = g2h_exec_queue_lookup(guc, guc_id); 3005 if (unlikely(!q)) 3006 return -EPROTO; 3007 3008 /* 3009 * The type is HW-defined and changes based on platform, so we don't 3010 * decode it in the kernel and only check if it is valid. 3011 * See bspec 54047 and 72187 for details. 3012 */ 3013 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 3014 xe_gt_info(gt, 3015 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 3016 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3017 else 3018 xe_gt_info(gt, 3019 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 3020 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 3021 3022 trace_xe_exec_queue_memory_cat_error(q); 3023 3024 /* Treat the same as engine reset */ 3025 xe_guc_exec_queue_reset_trigger_cleanup(q); 3026 3027 return 0; 3028 } 3029 3030 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 3031 { 3032 struct xe_gt *gt = guc_to_gt(guc); 3033 u8 guc_class, instance; 3034 u32 reason; 3035 3036 if (unlikely(len != 3)) 3037 return -EPROTO; 3038 3039 guc_class = msg[0]; 3040 instance = msg[1]; 3041 reason = msg[2]; 3042 3043 /* Unexpected failure of a hardware feature, log an actual error */ 3044 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3045 guc_class, instance, reason); 3046 3047 xe_gt_reset_async(gt); 3048 3049 return 0; 3050 } 3051 3052 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg, 3053 u32 len) 3054 { 3055 struct xe_gt *gt = guc_to_gt(guc); 3056 struct xe_device *xe = guc_to_xe(guc); 3057 struct xe_exec_queue *q; 3058 u32 guc_id = msg[2]; 3059 3060 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) { 3061 drm_err(&xe->drm, "Invalid length %u", len); 3062 return -EPROTO; 3063 } 3064 3065 q = g2h_exec_queue_lookup(guc, guc_id); 3066 if (unlikely(!q)) 3067 return -EPROTO; 3068 3069 xe_gt_dbg(gt, 3070 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x", 3071 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]); 3072 3073 trace_xe_exec_queue_cgp_context_error(q); 3074 3075 /* Treat the same as engine reset */ 3076 xe_guc_exec_queue_reset_trigger_cleanup(q); 3077 3078 return 0; 3079 } 3080 3081 /** 3082 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3083 * @guc: guc 3084 * @msg: message indicating CGP sync done 3085 * @len: length of message 3086 * 3087 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3088 * for CGP synchronization to complete. 3089 * 3090 * Return: 0 on success, -EPROTO for malformed messages. 3091 */ 3092 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3093 { 3094 struct xe_device *xe = guc_to_xe(guc); 3095 struct xe_exec_queue *q; 3096 u32 guc_id = msg[0]; 3097 3098 if (unlikely(len < 1)) { 3099 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3100 return -EPROTO; 3101 } 3102 3103 q = g2h_exec_queue_lookup(guc, guc_id); 3104 if (unlikely(!q)) 3105 return -EPROTO; 3106 3107 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3108 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3109 return -EPROTO; 3110 } 3111 3112 /* Wakeup the serialized cgp update wait */ 3113 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3114 xe_guc_ct_wake_waiters(&guc->ct); 3115 3116 return 0; 3117 } 3118 3119 static void 3120 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3121 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3122 { 3123 struct xe_guc *guc = exec_queue_to_guc(q); 3124 struct xe_device *xe = guc_to_xe(guc); 3125 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3126 int i; 3127 3128 snapshot->guc.wqi_head = q->guc->wqi_head; 3129 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3130 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3131 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3132 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3133 wq_desc.wq_status); 3134 3135 if (snapshot->parallel.wq_desc.head != 3136 snapshot->parallel.wq_desc.tail) { 3137 for (i = snapshot->parallel.wq_desc.head; 3138 i != snapshot->parallel.wq_desc.tail; 3139 i = (i + sizeof(u32)) % WQ_SIZE) 3140 snapshot->parallel.wq[i / sizeof(u32)] = 3141 parallel_read(xe, map, wq[i / sizeof(u32)]); 3142 } 3143 } 3144 3145 static void 3146 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3147 struct drm_printer *p) 3148 { 3149 int i; 3150 3151 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3152 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3153 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3154 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3155 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3156 3157 if (snapshot->parallel.wq_desc.head != 3158 snapshot->parallel.wq_desc.tail) { 3159 for (i = snapshot->parallel.wq_desc.head; 3160 i != snapshot->parallel.wq_desc.tail; 3161 i = (i + sizeof(u32)) % WQ_SIZE) 3162 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3163 snapshot->parallel.wq[i / sizeof(u32)]); 3164 } 3165 } 3166 3167 /** 3168 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3169 * @q: faulty exec queue 3170 * 3171 * This can be printed out in a later stage like during dev_coredump 3172 * analysis. 3173 * 3174 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3175 * caller, using `xe_guc_exec_queue_snapshot_free`. 3176 */ 3177 struct xe_guc_submit_exec_queue_snapshot * 3178 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3179 { 3180 struct xe_gpu_scheduler *sched = &q->guc->sched; 3181 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3182 int i; 3183 3184 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC); 3185 3186 if (!snapshot) 3187 return NULL; 3188 3189 snapshot->guc.id = q->guc->id; 3190 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3191 snapshot->class = q->class; 3192 snapshot->logical_mask = q->logical_mask; 3193 snapshot->width = q->width; 3194 snapshot->refcount = kref_read(&q->refcount); 3195 snapshot->sched_timeout = sched->base.timeout; 3196 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3197 snapshot->sched_props.preempt_timeout_us = 3198 q->sched_props.preempt_timeout_us; 3199 3200 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width, 3201 GFP_ATOMIC); 3202 3203 if (snapshot->lrc) { 3204 for (i = 0; i < q->width; ++i) { 3205 struct xe_lrc *lrc = q->lrc[i]; 3206 3207 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3208 } 3209 } 3210 3211 snapshot->schedule_state = atomic_read(&q->guc->state); 3212 snapshot->exec_queue_flags = q->flags; 3213 3214 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3215 if (snapshot->parallel_execution) 3216 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3217 3218 if (xe_exec_queue_is_multi_queue(q)) { 3219 snapshot->multi_queue.valid = true; 3220 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3221 snapshot->multi_queue.pos = q->multi_queue.pos; 3222 } 3223 3224 return snapshot; 3225 } 3226 3227 /** 3228 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3229 * @snapshot: Previously captured snapshot of job. 3230 * 3231 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3232 */ 3233 void 3234 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3235 { 3236 int i; 3237 3238 if (!snapshot || !snapshot->lrc) 3239 return; 3240 3241 for (i = 0; i < snapshot->width; ++i) 3242 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3243 } 3244 3245 /** 3246 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3247 * @snapshot: GuC Submit Engine snapshot object. 3248 * @p: drm_printer where it will be printed out. 3249 * 3250 * This function prints out a given GuC Submit Engine snapshot object. 3251 */ 3252 void 3253 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3254 struct drm_printer *p) 3255 { 3256 int i; 3257 3258 if (!snapshot) 3259 return; 3260 3261 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3262 drm_printf(p, "\tName: %s\n", snapshot->name); 3263 drm_printf(p, "\tClass: %d\n", snapshot->class); 3264 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3265 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3266 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3267 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3268 drm_printf(p, "\tTimeslice: %u (us)\n", 3269 snapshot->sched_props.timeslice_us); 3270 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3271 snapshot->sched_props.preempt_timeout_us); 3272 3273 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3274 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3275 3276 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3277 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3278 3279 if (snapshot->parallel_execution) 3280 guc_exec_queue_wq_snapshot_print(snapshot, p); 3281 3282 if (snapshot->multi_queue.valid) { 3283 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3284 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3285 } 3286 } 3287 3288 /** 3289 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3290 * snapshot. 3291 * @snapshot: GuC Submit Engine snapshot object. 3292 * 3293 * This function free all the memory that needed to be allocated at capture 3294 * time. 3295 */ 3296 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3297 { 3298 int i; 3299 3300 if (!snapshot) 3301 return; 3302 3303 if (snapshot->lrc) { 3304 for (i = 0; i < snapshot->width; i++) 3305 xe_lrc_snapshot_free(snapshot->lrc[i]); 3306 kfree(snapshot->lrc); 3307 } 3308 kfree(snapshot); 3309 } 3310 3311 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3312 { 3313 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3314 3315 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3316 xe_guc_exec_queue_snapshot_print(snapshot, p); 3317 xe_guc_exec_queue_snapshot_free(snapshot); 3318 } 3319 3320 /** 3321 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3322 * @q: Execution queue 3323 * @ctx_type: Type of the context 3324 * 3325 * This function registers the execution queue with the guc. Special context 3326 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3327 * are only applicable for IGPU and in the VF. 3328 * Submits the execution queue to GUC after registering it. 3329 * 3330 * Returns - None. 3331 */ 3332 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3333 { 3334 struct xe_guc *guc = exec_queue_to_guc(q); 3335 struct xe_device *xe = guc_to_xe(guc); 3336 struct xe_gt *gt = guc_to_gt(guc); 3337 3338 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3339 xe_gt_assert(gt, !IS_DGFX(xe)); 3340 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3341 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3342 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3343 3344 register_exec_queue(q, ctx_type); 3345 enable_scheduling(q); 3346 } 3347 3348 /** 3349 * xe_guc_submit_print - GuC Submit Print. 3350 * @guc: GuC. 3351 * @p: drm_printer where it will be printed out. 3352 * 3353 * This function capture and prints snapshots of **all** GuC Engines. 3354 */ 3355 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3356 { 3357 struct xe_exec_queue *q; 3358 unsigned long index; 3359 3360 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3361 return; 3362 3363 mutex_lock(&guc->submission_state.lock); 3364 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3365 guc_exec_queue_print(q, p); 3366 mutex_unlock(&guc->submission_state.lock); 3367 } 3368 3369 /** 3370 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues 3371 * registered with the GuC 3372 * @guc: GuC. 3373 * 3374 * Return: true if any MLRC queue is registered with the GuC, false otherwise. 3375 */ 3376 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) 3377 { 3378 struct xe_exec_queue *q; 3379 unsigned long index; 3380 3381 guard(mutex)(&guc->submission_state.lock); 3382 3383 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3384 if (q->width > 1) 3385 return true; 3386 3387 return false; 3388 } 3389 3390 /** 3391 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3392 * exec queues registered to given GuC. 3393 * @guc: the &xe_guc struct instance 3394 * @scratch: scratch buffer to be used as temporary storage 3395 * 3396 * Returns: zero on success, negative error code on failure. 3397 */ 3398 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3399 { 3400 struct xe_exec_queue *q; 3401 unsigned long index; 3402 int err = 0; 3403 3404 mutex_lock(&guc->submission_state.lock); 3405 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3406 /* Prevent redundant attempts to stop parallel queues */ 3407 if (q->guc->id != index) 3408 continue; 3409 3410 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3411 if (err) 3412 break; 3413 } 3414 mutex_unlock(&guc->submission_state.lock); 3415 3416 return err; 3417 } 3418