1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 #include <linux/math64.h> 14 15 #include <drm/drm_managed.h> 16 17 #include "abi/guc_actions_abi.h" 18 #include "abi/guc_actions_slpc_abi.h" 19 #include "abi/guc_klvs_abi.h" 20 #include "regs/xe_lrc_layout.h" 21 #include "xe_assert.h" 22 #include "xe_bo.h" 23 #include "xe_devcoredump.h" 24 #include "xe_device.h" 25 #include "xe_exec_queue.h" 26 #include "xe_force_wake.h" 27 #include "xe_gpu_scheduler.h" 28 #include "xe_gt.h" 29 #include "xe_gt_clock.h" 30 #include "xe_gt_printk.h" 31 #include "xe_guc.h" 32 #include "xe_guc_capture.h" 33 #include "xe_guc_ct.h" 34 #include "xe_guc_exec_queue_types.h" 35 #include "xe_guc_id_mgr.h" 36 #include "xe_guc_klv_helpers.h" 37 #include "xe_guc_submit_types.h" 38 #include "xe_hw_engine.h" 39 #include "xe_hw_fence.h" 40 #include "xe_lrc.h" 41 #include "xe_macros.h" 42 #include "xe_map.h" 43 #include "xe_mocs.h" 44 #include "xe_pm.h" 45 #include "xe_ring_ops_types.h" 46 #include "xe_sched_job.h" 47 #include "xe_trace.h" 48 #include "xe_uc_fw.h" 49 #include "xe_vm.h" 50 51 static struct xe_guc * 52 exec_queue_to_guc(struct xe_exec_queue *q) 53 { 54 return &q->gt->uc.guc; 55 } 56 57 /* 58 * Helpers for engine state, using an atomic as some of the bits can transition 59 * as the same time (e.g. a suspend can be happning at the same time as schedule 60 * engine done being processed). 61 */ 62 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 63 #define EXEC_QUEUE_STATE_ENABLED (1 << 1) 64 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 65 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 66 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 67 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) 68 #define EXEC_QUEUE_STATE_RESET (1 << 6) 69 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 70 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 71 #define EXEC_QUEUE_STATE_BANNED (1 << 9) 72 #define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) 73 #define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) 74 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12) 75 #define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13) 76 77 static bool exec_queue_registered(struct xe_exec_queue *q) 78 { 79 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 80 } 81 82 static void set_exec_queue_registered(struct xe_exec_queue *q) 83 { 84 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 85 } 86 87 static void clear_exec_queue_registered(struct xe_exec_queue *q) 88 { 89 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 90 } 91 92 static bool exec_queue_enabled(struct xe_exec_queue *q) 93 { 94 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; 95 } 96 97 static void set_exec_queue_enabled(struct xe_exec_queue *q) 98 { 99 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 100 } 101 102 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 103 { 104 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); 105 } 106 107 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 108 { 109 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 110 } 111 112 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 113 { 114 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 115 } 116 117 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 118 { 119 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 120 } 121 122 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 123 { 124 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 125 } 126 127 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 128 { 129 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 130 } 131 132 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 133 { 134 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 135 } 136 137 static bool exec_queue_destroyed(struct xe_exec_queue *q) 138 { 139 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 140 } 141 142 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 143 { 144 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 145 } 146 147 static void clear_exec_queue_destroyed(struct xe_exec_queue *q) 148 { 149 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 150 } 151 152 static bool exec_queue_banned(struct xe_exec_queue *q) 153 { 154 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 155 } 156 157 static void set_exec_queue_banned(struct xe_exec_queue *q) 158 { 159 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 160 } 161 162 static bool exec_queue_suspended(struct xe_exec_queue *q) 163 { 164 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; 165 } 166 167 static void set_exec_queue_suspended(struct xe_exec_queue *q) 168 { 169 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 170 } 171 172 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 173 { 174 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); 175 } 176 177 static bool exec_queue_reset(struct xe_exec_queue *q) 178 { 179 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 180 } 181 182 static void set_exec_queue_reset(struct xe_exec_queue *q) 183 { 184 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 185 } 186 187 static bool exec_queue_killed(struct xe_exec_queue *q) 188 { 189 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; 190 } 191 192 static void set_exec_queue_killed(struct xe_exec_queue *q) 193 { 194 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); 195 } 196 197 static bool exec_queue_wedged(struct xe_exec_queue *q) 198 { 199 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; 200 } 201 202 static void set_exec_queue_wedged(struct xe_exec_queue *q) 203 { 204 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 205 } 206 207 static bool exec_queue_check_timeout(struct xe_exec_queue *q) 208 { 209 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; 210 } 211 212 static void set_exec_queue_check_timeout(struct xe_exec_queue *q) 213 { 214 atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); 215 } 216 217 static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) 218 { 219 atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); 220 } 221 222 static bool exec_queue_extra_ref(struct xe_exec_queue *q) 223 { 224 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; 225 } 226 227 static void set_exec_queue_extra_ref(struct xe_exec_queue *q) 228 { 229 atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); 230 } 231 232 static void clear_exec_queue_extra_ref(struct xe_exec_queue *q) 233 { 234 atomic_and(~EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); 235 } 236 237 static bool exec_queue_pending_resume(struct xe_exec_queue *q) 238 { 239 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; 240 } 241 242 static void set_exec_queue_pending_resume(struct xe_exec_queue *q) 243 { 244 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 245 } 246 247 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q) 248 { 249 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state); 250 } 251 252 static bool exec_queue_pending_tdr_exit(struct xe_exec_queue *q) 253 { 254 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT; 255 } 256 257 static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) 258 { 259 atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); 260 } 261 262 static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q) 263 { 264 atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state); 265 } 266 267 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 268 { 269 return (atomic_read(&q->guc->state) & 270 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 271 EXEC_QUEUE_STATE_BANNED)); 272 } 273 274 static void guc_submit_fini(struct drm_device *drm, void *arg) 275 { 276 struct xe_guc *guc = arg; 277 struct xe_device *xe = guc_to_xe(guc); 278 struct xe_gt *gt = guc_to_gt(guc); 279 int ret; 280 281 ret = wait_event_timeout(guc->submission_state.fini_wq, 282 xa_empty(&guc->submission_state.exec_queue_lookup), 283 HZ * 5); 284 285 drain_workqueue(xe->destroy_wq); 286 287 xe_gt_assert(gt, ret); 288 289 xa_destroy(&guc->submission_state.exec_queue_lookup); 290 } 291 292 static void guc_submit_wedged_fini(void *arg) 293 { 294 struct xe_guc *guc = arg; 295 struct xe_exec_queue *q; 296 unsigned long index; 297 298 mutex_lock(&guc->submission_state.lock); 299 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 300 if (exec_queue_wedged(q)) { 301 mutex_unlock(&guc->submission_state.lock); 302 xe_exec_queue_put(q); 303 mutex_lock(&guc->submission_state.lock); 304 } 305 } 306 mutex_unlock(&guc->submission_state.lock); 307 } 308 309 static const struct xe_exec_queue_ops guc_exec_queue_ops; 310 311 static void primelockdep(struct xe_guc *guc) 312 { 313 if (!IS_ENABLED(CONFIG_LOCKDEP)) 314 return; 315 316 fs_reclaim_acquire(GFP_KERNEL); 317 318 mutex_lock(&guc->submission_state.lock); 319 mutex_unlock(&guc->submission_state.lock); 320 321 fs_reclaim_release(GFP_KERNEL); 322 } 323 324 /** 325 * xe_guc_submit_init() - Initialize GuC submission. 326 * @guc: the &xe_guc to initialize 327 * @num_ids: number of GuC context IDs to use 328 * 329 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all 330 * GuC context IDs supported by the GuC firmware should be used for submission. 331 * 332 * Only VF drivers will have to provide explicit number of GuC context IDs 333 * that they can use for submission. 334 * 335 * Return: 0 on success or a negative error code on failure. 336 */ 337 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) 338 { 339 struct xe_device *xe = guc_to_xe(guc); 340 struct xe_gt *gt = guc_to_gt(guc); 341 int err; 342 343 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 344 if (err) 345 return err; 346 347 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); 348 if (err) 349 return err; 350 351 gt->exec_queue_ops = &guc_exec_queue_ops; 352 353 xa_init(&guc->submission_state.exec_queue_lookup); 354 355 init_waitqueue_head(&guc->submission_state.fini_wq); 356 357 primelockdep(guc); 358 359 guc->submission_state.initialized = true; 360 361 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 362 } 363 364 /* 365 * Given that we want to guarantee enough RCS throughput to avoid missing 366 * frames, we set the yield policy to 20% of each 80ms interval. 367 */ 368 #define RC_YIELD_DURATION 80 /* in ms */ 369 #define RC_YIELD_RATIO 20 /* in percent */ 370 static u32 *emit_render_compute_yield_klv(u32 *emit) 371 { 372 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); 373 *emit++ = RC_YIELD_DURATION; 374 *emit++ = RC_YIELD_RATIO; 375 376 return emit; 377 } 378 379 #define SCHEDULING_POLICY_MAX_DWORDS 16 380 static int guc_init_global_schedule_policy(struct xe_guc *guc) 381 { 382 u32 data[SCHEDULING_POLICY_MAX_DWORDS]; 383 u32 *emit = data; 384 u32 count = 0; 385 int ret; 386 387 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 388 return 0; 389 390 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 391 392 if (CCS_MASK(guc_to_gt(guc))) 393 emit = emit_render_compute_yield_klv(emit); 394 395 count = emit - data; 396 if (count > 1) { 397 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); 398 399 ret = xe_guc_ct_send_block(&guc->ct, data, count); 400 if (ret < 0) { 401 xe_gt_err(guc_to_gt(guc), 402 "failed to enable GuC scheduling policies: %pe\n", 403 ERR_PTR(ret)); 404 return ret; 405 } 406 } 407 408 return 0; 409 } 410 411 int xe_guc_submit_enable(struct xe_guc *guc) 412 { 413 int ret; 414 415 ret = guc_init_global_schedule_policy(guc); 416 if (ret) 417 return ret; 418 419 guc->submission_state.enabled = true; 420 421 return 0; 422 } 423 424 void xe_guc_submit_disable(struct xe_guc *guc) 425 { 426 guc->submission_state.enabled = false; 427 } 428 429 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 430 { 431 int i; 432 433 lockdep_assert_held(&guc->submission_state.lock); 434 435 for (i = 0; i < xa_count; ++i) 436 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 437 438 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 439 q->guc->id, q->width); 440 441 if (xa_empty(&guc->submission_state.exec_queue_lookup)) 442 wake_up(&guc->submission_state.fini_wq); 443 } 444 445 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 446 { 447 int ret; 448 int i; 449 450 /* 451 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 452 * worse case user gets -ENOMEM on engine create and has to try again. 453 * 454 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 455 * failure. 456 */ 457 lockdep_assert_held(&guc->submission_state.lock); 458 459 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 460 q->width); 461 if (ret < 0) 462 return ret; 463 464 q->guc->id = ret; 465 466 for (i = 0; i < q->width; ++i) { 467 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup, 468 q->guc->id + i, q, GFP_NOWAIT)); 469 if (ret) 470 goto err_release; 471 } 472 473 return 0; 474 475 err_release: 476 __release_guc_id(guc, q, i); 477 478 return ret; 479 } 480 481 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 482 { 483 mutex_lock(&guc->submission_state.lock); 484 __release_guc_id(guc, q, q->width); 485 mutex_unlock(&guc->submission_state.lock); 486 } 487 488 struct exec_queue_policy { 489 u32 count; 490 struct guc_update_exec_queue_policy h2g; 491 }; 492 493 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 494 { 495 size_t bytes = sizeof(policy->h2g.header) + 496 (sizeof(policy->h2g.klv[0]) * policy->count); 497 498 return bytes / sizeof(u32); 499 } 500 501 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 502 u16 guc_id) 503 { 504 policy->h2g.header.action = 505 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 506 policy->h2g.header.guc_id = guc_id; 507 policy->count = 0; 508 } 509 510 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 511 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 512 u32 data) \ 513 { \ 514 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 515 \ 516 policy->h2g.klv[policy->count].kl = \ 517 FIELD_PREP(GUC_KLV_0_KEY, \ 518 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 519 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 520 policy->h2g.klv[policy->count].value = data; \ 521 policy->count++; \ 522 } 523 524 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 525 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 526 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 527 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) 528 #undef MAKE_EXEC_QUEUE_POLICY_ADD 529 530 static const int xe_exec_queue_prio_to_guc[] = { 531 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 532 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 533 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 534 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 535 }; 536 537 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 538 { 539 struct exec_queue_policy policy; 540 enum xe_exec_queue_priority prio = q->sched_props.priority; 541 u32 timeslice_us = q->sched_props.timeslice_us; 542 u32 slpc_exec_queue_freq_req = 0; 543 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 544 545 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) && 546 !xe_exec_queue_is_multi_queue_secondary(q)); 547 548 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) 549 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 550 551 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 552 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 553 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 554 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 555 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, 556 slpc_exec_queue_freq_req); 557 558 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 559 __guc_exec_queue_policy_action_size(&policy), 0, 0); 560 } 561 562 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 563 { 564 struct exec_queue_policy policy; 565 566 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q)); 567 568 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 569 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 570 571 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 572 __guc_exec_queue_policy_action_size(&policy), 0, 0); 573 } 574 575 static bool vf_recovery(struct xe_guc *guc) 576 { 577 return xe_gt_recovery_pending(guc_to_gt(guc)); 578 } 579 580 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 581 { 582 struct xe_guc *guc = exec_queue_to_guc(q); 583 struct xe_device *xe = guc_to_xe(guc); 584 585 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 586 wake_up_all(&xe->ufence_wq); 587 588 if (xe_exec_queue_is_lr(q)) 589 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); 590 else 591 xe_sched_tdr_queue_imm(&q->guc->sched); 592 } 593 594 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q) 595 { 596 if (xe_exec_queue_is_multi_queue(q)) { 597 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 598 struct xe_exec_queue_group *group = q->multi_queue.group; 599 struct xe_exec_queue *eq; 600 601 set_exec_queue_reset(primary); 602 if (!exec_queue_banned(primary) && !exec_queue_check_timeout(primary)) 603 xe_guc_exec_queue_trigger_cleanup(primary); 604 605 mutex_lock(&group->list_lock); 606 list_for_each_entry(eq, &group->list, multi_queue.link) { 607 set_exec_queue_reset(eq); 608 if (!exec_queue_banned(eq) && !exec_queue_check_timeout(eq)) 609 xe_guc_exec_queue_trigger_cleanup(eq); 610 } 611 mutex_unlock(&group->list_lock); 612 } else { 613 set_exec_queue_reset(q); 614 if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) 615 xe_guc_exec_queue_trigger_cleanup(q); 616 } 617 } 618 619 #define parallel_read(xe_, map_, field_) \ 620 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 621 field_) 622 #define parallel_write(xe_, map_, field_, val_) \ 623 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 624 field_, val_) 625 626 #define CGP_VERSION_MAJOR_SHIFT 8 627 628 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe, 629 struct xe_exec_queue *q) 630 { 631 struct xe_exec_queue_group *group = q->multi_queue.group; 632 u32 guc_id = group->primary->guc->id; 633 634 /* Currently implementing CGP version 1.0 */ 635 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32, 636 1 << CGP_VERSION_MAJOR_SHIFT); 637 638 xe_map_wr(xe, &group->cgp_bo->vmap, 639 (32 + q->multi_queue.pos * 2) * sizeof(u32), 640 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0]))); 641 642 xe_map_wr(xe, &group->cgp_bo->vmap, 643 (33 + q->multi_queue.pos * 2) * sizeof(u32), 644 u32, guc_id); 645 646 if (q->multi_queue.pos / 32) { 647 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), 648 u32, BIT(q->multi_queue.pos % 32)); 649 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0); 650 } else { 651 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), 652 u32, BIT(q->multi_queue.pos)); 653 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0); 654 } 655 } 656 657 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, 658 struct xe_exec_queue *q, 659 const u32 *action, u32 len) 660 { 661 struct xe_exec_queue_group *group = q->multi_queue.group; 662 struct xe_device *xe = guc_to_xe(guc); 663 long ret; 664 665 /* 666 * As all queues of a multi queue group use single drm scheduler 667 * submit workqueue, CGP synchronization with GuC are serialized. 668 * Hence, no locking is required here. 669 * Wait for any pending CGP_SYNC_DONE response before updating the 670 * CGP page and sending CGP_SYNC message. 671 * 672 * FIXME: Support VF migration 673 */ 674 ret = wait_event_timeout(guc->ct.wq, 675 !READ_ONCE(group->sync_pending) || 676 xe_guc_read_stopped(guc), HZ); 677 if (!ret || xe_guc_read_stopped(guc)) { 678 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n"); 679 return; 680 } 681 682 xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority); 683 xe_guc_exec_queue_group_cgp_update(xe, q); 684 685 WRITE_ONCE(group->sync_pending, true); 686 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1); 687 } 688 689 static void __register_exec_queue_group(struct xe_guc *guc, 690 struct xe_exec_queue *q, 691 struct guc_ctxt_registration_info *info) 692 { 693 #define MAX_MULTI_QUEUE_REG_SIZE (8) 694 u32 action[MAX_MULTI_QUEUE_REG_SIZE]; 695 int len = 0; 696 697 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE; 698 action[len++] = info->flags; 699 action[len++] = info->context_idx; 700 action[len++] = info->engine_class; 701 action[len++] = info->engine_submit_mask; 702 action[len++] = 0; /* Reserved */ 703 action[len++] = info->cgp_lo; 704 action[len++] = info->cgp_hi; 705 706 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE); 707 #undef MAX_MULTI_QUEUE_REG_SIZE 708 709 /* 710 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a 711 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 712 * from guc. 713 */ 714 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 715 } 716 717 static void xe_guc_exec_queue_group_add(struct xe_guc *guc, 718 struct xe_exec_queue *q) 719 { 720 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 721 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 722 int len = 0; 723 724 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q)); 725 726 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 727 action[len++] = q->multi_queue.group->primary->guc->id; 728 729 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 730 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 731 732 /* 733 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a 734 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response 735 * from guc. 736 */ 737 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 738 } 739 740 static void __register_mlrc_exec_queue(struct xe_guc *guc, 741 struct xe_exec_queue *q, 742 struct guc_ctxt_registration_info *info) 743 { 744 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 745 u32 action[MAX_MLRC_REG_SIZE]; 746 int len = 0; 747 int i; 748 749 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 750 751 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 752 action[len++] = info->flags; 753 action[len++] = info->context_idx; 754 action[len++] = info->engine_class; 755 action[len++] = info->engine_submit_mask; 756 action[len++] = info->wq_desc_lo; 757 action[len++] = info->wq_desc_hi; 758 action[len++] = info->wq_base_lo; 759 action[len++] = info->wq_base_hi; 760 action[len++] = info->wq_size; 761 action[len++] = q->width; 762 action[len++] = info->hwlrca_lo; 763 action[len++] = info->hwlrca_hi; 764 765 for (i = 1; i < q->width; ++i) { 766 struct xe_lrc *lrc = q->lrc[i]; 767 768 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 769 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 770 } 771 772 /* explicitly checks some fields that we might fixup later */ 773 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 774 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]); 775 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 776 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]); 777 xe_gt_assert(guc_to_gt(guc), q->width == 778 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]); 779 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 780 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]); 781 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 782 #undef MAX_MLRC_REG_SIZE 783 784 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 785 } 786 787 static void __register_exec_queue(struct xe_guc *guc, 788 struct guc_ctxt_registration_info *info) 789 { 790 u32 action[] = { 791 XE_GUC_ACTION_REGISTER_CONTEXT, 792 info->flags, 793 info->context_idx, 794 info->engine_class, 795 info->engine_submit_mask, 796 info->wq_desc_lo, 797 info->wq_desc_hi, 798 info->wq_base_lo, 799 info->wq_base_hi, 800 info->wq_size, 801 info->hwlrca_lo, 802 info->hwlrca_hi, 803 }; 804 805 /* explicitly checks some fields that we might fixup later */ 806 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo == 807 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]); 808 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo == 809 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]); 810 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo == 811 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]); 812 813 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 814 } 815 816 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) 817 { 818 struct xe_guc *guc = exec_queue_to_guc(q); 819 struct xe_device *xe = guc_to_xe(guc); 820 struct xe_lrc *lrc = q->lrc[0]; 821 struct guc_ctxt_registration_info info; 822 823 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 824 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); 825 826 memset(&info, 0, sizeof(info)); 827 info.context_idx = q->guc->id; 828 info.engine_class = xe_engine_class_to_guc_class(q->class); 829 info.engine_submit_mask = q->logical_mask; 830 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 831 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 832 info.flags = CONTEXT_REGISTRATION_FLAG_KMD | 833 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); 834 835 if (xe_exec_queue_is_multi_queue(q)) { 836 struct xe_exec_queue_group *group = q->multi_queue.group; 837 838 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo); 839 info.cgp_hi = 0; 840 } 841 842 if (xe_exec_queue_is_parallel(q)) { 843 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 844 struct iosys_map map = xe_lrc_parallel_map(lrc); 845 846 info.wq_desc_lo = lower_32_bits(ggtt_addr + 847 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 848 info.wq_desc_hi = upper_32_bits(ggtt_addr + 849 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 850 info.wq_base_lo = lower_32_bits(ggtt_addr + 851 offsetof(struct guc_submit_parallel_scratch, wq[0])); 852 info.wq_base_hi = upper_32_bits(ggtt_addr + 853 offsetof(struct guc_submit_parallel_scratch, wq[0])); 854 info.wq_size = WQ_SIZE; 855 856 q->guc->wqi_head = 0; 857 q->guc->wqi_tail = 0; 858 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 859 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 860 } 861 862 /* 863 * We must keep a reference for LR engines if engine is registered with 864 * the GuC as jobs signal immediately and can't destroy an engine if the 865 * GuC has a reference to it. 866 */ 867 if (xe_exec_queue_is_lr(q)) 868 xe_exec_queue_get(q); 869 870 set_exec_queue_registered(q); 871 trace_xe_exec_queue_register(q); 872 if (xe_exec_queue_is_multi_queue_primary(q)) 873 __register_exec_queue_group(guc, q, &info); 874 else if (xe_exec_queue_is_parallel(q)) 875 __register_mlrc_exec_queue(guc, q, &info); 876 else if (!xe_exec_queue_is_multi_queue_secondary(q)) 877 __register_exec_queue(guc, &info); 878 879 if (!xe_exec_queue_is_multi_queue_secondary(q)) 880 init_policies(guc, q); 881 882 if (xe_exec_queue_is_multi_queue_secondary(q)) 883 xe_guc_exec_queue_group_add(guc, q); 884 } 885 886 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 887 { 888 return (WQ_SIZE - q->guc->wqi_tail); 889 } 890 891 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 892 { 893 struct xe_guc *guc = exec_queue_to_guc(q); 894 struct xe_device *xe = guc_to_xe(guc); 895 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 896 unsigned int sleep_period_ms = 1, sleep_total_ms = 0; 897 898 #define AVAILABLE_SPACE \ 899 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 900 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 901 try_again: 902 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 903 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) { 904 if (sleep_total_ms > 2000) { 905 xe_gt_reset_async(q->gt); 906 return -ENODEV; 907 } 908 909 msleep(sleep_period_ms); 910 sleep_total_ms += sleep_period_ms; 911 if (sleep_period_ms < 64) 912 sleep_period_ms <<= 1; 913 goto try_again; 914 } 915 } 916 #undef AVAILABLE_SPACE 917 918 return 0; 919 } 920 921 static int wq_noop_append(struct xe_exec_queue *q) 922 { 923 struct xe_guc *guc = exec_queue_to_guc(q); 924 struct xe_device *xe = guc_to_xe(guc); 925 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 926 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 927 928 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 929 return -ENODEV; 930 931 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 932 933 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 934 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 935 FIELD_PREP(WQ_LEN_MASK, len_dw)); 936 q->guc->wqi_tail = 0; 937 938 return 0; 939 } 940 941 static void wq_item_append(struct xe_exec_queue *q) 942 { 943 struct xe_guc *guc = exec_queue_to_guc(q); 944 struct xe_device *xe = guc_to_xe(guc); 945 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 946 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 947 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 948 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 949 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 950 int i = 0, j; 951 952 if (wqi_size > wq_space_until_wrap(q)) { 953 if (wq_noop_append(q)) 954 return; 955 } 956 if (wq_wait_for_space(q, wqi_size)) 957 return; 958 959 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 960 FIELD_PREP(WQ_LEN_MASK, len_dw); 961 wqi[i++] = xe_lrc_descriptor(q->lrc[0]); 962 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 963 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); 964 wqi[i++] = 0; 965 for (j = 1; j < q->width; ++j) { 966 struct xe_lrc *lrc = q->lrc[j]; 967 968 wqi[i++] = lrc->ring.tail / sizeof(u64); 969 } 970 971 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 972 973 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 974 wq[q->guc->wqi_tail / sizeof(u32)])); 975 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 976 q->guc->wqi_tail += wqi_size; 977 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 978 979 xe_device_wmb(xe); 980 981 map = xe_lrc_parallel_map(q->lrc[0]); 982 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 983 } 984 985 #define RESUME_PENDING ~0x0ull 986 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) 987 { 988 struct xe_guc *guc = exec_queue_to_guc(q); 989 struct xe_lrc *lrc = q->lrc[0]; 990 u32 action[3]; 991 u32 g2h_len = 0; 992 u32 num_g2h = 0; 993 int len = 0; 994 bool extra_submit = false; 995 996 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 997 998 if (!job->restore_replay || job->last_replay) { 999 if (xe_exec_queue_is_parallel(q)) 1000 wq_item_append(q); 1001 else 1002 xe_lrc_set_ring_tail(lrc, lrc->ring.tail); 1003 job->last_replay = false; 1004 } 1005 1006 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 1007 return; 1008 1009 /* 1010 * All queues in a multi-queue group will use the primary queue 1011 * of the group to interface with GuC. 1012 */ 1013 q = xe_exec_queue_multi_queue_primary(q); 1014 1015 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 1016 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 1017 action[len++] = q->guc->id; 1018 action[len++] = GUC_CONTEXT_ENABLE; 1019 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 1020 num_g2h = 1; 1021 if (xe_exec_queue_is_parallel(q)) 1022 extra_submit = true; 1023 1024 q->guc->resume_time = RESUME_PENDING; 1025 set_exec_queue_pending_enable(q); 1026 set_exec_queue_enabled(q); 1027 trace_xe_exec_queue_scheduling_enable(q); 1028 } else { 1029 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1030 action[len++] = q->guc->id; 1031 trace_xe_exec_queue_submit(q); 1032 } 1033 1034 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 1035 1036 if (extra_submit) { 1037 len = 0; 1038 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 1039 action[len++] = q->guc->id; 1040 trace_xe_exec_queue_submit(q); 1041 1042 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 1043 } 1044 } 1045 1046 static struct dma_fence * 1047 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 1048 { 1049 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1050 struct xe_exec_queue *q = job->q; 1051 struct xe_guc *guc = exec_queue_to_guc(q); 1052 bool lr = xe_exec_queue_is_lr(q), killed_or_banned_or_wedged = 1053 exec_queue_killed_or_banned_or_wedged(q); 1054 1055 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 1056 exec_queue_banned(q) || exec_queue_suspended(q)); 1057 1058 trace_xe_sched_job_run(job); 1059 1060 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { 1061 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1062 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1063 1064 if (exec_queue_killed_or_banned_or_wedged(primary)) { 1065 killed_or_banned_or_wedged = true; 1066 goto run_job_out; 1067 } 1068 1069 if (!exec_queue_registered(primary)) 1070 register_exec_queue(primary, GUC_CONTEXT_NORMAL); 1071 } 1072 1073 if (!exec_queue_registered(q)) 1074 register_exec_queue(q, GUC_CONTEXT_NORMAL); 1075 if (!job->restore_replay) 1076 q->ring_ops->emit_job(job); 1077 submit_exec_queue(q, job); 1078 job->restore_replay = false; 1079 } 1080 1081 run_job_out: 1082 /* 1083 * We don't care about job-fence ordering in LR VMs because these fences 1084 * are never exported; they are used solely to keep jobs on the pending 1085 * list. Once a queue enters an error state, there's no need to track 1086 * them. 1087 */ 1088 if (killed_or_banned_or_wedged && lr) 1089 xe_sched_job_set_error(job, -ECANCELED); 1090 1091 return job->fence; 1092 } 1093 1094 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 1095 { 1096 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1097 1098 trace_xe_sched_job_free(job); 1099 xe_sched_job_put(job); 1100 } 1101 1102 int xe_guc_read_stopped(struct xe_guc *guc) 1103 { 1104 return atomic_read(&guc->submission_state.stopped); 1105 } 1106 1107 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 1108 struct xe_exec_queue *q, 1109 u32 runnable_state); 1110 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q); 1111 1112 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 1113 u32 action[] = { \ 1114 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 1115 q->guc->id, \ 1116 GUC_CONTEXT_##enable_disable, \ 1117 } 1118 1119 static void disable_scheduling_deregister(struct xe_guc *guc, 1120 struct xe_exec_queue *q) 1121 { 1122 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1123 int ret; 1124 1125 if (!xe_exec_queue_is_multi_queue_secondary(q)) 1126 set_min_preemption_timeout(guc, q); 1127 1128 smp_rmb(); 1129 ret = wait_event_timeout(guc->ct.wq, 1130 (!exec_queue_pending_enable(q) && 1131 !exec_queue_pending_disable(q)) || 1132 xe_guc_read_stopped(guc) || 1133 vf_recovery(guc), 1134 HZ * 5); 1135 if (!ret && !vf_recovery(guc)) { 1136 struct xe_gpu_scheduler *sched = &q->guc->sched; 1137 1138 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n"); 1139 xe_sched_submission_start(sched); 1140 xe_gt_reset_async(q->gt); 1141 if (!xe_exec_queue_is_lr(q)) 1142 xe_sched_tdr_queue_imm(sched); 1143 return; 1144 } 1145 1146 clear_exec_queue_enabled(q); 1147 set_exec_queue_pending_disable(q); 1148 set_exec_queue_destroyed(q); 1149 trace_xe_exec_queue_scheduling_disable(q); 1150 1151 /* 1152 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 1153 * handler and we are not allowed to reserved G2H space in handlers. 1154 */ 1155 if (xe_exec_queue_is_multi_queue_secondary(q)) 1156 handle_multi_queue_secondary_sched_done(guc, q, 0); 1157 else 1158 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1159 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 1160 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 1161 } 1162 1163 /** 1164 * xe_guc_submit_wedge() - Wedge GuC submission 1165 * @guc: the GuC object 1166 * 1167 * Save exec queue's registered with GuC state by taking a ref to each queue. 1168 * Register a DRMM handler to drop refs upon driver unload. 1169 */ 1170 void xe_guc_submit_wedge(struct xe_guc *guc) 1171 { 1172 struct xe_gt *gt = guc_to_gt(guc); 1173 struct xe_exec_queue *q; 1174 unsigned long index; 1175 int err; 1176 1177 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); 1178 1179 /* 1180 * If device is being wedged even before submission_state is 1181 * initialized, there's nothing to do here. 1182 */ 1183 if (!guc->submission_state.initialized) 1184 return; 1185 1186 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 1187 guc_submit_wedged_fini, guc); 1188 if (err) { 1189 xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " 1190 "Although device is wedged.\n"); 1191 return; 1192 } 1193 1194 mutex_lock(&guc->submission_state.lock); 1195 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1196 if (xe_exec_queue_get_unless_zero(q)) 1197 set_exec_queue_wedged(q); 1198 mutex_unlock(&guc->submission_state.lock); 1199 } 1200 1201 static bool guc_submit_hint_wedged(struct xe_guc *guc) 1202 { 1203 struct xe_device *xe = guc_to_xe(guc); 1204 1205 if (xe->wedged.mode != 2) 1206 return false; 1207 1208 if (xe_device_wedged(xe)) 1209 return true; 1210 1211 xe_device_declare_wedged(xe); 1212 1213 return true; 1214 } 1215 1216 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) 1217 { 1218 struct xe_guc_exec_queue *ge = 1219 container_of(w, struct xe_guc_exec_queue, lr_tdr); 1220 struct xe_exec_queue *q = ge->q; 1221 struct xe_guc *guc = exec_queue_to_guc(q); 1222 struct xe_gpu_scheduler *sched = &ge->sched; 1223 struct xe_sched_job *job; 1224 bool wedged = false; 1225 1226 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); 1227 1228 if (vf_recovery(guc)) 1229 return; 1230 1231 trace_xe_exec_queue_lr_cleanup(q); 1232 1233 if (!exec_queue_killed(q)) 1234 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1235 1236 /* Kill the run_job / process_msg entry points */ 1237 xe_sched_submission_stop(sched); 1238 1239 /* 1240 * Engine state now mostly stable, disable scheduling / deregister if 1241 * needed. This cleanup routine might be called multiple times, where 1242 * the actual async engine deregister drops the final engine ref. 1243 * Calling disable_scheduling_deregister will mark the engine as 1244 * destroyed and fire off the CT requests to disable scheduling / 1245 * deregister, which we only want to do once. We also don't want to mark 1246 * the engine as pending_disable again as this may race with the 1247 * xe_guc_deregister_done_handler() which treats it as an unexpected 1248 * state. 1249 */ 1250 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { 1251 struct xe_guc *guc = exec_queue_to_guc(q); 1252 int ret; 1253 1254 set_exec_queue_banned(q); 1255 disable_scheduling_deregister(guc, q); 1256 1257 /* 1258 * Must wait for scheduling to be disabled before signalling 1259 * any fences, if GT broken the GT reset code should signal us. 1260 */ 1261 ret = wait_event_timeout(guc->ct.wq, 1262 !exec_queue_pending_disable(q) || 1263 xe_guc_read_stopped(guc) || 1264 vf_recovery(guc), HZ * 5); 1265 if (vf_recovery(guc)) 1266 return; 1267 1268 if (!ret) { 1269 xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", 1270 q->guc->id); 1271 xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n", 1272 q->guc->id); 1273 xe_sched_submission_start(sched); 1274 xe_gt_reset_async(q->gt); 1275 return; 1276 } 1277 } 1278 1279 if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) 1280 xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id); 1281 1282 xe_hw_fence_irq_stop(q->fence_irq); 1283 1284 xe_sched_submission_start(sched); 1285 1286 spin_lock(&sched->base.job_list_lock); 1287 list_for_each_entry(job, &sched->base.pending_list, drm.list) 1288 xe_sched_job_set_error(job, -ECANCELED); 1289 spin_unlock(&sched->base.job_list_lock); 1290 1291 xe_hw_fence_irq_start(q->fence_irq); 1292 } 1293 1294 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 1295 1296 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 1297 { 1298 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 1299 u32 ctx_timestamp, ctx_job_timestamp; 1300 u32 timeout_ms = q->sched_props.job_timeout_ms; 1301 u32 diff; 1302 u64 running_time_ms; 1303 1304 if (!xe_sched_job_started(job)) { 1305 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started", 1306 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1307 q->guc->id); 1308 1309 return xe_sched_invalidate_job(job, 2); 1310 } 1311 1312 ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); 1313 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 1314 1315 /* 1316 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 1317 * possible overflows with a high timeout. 1318 */ 1319 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 1320 1321 diff = ctx_timestamp - ctx_job_timestamp; 1322 1323 /* 1324 * Ensure timeout is within 5% to account for an GuC scheduling latency 1325 */ 1326 running_time_ms = 1327 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 1328 1329 xe_gt_dbg(gt, 1330 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 1331 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1332 q->guc->id, running_time_ms, timeout_ms, diff); 1333 1334 return running_time_ms >= timeout_ms; 1335 } 1336 1337 static void enable_scheduling(struct xe_exec_queue *q) 1338 { 1339 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1340 struct xe_guc *guc = exec_queue_to_guc(q); 1341 int ret; 1342 1343 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1344 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1345 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1346 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1347 1348 set_exec_queue_pending_enable(q); 1349 set_exec_queue_enabled(q); 1350 trace_xe_exec_queue_scheduling_enable(q); 1351 1352 if (xe_exec_queue_is_multi_queue_secondary(q)) 1353 handle_multi_queue_secondary_sched_done(guc, q, 1); 1354 else 1355 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1356 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1357 1358 ret = wait_event_timeout(guc->ct.wq, 1359 !exec_queue_pending_enable(q) || 1360 xe_guc_read_stopped(guc) || 1361 vf_recovery(guc), HZ * 5); 1362 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) { 1363 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 1364 set_exec_queue_banned(q); 1365 xe_gt_reset_async(q->gt); 1366 if (!xe_exec_queue_is_lr(q)) 1367 xe_sched_tdr_queue_imm(&q->guc->sched); 1368 } 1369 } 1370 1371 static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 1372 { 1373 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1374 struct xe_guc *guc = exec_queue_to_guc(q); 1375 1376 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1377 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1378 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1379 1380 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q)) 1381 set_min_preemption_timeout(guc, q); 1382 clear_exec_queue_enabled(q); 1383 set_exec_queue_pending_disable(q); 1384 trace_xe_exec_queue_scheduling_disable(q); 1385 1386 if (xe_exec_queue_is_multi_queue_secondary(q)) 1387 handle_multi_queue_secondary_sched_done(guc, q, 0); 1388 else 1389 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1390 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1391 } 1392 1393 static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 1394 { 1395 u32 action[] = { 1396 XE_GUC_ACTION_DEREGISTER_CONTEXT, 1397 q->guc->id, 1398 }; 1399 1400 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1401 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1402 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1403 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1404 1405 set_exec_queue_destroyed(q); 1406 trace_xe_exec_queue_deregister(q); 1407 1408 if (xe_exec_queue_is_multi_queue_secondary(q)) 1409 handle_deregister_done(guc, q); 1410 else 1411 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1412 G2H_LEN_DW_DEREGISTER_CONTEXT, 1); 1413 } 1414 1415 static enum drm_gpu_sched_stat 1416 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 1417 { 1418 struct xe_sched_job *job = to_xe_sched_job(drm_job); 1419 struct xe_sched_job *tmp_job; 1420 struct xe_exec_queue *q = job->q; 1421 struct xe_gpu_scheduler *sched = &q->guc->sched; 1422 struct xe_guc *guc = exec_queue_to_guc(q); 1423 const char *process_name = "no process"; 1424 struct xe_device *xe = guc_to_xe(guc); 1425 int err = -ETIME; 1426 pid_t pid = -1; 1427 int i = 0; 1428 bool wedged = false, skip_timeout_check; 1429 1430 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_lr(q)); 1431 1432 /* 1433 * TDR has fired before free job worker. Common if exec queue 1434 * immediately closed after last fence signaled. Add back to pending 1435 * list so job can be freed and kick scheduler ensuring free job is not 1436 * lost. 1437 */ 1438 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) || 1439 vf_recovery(guc)) 1440 return DRM_GPU_SCHED_STAT_NO_HANG; 1441 1442 /* Kill the run_job entry point */ 1443 xe_sched_submission_stop(sched); 1444 1445 /* Must check all state after stopping scheduler */ 1446 skip_timeout_check = exec_queue_reset(q) || 1447 exec_queue_killed_or_banned_or_wedged(q) || 1448 exec_queue_destroyed(q); 1449 1450 /* 1451 * If devcoredump not captured and GuC capture for the job is not ready 1452 * do manual capture first and decide later if we need to use it 1453 */ 1454 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1455 !xe_guc_capture_get_matching_and_lock(q)) { 1456 /* take force wake before engine register manual capture */ 1457 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1458 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) 1459 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1460 1461 xe_engine_snapshot_capture_for_queue(q); 1462 } 1463 1464 /* 1465 * XXX: Sampling timeout doesn't work in wedged mode as we have to 1466 * modify scheduling state to read timestamp. We could read the 1467 * timestamp from a register to accumulate current running time but this 1468 * doesn't work for SRIOV. For now assuming timeouts in wedged mode are 1469 * genuine timeouts. 1470 */ 1471 if (!exec_queue_killed(q)) 1472 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1473 1474 /* Engine state now stable, disable scheduling to check timestamp */ 1475 if (!wedged && exec_queue_registered(q)) { 1476 int ret; 1477 1478 if (exec_queue_reset(q)) 1479 err = -EIO; 1480 1481 if (!exec_queue_destroyed(q)) { 1482 /* 1483 * Wait for any pending G2H to flush out before 1484 * modifying state 1485 */ 1486 ret = wait_event_timeout(guc->ct.wq, 1487 (!exec_queue_pending_enable(q) && 1488 !exec_queue_pending_disable(q)) || 1489 xe_guc_read_stopped(guc) || 1490 vf_recovery(guc), HZ * 5); 1491 if (vf_recovery(guc)) 1492 goto handle_vf_resume; 1493 if (!ret || xe_guc_read_stopped(guc)) 1494 goto trigger_reset; 1495 1496 /* 1497 * Flag communicates to G2H handler that schedule 1498 * disable originated from a timeout check. The G2H then 1499 * avoid triggering cleanup or deregistering the exec 1500 * queue. 1501 */ 1502 set_exec_queue_check_timeout(q); 1503 disable_scheduling(q, skip_timeout_check); 1504 } 1505 1506 /* 1507 * Must wait for scheduling to be disabled before signalling 1508 * any fences, if GT broken the GT reset code should signal us. 1509 * 1510 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 1511 * error) messages which can cause the schedule disable to get 1512 * lost. If this occurs, trigger a GT reset to recover. 1513 */ 1514 smp_rmb(); 1515 ret = wait_event_timeout(guc->ct.wq, 1516 !exec_queue_pending_disable(q) || 1517 xe_guc_read_stopped(guc) || 1518 vf_recovery(guc), HZ * 5); 1519 if (vf_recovery(guc)) 1520 goto handle_vf_resume; 1521 if (!ret || xe_guc_read_stopped(guc)) { 1522 trigger_reset: 1523 if (!ret) 1524 xe_gt_warn(guc_to_gt(guc), 1525 "Schedule disable failed to respond, guc_id=%d", 1526 q->guc->id); 1527 xe_devcoredump(q, job, 1528 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1529 q->guc->id, ret, xe_guc_read_stopped(guc)); 1530 set_exec_queue_extra_ref(q); 1531 xe_exec_queue_get(q); /* GT reset owns this */ 1532 set_exec_queue_banned(q); 1533 xe_gt_reset_async(q->gt); 1534 xe_sched_tdr_queue_imm(sched); 1535 goto rearm; 1536 } 1537 } 1538 1539 /* 1540 * Check if job is actually timed out, if so restart job execution and TDR 1541 */ 1542 if (!wedged && !skip_timeout_check && !check_timeout(q, job) && 1543 !exec_queue_reset(q) && exec_queue_registered(q)) { 1544 clear_exec_queue_check_timeout(q); 1545 goto sched_enable; 1546 } 1547 1548 if (q->vm && q->vm->xef) { 1549 process_name = q->vm->xef->process_name; 1550 pid = q->vm->xef->pid; 1551 } 1552 1553 if (!exec_queue_killed(q)) 1554 xe_gt_notice(guc_to_gt(guc), 1555 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", 1556 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1557 q->guc->id, q->flags, process_name, pid); 1558 1559 trace_xe_sched_job_timedout(job); 1560 1561 if (!exec_queue_killed(q)) 1562 xe_devcoredump(q, job, 1563 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1564 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1565 q->guc->id, q->flags); 1566 1567 /* 1568 * Kernel jobs should never fail, nor should VM jobs if they do 1569 * somethings has gone wrong and the GT needs a reset 1570 */ 1571 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1572 "Kernel-submitted job timed out\n"); 1573 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1574 "VM job timed out on non-killed execqueue\n"); 1575 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1576 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1577 if (!xe_sched_invalidate_job(job, 2)) { 1578 clear_exec_queue_check_timeout(q); 1579 xe_gt_reset_async(q->gt); 1580 goto rearm; 1581 } 1582 } 1583 1584 /* Finish cleaning up exec queue via deregister */ 1585 set_exec_queue_banned(q); 1586 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { 1587 set_exec_queue_extra_ref(q); 1588 xe_exec_queue_get(q); 1589 __deregister_exec_queue(guc, q); 1590 } 1591 1592 /* Stop fence signaling */ 1593 xe_hw_fence_irq_stop(q->fence_irq); 1594 1595 /* 1596 * Fence state now stable, stop / start scheduler which cleans up any 1597 * fences that are complete 1598 */ 1599 xe_sched_add_pending_job(sched, job); 1600 xe_sched_submission_start(sched); 1601 1602 xe_guc_exec_queue_trigger_cleanup(q); 1603 1604 /* Mark all outstanding jobs as bad, thus completing them */ 1605 spin_lock(&sched->base.job_list_lock); 1606 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 1607 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 1608 spin_unlock(&sched->base.job_list_lock); 1609 1610 /* Start fence signaling */ 1611 xe_hw_fence_irq_start(q->fence_irq); 1612 1613 return DRM_GPU_SCHED_STAT_RESET; 1614 1615 sched_enable: 1616 set_exec_queue_pending_tdr_exit(q); 1617 enable_scheduling(q); 1618 rearm: 1619 /* 1620 * XXX: Ideally want to adjust timeout based on current execution time 1621 * but there is not currently an easy way to do in DRM scheduler. With 1622 * some thought, do this in a follow up. 1623 */ 1624 xe_sched_submission_start(sched); 1625 handle_vf_resume: 1626 return DRM_GPU_SCHED_STAT_NO_HANG; 1627 } 1628 1629 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1630 { 1631 struct xe_guc_exec_queue *ge = q->guc; 1632 struct xe_guc *guc = exec_queue_to_guc(q); 1633 1634 release_guc_id(guc, q); 1635 xe_sched_entity_fini(&ge->entity); 1636 xe_sched_fini(&ge->sched); 1637 1638 /* 1639 * RCU free due sched being exported via DRM scheduler fences 1640 * (timeline name). 1641 */ 1642 kfree_rcu(ge, rcu); 1643 } 1644 1645 static void __guc_exec_queue_destroy_async(struct work_struct *w) 1646 { 1647 struct xe_guc_exec_queue *ge = 1648 container_of(w, struct xe_guc_exec_queue, destroy_async); 1649 struct xe_exec_queue *q = ge->q; 1650 struct xe_guc *guc = exec_queue_to_guc(q); 1651 1652 guard(xe_pm_runtime)(guc_to_xe(guc)); 1653 trace_xe_exec_queue_destroy(q); 1654 1655 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1656 struct xe_exec_queue_group *group = q->multi_queue.group; 1657 1658 mutex_lock(&group->list_lock); 1659 list_del(&q->multi_queue.link); 1660 mutex_unlock(&group->list_lock); 1661 } 1662 1663 if (xe_exec_queue_is_lr(q)) 1664 cancel_work_sync(&ge->lr_tdr); 1665 /* Confirm no work left behind accessing device structures */ 1666 cancel_delayed_work_sync(&ge->sched.base.work_tdr); 1667 1668 xe_exec_queue_fini(q); 1669 } 1670 1671 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) 1672 { 1673 struct xe_guc *guc = exec_queue_to_guc(q); 1674 struct xe_device *xe = guc_to_xe(guc); 1675 1676 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); 1677 1678 /* We must block on kernel engines so slabs are empty on driver unload */ 1679 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) 1680 __guc_exec_queue_destroy_async(&q->guc->destroy_async); 1681 else 1682 queue_work(xe->destroy_wq, &q->guc->destroy_async); 1683 } 1684 1685 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) 1686 { 1687 /* 1688 * Might be done from within the GPU scheduler, need to do async as we 1689 * fini the scheduler when the engine is fini'd, the scheduler can't 1690 * complete fini within itself (circular dependency). Async resolves 1691 * this we and don't really care when everything is fini'd, just that it 1692 * is. 1693 */ 1694 guc_exec_queue_destroy_async(q); 1695 } 1696 1697 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1698 { 1699 struct xe_exec_queue *q = msg->private_data; 1700 struct xe_guc *guc = exec_queue_to_guc(q); 1701 1702 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1703 trace_xe_exec_queue_cleanup_entity(q); 1704 1705 /* 1706 * Expected state transitions for cleanup: 1707 * - If the exec queue is registered and GuC firmware is running, we must first 1708 * disable scheduling and deregister the queue to ensure proper teardown and 1709 * resource release in the GuC, then destroy the exec queue on driver side. 1710 * - If the GuC is already stopped (e.g., during driver unload or GPU reset), 1711 * we cannot expect a response for the deregister request. In this case, 1712 * it is safe to directly destroy the exec queue on driver side, as the GuC 1713 * will not process further requests and all resources must be cleaned up locally. 1714 */ 1715 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) 1716 disable_scheduling_deregister(guc, q); 1717 else 1718 __guc_exec_queue_destroy(guc, q); 1719 } 1720 1721 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1722 { 1723 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); 1724 } 1725 1726 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1727 { 1728 struct xe_exec_queue *q = msg->private_data; 1729 struct xe_guc *guc = exec_queue_to_guc(q); 1730 1731 if (guc_exec_queue_allowed_to_change_state(q)) 1732 init_policies(guc, q); 1733 kfree(msg); 1734 } 1735 1736 static void __suspend_fence_signal(struct xe_exec_queue *q) 1737 { 1738 struct xe_guc *guc = exec_queue_to_guc(q); 1739 struct xe_device *xe = guc_to_xe(guc); 1740 1741 if (!q->guc->suspend_pending) 1742 return; 1743 1744 WRITE_ONCE(q->guc->suspend_pending, false); 1745 1746 /* 1747 * We use a GuC shared wait queue for VFs because the VF resfix start 1748 * interrupt must be able to wake all instances of suspend_wait. This 1749 * prevents the VF migration worker from being starved during 1750 * scheduling. 1751 */ 1752 if (IS_SRIOV_VF(xe)) 1753 wake_up_all(&guc->ct.wq); 1754 else 1755 wake_up(&q->guc->suspend_wait); 1756 } 1757 1758 static void suspend_fence_signal(struct xe_exec_queue *q) 1759 { 1760 struct xe_guc *guc = exec_queue_to_guc(q); 1761 1762 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1763 xe_guc_read_stopped(guc)); 1764 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1765 1766 __suspend_fence_signal(q); 1767 } 1768 1769 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1770 { 1771 struct xe_exec_queue *q = msg->private_data; 1772 struct xe_guc *guc = exec_queue_to_guc(q); 1773 1774 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1775 exec_queue_enabled(q)) { 1776 wait_event(guc->ct.wq, vf_recovery(guc) || 1777 ((q->guc->resume_time != RESUME_PENDING || 1778 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q))); 1779 1780 if (!xe_guc_read_stopped(guc)) { 1781 s64 since_resume_ms = 1782 ktime_ms_delta(ktime_get(), 1783 q->guc->resume_time); 1784 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1785 since_resume_ms; 1786 1787 if (wait_ms > 0 && q->guc->resume_time) 1788 msleep(wait_ms); 1789 1790 set_exec_queue_suspended(q); 1791 disable_scheduling(q, false); 1792 } 1793 } else if (q->guc->suspend_pending) { 1794 set_exec_queue_suspended(q); 1795 suspend_fence_signal(q); 1796 } 1797 } 1798 1799 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1800 { 1801 struct xe_exec_queue *q = msg->private_data; 1802 1803 if (guc_exec_queue_allowed_to_change_state(q)) { 1804 clear_exec_queue_suspended(q); 1805 if (!exec_queue_enabled(q)) { 1806 q->guc->resume_time = RESUME_PENDING; 1807 set_exec_queue_pending_resume(q); 1808 enable_scheduling(q); 1809 } 1810 } else { 1811 clear_exec_queue_suspended(q); 1812 } 1813 } 1814 1815 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg) 1816 { 1817 struct xe_exec_queue *q = msg->private_data; 1818 1819 if (guc_exec_queue_allowed_to_change_state(q)) { 1820 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2) 1821 struct xe_guc *guc = exec_queue_to_guc(q); 1822 struct xe_exec_queue_group *group = q->multi_queue.group; 1823 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE]; 1824 int len = 0; 1825 1826 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC; 1827 action[len++] = group->primary->guc->id; 1828 1829 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE); 1830 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE 1831 1832 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len); 1833 } 1834 1835 kfree(msg); 1836 } 1837 1838 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1839 #define SET_SCHED_PROPS 2 1840 #define SUSPEND 3 1841 #define RESUME 4 1842 #define SET_MULTI_QUEUE_PRIORITY 5 1843 #define OPCODE_MASK 0xf 1844 #define MSG_LOCKED BIT(8) 1845 #define MSG_HEAD BIT(9) 1846 1847 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1848 { 1849 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); 1850 1851 trace_xe_sched_msg_recv(msg); 1852 1853 switch (msg->opcode) { 1854 case CLEANUP: 1855 __guc_exec_queue_process_msg_cleanup(msg); 1856 break; 1857 case SET_SCHED_PROPS: 1858 __guc_exec_queue_process_msg_set_sched_props(msg); 1859 break; 1860 case SUSPEND: 1861 __guc_exec_queue_process_msg_suspend(msg); 1862 break; 1863 case RESUME: 1864 __guc_exec_queue_process_msg_resume(msg); 1865 break; 1866 case SET_MULTI_QUEUE_PRIORITY: 1867 __guc_exec_queue_process_msg_set_multi_queue_priority(msg); 1868 break; 1869 default: 1870 XE_WARN_ON("Unknown message type"); 1871 } 1872 1873 xe_pm_runtime_put(xe); 1874 } 1875 1876 static const struct drm_sched_backend_ops drm_sched_ops = { 1877 .run_job = guc_exec_queue_run_job, 1878 .free_job = guc_exec_queue_free_job, 1879 .timedout_job = guc_exec_queue_timedout_job, 1880 }; 1881 1882 static const struct xe_sched_backend_ops xe_sched_ops = { 1883 .process_msg = guc_exec_queue_process_msg, 1884 }; 1885 1886 static int guc_exec_queue_init(struct xe_exec_queue *q) 1887 { 1888 struct xe_gpu_scheduler *sched; 1889 struct xe_guc *guc = exec_queue_to_guc(q); 1890 struct workqueue_struct *submit_wq = NULL; 1891 struct xe_guc_exec_queue *ge; 1892 long timeout; 1893 int err, i; 1894 1895 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1896 1897 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1898 if (!ge) 1899 return -ENOMEM; 1900 1901 q->guc = ge; 1902 ge->q = q; 1903 init_rcu_head(&ge->rcu); 1904 init_waitqueue_head(&ge->suspend_wait); 1905 1906 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) 1907 INIT_LIST_HEAD(&ge->static_msgs[i].link); 1908 1909 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1910 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1911 1912 /* 1913 * Use primary queue's submit_wq for all secondary queues of a 1914 * multi queue group. This serialization avoids any locking around 1915 * CGP synchronization with GuC. 1916 */ 1917 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1918 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); 1919 1920 submit_wq = primary->guc->sched.base.submit_wq; 1921 } 1922 1923 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1924 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64, 1925 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1926 q->name, gt_to_xe(q->gt)->drm.dev); 1927 if (err) 1928 goto err_free; 1929 1930 sched = &ge->sched; 1931 err = xe_sched_entity_init(&ge->entity, sched); 1932 if (err) 1933 goto err_sched; 1934 1935 if (xe_exec_queue_is_lr(q)) 1936 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); 1937 1938 mutex_lock(&guc->submission_state.lock); 1939 1940 err = alloc_guc_id(guc, q); 1941 if (err) 1942 goto err_entity; 1943 1944 q->entity = &ge->entity; 1945 1946 if (xe_guc_read_stopped(guc) || vf_recovery(guc)) 1947 xe_sched_stop(sched); 1948 1949 mutex_unlock(&guc->submission_state.lock); 1950 1951 xe_exec_queue_assign_name(q, q->guc->id); 1952 1953 /* 1954 * Maintain secondary queues of the multi queue group in a list 1955 * for handling dependencies across the queues in the group. 1956 */ 1957 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1958 struct xe_exec_queue_group *group = q->multi_queue.group; 1959 1960 INIT_LIST_HEAD(&q->multi_queue.link); 1961 mutex_lock(&group->list_lock); 1962 list_add_tail(&q->multi_queue.link, &group->list); 1963 mutex_unlock(&group->list_lock); 1964 } 1965 1966 trace_xe_exec_queue_create(q); 1967 1968 return 0; 1969 1970 err_entity: 1971 mutex_unlock(&guc->submission_state.lock); 1972 xe_sched_entity_fini(&ge->entity); 1973 err_sched: 1974 xe_sched_fini(&ge->sched); 1975 err_free: 1976 kfree(ge); 1977 1978 return err; 1979 } 1980 1981 static void guc_exec_queue_kill(struct xe_exec_queue *q) 1982 { 1983 trace_xe_exec_queue_kill(q); 1984 set_exec_queue_killed(q); 1985 __suspend_fence_signal(q); 1986 xe_guc_exec_queue_trigger_cleanup(q); 1987 } 1988 1989 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 1990 u32 opcode) 1991 { 1992 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); 1993 1994 INIT_LIST_HEAD(&msg->link); 1995 msg->opcode = opcode & OPCODE_MASK; 1996 msg->private_data = q; 1997 1998 trace_xe_sched_msg_add(msg); 1999 if (opcode & MSG_HEAD) 2000 xe_sched_add_msg_head(&q->guc->sched, msg); 2001 else if (opcode & MSG_LOCKED) 2002 xe_sched_add_msg_locked(&q->guc->sched, msg); 2003 else 2004 xe_sched_add_msg(&q->guc->sched, msg); 2005 } 2006 2007 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q, 2008 struct xe_sched_msg *msg, 2009 u32 opcode) 2010 { 2011 if (!list_empty(&msg->link)) 2012 return; 2013 2014 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD); 2015 } 2016 2017 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, 2018 struct xe_sched_msg *msg, 2019 u32 opcode) 2020 { 2021 if (!list_empty(&msg->link)) 2022 return false; 2023 2024 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); 2025 2026 return true; 2027 } 2028 2029 #define STATIC_MSG_CLEANUP 0 2030 #define STATIC_MSG_SUSPEND 1 2031 #define STATIC_MSG_RESUME 2 2032 static void guc_exec_queue_destroy(struct xe_exec_queue *q) 2033 { 2034 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2035 2036 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) 2037 guc_exec_queue_add_msg(q, msg, CLEANUP); 2038 else 2039 __guc_exec_queue_destroy(exec_queue_to_guc(q), q); 2040 } 2041 2042 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 2043 enum xe_exec_queue_priority priority) 2044 { 2045 struct xe_sched_msg *msg; 2046 2047 if (q->sched_props.priority == priority || 2048 exec_queue_killed_or_banned_or_wedged(q)) 2049 return 0; 2050 2051 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 2052 if (!msg) 2053 return -ENOMEM; 2054 2055 q->sched_props.priority = priority; 2056 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2057 2058 return 0; 2059 } 2060 2061 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 2062 { 2063 struct xe_sched_msg *msg; 2064 2065 if (q->sched_props.timeslice_us == timeslice_us || 2066 exec_queue_killed_or_banned_or_wedged(q)) 2067 return 0; 2068 2069 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 2070 if (!msg) 2071 return -ENOMEM; 2072 2073 q->sched_props.timeslice_us = timeslice_us; 2074 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2075 2076 return 0; 2077 } 2078 2079 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 2080 u32 preempt_timeout_us) 2081 { 2082 struct xe_sched_msg *msg; 2083 2084 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 2085 exec_queue_killed_or_banned_or_wedged(q)) 2086 return 0; 2087 2088 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 2089 if (!msg) 2090 return -ENOMEM; 2091 2092 q->sched_props.preempt_timeout_us = preempt_timeout_us; 2093 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 2094 2095 return 0; 2096 } 2097 2098 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, 2099 enum xe_multi_queue_priority priority) 2100 { 2101 struct xe_sched_msg *msg; 2102 2103 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); 2104 2105 if (q->multi_queue.priority == priority || 2106 exec_queue_killed_or_banned_or_wedged(q)) 2107 return 0; 2108 2109 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 2110 if (!msg) 2111 return -ENOMEM; 2112 2113 q->multi_queue.priority = priority; 2114 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); 2115 2116 return 0; 2117 } 2118 2119 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 2120 { 2121 struct xe_gpu_scheduler *sched = &q->guc->sched; 2122 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2123 2124 if (exec_queue_killed_or_banned_or_wedged(q)) 2125 return -EINVAL; 2126 2127 xe_sched_msg_lock(sched); 2128 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) 2129 q->guc->suspend_pending = true; 2130 xe_sched_msg_unlock(sched); 2131 2132 return 0; 2133 } 2134 2135 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 2136 { 2137 struct xe_guc *guc = exec_queue_to_guc(q); 2138 struct xe_device *xe = guc_to_xe(guc); 2139 int ret; 2140 2141 /* 2142 * Likely don't need to check exec_queue_killed() as we clear 2143 * suspend_pending upon kill but to be paranoid but races in which 2144 * suspend_pending is set after kill also check kill here. 2145 */ 2146 #define WAIT_COND \ 2147 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \ 2148 xe_guc_read_stopped(guc)) 2149 2150 retry: 2151 if (IS_SRIOV_VF(xe)) 2152 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND || 2153 vf_recovery(guc), 2154 HZ * 5); 2155 else 2156 ret = wait_event_interruptible_timeout(q->guc->suspend_wait, 2157 WAIT_COND, HZ * 5); 2158 2159 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc)))) 2160 return -EAGAIN; 2161 2162 if (!ret) { 2163 xe_gt_warn(guc_to_gt(guc), 2164 "Suspend fence, guc_id=%d, failed to respond", 2165 q->guc->id); 2166 /* XXX: Trigger GT reset? */ 2167 return -ETIME; 2168 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) { 2169 /* Corner case on RESFIX DONE where vf_recovery() changes */ 2170 goto retry; 2171 } 2172 2173 #undef WAIT_COND 2174 2175 return ret < 0 ? ret : 0; 2176 } 2177 2178 static void guc_exec_queue_resume(struct xe_exec_queue *q) 2179 { 2180 struct xe_gpu_scheduler *sched = &q->guc->sched; 2181 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2182 struct xe_guc *guc = exec_queue_to_guc(q); 2183 2184 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 2185 2186 xe_sched_msg_lock(sched); 2187 guc_exec_queue_try_add_msg(q, msg, RESUME); 2188 xe_sched_msg_unlock(sched); 2189 } 2190 2191 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 2192 { 2193 if (xe_exec_queue_is_multi_queue_secondary(q) && 2194 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q))) 2195 return true; 2196 2197 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); 2198 } 2199 2200 /* 2201 * All of these functions are an abstraction layer which other parts of Xe can 2202 * use to trap into the GuC backend. All of these functions, aside from init, 2203 * really shouldn't do much other than trap into the DRM scheduler which 2204 * synchronizes these operations. 2205 */ 2206 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 2207 .init = guc_exec_queue_init, 2208 .kill = guc_exec_queue_kill, 2209 .fini = guc_exec_queue_fini, 2210 .destroy = guc_exec_queue_destroy, 2211 .set_priority = guc_exec_queue_set_priority, 2212 .set_timeslice = guc_exec_queue_set_timeslice, 2213 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 2214 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority, 2215 .suspend = guc_exec_queue_suspend, 2216 .suspend_wait = guc_exec_queue_suspend_wait, 2217 .resume = guc_exec_queue_resume, 2218 .reset_status = guc_exec_queue_reset_status, 2219 }; 2220 2221 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 2222 { 2223 struct xe_gpu_scheduler *sched = &q->guc->sched; 2224 2225 /* Stop scheduling + flush any DRM scheduler operations */ 2226 xe_sched_submission_stop(sched); 2227 2228 /* Clean up lost G2H + reset engine state */ 2229 if (exec_queue_registered(q)) { 2230 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 2231 xe_exec_queue_put(q); 2232 else if (exec_queue_destroyed(q)) 2233 __guc_exec_queue_destroy(guc, q); 2234 } 2235 if (q->guc->suspend_pending) { 2236 set_exec_queue_suspended(q); 2237 suspend_fence_signal(q); 2238 } 2239 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 2240 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 2241 EXEC_QUEUE_STATE_SUSPENDED, 2242 &q->guc->state); 2243 q->guc->resume_time = 0; 2244 trace_xe_exec_queue_stop(q); 2245 2246 /* 2247 * Ban any engine (aside from kernel and engines used for VM ops) with a 2248 * started but not complete job or if a job has gone through a GT reset 2249 * more than twice. 2250 */ 2251 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 2252 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2253 bool ban = false; 2254 2255 if (job) { 2256 if ((xe_sched_job_started(job) && 2257 !xe_sched_job_completed(job)) || 2258 xe_sched_invalidate_job(job, 2)) { 2259 trace_xe_sched_job_ban(job); 2260 ban = true; 2261 } 2262 } else if (xe_exec_queue_is_lr(q) && 2263 !xe_lrc_ring_is_idle(q->lrc[0])) { 2264 ban = true; 2265 } 2266 2267 if (ban) { 2268 set_exec_queue_banned(q); 2269 xe_guc_exec_queue_trigger_cleanup(q); 2270 } 2271 } 2272 } 2273 2274 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 2275 { 2276 int ret; 2277 2278 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) 2279 return 0; 2280 2281 if (!guc->submission_state.initialized) 2282 return 0; 2283 2284 /* 2285 * Using an atomic here rather than submission_state.lock as this 2286 * function can be called while holding the CT lock (engine reset 2287 * failure). submission_state.lock needs the CT lock to resubmit jobs. 2288 * Atomic is not ideal, but it works to prevent against concurrent reset 2289 * and releasing any TDRs waiting on guc->submission_state.stopped. 2290 */ 2291 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 2292 smp_wmb(); 2293 wake_up_all(&guc->ct.wq); 2294 2295 return ret; 2296 } 2297 2298 void xe_guc_submit_reset_wait(struct xe_guc *guc) 2299 { 2300 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || 2301 !xe_guc_read_stopped(guc)); 2302 } 2303 2304 void xe_guc_submit_stop(struct xe_guc *guc) 2305 { 2306 struct xe_exec_queue *q; 2307 unsigned long index; 2308 2309 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2310 2311 mutex_lock(&guc->submission_state.lock); 2312 2313 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2314 /* Prevent redundant attempts to stop parallel queues */ 2315 if (q->guc->id != index) 2316 continue; 2317 2318 guc_exec_queue_stop(guc, q); 2319 } 2320 2321 mutex_unlock(&guc->submission_state.lock); 2322 2323 /* 2324 * No one can enter the backend at this point, aside from new engine 2325 * creation which is protected by guc->submission_state.lock. 2326 */ 2327 2328 } 2329 2330 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, 2331 struct xe_exec_queue *q) 2332 { 2333 bool pending_enable, pending_disable, pending_resume; 2334 2335 pending_enable = exec_queue_pending_enable(q); 2336 pending_resume = exec_queue_pending_resume(q); 2337 2338 if (pending_enable && pending_resume) { 2339 q->guc->needs_resume = true; 2340 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d", 2341 q->guc->id); 2342 } 2343 2344 if (pending_enable && !pending_resume && 2345 !exec_queue_pending_tdr_exit(q)) { 2346 clear_exec_queue_registered(q); 2347 if (xe_exec_queue_is_lr(q)) 2348 xe_exec_queue_put(q); 2349 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d", 2350 q->guc->id); 2351 } 2352 2353 if (pending_enable) { 2354 clear_exec_queue_enabled(q); 2355 clear_exec_queue_pending_resume(q); 2356 clear_exec_queue_pending_tdr_exit(q); 2357 clear_exec_queue_pending_enable(q); 2358 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d", 2359 q->guc->id); 2360 } 2361 2362 if (exec_queue_destroyed(q) && exec_queue_registered(q)) { 2363 clear_exec_queue_destroyed(q); 2364 if (exec_queue_extra_ref(q)) 2365 xe_exec_queue_put(q); 2366 else 2367 q->guc->needs_cleanup = true; 2368 clear_exec_queue_extra_ref(q); 2369 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", 2370 q->guc->id); 2371 } 2372 2373 pending_disable = exec_queue_pending_disable(q); 2374 2375 if (pending_disable && exec_queue_suspended(q)) { 2376 clear_exec_queue_suspended(q); 2377 q->guc->needs_suspend = true; 2378 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d", 2379 q->guc->id); 2380 } 2381 2382 if (pending_disable) { 2383 if (!pending_enable) 2384 set_exec_queue_enabled(q); 2385 clear_exec_queue_pending_disable(q); 2386 clear_exec_queue_check_timeout(q); 2387 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d", 2388 q->guc->id); 2389 } 2390 2391 q->guc->resume_time = 0; 2392 } 2393 2394 static void lrc_parallel_clear(struct xe_lrc *lrc) 2395 { 2396 struct xe_device *xe = gt_to_xe(lrc->gt); 2397 struct iosys_map map = xe_lrc_parallel_map(lrc); 2398 int i; 2399 2400 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) 2401 parallel_write(xe, map, wq[i], 2402 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 2403 FIELD_PREP(WQ_LEN_MASK, 0)); 2404 } 2405 2406 /* 2407 * This function is quite complex but only real way to ensure no state is lost 2408 * during VF resume flows. The function scans the queue state, make adjustments 2409 * as needed, and queues jobs / messages which replayed upon unpause. 2410 */ 2411 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) 2412 { 2413 struct xe_gpu_scheduler *sched = &q->guc->sched; 2414 struct xe_sched_job *job; 2415 int i; 2416 2417 lockdep_assert_held(&guc->submission_state.lock); 2418 2419 /* Stop scheduling + flush any DRM scheduler operations */ 2420 xe_sched_submission_stop(sched); 2421 if (xe_exec_queue_is_lr(q)) 2422 cancel_work_sync(&q->guc->lr_tdr); 2423 else 2424 cancel_delayed_work_sync(&sched->base.work_tdr); 2425 2426 guc_exec_queue_revert_pending_state_change(guc, q); 2427 2428 if (xe_exec_queue_is_parallel(q)) { 2429 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 2430 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); 2431 2432 /* 2433 * NOP existing WQ commands that may contain stale GGTT 2434 * addresses. These will be replayed upon unpause. The hardware 2435 * seems to get confused if the WQ head/tail pointers are 2436 * adjusted. 2437 */ 2438 if (lrc) 2439 lrc_parallel_clear(lrc); 2440 } 2441 2442 job = xe_sched_first_pending_job(sched); 2443 if (job) { 2444 job->restore_replay = true; 2445 2446 /* 2447 * Adjust software tail so jobs submitted overwrite previous 2448 * position in ring buffer with new GGTT addresses. 2449 */ 2450 for (i = 0; i < q->width; ++i) 2451 q->lrc[i]->ring.tail = job->ptrs[i].head; 2452 } 2453 } 2454 2455 /** 2456 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. 2457 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2458 */ 2459 void xe_guc_submit_pause(struct xe_guc *guc) 2460 { 2461 struct xe_exec_queue *q; 2462 unsigned long index; 2463 2464 mutex_lock(&guc->submission_state.lock); 2465 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2466 xe_sched_submission_stop(&q->guc->sched); 2467 mutex_unlock(&guc->submission_state.lock); 2468 } 2469 2470 /** 2471 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF. 2472 * @guc: the &xe_guc struct instance whose scheduler is to be disabled 2473 */ 2474 void xe_guc_submit_pause_vf(struct xe_guc *guc) 2475 { 2476 struct xe_exec_queue *q; 2477 unsigned long index; 2478 2479 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2480 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2481 2482 mutex_lock(&guc->submission_state.lock); 2483 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2484 /* Prevent redundant attempts to stop parallel queues */ 2485 if (q->guc->id != index) 2486 continue; 2487 2488 guc_exec_queue_pause(guc, q); 2489 } 2490 mutex_unlock(&guc->submission_state.lock); 2491 } 2492 2493 static void guc_exec_queue_start(struct xe_exec_queue *q) 2494 { 2495 struct xe_gpu_scheduler *sched = &q->guc->sched; 2496 2497 if (!exec_queue_killed_or_banned_or_wedged(q)) { 2498 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 2499 int i; 2500 2501 trace_xe_exec_queue_resubmit(q); 2502 if (job) { 2503 for (i = 0; i < q->width; ++i) { 2504 /* 2505 * The GuC context is unregistered at this point 2506 * time, adjusting software ring tail ensures 2507 * jobs are rewritten in original placement, 2508 * adjusting LRC tail ensures the newly loaded 2509 * GuC / contexts only view the LRC tail 2510 * increasing as jobs are written out. 2511 */ 2512 q->lrc[i]->ring.tail = job->ptrs[i].head; 2513 xe_lrc_set_ring_tail(q->lrc[i], 2514 xe_lrc_ring_head(q->lrc[i])); 2515 } 2516 } 2517 xe_sched_resubmit_jobs(sched); 2518 } 2519 2520 xe_sched_submission_start(sched); 2521 xe_sched_submission_resume_tdr(sched); 2522 } 2523 2524 int xe_guc_submit_start(struct xe_guc *guc) 2525 { 2526 struct xe_exec_queue *q; 2527 unsigned long index; 2528 2529 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 2530 2531 mutex_lock(&guc->submission_state.lock); 2532 atomic_dec(&guc->submission_state.stopped); 2533 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2534 /* Prevent redundant attempts to start parallel queues */ 2535 if (q->guc->id != index) 2536 continue; 2537 2538 guc_exec_queue_start(q); 2539 } 2540 mutex_unlock(&guc->submission_state.lock); 2541 2542 wake_up_all(&guc->ct.wq); 2543 2544 return 0; 2545 } 2546 2547 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, 2548 struct xe_exec_queue *q) 2549 { 2550 struct xe_gpu_scheduler *sched = &q->guc->sched; 2551 struct xe_sched_job *job = NULL, *__job; 2552 bool restore_replay = false; 2553 2554 list_for_each_entry(__job, &sched->base.pending_list, drm.list) { 2555 job = __job; 2556 restore_replay |= job->restore_replay; 2557 if (restore_replay) { 2558 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", 2559 q->guc->id, xe_sched_job_seqno(job)); 2560 2561 q->ring_ops->emit_job(job); 2562 job->restore_replay = true; 2563 } 2564 } 2565 2566 if (job) 2567 job->last_replay = true; 2568 } 2569 2570 /** 2571 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF. 2572 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause 2573 */ 2574 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc) 2575 { 2576 struct xe_exec_queue *q; 2577 unsigned long index; 2578 2579 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2580 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc)); 2581 2582 mutex_lock(&guc->submission_state.lock); 2583 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2584 /* Prevent redundant attempts to stop parallel queues */ 2585 if (q->guc->id != index) 2586 continue; 2587 2588 guc_exec_queue_unpause_prepare(guc, q); 2589 } 2590 mutex_unlock(&guc->submission_state.lock); 2591 } 2592 2593 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q) 2594 { 2595 struct xe_gpu_scheduler *sched = &q->guc->sched; 2596 struct xe_sched_msg *msg; 2597 2598 if (q->guc->needs_cleanup) { 2599 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 2600 2601 guc_exec_queue_add_msg(q, msg, CLEANUP); 2602 q->guc->needs_cleanup = false; 2603 } 2604 2605 if (q->guc->needs_suspend) { 2606 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 2607 2608 xe_sched_msg_lock(sched); 2609 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND); 2610 xe_sched_msg_unlock(sched); 2611 2612 q->guc->needs_suspend = false; 2613 } 2614 2615 /* 2616 * The resume must be in the message queue before the suspend as it is 2617 * not possible for a resume to be issued if a suspend pending is, but 2618 * the inverse is possible. 2619 */ 2620 if (q->guc->needs_resume) { 2621 msg = q->guc->static_msgs + STATIC_MSG_RESUME; 2622 2623 xe_sched_msg_lock(sched); 2624 guc_exec_queue_try_add_msg_head(q, msg, RESUME); 2625 xe_sched_msg_unlock(sched); 2626 2627 q->guc->needs_resume = false; 2628 } 2629 } 2630 2631 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q) 2632 { 2633 struct xe_gpu_scheduler *sched = &q->guc->sched; 2634 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q); 2635 2636 lockdep_assert_held(&guc->submission_state.lock); 2637 2638 xe_sched_resubmit_jobs(sched); 2639 guc_exec_queue_replay_pending_state_change(q); 2640 xe_sched_submission_start(sched); 2641 if (needs_tdr) 2642 xe_guc_exec_queue_trigger_cleanup(q); 2643 xe_sched_submission_resume_tdr(sched); 2644 } 2645 2646 /** 2647 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. 2648 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2649 */ 2650 void xe_guc_submit_unpause(struct xe_guc *guc) 2651 { 2652 struct xe_exec_queue *q; 2653 unsigned long index; 2654 2655 mutex_lock(&guc->submission_state.lock); 2656 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 2657 xe_sched_submission_start(&q->guc->sched); 2658 mutex_unlock(&guc->submission_state.lock); 2659 } 2660 2661 /** 2662 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF. 2663 * @guc: the &xe_guc struct instance whose scheduler is to be enabled 2664 */ 2665 void xe_guc_submit_unpause_vf(struct xe_guc *guc) 2666 { 2667 struct xe_exec_queue *q; 2668 unsigned long index; 2669 2670 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc))); 2671 2672 mutex_lock(&guc->submission_state.lock); 2673 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2674 /* 2675 * Prevent redundant attempts to stop parallel queues, or queues 2676 * created after resfix done. 2677 */ 2678 if (q->guc->id != index || 2679 !READ_ONCE(q->guc->sched.base.pause_submit)) 2680 continue; 2681 2682 guc_exec_queue_unpause(guc, q); 2683 } 2684 mutex_unlock(&guc->submission_state.lock); 2685 } 2686 2687 /** 2688 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC. 2689 * @guc: the &xe_guc struct instance whose scheduler is to be aborted 2690 */ 2691 void xe_guc_submit_pause_abort(struct xe_guc *guc) 2692 { 2693 struct xe_exec_queue *q; 2694 unsigned long index; 2695 2696 mutex_lock(&guc->submission_state.lock); 2697 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 2698 struct xe_gpu_scheduler *sched = &q->guc->sched; 2699 2700 /* Prevent redundant attempts to stop parallel queues */ 2701 if (q->guc->id != index) 2702 continue; 2703 2704 xe_sched_submission_start(sched); 2705 if (exec_queue_killed_or_banned_or_wedged(q)) 2706 xe_guc_exec_queue_trigger_cleanup(q); 2707 } 2708 mutex_unlock(&guc->submission_state.lock); 2709 } 2710 2711 static struct xe_exec_queue * 2712 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 2713 { 2714 struct xe_gt *gt = guc_to_gt(guc); 2715 struct xe_exec_queue *q; 2716 2717 if (unlikely(guc_id >= GUC_ID_MAX)) { 2718 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 2719 return NULL; 2720 } 2721 2722 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 2723 if (unlikely(!q)) { 2724 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); 2725 return NULL; 2726 } 2727 2728 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 2729 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 2730 2731 return q; 2732 } 2733 2734 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 2735 { 2736 u32 action[] = { 2737 XE_GUC_ACTION_DEREGISTER_CONTEXT, 2738 q->guc->id, 2739 }; 2740 2741 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 2742 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 2743 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 2744 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 2745 2746 trace_xe_exec_queue_deregister(q); 2747 2748 if (xe_exec_queue_is_multi_queue_secondary(q)) 2749 handle_deregister_done(guc, q); 2750 else 2751 xe_guc_ct_send_g2h_handler(&guc->ct, action, 2752 ARRAY_SIZE(action)); 2753 } 2754 2755 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 2756 u32 runnable_state) 2757 { 2758 trace_xe_exec_queue_scheduling_done(q); 2759 2760 if (runnable_state == 1) { 2761 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 2762 2763 q->guc->resume_time = ktime_get(); 2764 clear_exec_queue_pending_resume(q); 2765 clear_exec_queue_pending_tdr_exit(q); 2766 clear_exec_queue_pending_enable(q); 2767 smp_wmb(); 2768 wake_up_all(&guc->ct.wq); 2769 } else { 2770 bool check_timeout = exec_queue_check_timeout(q); 2771 2772 xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 2773 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 2774 2775 if (q->guc->suspend_pending) { 2776 suspend_fence_signal(q); 2777 clear_exec_queue_pending_disable(q); 2778 } else { 2779 if (exec_queue_banned(q) || check_timeout) { 2780 smp_wmb(); 2781 wake_up_all(&guc->ct.wq); 2782 } 2783 if (!check_timeout && exec_queue_destroyed(q)) { 2784 /* 2785 * Make sure to clear the pending_disable only 2786 * after sampling the destroyed state. We want 2787 * to ensure we don't trigger the unregister too 2788 * early with something intending to only 2789 * disable scheduling. The caller doing the 2790 * destroy must wait for an ongoing 2791 * pending_disable before marking as destroyed. 2792 */ 2793 clear_exec_queue_pending_disable(q); 2794 deregister_exec_queue(guc, q); 2795 } else { 2796 clear_exec_queue_pending_disable(q); 2797 } 2798 } 2799 } 2800 } 2801 2802 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc, 2803 struct xe_exec_queue *q, 2804 u32 runnable_state) 2805 { 2806 /* Take CT lock here as handle_sched_done() do send a h2g message */ 2807 mutex_lock(&guc->ct.lock); 2808 handle_sched_done(guc, q, runnable_state); 2809 mutex_unlock(&guc->ct.lock); 2810 } 2811 2812 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2813 { 2814 struct xe_exec_queue *q; 2815 u32 guc_id, runnable_state; 2816 2817 if (unlikely(len < 2)) 2818 return -EPROTO; 2819 2820 guc_id = msg[0]; 2821 runnable_state = msg[1]; 2822 2823 q = g2h_exec_queue_lookup(guc, guc_id); 2824 if (unlikely(!q)) 2825 return -EPROTO; 2826 2827 if (unlikely(!exec_queue_pending_enable(q) && 2828 !exec_queue_pending_disable(q))) { 2829 xe_gt_err(guc_to_gt(guc), 2830 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 2831 atomic_read(&q->guc->state), q->guc->id, 2832 runnable_state); 2833 return -EPROTO; 2834 } 2835 2836 handle_sched_done(guc, q, runnable_state); 2837 2838 return 0; 2839 } 2840 2841 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 2842 { 2843 trace_xe_exec_queue_deregister_done(q); 2844 2845 clear_exec_queue_registered(q); 2846 2847 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 2848 xe_exec_queue_put(q); 2849 else 2850 __guc_exec_queue_destroy(guc, q); 2851 } 2852 2853 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 2854 { 2855 struct xe_exec_queue *q; 2856 u32 guc_id; 2857 2858 if (unlikely(len < 1)) 2859 return -EPROTO; 2860 2861 guc_id = msg[0]; 2862 2863 q = g2h_exec_queue_lookup(guc, guc_id); 2864 if (unlikely(!q)) 2865 return -EPROTO; 2866 2867 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 2868 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 2869 xe_gt_err(guc_to_gt(guc), 2870 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 2871 atomic_read(&q->guc->state), q->guc->id); 2872 return -EPROTO; 2873 } 2874 2875 handle_deregister_done(guc, q); 2876 2877 return 0; 2878 } 2879 2880 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 2881 { 2882 struct xe_gt *gt = guc_to_gt(guc); 2883 struct xe_exec_queue *q; 2884 u32 guc_id; 2885 2886 if (unlikely(len < 1)) 2887 return -EPROTO; 2888 2889 guc_id = msg[0]; 2890 2891 q = g2h_exec_queue_lookup(guc, guc_id); 2892 if (unlikely(!q)) 2893 return -EPROTO; 2894 2895 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d", 2896 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2897 2898 trace_xe_exec_queue_reset(q); 2899 2900 /* 2901 * A banned engine is a NOP at this point (came from 2902 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 2903 * jobs by setting timeout of the job to the minimum value kicking 2904 * guc_exec_queue_timedout_job. 2905 */ 2906 xe_guc_exec_queue_reset_trigger_cleanup(q); 2907 2908 return 0; 2909 } 2910 2911 /* 2912 * xe_guc_error_capture_handler - Handler of GuC captured message 2913 * @guc: The GuC object 2914 * @msg: Point to the message 2915 * @len: The message length 2916 * 2917 * When GuC captured data is ready, GuC will send message 2918 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be 2919 * called 1st to check status before process the data comes with the message. 2920 * 2921 * Returns: error code. 0 if success 2922 */ 2923 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) 2924 { 2925 u32 status; 2926 2927 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2928 return -EPROTO; 2929 2930 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2931 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 2932 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space"); 2933 2934 xe_guc_capture_process(guc); 2935 2936 return 0; 2937 } 2938 2939 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 2940 u32 len) 2941 { 2942 struct xe_gt *gt = guc_to_gt(guc); 2943 struct xe_exec_queue *q; 2944 u32 guc_id; 2945 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; 2946 2947 if (unlikely(!len || len > 2)) 2948 return -EPROTO; 2949 2950 guc_id = msg[0]; 2951 2952 if (len == 2) 2953 type = msg[1]; 2954 2955 if (guc_id == GUC_ID_UNKNOWN) { 2956 /* 2957 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 2958 * context. In such case only PF will be notified about that fault. 2959 */ 2960 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 2961 return 0; 2962 } 2963 2964 q = g2h_exec_queue_lookup(guc, guc_id); 2965 if (unlikely(!q)) 2966 return -EPROTO; 2967 2968 /* 2969 * The type is HW-defined and changes based on platform, so we don't 2970 * decode it in the kernel and only check if it is valid. 2971 * See bspec 54047 and 72187 for details. 2972 */ 2973 if (type != XE_GUC_CAT_ERR_TYPE_INVALID) 2974 xe_gt_dbg(gt, 2975 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", 2976 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2977 else 2978 xe_gt_dbg(gt, 2979 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", 2980 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); 2981 2982 trace_xe_exec_queue_memory_cat_error(q); 2983 2984 /* Treat the same as engine reset */ 2985 xe_guc_exec_queue_reset_trigger_cleanup(q); 2986 2987 return 0; 2988 } 2989 2990 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 2991 { 2992 struct xe_gt *gt = guc_to_gt(guc); 2993 u8 guc_class, instance; 2994 u32 reason; 2995 2996 if (unlikely(len != 3)) 2997 return -EPROTO; 2998 2999 guc_class = msg[0]; 3000 instance = msg[1]; 3001 reason = msg[2]; 3002 3003 /* Unexpected failure of a hardware feature, log an actual error */ 3004 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 3005 guc_class, instance, reason); 3006 3007 xe_gt_reset_async(gt); 3008 3009 return 0; 3010 } 3011 3012 /** 3013 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler 3014 * @guc: guc 3015 * @msg: message indicating CGP sync done 3016 * @len: length of message 3017 * 3018 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting 3019 * for CGP synchronization to complete. 3020 * 3021 * Return: 0 on success, -EPROTO for malformed messages. 3022 */ 3023 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 3024 { 3025 struct xe_device *xe = guc_to_xe(guc); 3026 struct xe_exec_queue *q; 3027 u32 guc_id = msg[0]; 3028 3029 if (unlikely(len < 1)) { 3030 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len); 3031 return -EPROTO; 3032 } 3033 3034 q = g2h_exec_queue_lookup(guc, guc_id); 3035 if (unlikely(!q)) 3036 return -EPROTO; 3037 3038 if (!xe_exec_queue_is_multi_queue_primary(q)) { 3039 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response"); 3040 return -EPROTO; 3041 } 3042 3043 /* Wakeup the serialized cgp update wait */ 3044 WRITE_ONCE(q->multi_queue.group->sync_pending, false); 3045 xe_guc_ct_wake_waiters(&guc->ct); 3046 3047 return 0; 3048 } 3049 3050 static void 3051 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 3052 struct xe_guc_submit_exec_queue_snapshot *snapshot) 3053 { 3054 struct xe_guc *guc = exec_queue_to_guc(q); 3055 struct xe_device *xe = guc_to_xe(guc); 3056 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); 3057 int i; 3058 3059 snapshot->guc.wqi_head = q->guc->wqi_head; 3060 snapshot->guc.wqi_tail = q->guc->wqi_tail; 3061 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 3062 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 3063 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 3064 wq_desc.wq_status); 3065 3066 if (snapshot->parallel.wq_desc.head != 3067 snapshot->parallel.wq_desc.tail) { 3068 for (i = snapshot->parallel.wq_desc.head; 3069 i != snapshot->parallel.wq_desc.tail; 3070 i = (i + sizeof(u32)) % WQ_SIZE) 3071 snapshot->parallel.wq[i / sizeof(u32)] = 3072 parallel_read(xe, map, wq[i / sizeof(u32)]); 3073 } 3074 } 3075 3076 static void 3077 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3078 struct drm_printer *p) 3079 { 3080 int i; 3081 3082 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 3083 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 3084 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 3085 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 3086 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 3087 3088 if (snapshot->parallel.wq_desc.head != 3089 snapshot->parallel.wq_desc.tail) { 3090 for (i = snapshot->parallel.wq_desc.head; 3091 i != snapshot->parallel.wq_desc.tail; 3092 i = (i + sizeof(u32)) % WQ_SIZE) 3093 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 3094 snapshot->parallel.wq[i / sizeof(u32)]); 3095 } 3096 } 3097 3098 /** 3099 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 3100 * @q: faulty exec queue 3101 * 3102 * This can be printed out in a later stage like during dev_coredump 3103 * analysis. 3104 * 3105 * Returns: a GuC Submit Engine snapshot object that must be freed by the 3106 * caller, using `xe_guc_exec_queue_snapshot_free`. 3107 */ 3108 struct xe_guc_submit_exec_queue_snapshot * 3109 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 3110 { 3111 struct xe_gpu_scheduler *sched = &q->guc->sched; 3112 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3113 int i; 3114 3115 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 3116 3117 if (!snapshot) 3118 return NULL; 3119 3120 snapshot->guc.id = q->guc->id; 3121 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 3122 snapshot->class = q->class; 3123 snapshot->logical_mask = q->logical_mask; 3124 snapshot->width = q->width; 3125 snapshot->refcount = kref_read(&q->refcount); 3126 snapshot->sched_timeout = sched->base.timeout; 3127 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 3128 snapshot->sched_props.preempt_timeout_us = 3129 q->sched_props.preempt_timeout_us; 3130 3131 snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), 3132 GFP_ATOMIC); 3133 3134 if (snapshot->lrc) { 3135 for (i = 0; i < q->width; ++i) { 3136 struct xe_lrc *lrc = q->lrc[i]; 3137 3138 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 3139 } 3140 } 3141 3142 snapshot->schedule_state = atomic_read(&q->guc->state); 3143 snapshot->exec_queue_flags = q->flags; 3144 3145 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 3146 if (snapshot->parallel_execution) 3147 guc_exec_queue_wq_snapshot_capture(q, snapshot); 3148 3149 if (xe_exec_queue_is_multi_queue(q)) { 3150 snapshot->multi_queue.valid = true; 3151 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id; 3152 snapshot->multi_queue.pos = q->multi_queue.pos; 3153 } 3154 spin_lock(&sched->base.job_list_lock); 3155 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); 3156 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, 3157 sizeof(struct pending_list_snapshot), 3158 GFP_ATOMIC); 3159 3160 if (snapshot->pending_list) { 3161 struct xe_sched_job *job_iter; 3162 3163 i = 0; 3164 list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { 3165 snapshot->pending_list[i].seqno = 3166 xe_sched_job_seqno(job_iter); 3167 snapshot->pending_list[i].fence = 3168 dma_fence_is_signaled(job_iter->fence) ? 1 : 0; 3169 snapshot->pending_list[i].finished = 3170 dma_fence_is_signaled(&job_iter->drm.s_fence->finished) 3171 ? 1 : 0; 3172 i++; 3173 } 3174 } 3175 3176 spin_unlock(&sched->base.job_list_lock); 3177 3178 return snapshot; 3179 } 3180 3181 /** 3182 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 3183 * @snapshot: Previously captured snapshot of job. 3184 * 3185 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 3186 */ 3187 void 3188 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3189 { 3190 int i; 3191 3192 if (!snapshot || !snapshot->lrc) 3193 return; 3194 3195 for (i = 0; i < snapshot->width; ++i) 3196 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 3197 } 3198 3199 /** 3200 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 3201 * @snapshot: GuC Submit Engine snapshot object. 3202 * @p: drm_printer where it will be printed out. 3203 * 3204 * This function prints out a given GuC Submit Engine snapshot object. 3205 */ 3206 void 3207 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 3208 struct drm_printer *p) 3209 { 3210 int i; 3211 3212 if (!snapshot) 3213 return; 3214 3215 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id); 3216 drm_printf(p, "\tName: %s\n", snapshot->name); 3217 drm_printf(p, "\tClass: %d\n", snapshot->class); 3218 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 3219 drm_printf(p, "\tWidth: %d\n", snapshot->width); 3220 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 3221 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 3222 drm_printf(p, "\tTimeslice: %u (us)\n", 3223 snapshot->sched_props.timeslice_us); 3224 drm_printf(p, "\tPreempt timeout: %u (us)\n", 3225 snapshot->sched_props.preempt_timeout_us); 3226 3227 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 3228 xe_lrc_snapshot_print(snapshot->lrc[i], p); 3229 3230 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 3231 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 3232 3233 if (snapshot->parallel_execution) 3234 guc_exec_queue_wq_snapshot_print(snapshot, p); 3235 3236 if (snapshot->multi_queue.valid) { 3237 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary); 3238 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos); 3239 } 3240 3241 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; 3242 i++) 3243 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 3244 snapshot->pending_list[i].seqno, 3245 snapshot->pending_list[i].fence, 3246 snapshot->pending_list[i].finished); 3247 } 3248 3249 /** 3250 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 3251 * snapshot. 3252 * @snapshot: GuC Submit Engine snapshot object. 3253 * 3254 * This function free all the memory that needed to be allocated at capture 3255 * time. 3256 */ 3257 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 3258 { 3259 int i; 3260 3261 if (!snapshot) 3262 return; 3263 3264 if (snapshot->lrc) { 3265 for (i = 0; i < snapshot->width; i++) 3266 xe_lrc_snapshot_free(snapshot->lrc[i]); 3267 kfree(snapshot->lrc); 3268 } 3269 kfree(snapshot->pending_list); 3270 kfree(snapshot); 3271 } 3272 3273 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 3274 { 3275 struct xe_guc_submit_exec_queue_snapshot *snapshot; 3276 3277 snapshot = xe_guc_exec_queue_snapshot_capture(q); 3278 xe_guc_exec_queue_snapshot_print(snapshot, p); 3279 xe_guc_exec_queue_snapshot_free(snapshot); 3280 } 3281 3282 /** 3283 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. 3284 * @q: Execution queue 3285 * @ctx_type: Type of the context 3286 * 3287 * This function registers the execution queue with the guc. Special context 3288 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE 3289 * are only applicable for IGPU and in the VF. 3290 * Submits the execution queue to GUC after registering it. 3291 * 3292 * Returns - None. 3293 */ 3294 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) 3295 { 3296 struct xe_guc *guc = exec_queue_to_guc(q); 3297 struct xe_device *xe = guc_to_xe(guc); 3298 struct xe_gt *gt = guc_to_gt(guc); 3299 3300 xe_gt_assert(gt, IS_SRIOV_VF(xe)); 3301 xe_gt_assert(gt, !IS_DGFX(xe)); 3302 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || 3303 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); 3304 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); 3305 3306 register_exec_queue(q, ctx_type); 3307 enable_scheduling(q); 3308 } 3309 3310 /** 3311 * xe_guc_submit_print - GuC Submit Print. 3312 * @guc: GuC. 3313 * @p: drm_printer where it will be printed out. 3314 * 3315 * This function capture and prints snapshots of **all** GuC Engines. 3316 */ 3317 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 3318 { 3319 struct xe_exec_queue *q; 3320 unsigned long index; 3321 3322 if (!xe_device_uc_enabled(guc_to_xe(guc))) 3323 return; 3324 3325 mutex_lock(&guc->submission_state.lock); 3326 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 3327 guc_exec_queue_print(q, p); 3328 mutex_unlock(&guc->submission_state.lock); 3329 } 3330 3331 /** 3332 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all 3333 * exec queues registered to given GuC. 3334 * @guc: the &xe_guc struct instance 3335 * @scratch: scratch buffer to be used as temporary storage 3336 * 3337 * Returns: zero on success, negative error code on failure. 3338 */ 3339 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) 3340 { 3341 struct xe_exec_queue *q; 3342 unsigned long index; 3343 int err = 0; 3344 3345 mutex_lock(&guc->submission_state.lock); 3346 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { 3347 /* Prevent redundant attempts to stop parallel queues */ 3348 if (q->guc->id != index) 3349 continue; 3350 3351 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); 3352 if (err) 3353 break; 3354 } 3355 mutex_unlock(&guc->submission_state.lock); 3356 3357 return err; 3358 } 3359