1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 14 #include <drm/drm_managed.h> 15 16 #include "abi/guc_actions_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "regs/xe_lrc_layout.h" 19 #include "xe_assert.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_gt_printk.h" 27 #include "xe_guc.h" 28 #include "xe_guc_ct.h" 29 #include "xe_guc_exec_queue_types.h" 30 #include "xe_guc_id_mgr.h" 31 #include "xe_guc_submit_types.h" 32 #include "xe_hw_engine.h" 33 #include "xe_hw_fence.h" 34 #include "xe_lrc.h" 35 #include "xe_macros.h" 36 #include "xe_map.h" 37 #include "xe_mocs.h" 38 #include "xe_ring_ops_types.h" 39 #include "xe_sched_job.h" 40 #include "xe_trace.h" 41 #include "xe_vm.h" 42 43 static struct xe_guc * 44 exec_queue_to_guc(struct xe_exec_queue *q) 45 { 46 return &q->gt->uc.guc; 47 } 48 49 /* 50 * Helpers for engine state, using an atomic as some of the bits can transition 51 * as the same time (e.g. a suspend can be happning at the same time as schedule 52 * engine done being processed). 53 */ 54 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 55 #define ENGINE_STATE_ENABLED (1 << 1) 56 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 57 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 58 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 59 #define ENGINE_STATE_SUSPENDED (1 << 5) 60 #define EXEC_QUEUE_STATE_RESET (1 << 6) 61 #define ENGINE_STATE_KILLED (1 << 7) 62 63 static bool exec_queue_registered(struct xe_exec_queue *q) 64 { 65 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 66 } 67 68 static void set_exec_queue_registered(struct xe_exec_queue *q) 69 { 70 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 71 } 72 73 static void clear_exec_queue_registered(struct xe_exec_queue *q) 74 { 75 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 76 } 77 78 static bool exec_queue_enabled(struct xe_exec_queue *q) 79 { 80 return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; 81 } 82 83 static void set_exec_queue_enabled(struct xe_exec_queue *q) 84 { 85 atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); 86 } 87 88 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 89 { 90 atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); 91 } 92 93 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 94 { 95 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 96 } 97 98 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 99 { 100 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 101 } 102 103 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 104 { 105 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 106 } 107 108 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 109 { 110 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 111 } 112 113 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 114 { 115 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 116 } 117 118 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 119 { 120 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 121 } 122 123 static bool exec_queue_destroyed(struct xe_exec_queue *q) 124 { 125 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 126 } 127 128 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 129 { 130 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 131 } 132 133 static bool exec_queue_banned(struct xe_exec_queue *q) 134 { 135 return (q->flags & EXEC_QUEUE_FLAG_BANNED); 136 } 137 138 static void set_exec_queue_banned(struct xe_exec_queue *q) 139 { 140 q->flags |= EXEC_QUEUE_FLAG_BANNED; 141 } 142 143 static bool exec_queue_suspended(struct xe_exec_queue *q) 144 { 145 return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; 146 } 147 148 static void set_exec_queue_suspended(struct xe_exec_queue *q) 149 { 150 atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); 151 } 152 153 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 154 { 155 atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); 156 } 157 158 static bool exec_queue_reset(struct xe_exec_queue *q) 159 { 160 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 161 } 162 163 static void set_exec_queue_reset(struct xe_exec_queue *q) 164 { 165 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 166 } 167 168 static bool exec_queue_killed(struct xe_exec_queue *q) 169 { 170 return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; 171 } 172 173 static void set_exec_queue_killed(struct xe_exec_queue *q) 174 { 175 atomic_or(ENGINE_STATE_KILLED, &q->guc->state); 176 } 177 178 static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) 179 { 180 return exec_queue_killed(q) || exec_queue_banned(q); 181 } 182 183 #ifdef CONFIG_PROVE_LOCKING 184 static int alloc_submit_wq(struct xe_guc *guc) 185 { 186 int i; 187 188 for (i = 0; i < NUM_SUBMIT_WQ; ++i) { 189 guc->submission_state.submit_wq_pool[i] = 190 alloc_ordered_workqueue("submit_wq", 0); 191 if (!guc->submission_state.submit_wq_pool[i]) 192 goto err_free; 193 } 194 195 return 0; 196 197 err_free: 198 while (i) 199 destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); 200 201 return -ENOMEM; 202 } 203 204 static void free_submit_wq(struct xe_guc *guc) 205 { 206 int i; 207 208 for (i = 0; i < NUM_SUBMIT_WQ; ++i) 209 destroy_workqueue(guc->submission_state.submit_wq_pool[i]); 210 } 211 212 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) 213 { 214 int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; 215 216 return guc->submission_state.submit_wq_pool[idx]; 217 } 218 #else 219 static int alloc_submit_wq(struct xe_guc *guc) 220 { 221 return 0; 222 } 223 224 static void free_submit_wq(struct xe_guc *guc) 225 { 226 227 } 228 229 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) 230 { 231 return NULL; 232 } 233 #endif 234 235 static void guc_submit_fini(struct drm_device *drm, void *arg) 236 { 237 struct xe_guc *guc = arg; 238 239 xa_destroy(&guc->submission_state.exec_queue_lookup); 240 free_submit_wq(guc); 241 } 242 243 static const struct xe_exec_queue_ops guc_exec_queue_ops; 244 245 static void primelockdep(struct xe_guc *guc) 246 { 247 if (!IS_ENABLED(CONFIG_LOCKDEP)) 248 return; 249 250 fs_reclaim_acquire(GFP_KERNEL); 251 252 mutex_lock(&guc->submission_state.lock); 253 might_lock(&guc->submission_state.suspend.lock); 254 mutex_unlock(&guc->submission_state.lock); 255 256 fs_reclaim_release(GFP_KERNEL); 257 } 258 259 int xe_guc_submit_init(struct xe_guc *guc) 260 { 261 struct xe_device *xe = guc_to_xe(guc); 262 struct xe_gt *gt = guc_to_gt(guc); 263 int err; 264 265 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); 266 if (err) 267 return err; 268 269 err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0); 270 if (err) 271 return err; 272 273 err = alloc_submit_wq(guc); 274 if (err) 275 return err; 276 277 gt->exec_queue_ops = &guc_exec_queue_ops; 278 279 xa_init(&guc->submission_state.exec_queue_lookup); 280 281 spin_lock_init(&guc->submission_state.suspend.lock); 282 guc->submission_state.suspend.context = dma_fence_context_alloc(1); 283 284 primelockdep(guc); 285 286 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 287 } 288 289 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 290 { 291 int i; 292 293 lockdep_assert_held(&guc->submission_state.lock); 294 295 for (i = 0; i < xa_count; ++i) 296 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 297 298 xe_guc_id_mgr_release_locked(&guc->submission_state.idm, 299 q->guc->id, q->width); 300 } 301 302 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 303 { 304 int ret; 305 void *ptr; 306 int i; 307 308 /* 309 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 310 * worse case user gets -ENOMEM on engine create and has to try again. 311 * 312 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 313 * failure. 314 */ 315 lockdep_assert_held(&guc->submission_state.lock); 316 317 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, 318 q->width); 319 if (ret < 0) 320 return ret; 321 322 q->guc->id = ret; 323 324 for (i = 0; i < q->width; ++i) { 325 ptr = xa_store(&guc->submission_state.exec_queue_lookup, 326 q->guc->id + i, q, GFP_NOWAIT); 327 if (IS_ERR(ptr)) { 328 ret = PTR_ERR(ptr); 329 goto err_release; 330 } 331 } 332 333 return 0; 334 335 err_release: 336 __release_guc_id(guc, q, i); 337 338 return ret; 339 } 340 341 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 342 { 343 mutex_lock(&guc->submission_state.lock); 344 __release_guc_id(guc, q, q->width); 345 mutex_unlock(&guc->submission_state.lock); 346 } 347 348 struct exec_queue_policy { 349 u32 count; 350 struct guc_update_exec_queue_policy h2g; 351 }; 352 353 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 354 { 355 size_t bytes = sizeof(policy->h2g.header) + 356 (sizeof(policy->h2g.klv[0]) * policy->count); 357 358 return bytes / sizeof(u32); 359 } 360 361 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 362 u16 guc_id) 363 { 364 policy->h2g.header.action = 365 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 366 policy->h2g.header.guc_id = guc_id; 367 policy->count = 0; 368 } 369 370 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 371 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 372 u32 data) \ 373 { \ 374 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 375 \ 376 policy->h2g.klv[policy->count].kl = \ 377 FIELD_PREP(GUC_KLV_0_KEY, \ 378 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 379 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 380 policy->h2g.klv[policy->count].value = data; \ 381 policy->count++; \ 382 } 383 384 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 385 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 386 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 387 #undef MAKE_EXEC_QUEUE_POLICY_ADD 388 389 static const int xe_exec_queue_prio_to_guc[] = { 390 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 391 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 392 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 393 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 394 }; 395 396 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 397 { 398 struct exec_queue_policy policy; 399 struct xe_device *xe = guc_to_xe(guc); 400 enum xe_exec_queue_priority prio = q->sched_props.priority; 401 u32 timeslice_us = q->sched_props.timeslice_us; 402 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 403 404 xe_assert(xe, exec_queue_registered(q)); 405 406 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 407 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 408 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 409 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 410 411 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 412 __guc_exec_queue_policy_action_size(&policy), 0, 0); 413 } 414 415 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 416 { 417 struct exec_queue_policy policy; 418 419 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 420 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 421 422 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 423 __guc_exec_queue_policy_action_size(&policy), 0, 0); 424 } 425 426 #define parallel_read(xe_, map_, field_) \ 427 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 428 field_) 429 #define parallel_write(xe_, map_, field_, val_) \ 430 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 431 field_, val_) 432 433 static void __register_mlrc_engine(struct xe_guc *guc, 434 struct xe_exec_queue *q, 435 struct guc_ctxt_registration_info *info) 436 { 437 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 438 struct xe_device *xe = guc_to_xe(guc); 439 u32 action[MAX_MLRC_REG_SIZE]; 440 int len = 0; 441 int i; 442 443 xe_assert(xe, xe_exec_queue_is_parallel(q)); 444 445 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 446 action[len++] = info->flags; 447 action[len++] = info->context_idx; 448 action[len++] = info->engine_class; 449 action[len++] = info->engine_submit_mask; 450 action[len++] = info->wq_desc_lo; 451 action[len++] = info->wq_desc_hi; 452 action[len++] = info->wq_base_lo; 453 action[len++] = info->wq_base_hi; 454 action[len++] = info->wq_size; 455 action[len++] = q->width; 456 action[len++] = info->hwlrca_lo; 457 action[len++] = info->hwlrca_hi; 458 459 for (i = 1; i < q->width; ++i) { 460 struct xe_lrc *lrc = q->lrc + i; 461 462 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 463 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 464 } 465 466 xe_assert(xe, len <= MAX_MLRC_REG_SIZE); 467 #undef MAX_MLRC_REG_SIZE 468 469 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 470 } 471 472 static void __register_engine(struct xe_guc *guc, 473 struct guc_ctxt_registration_info *info) 474 { 475 u32 action[] = { 476 XE_GUC_ACTION_REGISTER_CONTEXT, 477 info->flags, 478 info->context_idx, 479 info->engine_class, 480 info->engine_submit_mask, 481 info->wq_desc_lo, 482 info->wq_desc_hi, 483 info->wq_base_lo, 484 info->wq_base_hi, 485 info->wq_size, 486 info->hwlrca_lo, 487 info->hwlrca_hi, 488 }; 489 490 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 491 } 492 493 static void register_engine(struct xe_exec_queue *q) 494 { 495 struct xe_guc *guc = exec_queue_to_guc(q); 496 struct xe_device *xe = guc_to_xe(guc); 497 struct xe_lrc *lrc = q->lrc; 498 struct guc_ctxt_registration_info info; 499 500 xe_assert(xe, !exec_queue_registered(q)); 501 502 memset(&info, 0, sizeof(info)); 503 info.context_idx = q->guc->id; 504 info.engine_class = xe_engine_class_to_guc_class(q->class); 505 info.engine_submit_mask = q->logical_mask; 506 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 507 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 508 info.flags = CONTEXT_REGISTRATION_FLAG_KMD; 509 510 if (xe_exec_queue_is_parallel(q)) { 511 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 512 struct iosys_map map = xe_lrc_parallel_map(lrc); 513 514 info.wq_desc_lo = lower_32_bits(ggtt_addr + 515 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 516 info.wq_desc_hi = upper_32_bits(ggtt_addr + 517 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 518 info.wq_base_lo = lower_32_bits(ggtt_addr + 519 offsetof(struct guc_submit_parallel_scratch, wq[0])); 520 info.wq_base_hi = upper_32_bits(ggtt_addr + 521 offsetof(struct guc_submit_parallel_scratch, wq[0])); 522 info.wq_size = WQ_SIZE; 523 524 q->guc->wqi_head = 0; 525 q->guc->wqi_tail = 0; 526 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 527 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 528 } 529 530 /* 531 * We must keep a reference for LR engines if engine is registered with 532 * the GuC as jobs signal immediately and can't destroy an engine if the 533 * GuC has a reference to it. 534 */ 535 if (xe_exec_queue_is_lr(q)) 536 xe_exec_queue_get(q); 537 538 set_exec_queue_registered(q); 539 trace_xe_exec_queue_register(q); 540 if (xe_exec_queue_is_parallel(q)) 541 __register_mlrc_engine(guc, q, &info); 542 else 543 __register_engine(guc, &info); 544 init_policies(guc, q); 545 } 546 547 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 548 { 549 return (WQ_SIZE - q->guc->wqi_tail); 550 } 551 552 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 553 { 554 struct xe_guc *guc = exec_queue_to_guc(q); 555 struct xe_device *xe = guc_to_xe(guc); 556 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 557 unsigned int sleep_period_ms = 1; 558 559 #define AVAILABLE_SPACE \ 560 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 561 if (wqi_size > AVAILABLE_SPACE) { 562 try_again: 563 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 564 if (wqi_size > AVAILABLE_SPACE) { 565 if (sleep_period_ms == 1024) { 566 xe_gt_reset_async(q->gt); 567 return -ENODEV; 568 } 569 570 msleep(sleep_period_ms); 571 sleep_period_ms <<= 1; 572 goto try_again; 573 } 574 } 575 #undef AVAILABLE_SPACE 576 577 return 0; 578 } 579 580 static int wq_noop_append(struct xe_exec_queue *q) 581 { 582 struct xe_guc *guc = exec_queue_to_guc(q); 583 struct xe_device *xe = guc_to_xe(guc); 584 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 585 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 586 587 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 588 return -ENODEV; 589 590 xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); 591 592 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 593 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 594 FIELD_PREP(WQ_LEN_MASK, len_dw)); 595 q->guc->wqi_tail = 0; 596 597 return 0; 598 } 599 600 static void wq_item_append(struct xe_exec_queue *q) 601 { 602 struct xe_guc *guc = exec_queue_to_guc(q); 603 struct xe_device *xe = guc_to_xe(guc); 604 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 605 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 606 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 607 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 608 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 609 int i = 0, j; 610 611 if (wqi_size > wq_space_until_wrap(q)) { 612 if (wq_noop_append(q)) 613 return; 614 } 615 if (wq_wait_for_space(q, wqi_size)) 616 return; 617 618 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 619 FIELD_PREP(WQ_LEN_MASK, len_dw); 620 wqi[i++] = xe_lrc_descriptor(q->lrc); 621 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 622 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64)); 623 wqi[i++] = 0; 624 for (j = 1; j < q->width; ++j) { 625 struct xe_lrc *lrc = q->lrc + j; 626 627 wqi[i++] = lrc->ring.tail / sizeof(u64); 628 } 629 630 xe_assert(xe, i == wqi_size / sizeof(u32)); 631 632 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 633 wq[q->guc->wqi_tail / sizeof(u32)])); 634 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 635 q->guc->wqi_tail += wqi_size; 636 xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); 637 638 xe_device_wmb(xe); 639 640 map = xe_lrc_parallel_map(q->lrc); 641 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 642 } 643 644 #define RESUME_PENDING ~0x0ull 645 static void submit_exec_queue(struct xe_exec_queue *q) 646 { 647 struct xe_guc *guc = exec_queue_to_guc(q); 648 struct xe_device *xe = guc_to_xe(guc); 649 struct xe_lrc *lrc = q->lrc; 650 u32 action[3]; 651 u32 g2h_len = 0; 652 u32 num_g2h = 0; 653 int len = 0; 654 bool extra_submit = false; 655 656 xe_assert(xe, exec_queue_registered(q)); 657 658 if (xe_exec_queue_is_parallel(q)) 659 wq_item_append(q); 660 else 661 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 662 663 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 664 return; 665 666 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 667 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 668 action[len++] = q->guc->id; 669 action[len++] = GUC_CONTEXT_ENABLE; 670 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 671 num_g2h = 1; 672 if (xe_exec_queue_is_parallel(q)) 673 extra_submit = true; 674 675 q->guc->resume_time = RESUME_PENDING; 676 set_exec_queue_pending_enable(q); 677 set_exec_queue_enabled(q); 678 trace_xe_exec_queue_scheduling_enable(q); 679 } else { 680 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 681 action[len++] = q->guc->id; 682 trace_xe_exec_queue_submit(q); 683 } 684 685 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 686 687 if (extra_submit) { 688 len = 0; 689 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 690 action[len++] = q->guc->id; 691 trace_xe_exec_queue_submit(q); 692 693 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 694 } 695 } 696 697 static struct dma_fence * 698 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 699 { 700 struct xe_sched_job *job = to_xe_sched_job(drm_job); 701 struct xe_exec_queue *q = job->q; 702 struct xe_guc *guc = exec_queue_to_guc(q); 703 struct xe_device *xe = guc_to_xe(guc); 704 bool lr = xe_exec_queue_is_lr(q); 705 706 xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 707 exec_queue_banned(q) || exec_queue_suspended(q)); 708 709 trace_xe_sched_job_run(job); 710 711 if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { 712 if (!exec_queue_registered(q)) 713 register_engine(q); 714 if (!lr) /* LR jobs are emitted in the exec IOCTL */ 715 q->ring_ops->emit_job(job); 716 submit_exec_queue(q); 717 } 718 719 if (lr) { 720 xe_sched_job_set_error(job, -EOPNOTSUPP); 721 return NULL; 722 } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { 723 return job->fence; 724 } else { 725 return dma_fence_get(job->fence); 726 } 727 } 728 729 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 730 { 731 struct xe_sched_job *job = to_xe_sched_job(drm_job); 732 733 trace_xe_sched_job_free(job); 734 xe_sched_job_put(job); 735 } 736 737 static int guc_read_stopped(struct xe_guc *guc) 738 { 739 return atomic_read(&guc->submission_state.stopped); 740 } 741 742 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 743 u32 action[] = { \ 744 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 745 q->guc->id, \ 746 GUC_CONTEXT_##enable_disable, \ 747 } 748 749 static void disable_scheduling_deregister(struct xe_guc *guc, 750 struct xe_exec_queue *q) 751 { 752 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 753 struct xe_device *xe = guc_to_xe(guc); 754 int ret; 755 756 set_min_preemption_timeout(guc, q); 757 smp_rmb(); 758 ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || 759 guc_read_stopped(guc), HZ * 5); 760 if (!ret) { 761 struct xe_gpu_scheduler *sched = &q->guc->sched; 762 763 drm_warn(&xe->drm, "Pending enable failed to respond"); 764 xe_sched_submission_start(sched); 765 xe_gt_reset_async(q->gt); 766 xe_sched_tdr_queue_imm(sched); 767 return; 768 } 769 770 clear_exec_queue_enabled(q); 771 set_exec_queue_pending_disable(q); 772 set_exec_queue_destroyed(q); 773 trace_xe_exec_queue_scheduling_disable(q); 774 775 /* 776 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 777 * handler and we are not allowed to reserved G2H space in handlers. 778 */ 779 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 780 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 781 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 782 } 783 784 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p); 785 786 #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) 787 static void simple_error_capture(struct xe_exec_queue *q) 788 { 789 struct xe_guc *guc = exec_queue_to_guc(q); 790 struct xe_device *xe = guc_to_xe(guc); 791 struct drm_printer p = drm_err_printer(&xe->drm, NULL); 792 struct xe_hw_engine *hwe; 793 enum xe_hw_engine_id id; 794 u32 adj_logical_mask = q->logical_mask; 795 u32 width_mask = (0x1 << q->width) - 1; 796 int i; 797 bool cookie; 798 799 if (q->vm && !q->vm->error_capture.capture_once) { 800 q->vm->error_capture.capture_once = true; 801 cookie = dma_fence_begin_signalling(); 802 for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { 803 if (adj_logical_mask & BIT(i)) { 804 adj_logical_mask |= width_mask << i; 805 i += q->width; 806 } else { 807 ++i; 808 } 809 } 810 811 if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL)) 812 xe_gt_info(guc_to_gt(guc), 813 "failed to get forcewake for error capture"); 814 xe_guc_ct_print(&guc->ct, &p, true); 815 guc_exec_queue_print(q, &p); 816 for_each_hw_engine(hwe, guc_to_gt(guc), id) { 817 if (hwe->class != q->hwe->class || 818 !(BIT(hwe->logical_instance) & adj_logical_mask)) 819 continue; 820 xe_hw_engine_print(hwe, &p); 821 } 822 xe_analyze_vm(&p, q->vm, q->gt->info.id); 823 xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 824 dma_fence_end_signalling(cookie); 825 } 826 } 827 #else 828 static void simple_error_capture(struct xe_exec_queue *q) 829 { 830 } 831 #endif 832 833 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 834 { 835 struct xe_guc *guc = exec_queue_to_guc(q); 836 struct xe_device *xe = guc_to_xe(guc); 837 838 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 839 wake_up_all(&xe->ufence_wq); 840 841 if (xe_exec_queue_is_lr(q)) 842 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); 843 else 844 xe_sched_tdr_queue_imm(&q->guc->sched); 845 } 846 847 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) 848 { 849 struct xe_guc_exec_queue *ge = 850 container_of(w, struct xe_guc_exec_queue, lr_tdr); 851 struct xe_exec_queue *q = ge->q; 852 struct xe_guc *guc = exec_queue_to_guc(q); 853 struct xe_device *xe = guc_to_xe(guc); 854 struct xe_gpu_scheduler *sched = &ge->sched; 855 856 xe_assert(xe, xe_exec_queue_is_lr(q)); 857 trace_xe_exec_queue_lr_cleanup(q); 858 859 /* Kill the run_job / process_msg entry points */ 860 xe_sched_submission_stop(sched); 861 862 /* 863 * Engine state now mostly stable, disable scheduling / deregister if 864 * needed. This cleanup routine might be called multiple times, where 865 * the actual async engine deregister drops the final engine ref. 866 * Calling disable_scheduling_deregister will mark the engine as 867 * destroyed and fire off the CT requests to disable scheduling / 868 * deregister, which we only want to do once. We also don't want to mark 869 * the engine as pending_disable again as this may race with the 870 * xe_guc_deregister_done_handler() which treats it as an unexpected 871 * state. 872 */ 873 if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { 874 struct xe_guc *guc = exec_queue_to_guc(q); 875 int ret; 876 877 set_exec_queue_banned(q); 878 disable_scheduling_deregister(guc, q); 879 880 /* 881 * Must wait for scheduling to be disabled before signalling 882 * any fences, if GT broken the GT reset code should signal us. 883 */ 884 ret = wait_event_timeout(guc->ct.wq, 885 !exec_queue_pending_disable(q) || 886 guc_read_stopped(guc), HZ * 5); 887 if (!ret) { 888 drm_warn(&xe->drm, "Schedule disable failed to respond"); 889 xe_sched_submission_start(sched); 890 xe_gt_reset_async(q->gt); 891 return; 892 } 893 } 894 895 xe_sched_submission_start(sched); 896 } 897 898 static enum drm_gpu_sched_stat 899 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 900 { 901 struct xe_sched_job *job = to_xe_sched_job(drm_job); 902 struct xe_sched_job *tmp_job; 903 struct xe_exec_queue *q = job->q; 904 struct xe_gpu_scheduler *sched = &q->guc->sched; 905 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); 906 int err = -ETIME; 907 int i = 0; 908 909 /* 910 * TDR has fired before free job worker. Common if exec queue 911 * immediately closed after last fence signaled. 912 */ 913 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { 914 guc_exec_queue_free_job(drm_job); 915 916 return DRM_GPU_SCHED_STAT_NOMINAL; 917 } 918 919 drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", 920 xe_sched_job_seqno(job), q->guc->id, q->flags); 921 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 922 "Kernel-submitted job timed out\n"); 923 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 924 "VM job timed out on non-killed execqueue\n"); 925 926 simple_error_capture(q); 927 xe_devcoredump(job); 928 929 trace_xe_sched_job_timedout(job); 930 931 /* Kill the run_job entry point */ 932 xe_sched_submission_stop(sched); 933 934 /* 935 * Kernel jobs should never fail, nor should VM jobs if they do 936 * somethings has gone wrong and the GT needs a reset 937 */ 938 if (q->flags & EXEC_QUEUE_FLAG_KERNEL || 939 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { 940 if (!xe_sched_invalidate_job(job, 2)) { 941 xe_sched_add_pending_job(sched, job); 942 xe_sched_submission_start(sched); 943 xe_gt_reset_async(q->gt); 944 goto out; 945 } 946 } 947 948 /* Engine state now stable, disable scheduling if needed */ 949 if (exec_queue_registered(q)) { 950 struct xe_guc *guc = exec_queue_to_guc(q); 951 int ret; 952 953 if (exec_queue_reset(q)) 954 err = -EIO; 955 set_exec_queue_banned(q); 956 if (!exec_queue_destroyed(q)) { 957 xe_exec_queue_get(q); 958 disable_scheduling_deregister(guc, q); 959 } 960 961 /* 962 * Must wait for scheduling to be disabled before signalling 963 * any fences, if GT broken the GT reset code should signal us. 964 * 965 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 966 * error) messages which can cause the schedule disable to get 967 * lost. If this occurs, trigger a GT reset to recover. 968 */ 969 smp_rmb(); 970 ret = wait_event_timeout(guc->ct.wq, 971 !exec_queue_pending_disable(q) || 972 guc_read_stopped(guc), HZ * 5); 973 if (!ret || guc_read_stopped(guc)) { 974 drm_warn(&xe->drm, "Schedule disable failed to respond"); 975 xe_sched_add_pending_job(sched, job); 976 xe_sched_submission_start(sched); 977 xe_gt_reset_async(q->gt); 978 xe_sched_tdr_queue_imm(sched); 979 goto out; 980 } 981 } 982 983 /* Stop fence signaling */ 984 xe_hw_fence_irq_stop(q->fence_irq); 985 986 /* 987 * Fence state now stable, stop / start scheduler which cleans up any 988 * fences that are complete 989 */ 990 xe_sched_add_pending_job(sched, job); 991 xe_sched_submission_start(sched); 992 xe_guc_exec_queue_trigger_cleanup(q); 993 994 /* Mark all outstanding jobs as bad, thus completing them */ 995 spin_lock(&sched->base.job_list_lock); 996 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 997 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 998 spin_unlock(&sched->base.job_list_lock); 999 1000 /* Start fence signaling */ 1001 xe_hw_fence_irq_start(q->fence_irq); 1002 1003 out: 1004 return DRM_GPU_SCHED_STAT_NOMINAL; 1005 } 1006 1007 static void __guc_exec_queue_fini_async(struct work_struct *w) 1008 { 1009 struct xe_guc_exec_queue *ge = 1010 container_of(w, struct xe_guc_exec_queue, fini_async); 1011 struct xe_exec_queue *q = ge->q; 1012 struct xe_guc *guc = exec_queue_to_guc(q); 1013 1014 trace_xe_exec_queue_destroy(q); 1015 1016 if (xe_exec_queue_is_lr(q)) 1017 cancel_work_sync(&ge->lr_tdr); 1018 release_guc_id(guc, q); 1019 xe_sched_entity_fini(&ge->entity); 1020 xe_sched_fini(&ge->sched); 1021 1022 kfree(ge); 1023 xe_exec_queue_fini(q); 1024 } 1025 1026 static void guc_exec_queue_fini_async(struct xe_exec_queue *q) 1027 { 1028 INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); 1029 1030 /* We must block on kernel engines so slabs are empty on driver unload */ 1031 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) 1032 __guc_exec_queue_fini_async(&q->guc->fini_async); 1033 else 1034 queue_work(system_wq, &q->guc->fini_async); 1035 } 1036 1037 static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) 1038 { 1039 /* 1040 * Might be done from within the GPU scheduler, need to do async as we 1041 * fini the scheduler when the engine is fini'd, the scheduler can't 1042 * complete fini within itself (circular dependency). Async resolves 1043 * this we and don't really care when everything is fini'd, just that it 1044 * is. 1045 */ 1046 guc_exec_queue_fini_async(q); 1047 } 1048 1049 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1050 { 1051 struct xe_exec_queue *q = msg->private_data; 1052 struct xe_guc *guc = exec_queue_to_guc(q); 1053 struct xe_device *xe = guc_to_xe(guc); 1054 1055 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1056 trace_xe_exec_queue_cleanup_entity(q); 1057 1058 if (exec_queue_registered(q)) 1059 disable_scheduling_deregister(guc, q); 1060 else 1061 __guc_exec_queue_fini(guc, q); 1062 } 1063 1064 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1065 { 1066 return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); 1067 } 1068 1069 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1070 { 1071 struct xe_exec_queue *q = msg->private_data; 1072 struct xe_guc *guc = exec_queue_to_guc(q); 1073 1074 if (guc_exec_queue_allowed_to_change_state(q)) 1075 init_policies(guc, q); 1076 kfree(msg); 1077 } 1078 1079 static void suspend_fence_signal(struct xe_exec_queue *q) 1080 { 1081 struct xe_guc *guc = exec_queue_to_guc(q); 1082 struct xe_device *xe = guc_to_xe(guc); 1083 1084 xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || 1085 guc_read_stopped(guc)); 1086 xe_assert(xe, q->guc->suspend_pending); 1087 1088 q->guc->suspend_pending = false; 1089 smp_wmb(); 1090 wake_up(&q->guc->suspend_wait); 1091 } 1092 1093 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1094 { 1095 struct xe_exec_queue *q = msg->private_data; 1096 struct xe_guc *guc = exec_queue_to_guc(q); 1097 1098 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1099 exec_queue_enabled(q)) { 1100 wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || 1101 guc_read_stopped(guc)); 1102 1103 if (!guc_read_stopped(guc)) { 1104 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1105 s64 since_resume_ms = 1106 ktime_ms_delta(ktime_get(), 1107 q->guc->resume_time); 1108 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1109 since_resume_ms; 1110 1111 if (wait_ms > 0 && q->guc->resume_time) 1112 msleep(wait_ms); 1113 1114 set_exec_queue_suspended(q); 1115 clear_exec_queue_enabled(q); 1116 set_exec_queue_pending_disable(q); 1117 trace_xe_exec_queue_scheduling_disable(q); 1118 1119 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1120 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1121 } 1122 } else if (q->guc->suspend_pending) { 1123 set_exec_queue_suspended(q); 1124 suspend_fence_signal(q); 1125 } 1126 } 1127 1128 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1129 { 1130 struct xe_exec_queue *q = msg->private_data; 1131 struct xe_guc *guc = exec_queue_to_guc(q); 1132 1133 if (guc_exec_queue_allowed_to_change_state(q)) { 1134 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1135 1136 q->guc->resume_time = RESUME_PENDING; 1137 clear_exec_queue_suspended(q); 1138 set_exec_queue_pending_enable(q); 1139 set_exec_queue_enabled(q); 1140 trace_xe_exec_queue_scheduling_enable(q); 1141 1142 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1143 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1144 } else { 1145 clear_exec_queue_suspended(q); 1146 } 1147 } 1148 1149 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1150 #define SET_SCHED_PROPS 2 1151 #define SUSPEND 3 1152 #define RESUME 4 1153 1154 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1155 { 1156 trace_xe_sched_msg_recv(msg); 1157 1158 switch (msg->opcode) { 1159 case CLEANUP: 1160 __guc_exec_queue_process_msg_cleanup(msg); 1161 break; 1162 case SET_SCHED_PROPS: 1163 __guc_exec_queue_process_msg_set_sched_props(msg); 1164 break; 1165 case SUSPEND: 1166 __guc_exec_queue_process_msg_suspend(msg); 1167 break; 1168 case RESUME: 1169 __guc_exec_queue_process_msg_resume(msg); 1170 break; 1171 default: 1172 XE_WARN_ON("Unknown message type"); 1173 } 1174 } 1175 1176 static const struct drm_sched_backend_ops drm_sched_ops = { 1177 .run_job = guc_exec_queue_run_job, 1178 .free_job = guc_exec_queue_free_job, 1179 .timedout_job = guc_exec_queue_timedout_job, 1180 }; 1181 1182 static const struct xe_sched_backend_ops xe_sched_ops = { 1183 .process_msg = guc_exec_queue_process_msg, 1184 }; 1185 1186 static int guc_exec_queue_init(struct xe_exec_queue *q) 1187 { 1188 struct xe_gpu_scheduler *sched; 1189 struct xe_guc *guc = exec_queue_to_guc(q); 1190 struct xe_device *xe = guc_to_xe(guc); 1191 struct xe_guc_exec_queue *ge; 1192 long timeout; 1193 int err; 1194 1195 xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); 1196 1197 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1198 if (!ge) 1199 return -ENOMEM; 1200 1201 q->guc = ge; 1202 ge->q = q; 1203 init_waitqueue_head(&ge->suspend_wait); 1204 1205 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1206 msecs_to_jiffies(q->sched_props.job_timeout_ms); 1207 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1208 get_submit_wq(guc), 1209 q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, 1210 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1211 q->name, gt_to_xe(q->gt)->drm.dev); 1212 if (err) 1213 goto err_free; 1214 1215 sched = &ge->sched; 1216 err = xe_sched_entity_init(&ge->entity, sched); 1217 if (err) 1218 goto err_sched; 1219 1220 if (xe_exec_queue_is_lr(q)) 1221 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); 1222 1223 mutex_lock(&guc->submission_state.lock); 1224 1225 err = alloc_guc_id(guc, q); 1226 if (err) 1227 goto err_entity; 1228 1229 q->entity = &ge->entity; 1230 1231 if (guc_read_stopped(guc)) 1232 xe_sched_stop(sched); 1233 1234 mutex_unlock(&guc->submission_state.lock); 1235 1236 xe_exec_queue_assign_name(q, q->guc->id); 1237 1238 trace_xe_exec_queue_create(q); 1239 1240 return 0; 1241 1242 err_entity: 1243 mutex_unlock(&guc->submission_state.lock); 1244 xe_sched_entity_fini(&ge->entity); 1245 err_sched: 1246 xe_sched_fini(&ge->sched); 1247 err_free: 1248 kfree(ge); 1249 1250 return err; 1251 } 1252 1253 static void guc_exec_queue_kill(struct xe_exec_queue *q) 1254 { 1255 trace_xe_exec_queue_kill(q); 1256 set_exec_queue_killed(q); 1257 xe_guc_exec_queue_trigger_cleanup(q); 1258 } 1259 1260 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 1261 u32 opcode) 1262 { 1263 INIT_LIST_HEAD(&msg->link); 1264 msg->opcode = opcode; 1265 msg->private_data = q; 1266 1267 trace_xe_sched_msg_add(msg); 1268 xe_sched_add_msg(&q->guc->sched, msg); 1269 } 1270 1271 #define STATIC_MSG_CLEANUP 0 1272 #define STATIC_MSG_SUSPEND 1 1273 #define STATIC_MSG_RESUME 2 1274 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1275 { 1276 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 1277 1278 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) 1279 guc_exec_queue_add_msg(q, msg, CLEANUP); 1280 else 1281 __guc_exec_queue_fini(exec_queue_to_guc(q), q); 1282 } 1283 1284 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 1285 enum xe_exec_queue_priority priority) 1286 { 1287 struct xe_sched_msg *msg; 1288 1289 if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) 1290 return 0; 1291 1292 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1293 if (!msg) 1294 return -ENOMEM; 1295 1296 q->sched_props.priority = priority; 1297 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1298 1299 return 0; 1300 } 1301 1302 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 1303 { 1304 struct xe_sched_msg *msg; 1305 1306 if (q->sched_props.timeslice_us == timeslice_us || 1307 exec_queue_killed_or_banned(q)) 1308 return 0; 1309 1310 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1311 if (!msg) 1312 return -ENOMEM; 1313 1314 q->sched_props.timeslice_us = timeslice_us; 1315 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1316 1317 return 0; 1318 } 1319 1320 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 1321 u32 preempt_timeout_us) 1322 { 1323 struct xe_sched_msg *msg; 1324 1325 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 1326 exec_queue_killed_or_banned(q)) 1327 return 0; 1328 1329 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1330 if (!msg) 1331 return -ENOMEM; 1332 1333 q->sched_props.preempt_timeout_us = preempt_timeout_us; 1334 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1335 1336 return 0; 1337 } 1338 1339 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 1340 { 1341 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 1342 1343 if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) 1344 return -EINVAL; 1345 1346 q->guc->suspend_pending = true; 1347 guc_exec_queue_add_msg(q, msg, SUSPEND); 1348 1349 return 0; 1350 } 1351 1352 static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 1353 { 1354 struct xe_guc *guc = exec_queue_to_guc(q); 1355 1356 wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || 1357 guc_read_stopped(guc)); 1358 } 1359 1360 static void guc_exec_queue_resume(struct xe_exec_queue *q) 1361 { 1362 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 1363 struct xe_guc *guc = exec_queue_to_guc(q); 1364 struct xe_device *xe = guc_to_xe(guc); 1365 1366 xe_assert(xe, !q->guc->suspend_pending); 1367 1368 guc_exec_queue_add_msg(q, msg, RESUME); 1369 } 1370 1371 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 1372 { 1373 return exec_queue_reset(q); 1374 } 1375 1376 /* 1377 * All of these functions are an abstraction layer which other parts of XE can 1378 * use to trap into the GuC backend. All of these functions, aside from init, 1379 * really shouldn't do much other than trap into the DRM scheduler which 1380 * synchronizes these operations. 1381 */ 1382 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 1383 .init = guc_exec_queue_init, 1384 .kill = guc_exec_queue_kill, 1385 .fini = guc_exec_queue_fini, 1386 .set_priority = guc_exec_queue_set_priority, 1387 .set_timeslice = guc_exec_queue_set_timeslice, 1388 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 1389 .suspend = guc_exec_queue_suspend, 1390 .suspend_wait = guc_exec_queue_suspend_wait, 1391 .resume = guc_exec_queue_resume, 1392 .reset_status = guc_exec_queue_reset_status, 1393 }; 1394 1395 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 1396 { 1397 struct xe_gpu_scheduler *sched = &q->guc->sched; 1398 1399 /* Stop scheduling + flush any DRM scheduler operations */ 1400 xe_sched_submission_stop(sched); 1401 1402 /* Clean up lost G2H + reset engine state */ 1403 if (exec_queue_registered(q)) { 1404 if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || 1405 xe_exec_queue_is_lr(q)) 1406 xe_exec_queue_put(q); 1407 else if (exec_queue_destroyed(q)) 1408 __guc_exec_queue_fini(guc, q); 1409 } 1410 if (q->guc->suspend_pending) { 1411 set_exec_queue_suspended(q); 1412 suspend_fence_signal(q); 1413 } 1414 atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, 1415 &q->guc->state); 1416 q->guc->resume_time = 0; 1417 trace_xe_exec_queue_stop(q); 1418 1419 /* 1420 * Ban any engine (aside from kernel and engines used for VM ops) with a 1421 * started but not complete job or if a job has gone through a GT reset 1422 * more than twice. 1423 */ 1424 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 1425 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 1426 1427 if (job) { 1428 if ((xe_sched_job_started(job) && 1429 !xe_sched_job_completed(job)) || 1430 xe_sched_invalidate_job(job, 2)) { 1431 trace_xe_sched_job_ban(job); 1432 xe_sched_tdr_queue_imm(&q->guc->sched); 1433 set_exec_queue_banned(q); 1434 } 1435 } 1436 } 1437 } 1438 1439 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1440 { 1441 int ret; 1442 1443 /* 1444 * Using an atomic here rather than submission_state.lock as this 1445 * function can be called while holding the CT lock (engine reset 1446 * failure). submission_state.lock needs the CT lock to resubmit jobs. 1447 * Atomic is not ideal, but it works to prevent against concurrent reset 1448 * and releasing any TDRs waiting on guc->submission_state.stopped. 1449 */ 1450 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 1451 smp_wmb(); 1452 wake_up_all(&guc->ct.wq); 1453 1454 return ret; 1455 } 1456 1457 void xe_guc_submit_reset_wait(struct xe_guc *guc) 1458 { 1459 wait_event(guc->ct.wq, !guc_read_stopped(guc)); 1460 } 1461 1462 int xe_guc_submit_stop(struct xe_guc *guc) 1463 { 1464 struct xe_exec_queue *q; 1465 unsigned long index; 1466 struct xe_device *xe = guc_to_xe(guc); 1467 1468 xe_assert(xe, guc_read_stopped(guc) == 1); 1469 1470 mutex_lock(&guc->submission_state.lock); 1471 1472 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1473 guc_exec_queue_stop(guc, q); 1474 1475 mutex_unlock(&guc->submission_state.lock); 1476 1477 /* 1478 * No one can enter the backend at this point, aside from new engine 1479 * creation which is protected by guc->submission_state.lock. 1480 */ 1481 1482 return 0; 1483 } 1484 1485 static void guc_exec_queue_start(struct xe_exec_queue *q) 1486 { 1487 struct xe_gpu_scheduler *sched = &q->guc->sched; 1488 1489 if (!exec_queue_killed_or_banned(q)) { 1490 int i; 1491 1492 trace_xe_exec_queue_resubmit(q); 1493 for (i = 0; i < q->width; ++i) 1494 xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail); 1495 xe_sched_resubmit_jobs(sched); 1496 } 1497 1498 xe_sched_submission_start(sched); 1499 } 1500 1501 int xe_guc_submit_start(struct xe_guc *guc) 1502 { 1503 struct xe_exec_queue *q; 1504 unsigned long index; 1505 struct xe_device *xe = guc_to_xe(guc); 1506 1507 xe_assert(xe, guc_read_stopped(guc) == 1); 1508 1509 mutex_lock(&guc->submission_state.lock); 1510 atomic_dec(&guc->submission_state.stopped); 1511 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1512 guc_exec_queue_start(q); 1513 mutex_unlock(&guc->submission_state.lock); 1514 1515 wake_up_all(&guc->ct.wq); 1516 1517 return 0; 1518 } 1519 1520 static struct xe_exec_queue * 1521 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 1522 { 1523 struct xe_device *xe = guc_to_xe(guc); 1524 struct xe_exec_queue *q; 1525 1526 if (unlikely(guc_id >= GUC_ID_MAX)) { 1527 drm_err(&xe->drm, "Invalid guc_id %u", guc_id); 1528 return NULL; 1529 } 1530 1531 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 1532 if (unlikely(!q)) { 1533 drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); 1534 return NULL; 1535 } 1536 1537 xe_assert(xe, guc_id >= q->guc->id); 1538 xe_assert(xe, guc_id < (q->guc->id + q->width)); 1539 1540 return q; 1541 } 1542 1543 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 1544 { 1545 u32 action[] = { 1546 XE_GUC_ACTION_DEREGISTER_CONTEXT, 1547 q->guc->id, 1548 }; 1549 1550 trace_xe_exec_queue_deregister(q); 1551 1552 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 1553 } 1554 1555 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q) 1556 { 1557 trace_xe_exec_queue_scheduling_done(q); 1558 1559 if (exec_queue_pending_enable(q)) { 1560 q->guc->resume_time = ktime_get(); 1561 clear_exec_queue_pending_enable(q); 1562 smp_wmb(); 1563 wake_up_all(&guc->ct.wq); 1564 } else { 1565 clear_exec_queue_pending_disable(q); 1566 if (q->guc->suspend_pending) { 1567 suspend_fence_signal(q); 1568 } else { 1569 if (exec_queue_banned(q)) { 1570 smp_wmb(); 1571 wake_up_all(&guc->ct.wq); 1572 } 1573 deregister_exec_queue(guc, q); 1574 } 1575 } 1576 } 1577 1578 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1579 { 1580 struct xe_device *xe = guc_to_xe(guc); 1581 struct xe_exec_queue *q; 1582 u32 guc_id = msg[0]; 1583 1584 if (unlikely(len < 2)) { 1585 drm_err(&xe->drm, "Invalid length %u", len); 1586 return -EPROTO; 1587 } 1588 1589 q = g2h_exec_queue_lookup(guc, guc_id); 1590 if (unlikely(!q)) 1591 return -EPROTO; 1592 1593 if (unlikely(!exec_queue_pending_enable(q) && 1594 !exec_queue_pending_disable(q))) { 1595 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1596 atomic_read(&q->guc->state)); 1597 return -EPROTO; 1598 } 1599 1600 handle_sched_done(guc, q); 1601 1602 return 0; 1603 } 1604 1605 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) 1606 { 1607 trace_xe_exec_queue_deregister_done(q); 1608 1609 clear_exec_queue_registered(q); 1610 1611 if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) 1612 xe_exec_queue_put(q); 1613 else 1614 __guc_exec_queue_fini(guc, q); 1615 } 1616 1617 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1618 { 1619 struct xe_device *xe = guc_to_xe(guc); 1620 struct xe_exec_queue *q; 1621 u32 guc_id = msg[0]; 1622 1623 if (unlikely(len < 1)) { 1624 drm_err(&xe->drm, "Invalid length %u", len); 1625 return -EPROTO; 1626 } 1627 1628 q = g2h_exec_queue_lookup(guc, guc_id); 1629 if (unlikely(!q)) 1630 return -EPROTO; 1631 1632 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 1633 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 1634 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1635 atomic_read(&q->guc->state)); 1636 return -EPROTO; 1637 } 1638 1639 handle_deregister_done(guc, q); 1640 1641 return 0; 1642 } 1643 1644 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 1645 { 1646 struct xe_device *xe = guc_to_xe(guc); 1647 struct xe_exec_queue *q; 1648 u32 guc_id = msg[0]; 1649 1650 if (unlikely(len < 1)) { 1651 drm_err(&xe->drm, "Invalid length %u", len); 1652 return -EPROTO; 1653 } 1654 1655 q = g2h_exec_queue_lookup(guc, guc_id); 1656 if (unlikely(!q)) 1657 return -EPROTO; 1658 1659 drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); 1660 1661 /* FIXME: Do error capture, most likely async */ 1662 1663 trace_xe_exec_queue_reset(q); 1664 1665 /* 1666 * A banned engine is a NOP at this point (came from 1667 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 1668 * jobs by setting timeout of the job to the minimum value kicking 1669 * guc_exec_queue_timedout_job. 1670 */ 1671 set_exec_queue_reset(q); 1672 if (!exec_queue_banned(q)) 1673 xe_guc_exec_queue_trigger_cleanup(q); 1674 1675 return 0; 1676 } 1677 1678 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 1679 u32 len) 1680 { 1681 struct xe_device *xe = guc_to_xe(guc); 1682 struct xe_exec_queue *q; 1683 u32 guc_id = msg[0]; 1684 1685 if (unlikely(len < 1)) { 1686 drm_err(&xe->drm, "Invalid length %u", len); 1687 return -EPROTO; 1688 } 1689 1690 q = g2h_exec_queue_lookup(guc, guc_id); 1691 if (unlikely(!q)) 1692 return -EPROTO; 1693 1694 drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); 1695 trace_xe_exec_queue_memory_cat_error(q); 1696 1697 /* Treat the same as engine reset */ 1698 set_exec_queue_reset(q); 1699 if (!exec_queue_banned(q)) 1700 xe_guc_exec_queue_trigger_cleanup(q); 1701 1702 return 0; 1703 } 1704 1705 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 1706 { 1707 struct xe_device *xe = guc_to_xe(guc); 1708 u8 guc_class, instance; 1709 u32 reason; 1710 1711 if (unlikely(len != 3)) { 1712 drm_err(&xe->drm, "Invalid length %u", len); 1713 return -EPROTO; 1714 } 1715 1716 guc_class = msg[0]; 1717 instance = msg[1]; 1718 reason = msg[2]; 1719 1720 /* Unexpected failure of a hardware feature, log an actual error */ 1721 drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", 1722 guc_class, instance, reason); 1723 1724 xe_gt_reset_async(guc_to_gt(guc)); 1725 1726 return 0; 1727 } 1728 1729 static void 1730 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 1731 struct xe_guc_submit_exec_queue_snapshot *snapshot) 1732 { 1733 struct xe_guc *guc = exec_queue_to_guc(q); 1734 struct xe_device *xe = guc_to_xe(guc); 1735 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 1736 int i; 1737 1738 snapshot->guc.wqi_head = q->guc->wqi_head; 1739 snapshot->guc.wqi_tail = q->guc->wqi_tail; 1740 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 1741 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 1742 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 1743 wq_desc.wq_status); 1744 1745 if (snapshot->parallel.wq_desc.head != 1746 snapshot->parallel.wq_desc.tail) { 1747 for (i = snapshot->parallel.wq_desc.head; 1748 i != snapshot->parallel.wq_desc.tail; 1749 i = (i + sizeof(u32)) % WQ_SIZE) 1750 snapshot->parallel.wq[i / sizeof(u32)] = 1751 parallel_read(xe, map, wq[i / sizeof(u32)]); 1752 } 1753 } 1754 1755 static void 1756 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 1757 struct drm_printer *p) 1758 { 1759 int i; 1760 1761 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 1762 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 1763 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 1764 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 1765 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 1766 1767 if (snapshot->parallel.wq_desc.head != 1768 snapshot->parallel.wq_desc.tail) { 1769 for (i = snapshot->parallel.wq_desc.head; 1770 i != snapshot->parallel.wq_desc.tail; 1771 i = (i + sizeof(u32)) % WQ_SIZE) 1772 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 1773 snapshot->parallel.wq[i / sizeof(u32)]); 1774 } 1775 } 1776 1777 /** 1778 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 1779 * @q: faulty exec queue 1780 * 1781 * This can be printed out in a later stage like during dev_coredump 1782 * analysis. 1783 * 1784 * Returns: a GuC Submit Engine snapshot object that must be freed by the 1785 * caller, using `xe_guc_exec_queue_snapshot_free`. 1786 */ 1787 struct xe_guc_submit_exec_queue_snapshot * 1788 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 1789 { 1790 struct xe_gpu_scheduler *sched = &q->guc->sched; 1791 struct xe_guc_submit_exec_queue_snapshot *snapshot; 1792 int i; 1793 1794 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 1795 1796 if (!snapshot) 1797 return NULL; 1798 1799 snapshot->guc.id = q->guc->id; 1800 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 1801 snapshot->class = q->class; 1802 snapshot->logical_mask = q->logical_mask; 1803 snapshot->width = q->width; 1804 snapshot->refcount = kref_read(&q->refcount); 1805 snapshot->sched_timeout = sched->base.timeout; 1806 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 1807 snapshot->sched_props.preempt_timeout_us = 1808 q->sched_props.preempt_timeout_us; 1809 1810 snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), 1811 GFP_ATOMIC); 1812 1813 if (snapshot->lrc) { 1814 for (i = 0; i < q->width; ++i) { 1815 struct xe_lrc *lrc = q->lrc + i; 1816 1817 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); 1818 } 1819 } 1820 1821 snapshot->schedule_state = atomic_read(&q->guc->state); 1822 snapshot->exec_queue_flags = q->flags; 1823 1824 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 1825 if (snapshot->parallel_execution) 1826 guc_exec_queue_wq_snapshot_capture(q, snapshot); 1827 1828 spin_lock(&sched->base.job_list_lock); 1829 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); 1830 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, 1831 sizeof(struct pending_list_snapshot), 1832 GFP_ATOMIC); 1833 1834 if (snapshot->pending_list) { 1835 struct xe_sched_job *job_iter; 1836 1837 i = 0; 1838 list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { 1839 snapshot->pending_list[i].seqno = 1840 xe_sched_job_seqno(job_iter); 1841 snapshot->pending_list[i].fence = 1842 dma_fence_is_signaled(job_iter->fence) ? 1 : 0; 1843 snapshot->pending_list[i].finished = 1844 dma_fence_is_signaled(&job_iter->drm.s_fence->finished) 1845 ? 1 : 0; 1846 i++; 1847 } 1848 } 1849 1850 spin_unlock(&sched->base.job_list_lock); 1851 1852 return snapshot; 1853 } 1854 1855 /** 1856 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. 1857 * @snapshot: Previously captured snapshot of job. 1858 * 1859 * This captures some data that requires taking some locks, so it cannot be done in signaling path. 1860 */ 1861 void 1862 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) 1863 { 1864 int i; 1865 1866 if (!snapshot || !snapshot->lrc) 1867 return; 1868 1869 for (i = 0; i < snapshot->width; ++i) 1870 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); 1871 } 1872 1873 /** 1874 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 1875 * @snapshot: GuC Submit Engine snapshot object. 1876 * @p: drm_printer where it will be printed out. 1877 * 1878 * This function prints out a given GuC Submit Engine snapshot object. 1879 */ 1880 void 1881 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 1882 struct drm_printer *p) 1883 { 1884 int i; 1885 1886 if (!snapshot) 1887 return; 1888 1889 drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); 1890 drm_printf(p, "\tName: %s\n", snapshot->name); 1891 drm_printf(p, "\tClass: %d\n", snapshot->class); 1892 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 1893 drm_printf(p, "\tWidth: %d\n", snapshot->width); 1894 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 1895 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 1896 drm_printf(p, "\tTimeslice: %u (us)\n", 1897 snapshot->sched_props.timeslice_us); 1898 drm_printf(p, "\tPreempt timeout: %u (us)\n", 1899 snapshot->sched_props.preempt_timeout_us); 1900 1901 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) 1902 xe_lrc_snapshot_print(snapshot->lrc[i], p); 1903 1904 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 1905 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 1906 1907 if (snapshot->parallel_execution) 1908 guc_exec_queue_wq_snapshot_print(snapshot, p); 1909 1910 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; 1911 i++) 1912 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 1913 snapshot->pending_list[i].seqno, 1914 snapshot->pending_list[i].fence, 1915 snapshot->pending_list[i].finished); 1916 } 1917 1918 /** 1919 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 1920 * snapshot. 1921 * @snapshot: GuC Submit Engine snapshot object. 1922 * 1923 * This function free all the memory that needed to be allocated at capture 1924 * time. 1925 */ 1926 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 1927 { 1928 int i; 1929 1930 if (!snapshot) 1931 return; 1932 1933 if (snapshot->lrc) { 1934 for (i = 0; i < snapshot->width; i++) 1935 xe_lrc_snapshot_free(snapshot->lrc[i]); 1936 kfree(snapshot->lrc); 1937 } 1938 kfree(snapshot->pending_list); 1939 kfree(snapshot); 1940 } 1941 1942 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 1943 { 1944 struct xe_guc_submit_exec_queue_snapshot *snapshot; 1945 1946 snapshot = xe_guc_exec_queue_snapshot_capture(q); 1947 xe_guc_exec_queue_snapshot_print(snapshot, p); 1948 xe_guc_exec_queue_snapshot_free(snapshot); 1949 } 1950 1951 /** 1952 * xe_guc_submit_print - GuC Submit Print. 1953 * @guc: GuC. 1954 * @p: drm_printer where it will be printed out. 1955 * 1956 * This function capture and prints snapshots of **all** GuC Engines. 1957 */ 1958 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 1959 { 1960 struct xe_exec_queue *q; 1961 unsigned long index; 1962 1963 if (!xe_device_uc_enabled(guc_to_xe(guc))) 1964 return; 1965 1966 mutex_lock(&guc->submission_state.lock); 1967 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1968 guc_exec_queue_print(q, p); 1969 mutex_unlock(&guc->submission_state.lock); 1970 } 1971