1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_submit.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/bitmap.h> 10 #include <linux/circ_buf.h> 11 #include <linux/delay.h> 12 #include <linux/dma-fence-array.h> 13 14 #include <drm/drm_managed.h> 15 16 #include "abi/guc_actions_abi.h" 17 #include "abi/guc_klvs_abi.h" 18 #include "regs/xe_lrc_layout.h" 19 #include "xe_assert.h" 20 #include "xe_devcoredump.h" 21 #include "xe_device.h" 22 #include "xe_exec_queue.h" 23 #include "xe_force_wake.h" 24 #include "xe_gpu_scheduler.h" 25 #include "xe_gt.h" 26 #include "xe_guc.h" 27 #include "xe_guc_ct.h" 28 #include "xe_guc_exec_queue_types.h" 29 #include "xe_guc_submit_types.h" 30 #include "xe_hw_engine.h" 31 #include "xe_hw_fence.h" 32 #include "xe_lrc.h" 33 #include "xe_macros.h" 34 #include "xe_map.h" 35 #include "xe_mocs.h" 36 #include "xe_ring_ops_types.h" 37 #include "xe_sched_job.h" 38 #include "xe_trace.h" 39 #include "xe_vm.h" 40 41 static struct xe_guc * 42 exec_queue_to_guc(struct xe_exec_queue *q) 43 { 44 return &q->gt->uc.guc; 45 } 46 47 /* 48 * Helpers for engine state, using an atomic as some of the bits can transition 49 * as the same time (e.g. a suspend can be happning at the same time as schedule 50 * engine done being processed). 51 */ 52 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) 53 #define ENGINE_STATE_ENABLED (1 << 1) 54 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) 55 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) 56 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) 57 #define ENGINE_STATE_SUSPENDED (1 << 5) 58 #define EXEC_QUEUE_STATE_RESET (1 << 6) 59 #define ENGINE_STATE_KILLED (1 << 7) 60 61 static bool exec_queue_registered(struct xe_exec_queue *q) 62 { 63 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; 64 } 65 66 static void set_exec_queue_registered(struct xe_exec_queue *q) 67 { 68 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 69 } 70 71 static void clear_exec_queue_registered(struct xe_exec_queue *q) 72 { 73 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); 74 } 75 76 static bool exec_queue_enabled(struct xe_exec_queue *q) 77 { 78 return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; 79 } 80 81 static void set_exec_queue_enabled(struct xe_exec_queue *q) 82 { 83 atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); 84 } 85 86 static void clear_exec_queue_enabled(struct xe_exec_queue *q) 87 { 88 atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); 89 } 90 91 static bool exec_queue_pending_enable(struct xe_exec_queue *q) 92 { 93 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; 94 } 95 96 static void set_exec_queue_pending_enable(struct xe_exec_queue *q) 97 { 98 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 99 } 100 101 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) 102 { 103 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); 104 } 105 106 static bool exec_queue_pending_disable(struct xe_exec_queue *q) 107 { 108 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; 109 } 110 111 static void set_exec_queue_pending_disable(struct xe_exec_queue *q) 112 { 113 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 114 } 115 116 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) 117 { 118 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); 119 } 120 121 static bool exec_queue_destroyed(struct xe_exec_queue *q) 122 { 123 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; 124 } 125 126 static void set_exec_queue_destroyed(struct xe_exec_queue *q) 127 { 128 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); 129 } 130 131 static bool exec_queue_banned(struct xe_exec_queue *q) 132 { 133 return (q->flags & EXEC_QUEUE_FLAG_BANNED); 134 } 135 136 static void set_exec_queue_banned(struct xe_exec_queue *q) 137 { 138 q->flags |= EXEC_QUEUE_FLAG_BANNED; 139 } 140 141 static bool exec_queue_suspended(struct xe_exec_queue *q) 142 { 143 return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; 144 } 145 146 static void set_exec_queue_suspended(struct xe_exec_queue *q) 147 { 148 atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); 149 } 150 151 static void clear_exec_queue_suspended(struct xe_exec_queue *q) 152 { 153 atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); 154 } 155 156 static bool exec_queue_reset(struct xe_exec_queue *q) 157 { 158 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; 159 } 160 161 static void set_exec_queue_reset(struct xe_exec_queue *q) 162 { 163 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); 164 } 165 166 static bool exec_queue_killed(struct xe_exec_queue *q) 167 { 168 return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; 169 } 170 171 static void set_exec_queue_killed(struct xe_exec_queue *q) 172 { 173 atomic_or(ENGINE_STATE_KILLED, &q->guc->state); 174 } 175 176 static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) 177 { 178 return exec_queue_killed(q) || exec_queue_banned(q); 179 } 180 181 #ifdef CONFIG_PROVE_LOCKING 182 static int alloc_submit_wq(struct xe_guc *guc) 183 { 184 int i; 185 186 for (i = 0; i < NUM_SUBMIT_WQ; ++i) { 187 guc->submission_state.submit_wq_pool[i] = 188 alloc_ordered_workqueue("submit_wq", 0); 189 if (!guc->submission_state.submit_wq_pool[i]) 190 goto err_free; 191 } 192 193 return 0; 194 195 err_free: 196 while (i) 197 destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); 198 199 return -ENOMEM; 200 } 201 202 static void free_submit_wq(struct xe_guc *guc) 203 { 204 int i; 205 206 for (i = 0; i < NUM_SUBMIT_WQ; ++i) 207 destroy_workqueue(guc->submission_state.submit_wq_pool[i]); 208 } 209 210 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) 211 { 212 int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; 213 214 return guc->submission_state.submit_wq_pool[idx]; 215 } 216 #else 217 static int alloc_submit_wq(struct xe_guc *guc) 218 { 219 return 0; 220 } 221 222 static void free_submit_wq(struct xe_guc *guc) 223 { 224 225 } 226 227 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) 228 { 229 return NULL; 230 } 231 #endif 232 233 static void guc_submit_fini(struct drm_device *drm, void *arg) 234 { 235 struct xe_guc *guc = arg; 236 237 xa_destroy(&guc->submission_state.exec_queue_lookup); 238 ida_destroy(&guc->submission_state.guc_ids); 239 bitmap_free(guc->submission_state.guc_ids_bitmap); 240 free_submit_wq(guc); 241 mutex_destroy(&guc->submission_state.lock); 242 } 243 244 #define GUC_ID_MAX 65535 245 #define GUC_ID_NUMBER_MLRC 4096 246 #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) 247 #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC 248 249 static const struct xe_exec_queue_ops guc_exec_queue_ops; 250 251 static void primelockdep(struct xe_guc *guc) 252 { 253 if (!IS_ENABLED(CONFIG_LOCKDEP)) 254 return; 255 256 fs_reclaim_acquire(GFP_KERNEL); 257 258 mutex_lock(&guc->submission_state.lock); 259 might_lock(&guc->submission_state.suspend.lock); 260 mutex_unlock(&guc->submission_state.lock); 261 262 fs_reclaim_release(GFP_KERNEL); 263 } 264 265 int xe_guc_submit_init(struct xe_guc *guc) 266 { 267 struct xe_device *xe = guc_to_xe(guc); 268 struct xe_gt *gt = guc_to_gt(guc); 269 int err; 270 271 guc->submission_state.guc_ids_bitmap = 272 bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); 273 if (!guc->submission_state.guc_ids_bitmap) 274 return -ENOMEM; 275 276 err = alloc_submit_wq(guc); 277 if (err) { 278 bitmap_free(guc->submission_state.guc_ids_bitmap); 279 return err; 280 } 281 282 gt->exec_queue_ops = &guc_exec_queue_ops; 283 284 mutex_init(&guc->submission_state.lock); 285 xa_init(&guc->submission_state.exec_queue_lookup); 286 ida_init(&guc->submission_state.guc_ids); 287 288 spin_lock_init(&guc->submission_state.suspend.lock); 289 guc->submission_state.suspend.context = dma_fence_context_alloc(1); 290 291 primelockdep(guc); 292 293 err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 294 if (err) 295 return err; 296 297 return 0; 298 } 299 300 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) 301 { 302 int i; 303 304 lockdep_assert_held(&guc->submission_state.lock); 305 306 for (i = 0; i < xa_count; ++i) 307 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); 308 309 if (xe_exec_queue_is_parallel(q)) 310 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 311 q->guc->id - GUC_ID_START_MLRC, 312 order_base_2(q->width)); 313 else 314 ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); 315 } 316 317 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 318 { 319 int ret; 320 void *ptr; 321 int i; 322 323 /* 324 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 325 * worse case user gets -ENOMEM on engine create and has to try again. 326 * 327 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 328 * failure. 329 */ 330 lockdep_assert_held(&guc->submission_state.lock); 331 332 if (xe_exec_queue_is_parallel(q)) { 333 void *bitmap = guc->submission_state.guc_ids_bitmap; 334 335 ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, 336 order_base_2(q->width)); 337 } else { 338 ret = ida_simple_get(&guc->submission_state.guc_ids, 0, 339 GUC_ID_NUMBER_SLRC, GFP_NOWAIT); 340 } 341 if (ret < 0) 342 return ret; 343 344 q->guc->id = ret; 345 if (xe_exec_queue_is_parallel(q)) 346 q->guc->id += GUC_ID_START_MLRC; 347 348 for (i = 0; i < q->width; ++i) { 349 ptr = xa_store(&guc->submission_state.exec_queue_lookup, 350 q->guc->id + i, q, GFP_NOWAIT); 351 if (IS_ERR(ptr)) { 352 ret = PTR_ERR(ptr); 353 goto err_release; 354 } 355 } 356 357 return 0; 358 359 err_release: 360 __release_guc_id(guc, q, i); 361 362 return ret; 363 } 364 365 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) 366 { 367 mutex_lock(&guc->submission_state.lock); 368 __release_guc_id(guc, q, q->width); 369 mutex_unlock(&guc->submission_state.lock); 370 } 371 372 struct exec_queue_policy { 373 u32 count; 374 struct guc_update_exec_queue_policy h2g; 375 }; 376 377 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) 378 { 379 size_t bytes = sizeof(policy->h2g.header) + 380 (sizeof(policy->h2g.klv[0]) * policy->count); 381 382 return bytes / sizeof(u32); 383 } 384 385 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, 386 u16 guc_id) 387 { 388 policy->h2g.header.action = 389 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 390 policy->h2g.header.guc_id = guc_id; 391 policy->count = 0; 392 } 393 394 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ 395 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ 396 u32 data) \ 397 { \ 398 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 399 \ 400 policy->h2g.klv[policy->count].kl = \ 401 FIELD_PREP(GUC_KLV_0_KEY, \ 402 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 403 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 404 policy->h2g.klv[policy->count].value = data; \ 405 policy->count++; \ 406 } 407 408 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 409 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 410 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 411 #undef MAKE_EXEC_QUEUE_POLICY_ADD 412 413 static const int xe_exec_queue_prio_to_guc[] = { 414 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 415 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 416 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 417 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 418 }; 419 420 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 421 { 422 struct exec_queue_policy policy; 423 struct xe_device *xe = guc_to_xe(guc); 424 enum xe_exec_queue_priority prio = q->sched_props.priority; 425 u32 timeslice_us = q->sched_props.timeslice_us; 426 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 427 428 xe_assert(xe, exec_queue_registered(q)); 429 430 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 431 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); 432 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); 433 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); 434 435 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 436 __guc_exec_queue_policy_action_size(&policy), 0, 0); 437 } 438 439 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) 440 { 441 struct exec_queue_policy policy; 442 443 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 444 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); 445 446 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 447 __guc_exec_queue_policy_action_size(&policy), 0, 0); 448 } 449 450 #define parallel_read(xe_, map_, field_) \ 451 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 452 field_) 453 #define parallel_write(xe_, map_, field_, val_) \ 454 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ 455 field_, val_) 456 457 static void __register_mlrc_engine(struct xe_guc *guc, 458 struct xe_exec_queue *q, 459 struct guc_ctxt_registration_info *info) 460 { 461 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 462 struct xe_device *xe = guc_to_xe(guc); 463 u32 action[MAX_MLRC_REG_SIZE]; 464 int len = 0; 465 int i; 466 467 xe_assert(xe, xe_exec_queue_is_parallel(q)); 468 469 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 470 action[len++] = info->flags; 471 action[len++] = info->context_idx; 472 action[len++] = info->engine_class; 473 action[len++] = info->engine_submit_mask; 474 action[len++] = info->wq_desc_lo; 475 action[len++] = info->wq_desc_hi; 476 action[len++] = info->wq_base_lo; 477 action[len++] = info->wq_base_hi; 478 action[len++] = info->wq_size; 479 action[len++] = q->width; 480 action[len++] = info->hwlrca_lo; 481 action[len++] = info->hwlrca_hi; 482 483 for (i = 1; i < q->width; ++i) { 484 struct xe_lrc *lrc = q->lrc + i; 485 486 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 487 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 488 } 489 490 xe_assert(xe, len <= MAX_MLRC_REG_SIZE); 491 #undef MAX_MLRC_REG_SIZE 492 493 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 494 } 495 496 static void __register_engine(struct xe_guc *guc, 497 struct guc_ctxt_registration_info *info) 498 { 499 u32 action[] = { 500 XE_GUC_ACTION_REGISTER_CONTEXT, 501 info->flags, 502 info->context_idx, 503 info->engine_class, 504 info->engine_submit_mask, 505 info->wq_desc_lo, 506 info->wq_desc_hi, 507 info->wq_base_lo, 508 info->wq_base_hi, 509 info->wq_size, 510 info->hwlrca_lo, 511 info->hwlrca_hi, 512 }; 513 514 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 515 } 516 517 static void register_engine(struct xe_exec_queue *q) 518 { 519 struct xe_guc *guc = exec_queue_to_guc(q); 520 struct xe_device *xe = guc_to_xe(guc); 521 struct xe_lrc *lrc = q->lrc; 522 struct guc_ctxt_registration_info info; 523 524 xe_assert(xe, !exec_queue_registered(q)); 525 526 memset(&info, 0, sizeof(info)); 527 info.context_idx = q->guc->id; 528 info.engine_class = xe_engine_class_to_guc_class(q->class); 529 info.engine_submit_mask = q->logical_mask; 530 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 531 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 532 info.flags = CONTEXT_REGISTRATION_FLAG_KMD; 533 534 if (xe_exec_queue_is_parallel(q)) { 535 u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 536 struct iosys_map map = xe_lrc_parallel_map(lrc); 537 538 info.wq_desc_lo = lower_32_bits(ggtt_addr + 539 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 540 info.wq_desc_hi = upper_32_bits(ggtt_addr + 541 offsetof(struct guc_submit_parallel_scratch, wq_desc)); 542 info.wq_base_lo = lower_32_bits(ggtt_addr + 543 offsetof(struct guc_submit_parallel_scratch, wq[0])); 544 info.wq_base_hi = upper_32_bits(ggtt_addr + 545 offsetof(struct guc_submit_parallel_scratch, wq[0])); 546 info.wq_size = WQ_SIZE; 547 548 q->guc->wqi_head = 0; 549 q->guc->wqi_tail = 0; 550 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 551 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 552 } 553 554 /* 555 * We must keep a reference for LR engines if engine is registered with 556 * the GuC as jobs signal immediately and can't destroy an engine if the 557 * GuC has a reference to it. 558 */ 559 if (xe_exec_queue_is_lr(q)) 560 xe_exec_queue_get(q); 561 562 set_exec_queue_registered(q); 563 trace_xe_exec_queue_register(q); 564 if (xe_exec_queue_is_parallel(q)) 565 __register_mlrc_engine(guc, q, &info); 566 else 567 __register_engine(guc, &info); 568 init_policies(guc, q); 569 } 570 571 static u32 wq_space_until_wrap(struct xe_exec_queue *q) 572 { 573 return (WQ_SIZE - q->guc->wqi_tail); 574 } 575 576 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) 577 { 578 struct xe_guc *guc = exec_queue_to_guc(q); 579 struct xe_device *xe = guc_to_xe(guc); 580 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 581 unsigned int sleep_period_ms = 1; 582 583 #define AVAILABLE_SPACE \ 584 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) 585 if (wqi_size > AVAILABLE_SPACE) { 586 try_again: 587 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 588 if (wqi_size > AVAILABLE_SPACE) { 589 if (sleep_period_ms == 1024) { 590 xe_gt_reset_async(q->gt); 591 return -ENODEV; 592 } 593 594 msleep(sleep_period_ms); 595 sleep_period_ms <<= 1; 596 goto try_again; 597 } 598 } 599 #undef AVAILABLE_SPACE 600 601 return 0; 602 } 603 604 static int wq_noop_append(struct xe_exec_queue *q) 605 { 606 struct xe_guc *guc = exec_queue_to_guc(q); 607 struct xe_device *xe = guc_to_xe(guc); 608 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 609 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; 610 611 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 612 return -ENODEV; 613 614 xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); 615 616 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 617 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 618 FIELD_PREP(WQ_LEN_MASK, len_dw)); 619 q->guc->wqi_tail = 0; 620 621 return 0; 622 } 623 624 static void wq_item_append(struct xe_exec_queue *q) 625 { 626 struct xe_guc *guc = exec_queue_to_guc(q); 627 struct xe_device *xe = guc_to_xe(guc); 628 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 629 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ 630 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; 631 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); 632 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 633 int i = 0, j; 634 635 if (wqi_size > wq_space_until_wrap(q)) { 636 if (wq_noop_append(q)) 637 return; 638 } 639 if (wq_wait_for_space(q, wqi_size)) 640 return; 641 642 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 643 FIELD_PREP(WQ_LEN_MASK, len_dw); 644 wqi[i++] = xe_lrc_descriptor(q->lrc); 645 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | 646 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64)); 647 wqi[i++] = 0; 648 for (j = 1; j < q->width; ++j) { 649 struct xe_lrc *lrc = q->lrc + j; 650 651 wqi[i++] = lrc->ring.tail / sizeof(u64); 652 } 653 654 xe_assert(xe, i == wqi_size / sizeof(u32)); 655 656 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 657 wq[q->guc->wqi_tail / sizeof(u32)])); 658 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 659 q->guc->wqi_tail += wqi_size; 660 xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); 661 662 xe_device_wmb(xe); 663 664 map = xe_lrc_parallel_map(q->lrc); 665 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); 666 } 667 668 #define RESUME_PENDING ~0x0ull 669 static void submit_exec_queue(struct xe_exec_queue *q) 670 { 671 struct xe_guc *guc = exec_queue_to_guc(q); 672 struct xe_device *xe = guc_to_xe(guc); 673 struct xe_lrc *lrc = q->lrc; 674 u32 action[3]; 675 u32 g2h_len = 0; 676 u32 num_g2h = 0; 677 int len = 0; 678 bool extra_submit = false; 679 680 xe_assert(xe, exec_queue_registered(q)); 681 682 if (xe_exec_queue_is_parallel(q)) 683 wq_item_append(q); 684 else 685 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 686 687 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) 688 return; 689 690 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { 691 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 692 action[len++] = q->guc->id; 693 action[len++] = GUC_CONTEXT_ENABLE; 694 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 695 num_g2h = 1; 696 if (xe_exec_queue_is_parallel(q)) 697 extra_submit = true; 698 699 q->guc->resume_time = RESUME_PENDING; 700 set_exec_queue_pending_enable(q); 701 set_exec_queue_enabled(q); 702 trace_xe_exec_queue_scheduling_enable(q); 703 } else { 704 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 705 action[len++] = q->guc->id; 706 trace_xe_exec_queue_submit(q); 707 } 708 709 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 710 711 if (extra_submit) { 712 len = 0; 713 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 714 action[len++] = q->guc->id; 715 trace_xe_exec_queue_submit(q); 716 717 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 718 } 719 } 720 721 static struct dma_fence * 722 guc_exec_queue_run_job(struct drm_sched_job *drm_job) 723 { 724 struct xe_sched_job *job = to_xe_sched_job(drm_job); 725 struct xe_exec_queue *q = job->q; 726 struct xe_guc *guc = exec_queue_to_guc(q); 727 struct xe_device *xe = guc_to_xe(guc); 728 bool lr = xe_exec_queue_is_lr(q); 729 730 xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 731 exec_queue_banned(q) || exec_queue_suspended(q)); 732 733 trace_xe_sched_job_run(job); 734 735 if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { 736 if (!exec_queue_registered(q)) 737 register_engine(q); 738 if (!lr) /* LR jobs are emitted in the exec IOCTL */ 739 q->ring_ops->emit_job(job); 740 submit_exec_queue(q); 741 } 742 743 if (lr) { 744 xe_sched_job_set_error(job, -EOPNOTSUPP); 745 return NULL; 746 } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { 747 return job->fence; 748 } else { 749 return dma_fence_get(job->fence); 750 } 751 } 752 753 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) 754 { 755 struct xe_sched_job *job = to_xe_sched_job(drm_job); 756 757 trace_xe_sched_job_free(job); 758 xe_sched_job_put(job); 759 } 760 761 static int guc_read_stopped(struct xe_guc *guc) 762 { 763 return atomic_read(&guc->submission_state.stopped); 764 } 765 766 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ 767 u32 action[] = { \ 768 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 769 q->guc->id, \ 770 GUC_CONTEXT_##enable_disable, \ 771 } 772 773 static void disable_scheduling_deregister(struct xe_guc *guc, 774 struct xe_exec_queue *q) 775 { 776 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 777 struct xe_device *xe = guc_to_xe(guc); 778 int ret; 779 780 set_min_preemption_timeout(guc, q); 781 smp_rmb(); 782 ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || 783 guc_read_stopped(guc), HZ * 5); 784 if (!ret) { 785 struct xe_gpu_scheduler *sched = &q->guc->sched; 786 787 drm_warn(&xe->drm, "Pending enable failed to respond"); 788 xe_sched_submission_start(sched); 789 xe_gt_reset_async(q->gt); 790 xe_sched_tdr_queue_imm(sched); 791 return; 792 } 793 794 clear_exec_queue_enabled(q); 795 set_exec_queue_pending_disable(q); 796 set_exec_queue_destroyed(q); 797 trace_xe_exec_queue_scheduling_disable(q); 798 799 /* 800 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 801 * handler and we are not allowed to reserved G2H space in handlers. 802 */ 803 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 804 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 805 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 806 } 807 808 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p); 809 810 #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) 811 static void simple_error_capture(struct xe_exec_queue *q) 812 { 813 struct xe_guc *guc = exec_queue_to_guc(q); 814 struct drm_printer p = drm_err_printer(""); 815 struct xe_hw_engine *hwe; 816 enum xe_hw_engine_id id; 817 u32 adj_logical_mask = q->logical_mask; 818 u32 width_mask = (0x1 << q->width) - 1; 819 int i; 820 bool cookie; 821 822 if (q->vm && !q->vm->error_capture.capture_once) { 823 q->vm->error_capture.capture_once = true; 824 cookie = dma_fence_begin_signalling(); 825 for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { 826 if (adj_logical_mask & BIT(i)) { 827 adj_logical_mask |= width_mask << i; 828 i += q->width; 829 } else { 830 ++i; 831 } 832 } 833 834 xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 835 xe_guc_ct_print(&guc->ct, &p, true); 836 guc_exec_queue_print(q, &p); 837 for_each_hw_engine(hwe, guc_to_gt(guc), id) { 838 if (hwe->class != q->hwe->class || 839 !(BIT(hwe->logical_instance) & adj_logical_mask)) 840 continue; 841 xe_hw_engine_print(hwe, &p); 842 } 843 xe_analyze_vm(&p, q->vm, q->gt->info.id); 844 xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 845 dma_fence_end_signalling(cookie); 846 } 847 } 848 #else 849 static void simple_error_capture(struct xe_exec_queue *q) 850 { 851 } 852 #endif 853 854 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) 855 { 856 struct xe_guc *guc = exec_queue_to_guc(q); 857 struct xe_device *xe = guc_to_xe(guc); 858 859 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ 860 wake_up_all(&xe->ufence_wq); 861 862 if (xe_exec_queue_is_lr(q)) 863 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); 864 else 865 xe_sched_tdr_queue_imm(&q->guc->sched); 866 } 867 868 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) 869 { 870 struct xe_guc_exec_queue *ge = 871 container_of(w, struct xe_guc_exec_queue, lr_tdr); 872 struct xe_exec_queue *q = ge->q; 873 struct xe_guc *guc = exec_queue_to_guc(q); 874 struct xe_device *xe = guc_to_xe(guc); 875 struct xe_gpu_scheduler *sched = &ge->sched; 876 877 xe_assert(xe, xe_exec_queue_is_lr(q)); 878 trace_xe_exec_queue_lr_cleanup(q); 879 880 /* Kill the run_job / process_msg entry points */ 881 xe_sched_submission_stop(sched); 882 883 /* 884 * Engine state now mostly stable, disable scheduling / deregister if 885 * needed. This cleanup routine might be called multiple times, where 886 * the actual async engine deregister drops the final engine ref. 887 * Calling disable_scheduling_deregister will mark the engine as 888 * destroyed and fire off the CT requests to disable scheduling / 889 * deregister, which we only want to do once. We also don't want to mark 890 * the engine as pending_disable again as this may race with the 891 * xe_guc_deregister_done_handler() which treats it as an unexpected 892 * state. 893 */ 894 if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { 895 struct xe_guc *guc = exec_queue_to_guc(q); 896 int ret; 897 898 set_exec_queue_banned(q); 899 disable_scheduling_deregister(guc, q); 900 901 /* 902 * Must wait for scheduling to be disabled before signalling 903 * any fences, if GT broken the GT reset code should signal us. 904 */ 905 ret = wait_event_timeout(guc->ct.wq, 906 !exec_queue_pending_disable(q) || 907 guc_read_stopped(guc), HZ * 5); 908 if (!ret) { 909 drm_warn(&xe->drm, "Schedule disable failed to respond"); 910 xe_sched_submission_start(sched); 911 xe_gt_reset_async(q->gt); 912 return; 913 } 914 } 915 916 xe_sched_submission_start(sched); 917 } 918 919 static enum drm_gpu_sched_stat 920 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 921 { 922 struct xe_sched_job *job = to_xe_sched_job(drm_job); 923 struct xe_sched_job *tmp_job; 924 struct xe_exec_queue *q = job->q; 925 struct xe_gpu_scheduler *sched = &q->guc->sched; 926 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); 927 int err = -ETIME; 928 int i = 0; 929 930 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { 931 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); 932 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))); 933 934 drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", 935 xe_sched_job_seqno(job), q->guc->id, q->flags); 936 simple_error_capture(q); 937 xe_devcoredump(q); 938 } else { 939 drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", 940 xe_sched_job_seqno(job), q->guc->id, q->flags); 941 } 942 trace_xe_sched_job_timedout(job); 943 944 /* Kill the run_job entry point */ 945 xe_sched_submission_stop(sched); 946 947 /* 948 * Kernel jobs should never fail, nor should VM jobs if they do 949 * somethings has gone wrong and the GT needs a reset 950 */ 951 if (q->flags & EXEC_QUEUE_FLAG_KERNEL || 952 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { 953 if (!xe_sched_invalidate_job(job, 2)) { 954 xe_sched_add_pending_job(sched, job); 955 xe_sched_submission_start(sched); 956 xe_gt_reset_async(q->gt); 957 goto out; 958 } 959 } 960 961 /* Engine state now stable, disable scheduling if needed */ 962 if (exec_queue_registered(q)) { 963 struct xe_guc *guc = exec_queue_to_guc(q); 964 int ret; 965 966 if (exec_queue_reset(q)) 967 err = -EIO; 968 set_exec_queue_banned(q); 969 if (!exec_queue_destroyed(q)) { 970 xe_exec_queue_get(q); 971 disable_scheduling_deregister(guc, q); 972 } 973 974 /* 975 * Must wait for scheduling to be disabled before signalling 976 * any fences, if GT broken the GT reset code should signal us. 977 * 978 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 979 * error) messages which can cause the schedule disable to get 980 * lost. If this occurs, trigger a GT reset to recover. 981 */ 982 smp_rmb(); 983 ret = wait_event_timeout(guc->ct.wq, 984 !exec_queue_pending_disable(q) || 985 guc_read_stopped(guc), HZ * 5); 986 if (!ret || guc_read_stopped(guc)) { 987 drm_warn(&xe->drm, "Schedule disable failed to respond"); 988 xe_sched_add_pending_job(sched, job); 989 xe_sched_submission_start(sched); 990 xe_gt_reset_async(q->gt); 991 xe_sched_tdr_queue_imm(sched); 992 goto out; 993 } 994 } 995 996 /* Stop fence signaling */ 997 xe_hw_fence_irq_stop(q->fence_irq); 998 999 /* 1000 * Fence state now stable, stop / start scheduler which cleans up any 1001 * fences that are complete 1002 */ 1003 xe_sched_add_pending_job(sched, job); 1004 xe_sched_submission_start(sched); 1005 xe_guc_exec_queue_trigger_cleanup(q); 1006 1007 /* Mark all outstanding jobs as bad, thus completing them */ 1008 spin_lock(&sched->base.job_list_lock); 1009 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 1010 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 1011 spin_unlock(&sched->base.job_list_lock); 1012 1013 /* Start fence signaling */ 1014 xe_hw_fence_irq_start(q->fence_irq); 1015 1016 out: 1017 return DRM_GPU_SCHED_STAT_NOMINAL; 1018 } 1019 1020 static void __guc_exec_queue_fini_async(struct work_struct *w) 1021 { 1022 struct xe_guc_exec_queue *ge = 1023 container_of(w, struct xe_guc_exec_queue, fini_async); 1024 struct xe_exec_queue *q = ge->q; 1025 struct xe_guc *guc = exec_queue_to_guc(q); 1026 1027 trace_xe_exec_queue_destroy(q); 1028 1029 if (xe_exec_queue_is_lr(q)) 1030 cancel_work_sync(&ge->lr_tdr); 1031 if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) 1032 xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q); 1033 release_guc_id(guc, q); 1034 xe_sched_entity_fini(&ge->entity); 1035 xe_sched_fini(&ge->sched); 1036 1037 kfree(ge); 1038 xe_exec_queue_fini(q); 1039 } 1040 1041 static void guc_exec_queue_fini_async(struct xe_exec_queue *q) 1042 { 1043 INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); 1044 1045 /* We must block on kernel engines so slabs are empty on driver unload */ 1046 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) 1047 __guc_exec_queue_fini_async(&q->guc->fini_async); 1048 else 1049 queue_work(system_wq, &q->guc->fini_async); 1050 } 1051 1052 static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) 1053 { 1054 /* 1055 * Might be done from within the GPU scheduler, need to do async as we 1056 * fini the scheduler when the engine is fini'd, the scheduler can't 1057 * complete fini within itself (circular dependency). Async resolves 1058 * this we and don't really care when everything is fini'd, just that it 1059 * is. 1060 */ 1061 guc_exec_queue_fini_async(q); 1062 } 1063 1064 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) 1065 { 1066 struct xe_exec_queue *q = msg->private_data; 1067 struct xe_guc *guc = exec_queue_to_guc(q); 1068 struct xe_device *xe = guc_to_xe(guc); 1069 1070 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1071 trace_xe_exec_queue_cleanup_entity(q); 1072 1073 if (exec_queue_registered(q)) 1074 disable_scheduling_deregister(guc, q); 1075 else 1076 __guc_exec_queue_fini(guc, q); 1077 } 1078 1079 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) 1080 { 1081 return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); 1082 } 1083 1084 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) 1085 { 1086 struct xe_exec_queue *q = msg->private_data; 1087 struct xe_guc *guc = exec_queue_to_guc(q); 1088 1089 if (guc_exec_queue_allowed_to_change_state(q)) 1090 init_policies(guc, q); 1091 kfree(msg); 1092 } 1093 1094 static void suspend_fence_signal(struct xe_exec_queue *q) 1095 { 1096 struct xe_guc *guc = exec_queue_to_guc(q); 1097 struct xe_device *xe = guc_to_xe(guc); 1098 1099 xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || 1100 guc_read_stopped(guc)); 1101 xe_assert(xe, q->guc->suspend_pending); 1102 1103 q->guc->suspend_pending = false; 1104 smp_wmb(); 1105 wake_up(&q->guc->suspend_wait); 1106 } 1107 1108 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) 1109 { 1110 struct xe_exec_queue *q = msg->private_data; 1111 struct xe_guc *guc = exec_queue_to_guc(q); 1112 1113 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && 1114 exec_queue_enabled(q)) { 1115 wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || 1116 guc_read_stopped(guc)); 1117 1118 if (!guc_read_stopped(guc)) { 1119 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1120 s64 since_resume_ms = 1121 ktime_ms_delta(ktime_get(), 1122 q->guc->resume_time); 1123 s64 wait_ms = q->vm->preempt.min_run_period_ms - 1124 since_resume_ms; 1125 1126 if (wait_ms > 0 && q->guc->resume_time) 1127 msleep(wait_ms); 1128 1129 set_exec_queue_suspended(q); 1130 clear_exec_queue_enabled(q); 1131 set_exec_queue_pending_disable(q); 1132 trace_xe_exec_queue_scheduling_disable(q); 1133 1134 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1135 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1136 } 1137 } else if (q->guc->suspend_pending) { 1138 set_exec_queue_suspended(q); 1139 suspend_fence_signal(q); 1140 } 1141 } 1142 1143 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1144 { 1145 struct xe_exec_queue *q = msg->private_data; 1146 struct xe_guc *guc = exec_queue_to_guc(q); 1147 1148 if (guc_exec_queue_allowed_to_change_state(q)) { 1149 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1150 1151 q->guc->resume_time = RESUME_PENDING; 1152 clear_exec_queue_suspended(q); 1153 set_exec_queue_pending_enable(q); 1154 set_exec_queue_enabled(q); 1155 trace_xe_exec_queue_scheduling_enable(q); 1156 1157 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1158 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1159 } else { 1160 clear_exec_queue_suspended(q); 1161 } 1162 } 1163 1164 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1165 #define SET_SCHED_PROPS 2 1166 #define SUSPEND 3 1167 #define RESUME 4 1168 1169 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) 1170 { 1171 trace_xe_sched_msg_recv(msg); 1172 1173 switch (msg->opcode) { 1174 case CLEANUP: 1175 __guc_exec_queue_process_msg_cleanup(msg); 1176 break; 1177 case SET_SCHED_PROPS: 1178 __guc_exec_queue_process_msg_set_sched_props(msg); 1179 break; 1180 case SUSPEND: 1181 __guc_exec_queue_process_msg_suspend(msg); 1182 break; 1183 case RESUME: 1184 __guc_exec_queue_process_msg_resume(msg); 1185 break; 1186 default: 1187 XE_WARN_ON("Unknown message type"); 1188 } 1189 } 1190 1191 static const struct drm_sched_backend_ops drm_sched_ops = { 1192 .run_job = guc_exec_queue_run_job, 1193 .free_job = guc_exec_queue_free_job, 1194 .timedout_job = guc_exec_queue_timedout_job, 1195 }; 1196 1197 static const struct xe_sched_backend_ops xe_sched_ops = { 1198 .process_msg = guc_exec_queue_process_msg, 1199 }; 1200 1201 static int guc_exec_queue_init(struct xe_exec_queue *q) 1202 { 1203 struct xe_gpu_scheduler *sched; 1204 struct xe_guc *guc = exec_queue_to_guc(q); 1205 struct xe_device *xe = guc_to_xe(guc); 1206 struct xe_guc_exec_queue *ge; 1207 long timeout; 1208 int err; 1209 1210 xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); 1211 1212 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1213 if (!ge) 1214 return -ENOMEM; 1215 1216 q->guc = ge; 1217 ge->q = q; 1218 init_waitqueue_head(&ge->suspend_wait); 1219 1220 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : 1221 q->hwe->eclass->sched_props.job_timeout_ms; 1222 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, 1223 get_submit_wq(guc), 1224 q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, 1225 timeout, guc_to_gt(guc)->ordered_wq, NULL, 1226 q->name, gt_to_xe(q->gt)->drm.dev); 1227 if (err) 1228 goto err_free; 1229 1230 sched = &ge->sched; 1231 err = xe_sched_entity_init(&ge->entity, sched); 1232 if (err) 1233 goto err_sched; 1234 1235 if (xe_exec_queue_is_lr(q)) 1236 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); 1237 1238 mutex_lock(&guc->submission_state.lock); 1239 1240 err = alloc_guc_id(guc, q); 1241 if (err) 1242 goto err_entity; 1243 1244 q->entity = &ge->entity; 1245 1246 if (guc_read_stopped(guc)) 1247 xe_sched_stop(sched); 1248 1249 mutex_unlock(&guc->submission_state.lock); 1250 1251 xe_exec_queue_assign_name(q, q->guc->id); 1252 1253 trace_xe_exec_queue_create(q); 1254 1255 return 0; 1256 1257 err_entity: 1258 xe_sched_entity_fini(&ge->entity); 1259 err_sched: 1260 xe_sched_fini(&ge->sched); 1261 err_free: 1262 kfree(ge); 1263 1264 return err; 1265 } 1266 1267 static void guc_exec_queue_kill(struct xe_exec_queue *q) 1268 { 1269 trace_xe_exec_queue_kill(q); 1270 set_exec_queue_killed(q); 1271 xe_guc_exec_queue_trigger_cleanup(q); 1272 } 1273 1274 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, 1275 u32 opcode) 1276 { 1277 INIT_LIST_HEAD(&msg->link); 1278 msg->opcode = opcode; 1279 msg->private_data = q; 1280 1281 trace_xe_sched_msg_add(msg); 1282 xe_sched_add_msg(&q->guc->sched, msg); 1283 } 1284 1285 #define STATIC_MSG_CLEANUP 0 1286 #define STATIC_MSG_SUSPEND 1 1287 #define STATIC_MSG_RESUME 2 1288 static void guc_exec_queue_fini(struct xe_exec_queue *q) 1289 { 1290 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; 1291 1292 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) 1293 guc_exec_queue_add_msg(q, msg, CLEANUP); 1294 else 1295 __guc_exec_queue_fini(exec_queue_to_guc(q), q); 1296 } 1297 1298 static int guc_exec_queue_set_priority(struct xe_exec_queue *q, 1299 enum xe_exec_queue_priority priority) 1300 { 1301 struct xe_sched_msg *msg; 1302 1303 if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) 1304 return 0; 1305 1306 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1307 if (!msg) 1308 return -ENOMEM; 1309 1310 q->sched_props.priority = priority; 1311 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1312 1313 return 0; 1314 } 1315 1316 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 1317 { 1318 struct xe_sched_msg *msg; 1319 1320 if (q->sched_props.timeslice_us == timeslice_us || 1321 exec_queue_killed_or_banned(q)) 1322 return 0; 1323 1324 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1325 if (!msg) 1326 return -ENOMEM; 1327 1328 q->sched_props.timeslice_us = timeslice_us; 1329 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1330 1331 return 0; 1332 } 1333 1334 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 1335 u32 preempt_timeout_us) 1336 { 1337 struct xe_sched_msg *msg; 1338 1339 if (q->sched_props.preempt_timeout_us == preempt_timeout_us || 1340 exec_queue_killed_or_banned(q)) 1341 return 0; 1342 1343 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1344 if (!msg) 1345 return -ENOMEM; 1346 1347 q->sched_props.preempt_timeout_us = preempt_timeout_us; 1348 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); 1349 1350 return 0; 1351 } 1352 1353 static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms) 1354 { 1355 struct xe_gpu_scheduler *sched = &q->guc->sched; 1356 struct xe_guc *guc = exec_queue_to_guc(q); 1357 struct xe_device *xe = guc_to_xe(guc); 1358 1359 xe_assert(xe, !exec_queue_registered(q)); 1360 xe_assert(xe, !exec_queue_banned(q)); 1361 xe_assert(xe, !exec_queue_killed(q)); 1362 1363 sched->base.timeout = job_timeout_ms; 1364 1365 return 0; 1366 } 1367 1368 static int guc_exec_queue_suspend(struct xe_exec_queue *q) 1369 { 1370 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; 1371 1372 if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) 1373 return -EINVAL; 1374 1375 q->guc->suspend_pending = true; 1376 guc_exec_queue_add_msg(q, msg, SUSPEND); 1377 1378 return 0; 1379 } 1380 1381 static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) 1382 { 1383 struct xe_guc *guc = exec_queue_to_guc(q); 1384 1385 wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || 1386 guc_read_stopped(guc)); 1387 } 1388 1389 static void guc_exec_queue_resume(struct xe_exec_queue *q) 1390 { 1391 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 1392 struct xe_guc *guc = exec_queue_to_guc(q); 1393 struct xe_device *xe = guc_to_xe(guc); 1394 1395 xe_assert(xe, !q->guc->suspend_pending); 1396 1397 guc_exec_queue_add_msg(q, msg, RESUME); 1398 } 1399 1400 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) 1401 { 1402 return exec_queue_reset(q); 1403 } 1404 1405 /* 1406 * All of these functions are an abstraction layer which other parts of XE can 1407 * use to trap into the GuC backend. All of these functions, aside from init, 1408 * really shouldn't do much other than trap into the DRM scheduler which 1409 * synchronizes these operations. 1410 */ 1411 static const struct xe_exec_queue_ops guc_exec_queue_ops = { 1412 .init = guc_exec_queue_init, 1413 .kill = guc_exec_queue_kill, 1414 .fini = guc_exec_queue_fini, 1415 .set_priority = guc_exec_queue_set_priority, 1416 .set_timeslice = guc_exec_queue_set_timeslice, 1417 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, 1418 .set_job_timeout = guc_exec_queue_set_job_timeout, 1419 .suspend = guc_exec_queue_suspend, 1420 .suspend_wait = guc_exec_queue_suspend_wait, 1421 .resume = guc_exec_queue_resume, 1422 .reset_status = guc_exec_queue_reset_status, 1423 }; 1424 1425 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) 1426 { 1427 struct xe_gpu_scheduler *sched = &q->guc->sched; 1428 1429 /* Stop scheduling + flush any DRM scheduler operations */ 1430 xe_sched_submission_stop(sched); 1431 1432 /* Clean up lost G2H + reset engine state */ 1433 if (exec_queue_registered(q)) { 1434 if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || 1435 xe_exec_queue_is_lr(q)) 1436 xe_exec_queue_put(q); 1437 else if (exec_queue_destroyed(q)) 1438 __guc_exec_queue_fini(guc, q); 1439 } 1440 if (q->guc->suspend_pending) { 1441 set_exec_queue_suspended(q); 1442 suspend_fence_signal(q); 1443 } 1444 atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, 1445 &q->guc->state); 1446 q->guc->resume_time = 0; 1447 trace_xe_exec_queue_stop(q); 1448 1449 /* 1450 * Ban any engine (aside from kernel and engines used for VM ops) with a 1451 * started but not complete job or if a job has gone through a GT reset 1452 * more than twice. 1453 */ 1454 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 1455 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 1456 1457 if (job) { 1458 if ((xe_sched_job_started(job) && 1459 !xe_sched_job_completed(job)) || 1460 xe_sched_invalidate_job(job, 2)) { 1461 trace_xe_sched_job_ban(job); 1462 xe_sched_tdr_queue_imm(&q->guc->sched); 1463 set_exec_queue_banned(q); 1464 } 1465 } 1466 } 1467 } 1468 1469 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1470 { 1471 int ret; 1472 1473 /* 1474 * Using an atomic here rather than submission_state.lock as this 1475 * function can be called while holding the CT lock (engine reset 1476 * failure). submission_state.lock needs the CT lock to resubmit jobs. 1477 * Atomic is not ideal, but it works to prevent against concurrent reset 1478 * and releasing any TDRs waiting on guc->submission_state.stopped. 1479 */ 1480 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 1481 smp_wmb(); 1482 wake_up_all(&guc->ct.wq); 1483 1484 return ret; 1485 } 1486 1487 void xe_guc_submit_reset_wait(struct xe_guc *guc) 1488 { 1489 wait_event(guc->ct.wq, !guc_read_stopped(guc)); 1490 } 1491 1492 int xe_guc_submit_stop(struct xe_guc *guc) 1493 { 1494 struct xe_exec_queue *q; 1495 unsigned long index; 1496 struct xe_device *xe = guc_to_xe(guc); 1497 1498 xe_assert(xe, guc_read_stopped(guc) == 1); 1499 1500 mutex_lock(&guc->submission_state.lock); 1501 1502 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1503 guc_exec_queue_stop(guc, q); 1504 1505 mutex_unlock(&guc->submission_state.lock); 1506 1507 /* 1508 * No one can enter the backend at this point, aside from new engine 1509 * creation which is protected by guc->submission_state.lock. 1510 */ 1511 1512 return 0; 1513 } 1514 1515 static void guc_exec_queue_start(struct xe_exec_queue *q) 1516 { 1517 struct xe_gpu_scheduler *sched = &q->guc->sched; 1518 1519 if (!exec_queue_killed_or_banned(q)) { 1520 int i; 1521 1522 trace_xe_exec_queue_resubmit(q); 1523 for (i = 0; i < q->width; ++i) 1524 xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail); 1525 xe_sched_resubmit_jobs(sched); 1526 } 1527 1528 xe_sched_submission_start(sched); 1529 } 1530 1531 int xe_guc_submit_start(struct xe_guc *guc) 1532 { 1533 struct xe_exec_queue *q; 1534 unsigned long index; 1535 struct xe_device *xe = guc_to_xe(guc); 1536 1537 xe_assert(xe, guc_read_stopped(guc) == 1); 1538 1539 mutex_lock(&guc->submission_state.lock); 1540 atomic_dec(&guc->submission_state.stopped); 1541 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1542 guc_exec_queue_start(q); 1543 mutex_unlock(&guc->submission_state.lock); 1544 1545 wake_up_all(&guc->ct.wq); 1546 1547 return 0; 1548 } 1549 1550 static struct xe_exec_queue * 1551 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 1552 { 1553 struct xe_device *xe = guc_to_xe(guc); 1554 struct xe_exec_queue *q; 1555 1556 if (unlikely(guc_id >= GUC_ID_MAX)) { 1557 drm_err(&xe->drm, "Invalid guc_id %u", guc_id); 1558 return NULL; 1559 } 1560 1561 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 1562 if (unlikely(!q)) { 1563 drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); 1564 return NULL; 1565 } 1566 1567 xe_assert(xe, guc_id >= q->guc->id); 1568 xe_assert(xe, guc_id < (q->guc->id + q->width)); 1569 1570 return q; 1571 } 1572 1573 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 1574 { 1575 u32 action[] = { 1576 XE_GUC_ACTION_DEREGISTER_CONTEXT, 1577 q->guc->id, 1578 }; 1579 1580 trace_xe_exec_queue_deregister(q); 1581 1582 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 1583 } 1584 1585 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1586 { 1587 struct xe_device *xe = guc_to_xe(guc); 1588 struct xe_exec_queue *q; 1589 u32 guc_id = msg[0]; 1590 1591 if (unlikely(len < 2)) { 1592 drm_err(&xe->drm, "Invalid length %u", len); 1593 return -EPROTO; 1594 } 1595 1596 q = g2h_exec_queue_lookup(guc, guc_id); 1597 if (unlikely(!q)) 1598 return -EPROTO; 1599 1600 if (unlikely(!exec_queue_pending_enable(q) && 1601 !exec_queue_pending_disable(q))) { 1602 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1603 atomic_read(&q->guc->state)); 1604 return -EPROTO; 1605 } 1606 1607 trace_xe_exec_queue_scheduling_done(q); 1608 1609 if (exec_queue_pending_enable(q)) { 1610 q->guc->resume_time = ktime_get(); 1611 clear_exec_queue_pending_enable(q); 1612 smp_wmb(); 1613 wake_up_all(&guc->ct.wq); 1614 } else { 1615 clear_exec_queue_pending_disable(q); 1616 if (q->guc->suspend_pending) { 1617 suspend_fence_signal(q); 1618 } else { 1619 if (exec_queue_banned(q)) { 1620 smp_wmb(); 1621 wake_up_all(&guc->ct.wq); 1622 } 1623 deregister_exec_queue(guc, q); 1624 } 1625 } 1626 1627 return 0; 1628 } 1629 1630 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1631 { 1632 struct xe_device *xe = guc_to_xe(guc); 1633 struct xe_exec_queue *q; 1634 u32 guc_id = msg[0]; 1635 1636 if (unlikely(len < 1)) { 1637 drm_err(&xe->drm, "Invalid length %u", len); 1638 return -EPROTO; 1639 } 1640 1641 q = g2h_exec_queue_lookup(guc, guc_id); 1642 if (unlikely(!q)) 1643 return -EPROTO; 1644 1645 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 1646 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 1647 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1648 atomic_read(&q->guc->state)); 1649 return -EPROTO; 1650 } 1651 1652 trace_xe_exec_queue_deregister_done(q); 1653 1654 clear_exec_queue_registered(q); 1655 1656 if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) 1657 xe_exec_queue_put(q); 1658 else 1659 __guc_exec_queue_fini(guc, q); 1660 1661 return 0; 1662 } 1663 1664 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 1665 { 1666 struct xe_device *xe = guc_to_xe(guc); 1667 struct xe_exec_queue *q; 1668 u32 guc_id = msg[0]; 1669 1670 if (unlikely(len < 1)) { 1671 drm_err(&xe->drm, "Invalid length %u", len); 1672 return -EPROTO; 1673 } 1674 1675 q = g2h_exec_queue_lookup(guc, guc_id); 1676 if (unlikely(!q)) 1677 return -EPROTO; 1678 1679 drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); 1680 1681 /* FIXME: Do error capture, most likely async */ 1682 1683 trace_xe_exec_queue_reset(q); 1684 1685 /* 1686 * A banned engine is a NOP at this point (came from 1687 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel 1688 * jobs by setting timeout of the job to the minimum value kicking 1689 * guc_exec_queue_timedout_job. 1690 */ 1691 set_exec_queue_reset(q); 1692 if (!exec_queue_banned(q)) 1693 xe_guc_exec_queue_trigger_cleanup(q); 1694 1695 return 0; 1696 } 1697 1698 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 1699 u32 len) 1700 { 1701 struct xe_device *xe = guc_to_xe(guc); 1702 struct xe_exec_queue *q; 1703 u32 guc_id = msg[0]; 1704 1705 if (unlikely(len < 1)) { 1706 drm_err(&xe->drm, "Invalid length %u", len); 1707 return -EPROTO; 1708 } 1709 1710 q = g2h_exec_queue_lookup(guc, guc_id); 1711 if (unlikely(!q)) 1712 return -EPROTO; 1713 1714 drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); 1715 trace_xe_exec_queue_memory_cat_error(q); 1716 1717 /* Treat the same as engine reset */ 1718 set_exec_queue_reset(q); 1719 if (!exec_queue_banned(q)) 1720 xe_guc_exec_queue_trigger_cleanup(q); 1721 1722 return 0; 1723 } 1724 1725 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 1726 { 1727 struct xe_device *xe = guc_to_xe(guc); 1728 u8 guc_class, instance; 1729 u32 reason; 1730 1731 if (unlikely(len != 3)) { 1732 drm_err(&xe->drm, "Invalid length %u", len); 1733 return -EPROTO; 1734 } 1735 1736 guc_class = msg[0]; 1737 instance = msg[1]; 1738 reason = msg[2]; 1739 1740 /* Unexpected failure of a hardware feature, log an actual error */ 1741 drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", 1742 guc_class, instance, reason); 1743 1744 xe_gt_reset_async(guc_to_gt(guc)); 1745 1746 return 0; 1747 } 1748 1749 static void 1750 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, 1751 struct xe_guc_submit_exec_queue_snapshot *snapshot) 1752 { 1753 struct xe_guc *guc = exec_queue_to_guc(q); 1754 struct xe_device *xe = guc_to_xe(guc); 1755 struct iosys_map map = xe_lrc_parallel_map(q->lrc); 1756 int i; 1757 1758 snapshot->guc.wqi_head = q->guc->wqi_head; 1759 snapshot->guc.wqi_tail = q->guc->wqi_tail; 1760 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); 1761 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); 1762 snapshot->parallel.wq_desc.status = parallel_read(xe, map, 1763 wq_desc.wq_status); 1764 1765 if (snapshot->parallel.wq_desc.head != 1766 snapshot->parallel.wq_desc.tail) { 1767 for (i = snapshot->parallel.wq_desc.head; 1768 i != snapshot->parallel.wq_desc.tail; 1769 i = (i + sizeof(u32)) % WQ_SIZE) 1770 snapshot->parallel.wq[i / sizeof(u32)] = 1771 parallel_read(xe, map, wq[i / sizeof(u32)]); 1772 } 1773 } 1774 1775 static void 1776 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 1777 struct drm_printer *p) 1778 { 1779 int i; 1780 1781 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 1782 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); 1783 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 1784 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); 1785 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); 1786 1787 if (snapshot->parallel.wq_desc.head != 1788 snapshot->parallel.wq_desc.tail) { 1789 for (i = snapshot->parallel.wq_desc.head; 1790 i != snapshot->parallel.wq_desc.tail; 1791 i = (i + sizeof(u32)) % WQ_SIZE) 1792 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), 1793 snapshot->parallel.wq[i / sizeof(u32)]); 1794 } 1795 } 1796 1797 /** 1798 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. 1799 * @q: Xe exec queue. 1800 * 1801 * This can be printed out in a later stage like during dev_coredump 1802 * analysis. 1803 * 1804 * Returns: a GuC Submit Engine snapshot object that must be freed by the 1805 * caller, using `xe_guc_exec_queue_snapshot_free`. 1806 */ 1807 struct xe_guc_submit_exec_queue_snapshot * 1808 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) 1809 { 1810 struct xe_guc *guc = exec_queue_to_guc(q); 1811 struct xe_device *xe = guc_to_xe(guc); 1812 struct xe_gpu_scheduler *sched = &q->guc->sched; 1813 struct xe_sched_job *job; 1814 struct xe_guc_submit_exec_queue_snapshot *snapshot; 1815 int i; 1816 1817 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); 1818 1819 if (!snapshot) { 1820 drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n"); 1821 return NULL; 1822 } 1823 1824 snapshot->guc.id = q->guc->id; 1825 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); 1826 snapshot->class = q->class; 1827 snapshot->logical_mask = q->logical_mask; 1828 snapshot->width = q->width; 1829 snapshot->refcount = kref_read(&q->refcount); 1830 snapshot->sched_timeout = sched->base.timeout; 1831 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; 1832 snapshot->sched_props.preempt_timeout_us = 1833 q->sched_props.preempt_timeout_us; 1834 1835 snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), 1836 GFP_ATOMIC); 1837 1838 if (!snapshot->lrc) { 1839 drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n"); 1840 } else { 1841 for (i = 0; i < q->width; ++i) { 1842 struct xe_lrc *lrc = q->lrc + i; 1843 1844 snapshot->lrc[i].context_desc = 1845 lower_32_bits(xe_lrc_ggtt_addr(lrc)); 1846 snapshot->lrc[i].head = xe_lrc_ring_head(lrc); 1847 snapshot->lrc[i].tail.internal = lrc->ring.tail; 1848 snapshot->lrc[i].tail.memory = 1849 xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); 1850 snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); 1851 snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); 1852 } 1853 } 1854 1855 snapshot->schedule_state = atomic_read(&q->guc->state); 1856 snapshot->exec_queue_flags = q->flags; 1857 1858 snapshot->parallel_execution = xe_exec_queue_is_parallel(q); 1859 if (snapshot->parallel_execution) 1860 guc_exec_queue_wq_snapshot_capture(q, snapshot); 1861 1862 spin_lock(&sched->base.job_list_lock); 1863 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); 1864 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, 1865 sizeof(struct pending_list_snapshot), 1866 GFP_ATOMIC); 1867 1868 if (!snapshot->pending_list) { 1869 drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n"); 1870 } else { 1871 i = 0; 1872 list_for_each_entry(job, &sched->base.pending_list, drm.list) { 1873 snapshot->pending_list[i].seqno = 1874 xe_sched_job_seqno(job); 1875 snapshot->pending_list[i].fence = 1876 dma_fence_is_signaled(job->fence) ? 1 : 0; 1877 snapshot->pending_list[i].finished = 1878 dma_fence_is_signaled(&job->drm.s_fence->finished) 1879 ? 1 : 0; 1880 i++; 1881 } 1882 } 1883 1884 spin_unlock(&sched->base.job_list_lock); 1885 1886 return snapshot; 1887 } 1888 1889 /** 1890 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. 1891 * @snapshot: GuC Submit Engine snapshot object. 1892 * @p: drm_printer where it will be printed out. 1893 * 1894 * This function prints out a given GuC Submit Engine snapshot object. 1895 */ 1896 void 1897 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, 1898 struct drm_printer *p) 1899 { 1900 int i; 1901 1902 if (!snapshot) 1903 return; 1904 1905 drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); 1906 drm_printf(p, "\tName: %s\n", snapshot->name); 1907 drm_printf(p, "\tClass: %d\n", snapshot->class); 1908 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); 1909 drm_printf(p, "\tWidth: %d\n", snapshot->width); 1910 drm_printf(p, "\tRef: %d\n", snapshot->refcount); 1911 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); 1912 drm_printf(p, "\tTimeslice: %u (us)\n", 1913 snapshot->sched_props.timeslice_us); 1914 drm_printf(p, "\tPreempt timeout: %u (us)\n", 1915 snapshot->sched_props.preempt_timeout_us); 1916 1917 for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { 1918 drm_printf(p, "\tHW Context Desc: 0x%08x\n", 1919 snapshot->lrc[i].context_desc); 1920 drm_printf(p, "\tLRC Head: (memory) %u\n", 1921 snapshot->lrc[i].head); 1922 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1923 snapshot->lrc[i].tail.internal, 1924 snapshot->lrc[i].tail.memory); 1925 drm_printf(p, "\tStart seqno: (memory) %d\n", 1926 snapshot->lrc[i].start_seqno); 1927 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); 1928 } 1929 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); 1930 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); 1931 1932 if (snapshot->parallel_execution) 1933 guc_exec_queue_wq_snapshot_print(snapshot, p); 1934 1935 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; 1936 i++) 1937 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 1938 snapshot->pending_list[i].seqno, 1939 snapshot->pending_list[i].fence, 1940 snapshot->pending_list[i].finished); 1941 } 1942 1943 /** 1944 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given 1945 * snapshot. 1946 * @snapshot: GuC Submit Engine snapshot object. 1947 * 1948 * This function free all the memory that needed to be allocated at capture 1949 * time. 1950 */ 1951 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) 1952 { 1953 if (!snapshot) 1954 return; 1955 1956 kfree(snapshot->lrc); 1957 kfree(snapshot->pending_list); 1958 kfree(snapshot); 1959 } 1960 1961 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) 1962 { 1963 struct xe_guc_submit_exec_queue_snapshot *snapshot; 1964 1965 snapshot = xe_guc_exec_queue_snapshot_capture(q); 1966 xe_guc_exec_queue_snapshot_print(snapshot, p); 1967 xe_guc_exec_queue_snapshot_free(snapshot); 1968 } 1969 1970 /** 1971 * xe_guc_submit_print - GuC Submit Print. 1972 * @guc: GuC. 1973 * @p: drm_printer where it will be printed out. 1974 * 1975 * This function capture and prints snapshots of **all** GuC Engines. 1976 */ 1977 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 1978 { 1979 struct xe_exec_queue *q; 1980 unsigned long index; 1981 1982 if (!xe_device_uc_enabled(guc_to_xe(guc))) 1983 return; 1984 1985 mutex_lock(&guc->submission_state.lock); 1986 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) 1987 guc_exec_queue_print(q, p); 1988 mutex_unlock(&guc->submission_state.lock); 1989 } 1990