1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include <linux/bitfield.h> 7 #include <linux/bitmap.h> 8 #include <linux/circ_buf.h> 9 #include <linux/delay.h> 10 #include <linux/dma-fence-array.h> 11 12 #include <drm/drm_managed.h> 13 14 #include "xe_device.h" 15 #include "xe_engine.h" 16 #include "xe_guc.h" 17 #include "xe_guc_ct.h" 18 #include "xe_guc_engine_types.h" 19 #include "xe_guc_submit.h" 20 #include "xe_gt.h" 21 #include "xe_force_wake.h" 22 #include "xe_gpu_scheduler.h" 23 #include "xe_hw_engine.h" 24 #include "xe_hw_fence.h" 25 #include "xe_lrc.h" 26 #include "xe_macros.h" 27 #include "xe_map.h" 28 #include "xe_mocs.h" 29 #include "xe_ring_ops_types.h" 30 #include "xe_sched_job.h" 31 #include "xe_trace.h" 32 #include "xe_vm.h" 33 34 #include "gt/intel_lrc_reg.h" 35 36 static struct xe_gt * 37 guc_to_gt(struct xe_guc *guc) 38 { 39 return container_of(guc, struct xe_gt, uc.guc); 40 } 41 42 static struct xe_device * 43 guc_to_xe(struct xe_guc *guc) 44 { 45 return gt_to_xe(guc_to_gt(guc)); 46 } 47 48 static struct xe_guc * 49 engine_to_guc(struct xe_engine *e) 50 { 51 return &e->gt->uc.guc; 52 } 53 54 /* 55 * Helpers for engine state, using an atomic as some of the bits can transition 56 * as the same time (e.g. a suspend can be happning at the same time as schedule 57 * engine done being processed). 58 */ 59 #define ENGINE_STATE_REGISTERED (1 << 0) 60 #define ENGINE_STATE_ENABLED (1 << 1) 61 #define ENGINE_STATE_PENDING_ENABLE (1 << 2) 62 #define ENGINE_STATE_PENDING_DISABLE (1 << 3) 63 #define ENGINE_STATE_DESTROYED (1 << 4) 64 #define ENGINE_STATE_SUSPENDED (1 << 5) 65 #define ENGINE_STATE_RESET (1 << 6) 66 #define ENGINE_STATE_KILLED (1 << 7) 67 68 static bool engine_registered(struct xe_engine *e) 69 { 70 return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED; 71 } 72 73 static void set_engine_registered(struct xe_engine *e) 74 { 75 atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state); 76 } 77 78 static void clear_engine_registered(struct xe_engine *e) 79 { 80 atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state); 81 } 82 83 static bool engine_enabled(struct xe_engine *e) 84 { 85 return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED; 86 } 87 88 static void set_engine_enabled(struct xe_engine *e) 89 { 90 atomic_or(ENGINE_STATE_ENABLED, &e->guc->state); 91 } 92 93 static void clear_engine_enabled(struct xe_engine *e) 94 { 95 atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state); 96 } 97 98 static bool engine_pending_enable(struct xe_engine *e) 99 { 100 return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE; 101 } 102 103 static void set_engine_pending_enable(struct xe_engine *e) 104 { 105 atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state); 106 } 107 108 static void clear_engine_pending_enable(struct xe_engine *e) 109 { 110 atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state); 111 } 112 113 static bool engine_pending_disable(struct xe_engine *e) 114 { 115 return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE; 116 } 117 118 static void set_engine_pending_disable(struct xe_engine *e) 119 { 120 atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state); 121 } 122 123 static void clear_engine_pending_disable(struct xe_engine *e) 124 { 125 atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state); 126 } 127 128 static bool engine_destroyed(struct xe_engine *e) 129 { 130 return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED; 131 } 132 133 static void set_engine_destroyed(struct xe_engine *e) 134 { 135 atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state); 136 } 137 138 static bool engine_banned(struct xe_engine *e) 139 { 140 return (e->flags & ENGINE_FLAG_BANNED); 141 } 142 143 static void set_engine_banned(struct xe_engine *e) 144 { 145 e->flags |= ENGINE_FLAG_BANNED; 146 } 147 148 static bool engine_suspended(struct xe_engine *e) 149 { 150 return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED; 151 } 152 153 static void set_engine_suspended(struct xe_engine *e) 154 { 155 atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state); 156 } 157 158 static void clear_engine_suspended(struct xe_engine *e) 159 { 160 atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state); 161 } 162 163 static bool engine_reset(struct xe_engine *e) 164 { 165 return atomic_read(&e->guc->state) & ENGINE_STATE_RESET; 166 } 167 168 static void set_engine_reset(struct xe_engine *e) 169 { 170 atomic_or(ENGINE_STATE_RESET, &e->guc->state); 171 } 172 173 static bool engine_killed(struct xe_engine *e) 174 { 175 return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED; 176 } 177 178 static void set_engine_killed(struct xe_engine *e) 179 { 180 atomic_or(ENGINE_STATE_KILLED, &e->guc->state); 181 } 182 183 static bool engine_killed_or_banned(struct xe_engine *e) 184 { 185 return engine_killed(e) || engine_banned(e); 186 } 187 188 static void guc_submit_fini(struct drm_device *drm, void *arg) 189 { 190 struct xe_guc *guc = arg; 191 192 xa_destroy(&guc->submission_state.engine_lookup); 193 ida_destroy(&guc->submission_state.guc_ids); 194 bitmap_free(guc->submission_state.guc_ids_bitmap); 195 } 196 197 #define GUC_ID_MAX 65535 198 #define GUC_ID_NUMBER_MLRC 4096 199 #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) 200 #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC 201 202 static const struct xe_engine_ops guc_engine_ops; 203 204 static void primelockdep(struct xe_guc *guc) 205 { 206 if (!IS_ENABLED(CONFIG_LOCKDEP)) 207 return; 208 209 fs_reclaim_acquire(GFP_KERNEL); 210 211 mutex_lock(&guc->submission_state.lock); 212 might_lock(&guc->submission_state.suspend.lock); 213 mutex_unlock(&guc->submission_state.lock); 214 215 fs_reclaim_release(GFP_KERNEL); 216 } 217 218 int xe_guc_submit_init(struct xe_guc *guc) 219 { 220 struct xe_device *xe = guc_to_xe(guc); 221 struct xe_gt *gt = guc_to_gt(guc); 222 int err; 223 224 guc->submission_state.guc_ids_bitmap = 225 bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); 226 if (!guc->submission_state.guc_ids_bitmap) 227 return -ENOMEM; 228 229 gt->engine_ops = &guc_engine_ops; 230 231 mutex_init(&guc->submission_state.lock); 232 xa_init(&guc->submission_state.engine_lookup); 233 ida_init(&guc->submission_state.guc_ids); 234 235 spin_lock_init(&guc->submission_state.suspend.lock); 236 guc->submission_state.suspend.context = dma_fence_context_alloc(1); 237 238 primelockdep(guc); 239 240 err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); 241 if (err) 242 return err; 243 244 return 0; 245 } 246 247 static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) 248 { 249 int ret; 250 void *ptr; 251 252 /* 253 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, 254 * worse case user gets -ENOMEM on engine create and has to try again. 255 * 256 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent 257 * failure. 258 */ 259 lockdep_assert_held(&guc->submission_state.lock); 260 261 if (xe_engine_is_parallel(e)) { 262 void *bitmap = guc->submission_state.guc_ids_bitmap; 263 264 ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, 265 order_base_2(e->width)); 266 } else { 267 ret = ida_simple_get(&guc->submission_state.guc_ids, 0, 268 GUC_ID_NUMBER_SLRC, GFP_NOWAIT); 269 } 270 if (ret < 0) 271 return ret; 272 273 e->guc->id = ret; 274 if (xe_engine_is_parallel(e)) 275 e->guc->id += GUC_ID_START_MLRC; 276 277 ptr = xa_store(&guc->submission_state.engine_lookup, 278 e->guc->id, e, GFP_NOWAIT); 279 if (IS_ERR(ptr)) { 280 ret = PTR_ERR(ptr); 281 goto err_release; 282 } 283 284 return 0; 285 286 err_release: 287 ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); 288 return ret; 289 } 290 291 static void release_guc_id(struct xe_guc *guc, struct xe_engine *e) 292 { 293 mutex_lock(&guc->submission_state.lock); 294 xa_erase(&guc->submission_state.engine_lookup, e->guc->id); 295 if (xe_engine_is_parallel(e)) 296 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 297 e->guc->id - GUC_ID_START_MLRC, 298 order_base_2(e->width)); 299 else 300 ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); 301 mutex_unlock(&guc->submission_state.lock); 302 } 303 304 struct engine_policy { 305 u32 count; 306 struct guc_update_engine_policy h2g; 307 }; 308 309 static u32 __guc_engine_policy_action_size(struct engine_policy *policy) 310 { 311 size_t bytes = sizeof(policy->h2g.header) + 312 (sizeof(policy->h2g.klv[0]) * policy->count); 313 314 return bytes / sizeof(u32); 315 } 316 317 static void __guc_engine_policy_start_klv(struct engine_policy *policy, 318 u16 guc_id) 319 { 320 policy->h2g.header.action = 321 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 322 policy->h2g.header.guc_id = guc_id; 323 policy->count = 0; 324 } 325 326 #define MAKE_ENGINE_POLICY_ADD(func, id) \ 327 static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ 328 u32 data) \ 329 { \ 330 XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 331 \ 332 policy->h2g.klv[policy->count].kl = \ 333 FIELD_PREP(GUC_KLV_0_KEY, \ 334 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 335 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 336 policy->h2g.klv[policy->count].value = data; \ 337 policy->count++; \ 338 } 339 340 MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 341 MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 342 MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY) 343 #undef MAKE_ENGINE_POLICY_ADD 344 345 static const int xe_engine_prio_to_guc[] = { 346 [XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, 347 [XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, 348 [XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, 349 [XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, 350 }; 351 352 static void init_policies(struct xe_guc *guc, struct xe_engine *e) 353 { 354 struct engine_policy policy; 355 enum xe_engine_priority prio = e->priority; 356 u32 timeslice_us = e->sched_props.timeslice_us; 357 u32 preempt_timeout_us = e->sched_props.preempt_timeout_us; 358 359 XE_BUG_ON(!engine_registered(e)); 360 361 __guc_engine_policy_start_klv(&policy, e->guc->id); 362 __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); 363 __guc_engine_policy_add_execution_quantum(&policy, timeslice_us); 364 __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us); 365 366 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 367 __guc_engine_policy_action_size(&policy), 0, 0); 368 } 369 370 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) 371 { 372 struct engine_policy policy; 373 374 __guc_engine_policy_start_klv(&policy, e->guc->id); 375 __guc_engine_policy_add_preemption_timeout(&policy, 1); 376 377 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, 378 __guc_engine_policy_action_size(&policy), 0, 0); 379 } 380 381 #define PARALLEL_SCRATCH_SIZE 2048 382 #define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) 383 #define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) 384 #define CACHELINE_BYTES 64 385 386 struct sync_semaphore { 387 u32 semaphore; 388 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 389 }; 390 391 struct parallel_scratch { 392 struct guc_sched_wq_desc wq_desc; 393 394 struct sync_semaphore go; 395 struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; 396 397 u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - 398 sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)]; 399 400 u32 wq[WQ_SIZE / sizeof(u32)]; 401 }; 402 403 #define parallel_read(xe_, map_, field_) \ 404 xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_) 405 #define parallel_write(xe_, map_, field_, val_) \ 406 xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_) 407 408 static void __register_mlrc_engine(struct xe_guc *guc, 409 struct xe_engine *e, 410 struct guc_ctxt_registration_info *info) 411 { 412 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 413 u32 action[MAX_MLRC_REG_SIZE]; 414 int len = 0; 415 int i; 416 417 XE_BUG_ON(!xe_engine_is_parallel(e)); 418 419 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 420 action[len++] = info->flags; 421 action[len++] = info->context_idx; 422 action[len++] = info->engine_class; 423 action[len++] = info->engine_submit_mask; 424 action[len++] = info->wq_desc_lo; 425 action[len++] = info->wq_desc_hi; 426 action[len++] = info->wq_base_lo; 427 action[len++] = info->wq_base_hi; 428 action[len++] = info->wq_size; 429 action[len++] = e->width; 430 action[len++] = info->hwlrca_lo; 431 action[len++] = info->hwlrca_hi; 432 433 for (i = 1; i < e->width; ++i) { 434 struct xe_lrc *lrc = e->lrc + i; 435 436 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); 437 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 438 } 439 440 XE_BUG_ON(len > MAX_MLRC_REG_SIZE); 441 #undef MAX_MLRC_REG_SIZE 442 443 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 444 } 445 446 static void __register_engine(struct xe_guc *guc, 447 struct guc_ctxt_registration_info *info) 448 { 449 u32 action[] = { 450 XE_GUC_ACTION_REGISTER_CONTEXT, 451 info->flags, 452 info->context_idx, 453 info->engine_class, 454 info->engine_submit_mask, 455 info->wq_desc_lo, 456 info->wq_desc_hi, 457 info->wq_base_lo, 458 info->wq_base_hi, 459 info->wq_size, 460 info->hwlrca_lo, 461 info->hwlrca_hi, 462 }; 463 464 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 465 } 466 467 static void register_engine(struct xe_engine *e) 468 { 469 struct xe_guc *guc = engine_to_guc(e); 470 struct xe_device *xe = guc_to_xe(guc); 471 struct xe_lrc *lrc = e->lrc; 472 struct guc_ctxt_registration_info info; 473 474 XE_BUG_ON(engine_registered(e)); 475 476 memset(&info, 0, sizeof(info)); 477 info.context_idx = e->guc->id; 478 info.engine_class = xe_engine_class_to_guc_class(e->class); 479 info.engine_submit_mask = e->logical_mask; 480 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); 481 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); 482 info.flags = CONTEXT_REGISTRATION_FLAG_KMD; 483 484 if (xe_engine_is_parallel(e)) { 485 u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); 486 struct iosys_map map = xe_lrc_parallel_map(lrc); 487 488 info.wq_desc_lo = lower_32_bits(ggtt_addr + 489 offsetof(struct parallel_scratch, wq_desc)); 490 info.wq_desc_hi = upper_32_bits(ggtt_addr + 491 offsetof(struct parallel_scratch, wq_desc)); 492 info.wq_base_lo = lower_32_bits(ggtt_addr + 493 offsetof(struct parallel_scratch, wq[0])); 494 info.wq_base_hi = upper_32_bits(ggtt_addr + 495 offsetof(struct parallel_scratch, wq[0])); 496 info.wq_size = WQ_SIZE; 497 498 e->guc->wqi_head = 0; 499 e->guc->wqi_tail = 0; 500 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); 501 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); 502 } 503 504 set_engine_registered(e); 505 trace_xe_engine_register(e); 506 if (xe_engine_is_parallel(e)) 507 __register_mlrc_engine(guc, e, &info); 508 else 509 __register_engine(guc, &info); 510 init_policies(guc, e); 511 } 512 513 static u32 wq_space_until_wrap(struct xe_engine *e) 514 { 515 return (WQ_SIZE - e->guc->wqi_tail); 516 } 517 518 static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size) 519 { 520 struct xe_guc *guc = engine_to_guc(e); 521 struct xe_device *xe = guc_to_xe(guc); 522 struct iosys_map map = xe_lrc_parallel_map(e->lrc); 523 unsigned int sleep_period_ms = 1; 524 525 #define AVAILABLE_SPACE \ 526 CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE) 527 if (wqi_size > AVAILABLE_SPACE) { 528 try_again: 529 e->guc->wqi_head = parallel_read(xe, map, wq_desc.head); 530 if (wqi_size > AVAILABLE_SPACE) { 531 if (sleep_period_ms == 1024) { 532 xe_gt_reset_async(e->gt); 533 return -ENODEV; 534 } 535 536 msleep(sleep_period_ms); 537 sleep_period_ms <<= 1; 538 goto try_again; 539 } 540 } 541 #undef AVAILABLE_SPACE 542 543 return 0; 544 } 545 546 static int wq_noop_append(struct xe_engine *e) 547 { 548 struct xe_guc *guc = engine_to_guc(e); 549 struct xe_device *xe = guc_to_xe(guc); 550 struct iosys_map map = xe_lrc_parallel_map(e->lrc); 551 u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1; 552 553 if (wq_wait_for_space(e, wq_space_until_wrap(e))) 554 return -ENODEV; 555 556 XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 557 558 parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)], 559 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 560 FIELD_PREP(WQ_LEN_MASK, len_dw)); 561 e->guc->wqi_tail = 0; 562 563 return 0; 564 } 565 566 static void wq_item_append(struct xe_engine *e) 567 { 568 struct xe_guc *guc = engine_to_guc(e); 569 struct xe_device *xe = guc_to_xe(guc); 570 struct iosys_map map = xe_lrc_parallel_map(e->lrc); 571 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3]; 572 u32 wqi_size = (e->width + 3) * sizeof(u32); 573 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 574 int i = 0, j; 575 576 if (wqi_size > wq_space_until_wrap(e)) { 577 if (wq_noop_append(e)) 578 return; 579 } 580 if (wq_wait_for_space(e, wqi_size)) 581 return; 582 583 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 584 FIELD_PREP(WQ_LEN_MASK, len_dw); 585 wqi[i++] = xe_lrc_descriptor(e->lrc); 586 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) | 587 FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64)); 588 wqi[i++] = 0; 589 for (j = 1; j < e->width; ++j) { 590 struct xe_lrc *lrc = e->lrc + j; 591 592 wqi[i++] = lrc->ring.tail / sizeof(u64); 593 } 594 595 XE_BUG_ON(i != wqi_size / sizeof(u32)); 596 597 iosys_map_incr(&map, offsetof(struct parallel_scratch, 598 wq[e->guc->wqi_tail / sizeof(u32)])); 599 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 600 e->guc->wqi_tail += wqi_size; 601 XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE); 602 603 xe_device_wmb(xe); 604 605 map = xe_lrc_parallel_map(e->lrc); 606 parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail); 607 } 608 609 #define RESUME_PENDING ~0x0ull 610 static void submit_engine(struct xe_engine *e) 611 { 612 struct xe_guc *guc = engine_to_guc(e); 613 struct xe_lrc *lrc = e->lrc; 614 u32 action[3]; 615 u32 g2h_len = 0; 616 u32 num_g2h = 0; 617 int len = 0; 618 bool extra_submit = false; 619 620 XE_BUG_ON(!engine_registered(e)); 621 622 if (xe_engine_is_parallel(e)) 623 wq_item_append(e); 624 else 625 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 626 627 if (engine_suspended(e) && !xe_engine_is_parallel(e)) 628 return; 629 630 if (!engine_enabled(e) && !engine_suspended(e)) { 631 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 632 action[len++] = e->guc->id; 633 action[len++] = GUC_CONTEXT_ENABLE; 634 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 635 num_g2h = 1; 636 if (xe_engine_is_parallel(e)) 637 extra_submit = true; 638 639 e->guc->resume_time = RESUME_PENDING; 640 set_engine_pending_enable(e); 641 set_engine_enabled(e); 642 trace_xe_engine_scheduling_enable(e); 643 } else { 644 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 645 action[len++] = e->guc->id; 646 trace_xe_engine_submit(e); 647 } 648 649 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); 650 651 if (extra_submit) { 652 len = 0; 653 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; 654 action[len++] = e->guc->id; 655 trace_xe_engine_submit(e); 656 657 xe_guc_ct_send(&guc->ct, action, len, 0, 0); 658 } 659 } 660 661 static struct dma_fence * 662 guc_engine_run_job(struct drm_sched_job *drm_job) 663 { 664 struct xe_sched_job *job = to_xe_sched_job(drm_job); 665 struct xe_engine *e = job->engine; 666 667 XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) && 668 !engine_banned(e) && !engine_suspended(e)); 669 670 trace_xe_sched_job_run(job); 671 672 if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) { 673 if (!engine_registered(e)) 674 register_engine(e); 675 e->ring_ops->emit_job(job); 676 submit_engine(e); 677 } 678 679 if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) 680 return job->fence; 681 else 682 return dma_fence_get(job->fence); 683 } 684 685 static void guc_engine_free_job(struct drm_sched_job *drm_job) 686 { 687 struct xe_sched_job *job = to_xe_sched_job(drm_job); 688 689 trace_xe_sched_job_free(job); 690 xe_sched_job_put(job); 691 } 692 693 static int guc_read_stopped(struct xe_guc *guc) 694 { 695 return atomic_read(&guc->submission_state.stopped); 696 } 697 698 #define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \ 699 u32 action[] = { \ 700 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ 701 e->guc->id, \ 702 GUC_CONTEXT_##enable_disable, \ 703 } 704 705 static void disable_scheduling_deregister(struct xe_guc *guc, 706 struct xe_engine *e) 707 { 708 MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); 709 int ret; 710 711 set_min_preemption_timeout(guc, e); 712 smp_rmb(); 713 ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) || 714 guc_read_stopped(guc), HZ * 5); 715 if (!ret) { 716 struct xe_gpu_scheduler *sched = &e->guc->sched; 717 718 XE_WARN_ON("Pending enable failed to respond"); 719 xe_sched_submission_start(sched); 720 xe_gt_reset_async(e->gt); 721 xe_sched_tdr_queue_imm(sched); 722 return; 723 } 724 725 clear_engine_enabled(e); 726 set_engine_pending_disable(e); 727 set_engine_destroyed(e); 728 trace_xe_engine_scheduling_disable(e); 729 730 /* 731 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H 732 * handler and we are not allowed to reserved G2H space in handlers. 733 */ 734 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 735 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + 736 G2H_LEN_DW_DEREGISTER_CONTEXT, 2); 737 } 738 739 static void guc_engine_print(struct xe_engine *e, struct drm_printer *p); 740 741 #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) 742 static void simple_error_capture(struct xe_engine *e) 743 { 744 struct xe_guc *guc = engine_to_guc(e); 745 struct drm_printer p = drm_err_printer(""); 746 struct xe_hw_engine *hwe; 747 enum xe_hw_engine_id id; 748 u32 adj_logical_mask = e->logical_mask; 749 u32 width_mask = (0x1 << e->width) - 1; 750 int i; 751 bool cookie; 752 753 if (e->vm && !e->vm->error_capture.capture_once) { 754 e->vm->error_capture.capture_once = true; 755 cookie = dma_fence_begin_signalling(); 756 for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { 757 if (adj_logical_mask & BIT(i)) { 758 adj_logical_mask |= width_mask << i; 759 i += e->width; 760 } else { 761 ++i; 762 } 763 } 764 765 xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 766 xe_guc_ct_print(&guc->ct, &p); 767 guc_engine_print(e, &p); 768 for_each_hw_engine(hwe, guc_to_gt(guc), id) { 769 if (hwe->class != e->hwe->class || 770 !(BIT(hwe->logical_instance) & adj_logical_mask)) 771 continue; 772 xe_hw_engine_print_state(hwe, &p); 773 } 774 xe_analyze_vm(&p, e->vm, e->gt->info.id); 775 xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); 776 dma_fence_end_signalling(cookie); 777 } 778 } 779 #else 780 static void simple_error_capture(struct xe_engine *e) 781 { 782 } 783 #endif 784 785 static enum drm_gpu_sched_stat 786 guc_engine_timedout_job(struct drm_sched_job *drm_job) 787 { 788 struct xe_sched_job *job = to_xe_sched_job(drm_job); 789 struct xe_sched_job *tmp_job; 790 struct xe_engine *e = job->engine; 791 struct xe_gpu_scheduler *sched = &e->guc->sched; 792 struct xe_device *xe = guc_to_xe(engine_to_guc(e)); 793 int err = -ETIME; 794 int i = 0; 795 796 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { 797 XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL); 798 XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e)); 799 800 drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", 801 xe_sched_job_seqno(job), e->guc->id, e->flags); 802 simple_error_capture(e); 803 } else { 804 drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", 805 xe_sched_job_seqno(job), e->guc->id, e->flags); 806 } 807 trace_xe_sched_job_timedout(job); 808 809 /* Kill the run_job entry point */ 810 xe_sched_submission_stop(sched); 811 812 /* 813 * Kernel jobs should never fail, nor should VM jobs if they do 814 * somethings has gone wrong and the GT needs a reset 815 */ 816 if (e->flags & ENGINE_FLAG_KERNEL || 817 (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) { 818 if (!xe_sched_invalidate_job(job, 2)) { 819 xe_sched_add_pending_job(sched, job); 820 xe_sched_submission_start(sched); 821 xe_gt_reset_async(e->gt); 822 goto out; 823 } 824 } 825 826 /* Engine state now stable, disable scheduling if needed */ 827 if (engine_enabled(e)) { 828 struct xe_guc *guc = engine_to_guc(e); 829 int ret; 830 831 if (engine_reset(e)) 832 err = -EIO; 833 set_engine_banned(e); 834 xe_engine_get(e); 835 disable_scheduling_deregister(engine_to_guc(e), e); 836 837 /* 838 * Must wait for scheduling to be disabled before signalling 839 * any fences, if GT broken the GT reset code should signal us. 840 * 841 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault 842 * error) messages which can cause the schedule disable to get 843 * lost. If this occurs, trigger a GT reset to recover. 844 */ 845 smp_rmb(); 846 ret = wait_event_timeout(guc->ct.wq, 847 !engine_pending_disable(e) || 848 guc_read_stopped(guc), HZ * 5); 849 if (!ret) { 850 XE_WARN_ON("Schedule disable failed to respond"); 851 xe_sched_add_pending_job(sched, job); 852 xe_sched_submission_start(sched); 853 xe_gt_reset_async(e->gt); 854 xe_sched_tdr_queue_imm(sched); 855 goto out; 856 } 857 } 858 859 /* Stop fence signaling */ 860 xe_hw_fence_irq_stop(e->fence_irq); 861 862 /* 863 * Fence state now stable, stop / start scheduler which cleans up any 864 * fences that are complete 865 */ 866 xe_sched_add_pending_job(sched, job); 867 xe_sched_submission_start(sched); 868 xe_sched_tdr_queue_imm(&e->guc->sched); 869 870 /* Mark all outstanding jobs as bad, thus completing them */ 871 spin_lock(&sched->base.job_list_lock); 872 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) 873 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); 874 spin_unlock(&sched->base.job_list_lock); 875 876 /* Start fence signaling */ 877 xe_hw_fence_irq_start(e->fence_irq); 878 879 out: 880 return DRM_GPU_SCHED_STAT_NOMINAL; 881 } 882 883 static void __guc_engine_fini_async(struct work_struct *w) 884 { 885 struct xe_guc_engine *ge = 886 container_of(w, struct xe_guc_engine, fini_async); 887 struct xe_engine *e = ge->engine; 888 struct xe_guc *guc = engine_to_guc(e); 889 890 trace_xe_engine_destroy(e); 891 892 if (e->flags & ENGINE_FLAG_PERSISTENT) 893 xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); 894 release_guc_id(guc, e); 895 xe_sched_entity_fini(&ge->entity); 896 xe_sched_fini(&ge->sched); 897 898 if (!(e->flags & ENGINE_FLAG_KERNEL)) { 899 kfree(ge); 900 xe_engine_fini(e); 901 } 902 } 903 904 static void guc_engine_fini_async(struct xe_engine *e) 905 { 906 bool kernel = e->flags & ENGINE_FLAG_KERNEL; 907 908 INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async); 909 queue_work(system_unbound_wq, &e->guc->fini_async); 910 911 /* We must block on kernel engines so slabs are empty on driver unload */ 912 if (kernel) { 913 struct xe_guc_engine *ge = e->guc; 914 915 flush_work(&ge->fini_async); 916 kfree(ge); 917 xe_engine_fini(e); 918 } 919 } 920 921 static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e) 922 { 923 /* 924 * Might be done from within the GPU scheduler, need to do async as we 925 * fini the scheduler when the engine is fini'd, the scheduler can't 926 * complete fini within itself (circular dependency). Async resolves 927 * this we and don't really care when everything is fini'd, just that it 928 * is. 929 */ 930 guc_engine_fini_async(e); 931 } 932 933 static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg) 934 { 935 struct xe_engine *e = msg->private_data; 936 struct xe_guc *guc = engine_to_guc(e); 937 938 XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL); 939 trace_xe_engine_cleanup_entity(e); 940 941 if (engine_registered(e)) 942 disable_scheduling_deregister(guc, e); 943 else 944 __guc_engine_fini(guc, e); 945 } 946 947 static bool guc_engine_allowed_to_change_state(struct xe_engine *e) 948 { 949 return !engine_killed_or_banned(e) && engine_registered(e); 950 } 951 952 static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg) 953 { 954 struct xe_engine *e = msg->private_data; 955 struct xe_guc *guc = engine_to_guc(e); 956 957 if (guc_engine_allowed_to_change_state(e)) 958 init_policies(guc, e); 959 kfree(msg); 960 } 961 962 static void suspend_fence_signal(struct xe_engine *e) 963 { 964 struct xe_guc *guc = engine_to_guc(e); 965 966 XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) && 967 !guc_read_stopped(guc)); 968 XE_BUG_ON(!e->guc->suspend_pending); 969 970 e->guc->suspend_pending = false; 971 smp_wmb(); 972 wake_up(&e->guc->suspend_wait); 973 } 974 975 static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg) 976 { 977 struct xe_engine *e = msg->private_data; 978 struct xe_guc *guc = engine_to_guc(e); 979 980 if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) && 981 engine_enabled(e)) { 982 wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING || 983 guc_read_stopped(guc)); 984 985 if (!guc_read_stopped(guc)) { 986 MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); 987 s64 since_resume_ms = 988 ktime_ms_delta(ktime_get(), 989 e->guc->resume_time); 990 s64 wait_ms = e->vm->preempt.min_run_period_ms - 991 since_resume_ms; 992 993 if (wait_ms > 0 && e->guc->resume_time) 994 msleep(wait_ms); 995 996 set_engine_suspended(e); 997 clear_engine_enabled(e); 998 set_engine_pending_disable(e); 999 trace_xe_engine_scheduling_disable(e); 1000 1001 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1002 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1003 } 1004 } else if (e->guc->suspend_pending) { 1005 set_engine_suspended(e); 1006 suspend_fence_signal(e); 1007 } 1008 } 1009 1010 static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg) 1011 { 1012 struct xe_engine *e = msg->private_data; 1013 struct xe_guc *guc = engine_to_guc(e); 1014 1015 if (guc_engine_allowed_to_change_state(e)) { 1016 MAKE_SCHED_CONTEXT_ACTION(e, ENABLE); 1017 1018 e->guc->resume_time = RESUME_PENDING; 1019 clear_engine_suspended(e); 1020 set_engine_pending_enable(e); 1021 set_engine_enabled(e); 1022 trace_xe_engine_scheduling_enable(e); 1023 1024 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1025 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1026 } else { 1027 clear_engine_suspended(e); 1028 } 1029 } 1030 1031 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ 1032 #define SET_SCHED_PROPS 2 1033 #define SUSPEND 3 1034 #define RESUME 4 1035 1036 static void guc_engine_process_msg(struct xe_sched_msg *msg) 1037 { 1038 trace_xe_sched_msg_recv(msg); 1039 1040 switch (msg->opcode) { 1041 case CLEANUP: 1042 __guc_engine_process_msg_cleanup(msg); 1043 break; 1044 case SET_SCHED_PROPS: 1045 __guc_engine_process_msg_set_sched_props(msg); 1046 break; 1047 case SUSPEND: 1048 __guc_engine_process_msg_suspend(msg); 1049 break; 1050 case RESUME: 1051 __guc_engine_process_msg_resume(msg); 1052 break; 1053 default: 1054 XE_BUG_ON("Unknown message type"); 1055 } 1056 } 1057 1058 static const struct drm_sched_backend_ops drm_sched_ops = { 1059 .run_job = guc_engine_run_job, 1060 .free_job = guc_engine_free_job, 1061 .timedout_job = guc_engine_timedout_job, 1062 }; 1063 1064 static const struct xe_sched_backend_ops xe_sched_ops = { 1065 .process_msg = guc_engine_process_msg, 1066 }; 1067 1068 static int guc_engine_init(struct xe_engine *e) 1069 { 1070 struct xe_gpu_scheduler *sched; 1071 struct xe_guc *guc = engine_to_guc(e); 1072 struct xe_guc_engine *ge; 1073 long timeout; 1074 int err; 1075 1076 XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc))); 1077 1078 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1079 if (!ge) 1080 return -ENOMEM; 1081 1082 e->guc = ge; 1083 ge->engine = e; 1084 init_waitqueue_head(&ge->suspend_wait); 1085 1086 timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5; 1087 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, 1088 e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 1089 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, 1090 e->name, gt_to_xe(e->gt)->drm.dev); 1091 if (err) 1092 goto err_free; 1093 1094 sched = &ge->sched; 1095 err = xe_sched_entity_init(&ge->entity, sched); 1096 if (err) 1097 goto err_sched; 1098 e->priority = XE_ENGINE_PRIORITY_NORMAL; 1099 1100 mutex_lock(&guc->submission_state.lock); 1101 1102 err = alloc_guc_id(guc, e); 1103 if (err) 1104 goto err_entity; 1105 1106 e->entity = &ge->entity; 1107 1108 if (guc_read_stopped(guc)) 1109 xe_sched_stop(sched); 1110 1111 mutex_unlock(&guc->submission_state.lock); 1112 1113 switch (e->class) { 1114 case XE_ENGINE_CLASS_RENDER: 1115 sprintf(e->name, "rcs%d", e->guc->id); 1116 break; 1117 case XE_ENGINE_CLASS_VIDEO_DECODE: 1118 sprintf(e->name, "vcs%d", e->guc->id); 1119 break; 1120 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 1121 sprintf(e->name, "vecs%d", e->guc->id); 1122 break; 1123 case XE_ENGINE_CLASS_COPY: 1124 sprintf(e->name, "bcs%d", e->guc->id); 1125 break; 1126 case XE_ENGINE_CLASS_COMPUTE: 1127 sprintf(e->name, "ccs%d", e->guc->id); 1128 break; 1129 default: 1130 XE_WARN_ON(e->class); 1131 } 1132 1133 trace_xe_engine_create(e); 1134 1135 return 0; 1136 1137 err_entity: 1138 xe_sched_entity_fini(&ge->entity); 1139 err_sched: 1140 xe_sched_fini(&ge->sched); 1141 err_free: 1142 kfree(ge); 1143 1144 return err; 1145 } 1146 1147 static void guc_engine_kill(struct xe_engine *e) 1148 { 1149 trace_xe_engine_kill(e); 1150 set_engine_killed(e); 1151 xe_sched_tdr_queue_imm(&e->guc->sched); 1152 } 1153 1154 static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg, 1155 u32 opcode) 1156 { 1157 INIT_LIST_HEAD(&msg->link); 1158 msg->opcode = opcode; 1159 msg->private_data = e; 1160 1161 trace_xe_sched_msg_add(msg); 1162 xe_sched_add_msg(&e->guc->sched, msg); 1163 } 1164 1165 #define STATIC_MSG_CLEANUP 0 1166 #define STATIC_MSG_SUSPEND 1 1167 #define STATIC_MSG_RESUME 2 1168 static void guc_engine_fini(struct xe_engine *e) 1169 { 1170 struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP; 1171 1172 if (!(e->flags & ENGINE_FLAG_KERNEL)) 1173 guc_engine_add_msg(e, msg, CLEANUP); 1174 else 1175 __guc_engine_fini(engine_to_guc(e), e); 1176 } 1177 1178 static int guc_engine_set_priority(struct xe_engine *e, 1179 enum xe_engine_priority priority) 1180 { 1181 struct xe_sched_msg *msg; 1182 1183 if (e->priority == priority || engine_killed_or_banned(e)) 1184 return 0; 1185 1186 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1187 if (!msg) 1188 return -ENOMEM; 1189 1190 guc_engine_add_msg(e, msg, SET_SCHED_PROPS); 1191 e->priority = priority; 1192 1193 return 0; 1194 } 1195 1196 static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) 1197 { 1198 struct xe_sched_msg *msg; 1199 1200 if (e->sched_props.timeslice_us == timeslice_us || 1201 engine_killed_or_banned(e)) 1202 return 0; 1203 1204 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1205 if (!msg) 1206 return -ENOMEM; 1207 1208 e->sched_props.timeslice_us = timeslice_us; 1209 guc_engine_add_msg(e, msg, SET_SCHED_PROPS); 1210 1211 return 0; 1212 } 1213 1214 static int guc_engine_set_preempt_timeout(struct xe_engine *e, 1215 u32 preempt_timeout_us) 1216 { 1217 struct xe_sched_msg *msg; 1218 1219 if (e->sched_props.preempt_timeout_us == preempt_timeout_us || 1220 engine_killed_or_banned(e)) 1221 return 0; 1222 1223 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 1224 if (!msg) 1225 return -ENOMEM; 1226 1227 e->sched_props.preempt_timeout_us = preempt_timeout_us; 1228 guc_engine_add_msg(e, msg, SET_SCHED_PROPS); 1229 1230 return 0; 1231 } 1232 1233 static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms) 1234 { 1235 struct xe_gpu_scheduler *sched = &e->guc->sched; 1236 1237 XE_BUG_ON(engine_registered(e)); 1238 XE_BUG_ON(engine_banned(e)); 1239 XE_BUG_ON(engine_killed(e)); 1240 1241 sched->base.timeout = job_timeout_ms; 1242 1243 return 0; 1244 } 1245 1246 static int guc_engine_suspend(struct xe_engine *e) 1247 { 1248 struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND; 1249 1250 if (engine_killed_or_banned(e) || e->guc->suspend_pending) 1251 return -EINVAL; 1252 1253 e->guc->suspend_pending = true; 1254 guc_engine_add_msg(e, msg, SUSPEND); 1255 1256 return 0; 1257 } 1258 1259 static void guc_engine_suspend_wait(struct xe_engine *e) 1260 { 1261 struct xe_guc *guc = engine_to_guc(e); 1262 1263 wait_event(e->guc->suspend_wait, !e->guc->suspend_pending || 1264 guc_read_stopped(guc)); 1265 } 1266 1267 static void guc_engine_resume(struct xe_engine *e) 1268 { 1269 struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME; 1270 1271 XE_BUG_ON(e->guc->suspend_pending); 1272 1273 xe_mocs_init_engine(e); 1274 guc_engine_add_msg(e, msg, RESUME); 1275 } 1276 1277 /* 1278 * All of these functions are an abstraction layer which other parts of XE can 1279 * use to trap into the GuC backend. All of these functions, aside from init, 1280 * really shouldn't do much other than trap into the DRM scheduler which 1281 * synchronizes these operations. 1282 */ 1283 static const struct xe_engine_ops guc_engine_ops = { 1284 .init = guc_engine_init, 1285 .kill = guc_engine_kill, 1286 .fini = guc_engine_fini, 1287 .set_priority = guc_engine_set_priority, 1288 .set_timeslice = guc_engine_set_timeslice, 1289 .set_preempt_timeout = guc_engine_set_preempt_timeout, 1290 .set_job_timeout = guc_engine_set_job_timeout, 1291 .suspend = guc_engine_suspend, 1292 .suspend_wait = guc_engine_suspend_wait, 1293 .resume = guc_engine_resume, 1294 }; 1295 1296 static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e) 1297 { 1298 struct xe_gpu_scheduler *sched = &e->guc->sched; 1299 1300 /* Stop scheduling + flush any DRM scheduler operations */ 1301 xe_sched_submission_stop(sched); 1302 1303 /* Clean up lost G2H + reset engine state */ 1304 if (engine_destroyed(e) && engine_registered(e)) { 1305 if (engine_banned(e)) 1306 xe_engine_put(e); 1307 else 1308 __guc_engine_fini(guc, e); 1309 } 1310 if (e->guc->suspend_pending) { 1311 set_engine_suspended(e); 1312 suspend_fence_signal(e); 1313 } 1314 atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, 1315 &e->guc->state); 1316 e->guc->resume_time = 0; 1317 trace_xe_engine_stop(e); 1318 1319 /* 1320 * Ban any engine (aside from kernel and engines used for VM ops) with a 1321 * started but not complete job or if a job has gone through a GT reset 1322 * more than twice. 1323 */ 1324 if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) { 1325 struct xe_sched_job *job = xe_sched_first_pending_job(sched); 1326 1327 if (job) { 1328 if ((xe_sched_job_started(job) && 1329 !xe_sched_job_completed(job)) || 1330 xe_sched_invalidate_job(job, 2)) { 1331 trace_xe_sched_job_ban(job); 1332 xe_sched_tdr_queue_imm(&e->guc->sched); 1333 set_engine_banned(e); 1334 } 1335 } 1336 } 1337 } 1338 1339 int xe_guc_submit_reset_prepare(struct xe_guc *guc) 1340 { 1341 int ret; 1342 1343 /* 1344 * Using an atomic here rather than submission_state.lock as this 1345 * function can be called while holding the CT lock (engine reset 1346 * failure). submission_state.lock needs the CT lock to resubmit jobs. 1347 * Atomic is not ideal, but it works to prevent against concurrent reset 1348 * and releasing any TDRs waiting on guc->submission_state.stopped. 1349 */ 1350 ret = atomic_fetch_or(1, &guc->submission_state.stopped); 1351 smp_wmb(); 1352 wake_up_all(&guc->ct.wq); 1353 1354 return ret; 1355 } 1356 1357 void xe_guc_submit_reset_wait(struct xe_guc *guc) 1358 { 1359 wait_event(guc->ct.wq, !guc_read_stopped(guc)); 1360 } 1361 1362 int xe_guc_submit_stop(struct xe_guc *guc) 1363 { 1364 struct xe_engine *e; 1365 unsigned long index; 1366 1367 XE_BUG_ON(guc_read_stopped(guc) != 1); 1368 1369 mutex_lock(&guc->submission_state.lock); 1370 1371 xa_for_each(&guc->submission_state.engine_lookup, index, e) 1372 guc_engine_stop(guc, e); 1373 1374 mutex_unlock(&guc->submission_state.lock); 1375 1376 /* 1377 * No one can enter the backend at this point, aside from new engine 1378 * creation which is protected by guc->submission_state.lock. 1379 */ 1380 1381 return 0; 1382 } 1383 1384 static void guc_engine_start(struct xe_engine *e) 1385 { 1386 struct xe_gpu_scheduler *sched = &e->guc->sched; 1387 1388 if (!engine_killed_or_banned(e)) { 1389 int i; 1390 1391 trace_xe_engine_resubmit(e); 1392 for (i = 0; i < e->width; ++i) 1393 xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail); 1394 xe_sched_resubmit_jobs(sched); 1395 } 1396 1397 xe_sched_submission_start(sched); 1398 } 1399 1400 int xe_guc_submit_start(struct xe_guc *guc) 1401 { 1402 struct xe_engine *e; 1403 unsigned long index; 1404 1405 XE_BUG_ON(guc_read_stopped(guc) != 1); 1406 1407 mutex_lock(&guc->submission_state.lock); 1408 atomic_dec(&guc->submission_state.stopped); 1409 xa_for_each(&guc->submission_state.engine_lookup, index, e) 1410 guc_engine_start(e); 1411 mutex_unlock(&guc->submission_state.lock); 1412 1413 wake_up_all(&guc->ct.wq); 1414 1415 return 0; 1416 } 1417 1418 static struct xe_engine * 1419 g2h_engine_lookup(struct xe_guc *guc, u32 guc_id) 1420 { 1421 struct xe_device *xe = guc_to_xe(guc); 1422 struct xe_engine *e; 1423 1424 if (unlikely(guc_id >= GUC_ID_MAX)) { 1425 drm_err(&xe->drm, "Invalid guc_id %u", guc_id); 1426 return NULL; 1427 } 1428 1429 e = xa_load(&guc->submission_state.engine_lookup, guc_id); 1430 if (unlikely(!e)) { 1431 drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); 1432 return NULL; 1433 } 1434 1435 XE_BUG_ON(e->guc->id != guc_id); 1436 1437 return e; 1438 } 1439 1440 static void deregister_engine(struct xe_guc *guc, struct xe_engine *e) 1441 { 1442 u32 action[] = { 1443 XE_GUC_ACTION_DEREGISTER_CONTEXT, 1444 e->guc->id, 1445 }; 1446 1447 trace_xe_engine_deregister(e); 1448 1449 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 1450 } 1451 1452 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1453 { 1454 struct xe_device *xe = guc_to_xe(guc); 1455 struct xe_engine *e; 1456 u32 guc_id = msg[0]; 1457 1458 if (unlikely(len < 2)) { 1459 drm_err(&xe->drm, "Invalid length %u", len); 1460 return -EPROTO; 1461 } 1462 1463 e = g2h_engine_lookup(guc, guc_id); 1464 if (unlikely(!e)) 1465 return -EPROTO; 1466 1467 if (unlikely(!engine_pending_enable(e) && 1468 !engine_pending_disable(e))) { 1469 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1470 atomic_read(&e->guc->state)); 1471 return -EPROTO; 1472 } 1473 1474 trace_xe_engine_scheduling_done(e); 1475 1476 if (engine_pending_enable(e)) { 1477 e->guc->resume_time = ktime_get(); 1478 clear_engine_pending_enable(e); 1479 smp_wmb(); 1480 wake_up_all(&guc->ct.wq); 1481 } else { 1482 clear_engine_pending_disable(e); 1483 if (e->guc->suspend_pending) { 1484 suspend_fence_signal(e); 1485 } else { 1486 if (engine_banned(e)) { 1487 smp_wmb(); 1488 wake_up_all(&guc->ct.wq); 1489 } 1490 deregister_engine(guc, e); 1491 } 1492 } 1493 1494 return 0; 1495 } 1496 1497 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1498 { 1499 struct xe_device *xe = guc_to_xe(guc); 1500 struct xe_engine *e; 1501 u32 guc_id = msg[0]; 1502 1503 if (unlikely(len < 1)) { 1504 drm_err(&xe->drm, "Invalid length %u", len); 1505 return -EPROTO; 1506 } 1507 1508 e = g2h_engine_lookup(guc, guc_id); 1509 if (unlikely(!e)) 1510 return -EPROTO; 1511 1512 if (!engine_destroyed(e) || engine_pending_disable(e) || 1513 engine_pending_enable(e) || engine_enabled(e)) { 1514 drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1515 atomic_read(&e->guc->state)); 1516 return -EPROTO; 1517 } 1518 1519 trace_xe_engine_deregister_done(e); 1520 1521 clear_engine_registered(e); 1522 if (engine_banned(e)) 1523 xe_engine_put(e); 1524 else 1525 __guc_engine_fini(guc, e); 1526 1527 return 0; 1528 } 1529 1530 int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 1531 { 1532 struct xe_device *xe = guc_to_xe(guc); 1533 struct xe_engine *e; 1534 u32 guc_id = msg[0]; 1535 1536 if (unlikely(len < 1)) { 1537 drm_err(&xe->drm, "Invalid length %u", len); 1538 return -EPROTO; 1539 } 1540 1541 e = g2h_engine_lookup(guc, guc_id); 1542 if (unlikely(!e)) 1543 return -EPROTO; 1544 1545 drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); 1546 1547 /* FIXME: Do error capture, most likely async */ 1548 1549 trace_xe_engine_reset(e); 1550 1551 /* 1552 * A banned engine is a NOP at this point (came from 1553 * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel 1554 * jobs by setting timeout of the job to the minimum value kicking 1555 * guc_engine_timedout_job. 1556 */ 1557 set_engine_reset(e); 1558 if (!engine_banned(e)) 1559 xe_sched_tdr_queue_imm(&e->guc->sched); 1560 1561 return 0; 1562 } 1563 1564 int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, 1565 u32 len) 1566 { 1567 struct xe_device *xe = guc_to_xe(guc); 1568 struct xe_engine *e; 1569 u32 guc_id = msg[0]; 1570 1571 if (unlikely(len < 1)) { 1572 drm_err(&xe->drm, "Invalid length %u", len); 1573 return -EPROTO; 1574 } 1575 1576 e = g2h_engine_lookup(guc, guc_id); 1577 if (unlikely(!e)) 1578 return -EPROTO; 1579 1580 drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); 1581 trace_xe_engine_memory_cat_error(e); 1582 1583 /* Treat the same as engine reset */ 1584 set_engine_reset(e); 1585 if (!engine_banned(e)) 1586 xe_sched_tdr_queue_imm(&e->guc->sched); 1587 1588 return 0; 1589 } 1590 1591 int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 1592 { 1593 struct xe_device *xe = guc_to_xe(guc); 1594 u8 guc_class, instance; 1595 u32 reason; 1596 1597 if (unlikely(len != 3)) { 1598 drm_err(&xe->drm, "Invalid length %u", len); 1599 return -EPROTO; 1600 } 1601 1602 guc_class = msg[0]; 1603 instance = msg[1]; 1604 reason = msg[2]; 1605 1606 /* Unexpected failure of a hardware feature, log an actual error */ 1607 drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", 1608 guc_class, instance, reason); 1609 1610 xe_gt_reset_async(guc_to_gt(guc)); 1611 1612 return 0; 1613 } 1614 1615 static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p) 1616 { 1617 struct xe_guc *guc = engine_to_guc(e); 1618 struct xe_device *xe = guc_to_xe(guc); 1619 struct iosys_map map = xe_lrc_parallel_map(e->lrc); 1620 int i; 1621 1622 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", 1623 e->guc->wqi_head, parallel_read(xe, map, wq_desc.head)); 1624 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", 1625 e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail)); 1626 drm_printf(p, "\tWQ status: %u\n", 1627 parallel_read(xe, map, wq_desc.wq_status)); 1628 if (parallel_read(xe, map, wq_desc.head) != 1629 parallel_read(xe, map, wq_desc.tail)) { 1630 for (i = parallel_read(xe, map, wq_desc.head); 1631 i != parallel_read(xe, map, wq_desc.tail); 1632 i = (i + sizeof(u32)) % WQ_SIZE) 1633 drm_printf(p, "\tWQ[%ld]: 0x%08x\n", i / sizeof(u32), 1634 parallel_read(xe, map, wq[i / sizeof(u32)])); 1635 } 1636 } 1637 1638 static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) 1639 { 1640 struct xe_gpu_scheduler *sched = &e->guc->sched; 1641 struct xe_sched_job *job; 1642 int i; 1643 1644 drm_printf(p, "\nGuC ID: %d\n", e->guc->id); 1645 drm_printf(p, "\tName: %s\n", e->name); 1646 drm_printf(p, "\tClass: %d\n", e->class); 1647 drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask); 1648 drm_printf(p, "\tWidth: %d\n", e->width); 1649 drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount)); 1650 drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout); 1651 drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us); 1652 drm_printf(p, "\tPreempt timeout: %u (us)\n", 1653 e->sched_props.preempt_timeout_us); 1654 for (i = 0; i < e->width; ++i ) { 1655 struct xe_lrc *lrc = e->lrc + i; 1656 1657 drm_printf(p, "\tHW Context Desc: 0x%08x\n", 1658 lower_32_bits(xe_lrc_ggtt_addr(lrc))); 1659 drm_printf(p, "\tLRC Head: (memory) %u\n", 1660 xe_lrc_ring_head(lrc)); 1661 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1662 lrc->ring.tail, 1663 xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL)); 1664 drm_printf(p, "\tStart seqno: (memory) %d\n", 1665 xe_lrc_start_seqno(lrc)); 1666 drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc)); 1667 } 1668 drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state)); 1669 drm_printf(p, "\tFlags: 0x%lx\n", e->flags); 1670 if (xe_engine_is_parallel(e)) 1671 guc_engine_wq_print(e, p); 1672 1673 spin_lock(&sched->base.job_list_lock); 1674 list_for_each_entry(job, &sched->base.pending_list, drm.list) 1675 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", 1676 xe_sched_job_seqno(job), 1677 dma_fence_is_signaled(job->fence) ? 1 : 0, 1678 dma_fence_is_signaled(&job->drm.s_fence->finished) ? 1679 1 : 0); 1680 spin_unlock(&sched->base.job_list_lock); 1681 } 1682 1683 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) 1684 { 1685 struct xe_engine *e; 1686 unsigned long index; 1687 1688 if (!xe_device_guc_submission_enabled(guc_to_xe(guc))) 1689 return; 1690 1691 mutex_lock(&guc->submission_state.lock); 1692 xa_for_each(&guc->submission_state.engine_lookup, index, e) 1693 guc_engine_print(e, p); 1694 mutex_unlock(&guc->submission_state.lock); 1695 } 1696