1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_print.h" 31 #include "intel_guc_submission.h" 32 33 #include "i915_drv.h" 34 #include "i915_reg.h" 35 #include "i915_irq.h" 36 #include "i915_trace.h" 37 38 /** 39 * DOC: GuC-based command submission 40 * 41 * The Scratch registers: 42 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 43 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 44 * triggers an interrupt on the GuC via another register write (0xC4C8). 45 * Firmware writes a success/fail code back to the action register after 46 * processes the request. The kernel driver polls waiting for this update and 47 * then proceeds. 48 * 49 * Command Transport buffers (CTBs): 50 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 51 * - G2H) are a message interface between the i915 and GuC. 52 * 53 * Context registration: 54 * Before a context can be submitted it must be registered with the GuC via a 55 * H2G. A unique guc_id is associated with each context. The context is either 56 * registered at request creation time (normal operation) or at submission time 57 * (abnormal operation, e.g. after a reset). 58 * 59 * Context submission: 60 * The i915 updates the LRC tail value in memory. The i915 must enable the 61 * scheduling of the context within the GuC for the GuC to actually consider it. 62 * Therefore, the first time a disabled context is submitted we use a schedule 63 * enable H2G, while follow up submissions are done via the context submit H2G, 64 * which informs the GuC that a previously enabled context has new work 65 * available. 66 * 67 * Context unpin: 68 * To unpin a context a H2G is used to disable scheduling. When the 69 * corresponding G2H returns indicating the scheduling disable operation has 70 * completed it is safe to unpin the context. While a disable is in flight it 71 * isn't safe to resubmit the context so a fence is used to stall all future 72 * requests of that context until the G2H is returned. Because this interaction 73 * with the GuC takes a non-zero amount of time we delay the disabling of 74 * scheduling after the pin count goes to zero by a configurable period of time 75 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 76 * time to resubmit something on the context before doing this costly operation. 77 * This delay is only done if the context isn't closed and the guc_id usage is 78 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 79 * 80 * Context deregistration: 81 * Before a context can be destroyed or if we steal its guc_id we must 82 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 83 * safe to submit anything to this guc_id until the deregister completes so a 84 * fence is used to stall all requests associated with this guc_id until the 85 * corresponding G2H returns indicating the guc_id has been deregistered. 86 * 87 * submission_state.guc_ids: 88 * Unique number associated with private GuC context data passed in during 89 * context registration / submission / deregistration. 64k available. Simple ida 90 * is used for allocation. 91 * 92 * Stealing guc_ids: 93 * If no guc_ids are available they can be stolen from another context at 94 * request creation time if that context is unpinned. If a guc_id can't be found 95 * we punt this problem to the user as we believe this is near impossible to hit 96 * during normal use cases. 97 * 98 * Locking: 99 * In the GuC submission code we have 3 basic spin locks which protect 100 * everything. Details about each below. 101 * 102 * sched_engine->lock 103 * This is the submission lock for all contexts that share an i915 schedule 104 * engine (sched_engine), thus only one of the contexts which share a 105 * sched_engine can be submitting at a time. Currently only one sched_engine is 106 * used for all of GuC submission but that could change in the future. 107 * 108 * guc->submission_state.lock 109 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 110 * list. 111 * 112 * ce->guc_state.lock 113 * Protects everything under ce->guc_state. Ensures that a context is in the 114 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 115 * on a disabled context (bad idea), we don't issue a schedule enable when a 116 * schedule disable is in flight, etc... Also protects list of inflight requests 117 * on the context and the priority management state. Lock is individual to each 118 * context. 119 * 120 * Lock ordering rules: 121 * sched_engine->lock -> ce->guc_state.lock 122 * guc->submission_state.lock -> ce->guc_state.lock 123 * 124 * Reset races: 125 * When a full GT reset is triggered it is assumed that some G2H responses to 126 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 127 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 128 * contexts, release guc_ids, etc...). When this occurs we can scrub the 129 * context state and cleanup appropriately, however this is quite racey. 130 * To avoid races, the reset code must disable submission before scrubbing for 131 * the missing G2H, while the submission code must check for submission being 132 * disabled and skip sending H2Gs and updating context states when it is. Both 133 * sides must also make sure to hold the relevant locks. 134 */ 135 136 /* GuC Virtual Engine */ 137 struct guc_virtual_engine { 138 struct intel_engine_cs base; 139 struct intel_context context; 140 }; 141 142 static struct intel_context * 143 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 144 unsigned long flags); 145 146 static struct intel_context * 147 guc_create_parallel(struct intel_engine_cs **engines, 148 unsigned int num_siblings, 149 unsigned int width); 150 151 #define GUC_REQUEST_SIZE 64 /* bytes */ 152 153 /* 154 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 155 * per the GuC submission interface. A different allocation algorithm is used 156 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 157 * partition the guc_id space. We believe the number of multi-lrc contexts in 158 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 159 * multi-lrc. 160 */ 161 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 162 ((guc)->submission_state.num_guc_ids / 16) 163 164 /* 165 * Below is a set of functions which control the GuC scheduling state which 166 * require a lock. 167 */ 168 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 169 #define SCHED_STATE_DESTROYED BIT(1) 170 #define SCHED_STATE_PENDING_DISABLE BIT(2) 171 #define SCHED_STATE_BANNED BIT(3) 172 #define SCHED_STATE_ENABLED BIT(4) 173 #define SCHED_STATE_PENDING_ENABLE BIT(5) 174 #define SCHED_STATE_REGISTERED BIT(6) 175 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 176 #define SCHED_STATE_CLOSED BIT(8) 177 #define SCHED_STATE_BLOCKED_SHIFT 9 178 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 179 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 180 181 static inline void init_sched_state(struct intel_context *ce) 182 { 183 lockdep_assert_held(&ce->guc_state.lock); 184 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 185 } 186 187 /* 188 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 189 * A context close can race with the submission path, so SCHED_STATE_CLOSED 190 * can be set immediately before we try to register. 191 */ 192 #define SCHED_STATE_VALID_INIT \ 193 (SCHED_STATE_BLOCKED_MASK | \ 194 SCHED_STATE_CLOSED | \ 195 SCHED_STATE_REGISTERED) 196 197 __maybe_unused 198 static bool sched_state_is_init(struct intel_context *ce) 199 { 200 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 201 } 202 203 static inline bool 204 context_wait_for_deregister_to_register(struct intel_context *ce) 205 { 206 return ce->guc_state.sched_state & 207 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 208 } 209 210 static inline void 211 set_context_wait_for_deregister_to_register(struct intel_context *ce) 212 { 213 lockdep_assert_held(&ce->guc_state.lock); 214 ce->guc_state.sched_state |= 215 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 216 } 217 218 static inline void 219 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 220 { 221 lockdep_assert_held(&ce->guc_state.lock); 222 ce->guc_state.sched_state &= 223 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 224 } 225 226 static inline bool 227 context_destroyed(struct intel_context *ce) 228 { 229 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 230 } 231 232 static inline void 233 set_context_destroyed(struct intel_context *ce) 234 { 235 lockdep_assert_held(&ce->guc_state.lock); 236 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 237 } 238 239 static inline bool context_pending_disable(struct intel_context *ce) 240 { 241 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 242 } 243 244 static inline void set_context_pending_disable(struct intel_context *ce) 245 { 246 lockdep_assert_held(&ce->guc_state.lock); 247 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 248 } 249 250 static inline void clr_context_pending_disable(struct intel_context *ce) 251 { 252 lockdep_assert_held(&ce->guc_state.lock); 253 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 254 } 255 256 static inline bool context_banned(struct intel_context *ce) 257 { 258 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 259 } 260 261 static inline void set_context_banned(struct intel_context *ce) 262 { 263 lockdep_assert_held(&ce->guc_state.lock); 264 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 265 } 266 267 static inline void clr_context_banned(struct intel_context *ce) 268 { 269 lockdep_assert_held(&ce->guc_state.lock); 270 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 271 } 272 273 static inline bool context_enabled(struct intel_context *ce) 274 { 275 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 276 } 277 278 static inline void set_context_enabled(struct intel_context *ce) 279 { 280 lockdep_assert_held(&ce->guc_state.lock); 281 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 282 } 283 284 static inline void clr_context_enabled(struct intel_context *ce) 285 { 286 lockdep_assert_held(&ce->guc_state.lock); 287 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 288 } 289 290 static inline bool context_pending_enable(struct intel_context *ce) 291 { 292 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 293 } 294 295 static inline void set_context_pending_enable(struct intel_context *ce) 296 { 297 lockdep_assert_held(&ce->guc_state.lock); 298 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 299 } 300 301 static inline void clr_context_pending_enable(struct intel_context *ce) 302 { 303 lockdep_assert_held(&ce->guc_state.lock); 304 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 305 } 306 307 static inline bool context_registered(struct intel_context *ce) 308 { 309 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 310 } 311 312 static inline void set_context_registered(struct intel_context *ce) 313 { 314 lockdep_assert_held(&ce->guc_state.lock); 315 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 316 } 317 318 static inline void clr_context_registered(struct intel_context *ce) 319 { 320 lockdep_assert_held(&ce->guc_state.lock); 321 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 322 } 323 324 static inline bool context_policy_required(struct intel_context *ce) 325 { 326 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 327 } 328 329 static inline void set_context_policy_required(struct intel_context *ce) 330 { 331 lockdep_assert_held(&ce->guc_state.lock); 332 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 333 } 334 335 static inline void clr_context_policy_required(struct intel_context *ce) 336 { 337 lockdep_assert_held(&ce->guc_state.lock); 338 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 339 } 340 341 static inline bool context_close_done(struct intel_context *ce) 342 { 343 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 344 } 345 346 static inline void set_context_close_done(struct intel_context *ce) 347 { 348 lockdep_assert_held(&ce->guc_state.lock); 349 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 350 } 351 352 static inline u32 context_blocked(struct intel_context *ce) 353 { 354 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 355 SCHED_STATE_BLOCKED_SHIFT; 356 } 357 358 static inline void incr_context_blocked(struct intel_context *ce) 359 { 360 lockdep_assert_held(&ce->guc_state.lock); 361 362 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 363 364 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 365 } 366 367 static inline void decr_context_blocked(struct intel_context *ce) 368 { 369 lockdep_assert_held(&ce->guc_state.lock); 370 371 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 372 373 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 374 } 375 376 static struct intel_context * 377 request_to_scheduling_context(struct i915_request *rq) 378 { 379 return intel_context_to_parent(rq->context); 380 } 381 382 static inline bool context_guc_id_invalid(struct intel_context *ce) 383 { 384 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 385 } 386 387 static inline void set_context_guc_id_invalid(struct intel_context *ce) 388 { 389 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 390 } 391 392 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 393 { 394 return &ce->engine->gt->uc.guc; 395 } 396 397 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 398 { 399 return rb_entry(rb, struct i915_priolist, node); 400 } 401 402 /* 403 * When using multi-lrc submission a scratch memory area is reserved in the 404 * parent's context state for the process descriptor, work queue, and handshake 405 * between the parent + children contexts to insert safe preemption points 406 * between each of the BBs. Currently the scratch area is sized to a page. 407 * 408 * The layout of this scratch area is below: 409 * 0 guc_process_desc 410 * + sizeof(struct guc_process_desc) child go 411 * + CACHELINE_BYTES child join[0] 412 * ... 413 * + CACHELINE_BYTES child join[n - 1] 414 * ... unused 415 * PARENT_SCRATCH_SIZE / 2 work queue start 416 * ... work queue 417 * PARENT_SCRATCH_SIZE - 1 work queue end 418 */ 419 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 420 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 421 422 struct sync_semaphore { 423 u32 semaphore; 424 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 425 }; 426 427 struct parent_scratch { 428 union guc_descs { 429 struct guc_sched_wq_desc wq_desc; 430 struct guc_process_desc_v69 pdesc; 431 } descs; 432 433 struct sync_semaphore go; 434 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 435 436 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 437 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 438 439 u32 wq[WQ_SIZE / sizeof(u32)]; 440 }; 441 442 static u32 __get_parent_scratch_offset(struct intel_context *ce) 443 { 444 GEM_BUG_ON(!ce->parallel.guc.parent_page); 445 446 return ce->parallel.guc.parent_page * PAGE_SIZE; 447 } 448 449 static u32 __get_wq_offset(struct intel_context *ce) 450 { 451 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 452 453 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 454 } 455 456 static struct parent_scratch * 457 __get_parent_scratch(struct intel_context *ce) 458 { 459 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 460 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 461 462 /* 463 * Need to subtract LRC_STATE_OFFSET here as the 464 * parallel.guc.parent_page is the offset into ce->state while 465 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 466 */ 467 return (struct parent_scratch *) 468 (ce->lrc_reg_state + 469 ((__get_parent_scratch_offset(ce) - 470 LRC_STATE_OFFSET) / sizeof(u32))); 471 } 472 473 static struct guc_process_desc_v69 * 474 __get_process_desc_v69(struct intel_context *ce) 475 { 476 struct parent_scratch *ps = __get_parent_scratch(ce); 477 478 return &ps->descs.pdesc; 479 } 480 481 static struct guc_sched_wq_desc * 482 __get_wq_desc_v70(struct intel_context *ce) 483 { 484 struct parent_scratch *ps = __get_parent_scratch(ce); 485 486 return &ps->descs.wq_desc; 487 } 488 489 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 490 { 491 /* 492 * Check for space in work queue. Caching a value of head pointer in 493 * intel_context structure in order reduce the number accesses to shared 494 * GPU memory which may be across a PCIe bus. 495 */ 496 #define AVAILABLE_SPACE \ 497 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 498 if (wqi_size > AVAILABLE_SPACE) { 499 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 500 501 if (wqi_size > AVAILABLE_SPACE) 502 return NULL; 503 } 504 #undef AVAILABLE_SPACE 505 506 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 507 } 508 509 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 510 { 511 struct intel_context *ce = xa_load(&guc->context_lookup, id); 512 513 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 514 515 return ce; 516 } 517 518 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 519 { 520 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 521 522 if (!base) 523 return NULL; 524 525 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 526 527 return &base[index]; 528 } 529 530 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 531 { 532 u32 size; 533 int ret; 534 535 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 536 GUC_MAX_CONTEXT_ID); 537 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 538 (void **)&guc->lrc_desc_pool_vaddr_v69); 539 if (ret) 540 return ret; 541 542 return 0; 543 } 544 545 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 546 { 547 if (!guc->lrc_desc_pool_vaddr_v69) 548 return; 549 550 guc->lrc_desc_pool_vaddr_v69 = NULL; 551 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 552 } 553 554 static inline bool guc_submission_initialized(struct intel_guc *guc) 555 { 556 return guc->submission_initialized; 557 } 558 559 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 560 { 561 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 562 563 if (desc) 564 memset(desc, 0, sizeof(*desc)); 565 } 566 567 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 568 { 569 return __get_context(guc, id); 570 } 571 572 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 573 struct intel_context *ce) 574 { 575 unsigned long flags; 576 577 /* 578 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 579 * lower level functions directly. 580 */ 581 xa_lock_irqsave(&guc->context_lookup, flags); 582 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 583 xa_unlock_irqrestore(&guc->context_lookup, flags); 584 } 585 586 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 587 { 588 unsigned long flags; 589 590 if (unlikely(!guc_submission_initialized(guc))) 591 return; 592 593 _reset_lrc_desc_v69(guc, id); 594 595 /* 596 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 597 * the lower level functions directly. 598 */ 599 xa_lock_irqsave(&guc->context_lookup, flags); 600 __xa_erase(&guc->context_lookup, id); 601 xa_unlock_irqrestore(&guc->context_lookup, flags); 602 } 603 604 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 605 { 606 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 607 wake_up_all(&guc->ct.wq); 608 } 609 610 static int guc_submission_send_busy_loop(struct intel_guc *guc, 611 const u32 *action, 612 u32 len, 613 u32 g2h_len_dw, 614 bool loop) 615 { 616 /* 617 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 618 * so we don't handle the case where we don't get a reply because we 619 * aborted the send due to the channel being busy. 620 */ 621 GEM_BUG_ON(g2h_len_dw && !loop); 622 623 if (g2h_len_dw) 624 atomic_inc(&guc->outstanding_submission_g2h); 625 626 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 627 } 628 629 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 630 atomic_t *wait_var, 631 bool interruptible, 632 long timeout) 633 { 634 const int state = interruptible ? 635 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 636 DEFINE_WAIT(wait); 637 638 might_sleep(); 639 GEM_BUG_ON(timeout < 0); 640 641 if (!atomic_read(wait_var)) 642 return 0; 643 644 if (!timeout) 645 return -ETIME; 646 647 for (;;) { 648 prepare_to_wait(&guc->ct.wq, &wait, state); 649 650 if (!atomic_read(wait_var)) 651 break; 652 653 if (signal_pending_state(state, current)) { 654 timeout = -EINTR; 655 break; 656 } 657 658 if (!timeout) { 659 timeout = -ETIME; 660 break; 661 } 662 663 timeout = io_schedule_timeout(timeout); 664 } 665 finish_wait(&guc->ct.wq, &wait); 666 667 return (timeout < 0) ? timeout : 0; 668 } 669 670 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 671 { 672 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 673 return 0; 674 675 return intel_guc_wait_for_pending_msg(guc, 676 &guc->outstanding_submission_g2h, 677 true, timeout); 678 } 679 680 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 681 static int try_context_registration(struct intel_context *ce, bool loop); 682 683 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 684 { 685 int err = 0; 686 struct intel_context *ce = request_to_scheduling_context(rq); 687 u32 action[3]; 688 int len = 0; 689 u32 g2h_len_dw = 0; 690 bool enabled; 691 692 lockdep_assert_held(&rq->engine->sched_engine->lock); 693 694 /* 695 * Corner case where requests were sitting in the priority list or a 696 * request resubmitted after the context was banned. 697 */ 698 if (unlikely(!intel_context_is_schedulable(ce))) { 699 i915_request_put(i915_request_mark_eio(rq)); 700 intel_engine_signal_breadcrumbs(ce->engine); 701 return 0; 702 } 703 704 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 705 GEM_BUG_ON(context_guc_id_invalid(ce)); 706 707 if (context_policy_required(ce)) { 708 err = guc_context_policy_init_v70(ce, false); 709 if (err) 710 return err; 711 } 712 713 spin_lock(&ce->guc_state.lock); 714 715 /* 716 * The request / context will be run on the hardware when scheduling 717 * gets enabled in the unblock. For multi-lrc we still submit the 718 * context to move the LRC tails. 719 */ 720 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 721 goto out; 722 723 enabled = context_enabled(ce) || context_blocked(ce); 724 725 if (!enabled) { 726 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 727 action[len++] = ce->guc_id.id; 728 action[len++] = GUC_CONTEXT_ENABLE; 729 set_context_pending_enable(ce); 730 intel_context_get(ce); 731 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 732 } else { 733 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 734 action[len++] = ce->guc_id.id; 735 } 736 737 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 738 if (!enabled && !err) { 739 trace_intel_context_sched_enable(ce); 740 atomic_inc(&guc->outstanding_submission_g2h); 741 set_context_enabled(ce); 742 743 /* 744 * Without multi-lrc KMD does the submission step (moving the 745 * lrc tail) so enabling scheduling is sufficient to submit the 746 * context. This isn't the case in multi-lrc submission as the 747 * GuC needs to move the tails, hence the need for another H2G 748 * to submit a multi-lrc context after enabling scheduling. 749 */ 750 if (intel_context_is_parent(ce)) { 751 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 752 err = intel_guc_send_nb(guc, action, len - 1, 0); 753 } 754 } else if (!enabled) { 755 clr_context_pending_enable(ce); 756 intel_context_put(ce); 757 } 758 if (likely(!err)) 759 trace_i915_request_guc_submit(rq); 760 761 out: 762 spin_unlock(&ce->guc_state.lock); 763 return err; 764 } 765 766 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 767 { 768 int ret = __guc_add_request(guc, rq); 769 770 if (unlikely(ret == -EBUSY)) { 771 guc->stalled_request = rq; 772 guc->submission_stall_reason = STALL_ADD_REQUEST; 773 } 774 775 return ret; 776 } 777 778 static inline void guc_set_lrc_tail(struct i915_request *rq) 779 { 780 rq->context->lrc_reg_state[CTX_RING_TAIL] = 781 intel_ring_set_tail(rq->ring, rq->tail); 782 } 783 784 static inline int rq_prio(const struct i915_request *rq) 785 { 786 return rq->sched.attr.priority; 787 } 788 789 static bool is_multi_lrc_rq(struct i915_request *rq) 790 { 791 return intel_context_is_parallel(rq->context); 792 } 793 794 static bool can_merge_rq(struct i915_request *rq, 795 struct i915_request *last) 796 { 797 return request_to_scheduling_context(rq) == 798 request_to_scheduling_context(last); 799 } 800 801 static u32 wq_space_until_wrap(struct intel_context *ce) 802 { 803 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 804 } 805 806 static void write_wqi(struct intel_context *ce, u32 wqi_size) 807 { 808 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 809 810 /* 811 * Ensure WQI are visible before updating tail 812 */ 813 intel_guc_write_barrier(ce_to_guc(ce)); 814 815 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 816 (WQ_SIZE - 1); 817 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 818 } 819 820 static int guc_wq_noop_append(struct intel_context *ce) 821 { 822 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 823 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 824 825 if (!wqi) 826 return -EBUSY; 827 828 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 829 830 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 831 FIELD_PREP(WQ_LEN_MASK, len_dw); 832 ce->parallel.guc.wqi_tail = 0; 833 834 return 0; 835 } 836 837 static int __guc_wq_item_append(struct i915_request *rq) 838 { 839 struct intel_context *ce = request_to_scheduling_context(rq); 840 struct intel_context *child; 841 unsigned int wqi_size = (ce->parallel.number_children + 4) * 842 sizeof(u32); 843 u32 *wqi; 844 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 845 int ret; 846 847 /* Ensure context is in correct state updating work queue */ 848 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 849 GEM_BUG_ON(context_guc_id_invalid(ce)); 850 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 851 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 852 853 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 854 if (wqi_size > wq_space_until_wrap(ce)) { 855 ret = guc_wq_noop_append(ce); 856 if (ret) 857 return ret; 858 } 859 860 wqi = get_wq_pointer(ce, wqi_size); 861 if (!wqi) 862 return -EBUSY; 863 864 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 865 866 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 867 FIELD_PREP(WQ_LEN_MASK, len_dw); 868 *wqi++ = ce->lrc.lrca; 869 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 870 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 871 *wqi++ = 0; /* fence_id */ 872 for_each_child(ce, child) 873 *wqi++ = child->ring->tail / sizeof(u64); 874 875 write_wqi(ce, wqi_size); 876 877 return 0; 878 } 879 880 static int guc_wq_item_append(struct intel_guc *guc, 881 struct i915_request *rq) 882 { 883 struct intel_context *ce = request_to_scheduling_context(rq); 884 int ret; 885 886 if (unlikely(!intel_context_is_schedulable(ce))) 887 return 0; 888 889 ret = __guc_wq_item_append(rq); 890 if (unlikely(ret == -EBUSY)) { 891 guc->stalled_request = rq; 892 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 893 } 894 895 return ret; 896 } 897 898 static bool multi_lrc_submit(struct i915_request *rq) 899 { 900 struct intel_context *ce = request_to_scheduling_context(rq); 901 902 intel_ring_set_tail(rq->ring, rq->tail); 903 904 /* 905 * We expect the front end (execbuf IOCTL) to set this flag on the last 906 * request generated from a multi-BB submission. This indicates to the 907 * backend (GuC interface) that we should submit this context thus 908 * submitting all the requests generated in parallel. 909 */ 910 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 911 !intel_context_is_schedulable(ce); 912 } 913 914 static int guc_dequeue_one_context(struct intel_guc *guc) 915 { 916 struct i915_sched_engine * const sched_engine = guc->sched_engine; 917 struct i915_request *last = NULL; 918 bool submit = false; 919 struct rb_node *rb; 920 int ret; 921 922 lockdep_assert_held(&sched_engine->lock); 923 924 if (guc->stalled_request) { 925 submit = true; 926 last = guc->stalled_request; 927 928 switch (guc->submission_stall_reason) { 929 case STALL_REGISTER_CONTEXT: 930 goto register_context; 931 case STALL_MOVE_LRC_TAIL: 932 goto move_lrc_tail; 933 case STALL_ADD_REQUEST: 934 goto add_request; 935 default: 936 MISSING_CASE(guc->submission_stall_reason); 937 } 938 } 939 940 while ((rb = rb_first_cached(&sched_engine->queue))) { 941 struct i915_priolist *p = to_priolist(rb); 942 struct i915_request *rq, *rn; 943 944 priolist_for_each_request_consume(rq, rn, p) { 945 if (last && !can_merge_rq(rq, last)) 946 goto register_context; 947 948 list_del_init(&rq->sched.link); 949 950 __i915_request_submit(rq); 951 952 trace_i915_request_in(rq, 0); 953 last = rq; 954 955 if (is_multi_lrc_rq(rq)) { 956 /* 957 * We need to coalesce all multi-lrc requests in 958 * a relationship into a single H2G. We are 959 * guaranteed that all of these requests will be 960 * submitted sequentially. 961 */ 962 if (multi_lrc_submit(rq)) { 963 submit = true; 964 goto register_context; 965 } 966 } else { 967 submit = true; 968 } 969 } 970 971 rb_erase_cached(&p->node, &sched_engine->queue); 972 i915_priolist_free(p); 973 } 974 975 register_context: 976 if (submit) { 977 struct intel_context *ce = request_to_scheduling_context(last); 978 979 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 980 intel_context_is_schedulable(ce))) { 981 ret = try_context_registration(ce, false); 982 if (unlikely(ret == -EPIPE)) { 983 goto deadlk; 984 } else if (ret == -EBUSY) { 985 guc->stalled_request = last; 986 guc->submission_stall_reason = 987 STALL_REGISTER_CONTEXT; 988 goto schedule_tasklet; 989 } else if (ret != 0) { 990 GEM_WARN_ON(ret); /* Unexpected */ 991 goto deadlk; 992 } 993 } 994 995 move_lrc_tail: 996 if (is_multi_lrc_rq(last)) { 997 ret = guc_wq_item_append(guc, last); 998 if (ret == -EBUSY) { 999 goto schedule_tasklet; 1000 } else if (ret != 0) { 1001 GEM_WARN_ON(ret); /* Unexpected */ 1002 goto deadlk; 1003 } 1004 } else { 1005 guc_set_lrc_tail(last); 1006 } 1007 1008 add_request: 1009 ret = guc_add_request(guc, last); 1010 if (unlikely(ret == -EPIPE)) { 1011 goto deadlk; 1012 } else if (ret == -EBUSY) { 1013 goto schedule_tasklet; 1014 } else if (ret != 0) { 1015 GEM_WARN_ON(ret); /* Unexpected */ 1016 goto deadlk; 1017 } 1018 } 1019 1020 guc->stalled_request = NULL; 1021 guc->submission_stall_reason = STALL_NONE; 1022 return submit; 1023 1024 deadlk: 1025 sched_engine->tasklet.callback = NULL; 1026 tasklet_disable_nosync(&sched_engine->tasklet); 1027 return false; 1028 1029 schedule_tasklet: 1030 tasklet_schedule(&sched_engine->tasklet); 1031 return false; 1032 } 1033 1034 static void guc_submission_tasklet(struct tasklet_struct *t) 1035 { 1036 struct i915_sched_engine *sched_engine = 1037 from_tasklet(sched_engine, t, tasklet); 1038 unsigned long flags; 1039 bool loop; 1040 1041 spin_lock_irqsave(&sched_engine->lock, flags); 1042 1043 do { 1044 loop = guc_dequeue_one_context(sched_engine->private_data); 1045 } while (loop); 1046 1047 i915_sched_engine_reset_on_empty(sched_engine); 1048 1049 spin_unlock_irqrestore(&sched_engine->lock, flags); 1050 } 1051 1052 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1053 { 1054 if (iir & GT_RENDER_USER_INTERRUPT) 1055 intel_engine_signal_breadcrumbs(engine); 1056 } 1057 1058 static void __guc_context_destroy(struct intel_context *ce); 1059 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1060 static void guc_signal_context_fence(struct intel_context *ce); 1061 static void guc_cancel_context_requests(struct intel_context *ce); 1062 static void guc_blocked_fence_complete(struct intel_context *ce); 1063 1064 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1065 { 1066 struct intel_context *ce; 1067 unsigned long index, flags; 1068 bool pending_disable, pending_enable, deregister, destroyed, banned; 1069 1070 xa_lock_irqsave(&guc->context_lookup, flags); 1071 xa_for_each(&guc->context_lookup, index, ce) { 1072 /* 1073 * Corner case where the ref count on the object is zero but and 1074 * deregister G2H was lost. In this case we don't touch the ref 1075 * count and finish the destroy of the context. 1076 */ 1077 bool do_put = kref_get_unless_zero(&ce->ref); 1078 1079 xa_unlock(&guc->context_lookup); 1080 1081 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1082 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1083 /* successful cancel so jump straight to close it */ 1084 intel_context_sched_disable_unpin(ce); 1085 } 1086 1087 spin_lock(&ce->guc_state.lock); 1088 1089 /* 1090 * Once we are at this point submission_disabled() is guaranteed 1091 * to be visible to all callers who set the below flags (see above 1092 * flush and flushes in reset_prepare). If submission_disabled() 1093 * is set, the caller shouldn't set these flags. 1094 */ 1095 1096 destroyed = context_destroyed(ce); 1097 pending_enable = context_pending_enable(ce); 1098 pending_disable = context_pending_disable(ce); 1099 deregister = context_wait_for_deregister_to_register(ce); 1100 banned = context_banned(ce); 1101 init_sched_state(ce); 1102 1103 spin_unlock(&ce->guc_state.lock); 1104 1105 if (pending_enable || destroyed || deregister) { 1106 decr_outstanding_submission_g2h(guc); 1107 if (deregister) 1108 guc_signal_context_fence(ce); 1109 if (destroyed) { 1110 intel_gt_pm_put_async(guc_to_gt(guc)); 1111 release_guc_id(guc, ce); 1112 __guc_context_destroy(ce); 1113 } 1114 if (pending_enable || deregister) 1115 intel_context_put(ce); 1116 } 1117 1118 /* Not mutualy exclusive with above if statement. */ 1119 if (pending_disable) { 1120 guc_signal_context_fence(ce); 1121 if (banned) { 1122 guc_cancel_context_requests(ce); 1123 intel_engine_signal_breadcrumbs(ce->engine); 1124 } 1125 intel_context_sched_disable_unpin(ce); 1126 decr_outstanding_submission_g2h(guc); 1127 1128 spin_lock(&ce->guc_state.lock); 1129 guc_blocked_fence_complete(ce); 1130 spin_unlock(&ce->guc_state.lock); 1131 1132 intel_context_put(ce); 1133 } 1134 1135 if (do_put) 1136 intel_context_put(ce); 1137 xa_lock(&guc->context_lookup); 1138 } 1139 xa_unlock_irqrestore(&guc->context_lookup, flags); 1140 } 1141 1142 /* 1143 * GuC stores busyness stats for each engine at context in/out boundaries. A 1144 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1145 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1146 * GuC. 1147 * 1148 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1149 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1150 * active. For an active engine total busyness = total + (now - start), where 1151 * 'now' is the time at which the busyness is sampled. For inactive engine, 1152 * total busyness = total. 1153 * 1154 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1155 * 1156 * The start and total values provided by GuC are 32 bits and wrap around in a 1157 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1158 * increasing ns values, there is a need for this implementation to account for 1159 * overflows and extend the GuC provided values to 64 bits before returning 1160 * busyness to the user. In order to do that, a worker runs periodically at 1161 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1162 * 27 seconds for a gt clock frequency of 19.2 MHz). 1163 */ 1164 1165 #define WRAP_TIME_CLKS U32_MAX 1166 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1167 1168 static void 1169 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1170 { 1171 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1172 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1173 1174 if (new_start == lower_32_bits(*prev_start)) 1175 return; 1176 1177 /* 1178 * When gt is unparked, we update the gt timestamp and start the ping 1179 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1180 * is unparked, all switched in contexts will have a start time that is 1181 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1182 * 1183 * If neither gt_stamp nor new_start has rolled over, then the 1184 * gt_stamp_hi does not need to be adjusted, however if one of them has 1185 * rolled over, we need to adjust gt_stamp_hi accordingly. 1186 * 1187 * The below conditions address the cases of new_start rollover and 1188 * gt_stamp_last rollover respectively. 1189 */ 1190 if (new_start < gt_stamp_last && 1191 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1192 gt_stamp_hi++; 1193 1194 if (new_start > gt_stamp_last && 1195 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1196 gt_stamp_hi--; 1197 1198 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1199 } 1200 1201 #define record_read(map_, field_) \ 1202 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1203 1204 /* 1205 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1206 * we run into a race where the value read is inconsistent. Sometimes the 1207 * inconsistency is in reading the upper MSB bytes of the last_in value when 1208 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1209 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1210 * determine validity of these values. Instead we read the values multiple times 1211 * until they are consistent. In test runs, 3 attempts results in consistent 1212 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1213 * any new occurences. 1214 */ 1215 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1216 u32 *last_in, u32 *id, u32 *total) 1217 { 1218 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1219 int i = 0; 1220 1221 do { 1222 *last_in = record_read(&rec_map, last_switch_in_stamp); 1223 *id = record_read(&rec_map, current_context_index); 1224 *total = record_read(&rec_map, total_runtime); 1225 1226 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1227 record_read(&rec_map, current_context_index) == *id && 1228 record_read(&rec_map, total_runtime) == *total) 1229 break; 1230 } while (++i < 6); 1231 } 1232 1233 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1234 { 1235 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1236 struct intel_guc *guc = &engine->gt->uc.guc; 1237 u32 last_switch, ctx_id, total; 1238 1239 lockdep_assert_held(&guc->timestamp.lock); 1240 1241 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1242 1243 stats->running = ctx_id != ~0U && last_switch; 1244 if (stats->running) 1245 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1246 1247 /* 1248 * Instead of adjusting the total for overflow, just add the 1249 * difference from previous sample stats->total_gt_clks 1250 */ 1251 if (total && total != ~0U) { 1252 stats->total_gt_clks += (u32)(total - stats->prev_total); 1253 stats->prev_total = total; 1254 } 1255 } 1256 1257 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1258 { 1259 intel_wakeref_t wakeref; 1260 u32 reg, shift; 1261 1262 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1263 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1264 1265 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1266 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1267 1268 return 3 - shift; 1269 } 1270 1271 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1272 { 1273 struct intel_gt *gt = guc_to_gt(guc); 1274 u32 gt_stamp_lo, gt_stamp_hi; 1275 u64 gpm_ts; 1276 1277 lockdep_assert_held(&guc->timestamp.lock); 1278 1279 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1280 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1281 MISC_STATUS1) >> guc->timestamp.shift; 1282 gt_stamp_lo = lower_32_bits(gpm_ts); 1283 *now = ktime_get(); 1284 1285 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1286 gt_stamp_hi++; 1287 1288 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1289 } 1290 1291 /* 1292 * Unlike the execlist mode of submission total and active times are in terms of 1293 * gt clocks. The *now parameter is retained to return the cpu time at which the 1294 * busyness was sampled. 1295 */ 1296 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1297 { 1298 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1299 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1300 struct intel_gt *gt = engine->gt; 1301 struct intel_guc *guc = >->uc.guc; 1302 u64 total, gt_stamp_saved; 1303 unsigned long flags; 1304 u32 reset_count; 1305 bool in_reset; 1306 1307 spin_lock_irqsave(&guc->timestamp.lock, flags); 1308 1309 /* 1310 * If a reset happened, we risk reading partially updated engine 1311 * busyness from GuC, so we just use the driver stored copy of busyness. 1312 * Synchronize with gt reset using reset_count and the 1313 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1314 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1315 * usable by checking the flag afterwards. 1316 */ 1317 reset_count = i915_reset_count(gpu_error); 1318 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1319 1320 *now = ktime_get(); 1321 1322 /* 1323 * The active busyness depends on start_gt_clk and gt_stamp. 1324 * gt_stamp is updated by i915 only when gt is awake and the 1325 * start_gt_clk is derived from GuC state. To get a consistent 1326 * view of activity, we query the GuC state only if gt is awake. 1327 */ 1328 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1329 stats_saved = *stats; 1330 gt_stamp_saved = guc->timestamp.gt_stamp; 1331 /* 1332 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1333 * start_gt_clk' calculation below for active engines. 1334 */ 1335 guc_update_engine_gt_clks(engine); 1336 guc_update_pm_timestamp(guc, now); 1337 intel_gt_pm_put_async(gt); 1338 if (i915_reset_count(gpu_error) != reset_count) { 1339 *stats = stats_saved; 1340 guc->timestamp.gt_stamp = gt_stamp_saved; 1341 } 1342 } 1343 1344 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1345 if (stats->running) { 1346 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1347 1348 total += intel_gt_clock_interval_to_ns(gt, clk); 1349 } 1350 1351 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1352 1353 return ns_to_ktime(total); 1354 } 1355 1356 static void guc_enable_busyness_worker(struct intel_guc *guc) 1357 { 1358 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); 1359 } 1360 1361 static void guc_cancel_busyness_worker(struct intel_guc *guc) 1362 { 1363 cancel_delayed_work_sync(&guc->timestamp.work); 1364 } 1365 1366 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1367 { 1368 struct intel_gt *gt = guc_to_gt(guc); 1369 struct intel_engine_cs *engine; 1370 enum intel_engine_id id; 1371 unsigned long flags; 1372 ktime_t unused; 1373 1374 guc_cancel_busyness_worker(guc); 1375 1376 spin_lock_irqsave(&guc->timestamp.lock, flags); 1377 1378 guc_update_pm_timestamp(guc, &unused); 1379 for_each_engine(engine, gt, id) { 1380 guc_update_engine_gt_clks(engine); 1381 engine->stats.guc.prev_total = 0; 1382 } 1383 1384 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1385 } 1386 1387 static void __update_guc_busyness_stats(struct intel_guc *guc) 1388 { 1389 struct intel_gt *gt = guc_to_gt(guc); 1390 struct intel_engine_cs *engine; 1391 enum intel_engine_id id; 1392 unsigned long flags; 1393 ktime_t unused; 1394 1395 guc->timestamp.last_stat_jiffies = jiffies; 1396 1397 spin_lock_irqsave(&guc->timestamp.lock, flags); 1398 1399 guc_update_pm_timestamp(guc, &unused); 1400 for_each_engine(engine, gt, id) 1401 guc_update_engine_gt_clks(engine); 1402 1403 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1404 } 1405 1406 static void __guc_context_update_stats(struct intel_context *ce) 1407 { 1408 struct intel_guc *guc = ce_to_guc(ce); 1409 unsigned long flags; 1410 1411 spin_lock_irqsave(&guc->timestamp.lock, flags); 1412 lrc_update_runtime(ce); 1413 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1414 } 1415 1416 static void guc_context_update_stats(struct intel_context *ce) 1417 { 1418 if (!intel_context_pin_if_active(ce)) 1419 return; 1420 1421 __guc_context_update_stats(ce); 1422 intel_context_unpin(ce); 1423 } 1424 1425 static void guc_timestamp_ping(struct work_struct *wrk) 1426 { 1427 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1428 timestamp.work.work); 1429 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1430 struct intel_gt *gt = guc_to_gt(guc); 1431 struct intel_context *ce; 1432 intel_wakeref_t wakeref; 1433 unsigned long index; 1434 int srcu, ret; 1435 1436 /* 1437 * Ideally the busyness worker should take a gt pm wakeref because the 1438 * worker only needs to be active while gt is awake. However, the 1439 * gt_park path cancels the worker synchronously and this complicates 1440 * the flow if the worker is also running at the same time. The cancel 1441 * waits for the worker and when the worker releases the wakeref, that 1442 * would call gt_park and would lead to a deadlock. 1443 * 1444 * The resolution is to take the global pm wakeref if runtime pm is 1445 * already active. If not, we don't need to update the busyness stats as 1446 * the stats would already be updated when the gt was parked. 1447 * 1448 * Note: 1449 * - We do not requeue the worker if we cannot take a reference to runtime 1450 * pm since intel_guc_busyness_unpark would requeue the worker in the 1451 * resume path. 1452 * 1453 * - If the gt was parked longer than time taken for GT timestamp to roll 1454 * over, we ignore those rollovers since we don't care about tracking 1455 * the exact GT time. We only care about roll overs when the gt is 1456 * active and running workloads. 1457 * 1458 * - There is a window of time between gt_park and runtime suspend, 1459 * where the worker may run. This is acceptable since the worker will 1460 * not find any new data to update busyness. 1461 */ 1462 wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); 1463 if (!wakeref) 1464 return; 1465 1466 /* 1467 * Synchronize with gt reset to make sure the worker does not 1468 * corrupt the engine/guc stats. NB: can't actually block waiting 1469 * for a reset to complete as the reset requires flushing out 1470 * this worker thread if started. So waiting would deadlock. 1471 */ 1472 ret = intel_gt_reset_trylock(gt, &srcu); 1473 if (ret) 1474 goto err_trylock; 1475 1476 __update_guc_busyness_stats(guc); 1477 1478 /* adjust context stats for overflow */ 1479 xa_for_each(&guc->context_lookup, index, ce) 1480 guc_context_update_stats(ce); 1481 1482 intel_gt_reset_unlock(gt, srcu); 1483 1484 guc_enable_busyness_worker(guc); 1485 1486 err_trylock: 1487 intel_runtime_pm_put(>->i915->runtime_pm, wakeref); 1488 } 1489 1490 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1491 { 1492 u32 offset = intel_guc_engine_usage_offset(guc); 1493 u32 action[] = { 1494 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1495 offset, 1496 0, 1497 }; 1498 1499 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1500 } 1501 1502 static int guc_init_engine_stats(struct intel_guc *guc) 1503 { 1504 struct intel_gt *gt = guc_to_gt(guc); 1505 intel_wakeref_t wakeref; 1506 int ret; 1507 1508 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1509 ret = guc_action_enable_usage_stats(guc); 1510 1511 if (ret) 1512 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); 1513 else 1514 guc_enable_busyness_worker(guc); 1515 1516 return ret; 1517 } 1518 1519 static void guc_fini_engine_stats(struct intel_guc *guc) 1520 { 1521 guc_cancel_busyness_worker(guc); 1522 } 1523 1524 void intel_guc_busyness_park(struct intel_gt *gt) 1525 { 1526 struct intel_guc *guc = >->uc.guc; 1527 1528 if (!guc_submission_initialized(guc)) 1529 return; 1530 1531 /* 1532 * There is a race with suspend flow where the worker runs after suspend 1533 * and causes an unclaimed register access warning. Cancel the worker 1534 * synchronously here. 1535 */ 1536 guc_cancel_busyness_worker(guc); 1537 1538 /* 1539 * Before parking, we should sample engine busyness stats if we need to. 1540 * We can skip it if we are less than half a ping from the last time we 1541 * sampled the busyness stats. 1542 */ 1543 if (guc->timestamp.last_stat_jiffies && 1544 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1545 (guc->timestamp.ping_delay / 2))) 1546 return; 1547 1548 __update_guc_busyness_stats(guc); 1549 } 1550 1551 void intel_guc_busyness_unpark(struct intel_gt *gt) 1552 { 1553 struct intel_guc *guc = >->uc.guc; 1554 unsigned long flags; 1555 ktime_t unused; 1556 1557 if (!guc_submission_initialized(guc)) 1558 return; 1559 1560 spin_lock_irqsave(&guc->timestamp.lock, flags); 1561 guc_update_pm_timestamp(guc, &unused); 1562 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1563 guc_enable_busyness_worker(guc); 1564 } 1565 1566 static inline bool 1567 submission_disabled(struct intel_guc *guc) 1568 { 1569 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1570 1571 return unlikely(!sched_engine || 1572 !__tasklet_is_enabled(&sched_engine->tasklet) || 1573 intel_gt_is_wedged(guc_to_gt(guc))); 1574 } 1575 1576 static void disable_submission(struct intel_guc *guc) 1577 { 1578 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1579 1580 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1581 GEM_BUG_ON(!guc->ct.enabled); 1582 __tasklet_disable_sync_once(&sched_engine->tasklet); 1583 sched_engine->tasklet.callback = NULL; 1584 } 1585 } 1586 1587 static void enable_submission(struct intel_guc *guc) 1588 { 1589 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1590 unsigned long flags; 1591 1592 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1593 sched_engine->tasklet.callback = guc_submission_tasklet; 1594 wmb(); /* Make sure callback visible */ 1595 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1596 __tasklet_enable(&sched_engine->tasklet)) { 1597 GEM_BUG_ON(!guc->ct.enabled); 1598 1599 /* And kick in case we missed a new request submission. */ 1600 tasklet_hi_schedule(&sched_engine->tasklet); 1601 } 1602 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1603 } 1604 1605 static void guc_flush_submissions(struct intel_guc *guc) 1606 { 1607 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1608 unsigned long flags; 1609 1610 spin_lock_irqsave(&sched_engine->lock, flags); 1611 spin_unlock_irqrestore(&sched_engine->lock, flags); 1612 } 1613 1614 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1615 1616 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1617 { 1618 if (unlikely(!guc_submission_initialized(guc))) { 1619 /* Reset called during driver load? GuC not yet initialised! */ 1620 return; 1621 } 1622 1623 intel_gt_park_heartbeats(guc_to_gt(guc)); 1624 disable_submission(guc); 1625 guc->interrupts.disable(guc); 1626 __reset_guc_busyness_stats(guc); 1627 1628 /* Flush IRQ handler */ 1629 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1630 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1631 1632 guc_flush_submissions(guc); 1633 guc_flush_destroyed_contexts(guc); 1634 flush_work(&guc->ct.requests.worker); 1635 1636 scrub_guc_desc_for_outstanding_g2h(guc); 1637 } 1638 1639 static struct intel_engine_cs * 1640 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1641 { 1642 struct intel_engine_cs *engine; 1643 intel_engine_mask_t tmp, mask = ve->mask; 1644 unsigned int num_siblings = 0; 1645 1646 for_each_engine_masked(engine, ve->gt, mask, tmp) 1647 if (num_siblings++ == sibling) 1648 return engine; 1649 1650 return NULL; 1651 } 1652 1653 static inline struct intel_engine_cs * 1654 __context_to_physical_engine(struct intel_context *ce) 1655 { 1656 struct intel_engine_cs *engine = ce->engine; 1657 1658 if (intel_engine_is_virtual(engine)) 1659 engine = guc_virtual_get_sibling(engine, 0); 1660 1661 return engine; 1662 } 1663 1664 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1665 { 1666 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1667 1668 if (!intel_context_is_schedulable(ce)) 1669 return; 1670 1671 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1672 1673 /* 1674 * We want a simple context + ring to execute the breadcrumb update. 1675 * We cannot rely on the context being intact across the GPU hang, 1676 * so clear it and rebuild just what we need for the breadcrumb. 1677 * All pending requests for this context will be zapped, and any 1678 * future request will be after userspace has had the opportunity 1679 * to recreate its own state. 1680 */ 1681 if (scrub) 1682 lrc_init_regs(ce, engine, true); 1683 1684 /* Rerun the request; its payload has been neutered (if guilty). */ 1685 lrc_update_regs(ce, engine, head); 1686 } 1687 1688 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1689 { 1690 /* 1691 * Wa_22011802037: In addition to stopping the cs, we need 1692 * to wait for any pending mi force wakeups 1693 */ 1694 if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { 1695 intel_engine_stop_cs(engine); 1696 intel_engine_wait_for_pending_mi_fw(engine); 1697 } 1698 } 1699 1700 static void guc_reset_nop(struct intel_engine_cs *engine) 1701 { 1702 } 1703 1704 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1705 { 1706 } 1707 1708 static void 1709 __unwind_incomplete_requests(struct intel_context *ce) 1710 { 1711 struct i915_request *rq, *rn; 1712 struct list_head *pl; 1713 int prio = I915_PRIORITY_INVALID; 1714 struct i915_sched_engine * const sched_engine = 1715 ce->engine->sched_engine; 1716 unsigned long flags; 1717 1718 spin_lock_irqsave(&sched_engine->lock, flags); 1719 spin_lock(&ce->guc_state.lock); 1720 list_for_each_entry_safe_reverse(rq, rn, 1721 &ce->guc_state.requests, 1722 sched.link) { 1723 if (i915_request_completed(rq)) 1724 continue; 1725 1726 list_del_init(&rq->sched.link); 1727 __i915_request_unsubmit(rq); 1728 1729 /* Push the request back into the queue for later resubmission. */ 1730 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1731 if (rq_prio(rq) != prio) { 1732 prio = rq_prio(rq); 1733 pl = i915_sched_lookup_priolist(sched_engine, prio); 1734 } 1735 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1736 1737 list_add(&rq->sched.link, pl); 1738 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1739 } 1740 spin_unlock(&ce->guc_state.lock); 1741 spin_unlock_irqrestore(&sched_engine->lock, flags); 1742 } 1743 1744 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1745 { 1746 bool guilty; 1747 struct i915_request *rq; 1748 unsigned long flags; 1749 u32 head; 1750 int i, number_children = ce->parallel.number_children; 1751 struct intel_context *parent = ce; 1752 1753 GEM_BUG_ON(intel_context_is_child(ce)); 1754 1755 intel_context_get(ce); 1756 1757 /* 1758 * GuC will implicitly mark the context as non-schedulable when it sends 1759 * the reset notification. Make sure our state reflects this change. The 1760 * context will be marked enabled on resubmission. 1761 */ 1762 spin_lock_irqsave(&ce->guc_state.lock, flags); 1763 clr_context_enabled(ce); 1764 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1765 1766 /* 1767 * For each context in the relationship find the hanging request 1768 * resetting each context / request as needed 1769 */ 1770 for (i = 0; i < number_children + 1; ++i) { 1771 if (!intel_context_is_pinned(ce)) 1772 goto next_context; 1773 1774 guilty = false; 1775 rq = intel_context_get_active_request(ce); 1776 if (!rq) { 1777 head = ce->ring->tail; 1778 goto out_replay; 1779 } 1780 1781 if (i915_request_started(rq)) 1782 guilty = stalled & ce->engine->mask; 1783 1784 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1785 head = intel_ring_wrap(ce->ring, rq->head); 1786 1787 __i915_request_reset(rq, guilty); 1788 i915_request_put(rq); 1789 out_replay: 1790 guc_reset_state(ce, head, guilty); 1791 next_context: 1792 if (i != number_children) 1793 ce = list_next_entry(ce, parallel.child_link); 1794 } 1795 1796 __unwind_incomplete_requests(parent); 1797 intel_context_put(parent); 1798 } 1799 1800 void wake_up_all_tlb_invalidate(struct intel_guc *guc) 1801 { 1802 struct intel_guc_tlb_wait *wait; 1803 unsigned long i; 1804 1805 if (!intel_guc_tlb_invalidation_is_available(guc)) 1806 return; 1807 1808 xa_lock_irq(&guc->tlb_lookup); 1809 xa_for_each(&guc->tlb_lookup, i, wait) 1810 wake_up(&wait->wq); 1811 xa_unlock_irq(&guc->tlb_lookup); 1812 } 1813 1814 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1815 { 1816 struct intel_context *ce; 1817 unsigned long index; 1818 unsigned long flags; 1819 1820 if (unlikely(!guc_submission_initialized(guc))) { 1821 /* Reset called during driver load? GuC not yet initialised! */ 1822 return; 1823 } 1824 1825 xa_lock_irqsave(&guc->context_lookup, flags); 1826 xa_for_each(&guc->context_lookup, index, ce) { 1827 if (!kref_get_unless_zero(&ce->ref)) 1828 continue; 1829 1830 xa_unlock(&guc->context_lookup); 1831 1832 if (intel_context_is_pinned(ce) && 1833 !intel_context_is_child(ce)) 1834 __guc_reset_context(ce, stalled); 1835 1836 intel_context_put(ce); 1837 1838 xa_lock(&guc->context_lookup); 1839 } 1840 xa_unlock_irqrestore(&guc->context_lookup, flags); 1841 1842 /* GuC is blown away, drop all references to contexts */ 1843 xa_destroy(&guc->context_lookup); 1844 } 1845 1846 static void guc_cancel_context_requests(struct intel_context *ce) 1847 { 1848 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1849 struct i915_request *rq; 1850 unsigned long flags; 1851 1852 /* Mark all executing requests as skipped. */ 1853 spin_lock_irqsave(&sched_engine->lock, flags); 1854 spin_lock(&ce->guc_state.lock); 1855 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1856 i915_request_put(i915_request_mark_eio(rq)); 1857 spin_unlock(&ce->guc_state.lock); 1858 spin_unlock_irqrestore(&sched_engine->lock, flags); 1859 } 1860 1861 static void 1862 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1863 { 1864 struct i915_request *rq, *rn; 1865 struct rb_node *rb; 1866 unsigned long flags; 1867 1868 /* Can be called during boot if GuC fails to load */ 1869 if (!sched_engine) 1870 return; 1871 1872 /* 1873 * Before we call engine->cancel_requests(), we should have exclusive 1874 * access to the submission state. This is arranged for us by the 1875 * caller disabling the interrupt generation, the tasklet and other 1876 * threads that may then access the same state, giving us a free hand 1877 * to reset state. However, we still need to let lockdep be aware that 1878 * we know this state may be accessed in hardirq context, so we 1879 * disable the irq around this manipulation and we want to keep 1880 * the spinlock focused on its duties and not accidentally conflate 1881 * coverage to the submission's irq state. (Similarly, although we 1882 * shouldn't need to disable irq around the manipulation of the 1883 * submission's irq state, we also wish to remind ourselves that 1884 * it is irq state.) 1885 */ 1886 spin_lock_irqsave(&sched_engine->lock, flags); 1887 1888 /* Flush the queued requests to the timeline list (for retiring). */ 1889 while ((rb = rb_first_cached(&sched_engine->queue))) { 1890 struct i915_priolist *p = to_priolist(rb); 1891 1892 priolist_for_each_request_consume(rq, rn, p) { 1893 list_del_init(&rq->sched.link); 1894 1895 __i915_request_submit(rq); 1896 1897 i915_request_put(i915_request_mark_eio(rq)); 1898 } 1899 1900 rb_erase_cached(&p->node, &sched_engine->queue); 1901 i915_priolist_free(p); 1902 } 1903 1904 /* Remaining _unready_ requests will be nop'ed when submitted */ 1905 1906 sched_engine->queue_priority_hint = INT_MIN; 1907 sched_engine->queue = RB_ROOT_CACHED; 1908 1909 spin_unlock_irqrestore(&sched_engine->lock, flags); 1910 } 1911 1912 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1913 { 1914 struct intel_context *ce; 1915 unsigned long index; 1916 unsigned long flags; 1917 1918 xa_lock_irqsave(&guc->context_lookup, flags); 1919 xa_for_each(&guc->context_lookup, index, ce) { 1920 if (!kref_get_unless_zero(&ce->ref)) 1921 continue; 1922 1923 xa_unlock(&guc->context_lookup); 1924 1925 if (intel_context_is_pinned(ce) && 1926 !intel_context_is_child(ce)) 1927 guc_cancel_context_requests(ce); 1928 1929 intel_context_put(ce); 1930 1931 xa_lock(&guc->context_lookup); 1932 } 1933 xa_unlock_irqrestore(&guc->context_lookup, flags); 1934 1935 guc_cancel_sched_engine_requests(guc->sched_engine); 1936 1937 /* GuC is blown away, drop all references to contexts */ 1938 xa_destroy(&guc->context_lookup); 1939 1940 /* 1941 * Wedged GT won't respond to any TLB invalidation request. Simply 1942 * release all the blocked waiters. 1943 */ 1944 wake_up_all_tlb_invalidate(guc); 1945 } 1946 1947 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1948 { 1949 /* Reset called during driver load or during wedge? */ 1950 if (unlikely(!guc_submission_initialized(guc) || 1951 intel_gt_is_wedged(guc_to_gt(guc)))) { 1952 return; 1953 } 1954 1955 /* 1956 * Technically possible for either of these values to be non-zero here, 1957 * but very unlikely + harmless. Regardless let's add a warn so we can 1958 * see in CI if this happens frequently / a precursor to taking down the 1959 * machine. 1960 */ 1961 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1962 atomic_set(&guc->outstanding_submission_g2h, 0); 1963 1964 intel_guc_global_policies_update(guc); 1965 enable_submission(guc); 1966 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1967 1968 /* 1969 * The full GT reset will have cleared the TLB caches and flushed the 1970 * G2H message queue; we can release all the blocked waiters. 1971 */ 1972 wake_up_all_tlb_invalidate(guc); 1973 } 1974 1975 static void destroyed_worker_func(struct work_struct *w); 1976 static void reset_fail_worker_func(struct work_struct *w); 1977 1978 bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc) 1979 { 1980 return HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915) && 1981 intel_guc_is_ready(guc); 1982 } 1983 1984 static int init_tlb_lookup(struct intel_guc *guc) 1985 { 1986 struct intel_guc_tlb_wait *wait; 1987 int err; 1988 1989 if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) 1990 return 0; 1991 1992 xa_init_flags(&guc->tlb_lookup, XA_FLAGS_ALLOC); 1993 1994 wait = kzalloc(sizeof(*wait), GFP_KERNEL); 1995 if (!wait) 1996 return -ENOMEM; 1997 1998 init_waitqueue_head(&wait->wq); 1999 2000 /* Preallocate a shared id for use under memory pressure. */ 2001 err = xa_alloc_cyclic_irq(&guc->tlb_lookup, &guc->serial_slot, wait, 2002 xa_limit_32b, &guc->next_seqno, GFP_KERNEL); 2003 if (err < 0) { 2004 kfree(wait); 2005 return err; 2006 } 2007 2008 return 0; 2009 } 2010 2011 static void fini_tlb_lookup(struct intel_guc *guc) 2012 { 2013 struct intel_guc_tlb_wait *wait; 2014 2015 if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) 2016 return; 2017 2018 wait = xa_load(&guc->tlb_lookup, guc->serial_slot); 2019 if (wait && wait->busy) 2020 guc_err(guc, "Unexpected busy item in tlb_lookup on fini\n"); 2021 kfree(wait); 2022 2023 xa_destroy(&guc->tlb_lookup); 2024 } 2025 2026 /* 2027 * Set up the memory resources to be shared with the GuC (via the GGTT) 2028 * at firmware loading time. 2029 */ 2030 int intel_guc_submission_init(struct intel_guc *guc) 2031 { 2032 struct intel_gt *gt = guc_to_gt(guc); 2033 int ret; 2034 2035 if (guc->submission_initialized) 2036 return 0; 2037 2038 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { 2039 ret = guc_lrc_desc_pool_create_v69(guc); 2040 if (ret) 2041 return ret; 2042 } 2043 2044 ret = init_tlb_lookup(guc); 2045 if (ret) 2046 goto destroy_pool; 2047 2048 guc->submission_state.guc_ids_bitmap = 2049 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 2050 if (!guc->submission_state.guc_ids_bitmap) { 2051 ret = -ENOMEM; 2052 goto destroy_tlb; 2053 } 2054 2055 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 2056 guc->timestamp.shift = gpm_timestamp_shift(gt); 2057 guc->submission_initialized = true; 2058 2059 return 0; 2060 2061 destroy_tlb: 2062 fini_tlb_lookup(guc); 2063 destroy_pool: 2064 guc_lrc_desc_pool_destroy_v69(guc); 2065 return ret; 2066 } 2067 2068 void intel_guc_submission_fini(struct intel_guc *guc) 2069 { 2070 if (!guc->submission_initialized) 2071 return; 2072 2073 guc_flush_destroyed_contexts(guc); 2074 guc_lrc_desc_pool_destroy_v69(guc); 2075 i915_sched_engine_put(guc->sched_engine); 2076 bitmap_free(guc->submission_state.guc_ids_bitmap); 2077 fini_tlb_lookup(guc); 2078 guc->submission_initialized = false; 2079 } 2080 2081 static inline void queue_request(struct i915_sched_engine *sched_engine, 2082 struct i915_request *rq, 2083 int prio) 2084 { 2085 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2086 list_add_tail(&rq->sched.link, 2087 i915_sched_lookup_priolist(sched_engine, prio)); 2088 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2089 tasklet_hi_schedule(&sched_engine->tasklet); 2090 } 2091 2092 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 2093 struct i915_request *rq) 2094 { 2095 int ret = 0; 2096 2097 __i915_request_submit(rq); 2098 2099 trace_i915_request_in(rq, 0); 2100 2101 if (is_multi_lrc_rq(rq)) { 2102 if (multi_lrc_submit(rq)) { 2103 ret = guc_wq_item_append(guc, rq); 2104 if (!ret) 2105 ret = guc_add_request(guc, rq); 2106 } 2107 } else { 2108 guc_set_lrc_tail(rq); 2109 ret = guc_add_request(guc, rq); 2110 } 2111 2112 if (unlikely(ret == -EPIPE)) 2113 disable_submission(guc); 2114 2115 return ret; 2116 } 2117 2118 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 2119 { 2120 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2121 struct intel_context *ce = request_to_scheduling_context(rq); 2122 2123 return submission_disabled(guc) || guc->stalled_request || 2124 !i915_sched_engine_is_empty(sched_engine) || 2125 !ctx_id_mapped(guc, ce->guc_id.id); 2126 } 2127 2128 static void guc_submit_request(struct i915_request *rq) 2129 { 2130 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2131 struct intel_guc *guc = &rq->engine->gt->uc.guc; 2132 unsigned long flags; 2133 2134 /* Will be called from irq-context when using foreign fences. */ 2135 spin_lock_irqsave(&sched_engine->lock, flags); 2136 2137 if (need_tasklet(guc, rq)) 2138 queue_request(sched_engine, rq, rq_prio(rq)); 2139 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 2140 tasklet_hi_schedule(&sched_engine->tasklet); 2141 2142 spin_unlock_irqrestore(&sched_engine->lock, flags); 2143 } 2144 2145 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 2146 { 2147 int ret; 2148 2149 GEM_BUG_ON(intel_context_is_child(ce)); 2150 2151 if (intel_context_is_parent(ce)) 2152 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2153 NUMBER_MULTI_LRC_GUC_ID(guc), 2154 order_base_2(ce->parallel.number_children 2155 + 1)); 2156 else 2157 ret = ida_simple_get(&guc->submission_state.guc_ids, 2158 NUMBER_MULTI_LRC_GUC_ID(guc), 2159 guc->submission_state.num_guc_ids, 2160 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2161 __GFP_NOWARN); 2162 if (unlikely(ret < 0)) 2163 return ret; 2164 2165 if (!intel_context_is_parent(ce)) 2166 ++guc->submission_state.guc_ids_in_use; 2167 2168 ce->guc_id.id = ret; 2169 return 0; 2170 } 2171 2172 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2173 { 2174 GEM_BUG_ON(intel_context_is_child(ce)); 2175 2176 if (!context_guc_id_invalid(ce)) { 2177 if (intel_context_is_parent(ce)) { 2178 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2179 ce->guc_id.id, 2180 order_base_2(ce->parallel.number_children 2181 + 1)); 2182 } else { 2183 --guc->submission_state.guc_ids_in_use; 2184 ida_simple_remove(&guc->submission_state.guc_ids, 2185 ce->guc_id.id); 2186 } 2187 clr_ctx_id_mapping(guc, ce->guc_id.id); 2188 set_context_guc_id_invalid(ce); 2189 } 2190 if (!list_empty(&ce->guc_id.link)) 2191 list_del_init(&ce->guc_id.link); 2192 } 2193 2194 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2195 { 2196 unsigned long flags; 2197 2198 spin_lock_irqsave(&guc->submission_state.lock, flags); 2199 __release_guc_id(guc, ce); 2200 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2201 } 2202 2203 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2204 { 2205 struct intel_context *cn; 2206 2207 lockdep_assert_held(&guc->submission_state.lock); 2208 GEM_BUG_ON(intel_context_is_child(ce)); 2209 GEM_BUG_ON(intel_context_is_parent(ce)); 2210 2211 if (!list_empty(&guc->submission_state.guc_id_list)) { 2212 cn = list_first_entry(&guc->submission_state.guc_id_list, 2213 struct intel_context, 2214 guc_id.link); 2215 2216 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2217 GEM_BUG_ON(context_guc_id_invalid(cn)); 2218 GEM_BUG_ON(intel_context_is_child(cn)); 2219 GEM_BUG_ON(intel_context_is_parent(cn)); 2220 2221 list_del_init(&cn->guc_id.link); 2222 ce->guc_id.id = cn->guc_id.id; 2223 2224 spin_lock(&cn->guc_state.lock); 2225 clr_context_registered(cn); 2226 spin_unlock(&cn->guc_state.lock); 2227 2228 set_context_guc_id_invalid(cn); 2229 2230 #ifdef CONFIG_DRM_I915_SELFTEST 2231 guc->number_guc_id_stolen++; 2232 #endif 2233 2234 return 0; 2235 } else { 2236 return -EAGAIN; 2237 } 2238 } 2239 2240 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2241 { 2242 int ret; 2243 2244 lockdep_assert_held(&guc->submission_state.lock); 2245 GEM_BUG_ON(intel_context_is_child(ce)); 2246 2247 ret = new_guc_id(guc, ce); 2248 if (unlikely(ret < 0)) { 2249 if (intel_context_is_parent(ce)) 2250 return -ENOSPC; 2251 2252 ret = steal_guc_id(guc, ce); 2253 if (ret < 0) 2254 return ret; 2255 } 2256 2257 if (intel_context_is_parent(ce)) { 2258 struct intel_context *child; 2259 int i = 1; 2260 2261 for_each_child(ce, child) 2262 child->guc_id.id = ce->guc_id.id + i++; 2263 } 2264 2265 return 0; 2266 } 2267 2268 #define PIN_GUC_ID_TRIES 4 2269 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2270 { 2271 int ret = 0; 2272 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2273 2274 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2275 2276 try_again: 2277 spin_lock_irqsave(&guc->submission_state.lock, flags); 2278 2279 might_lock(&ce->guc_state.lock); 2280 2281 if (context_guc_id_invalid(ce)) { 2282 ret = assign_guc_id(guc, ce); 2283 if (ret) 2284 goto out_unlock; 2285 ret = 1; /* Indidcates newly assigned guc_id */ 2286 } 2287 if (!list_empty(&ce->guc_id.link)) 2288 list_del_init(&ce->guc_id.link); 2289 atomic_inc(&ce->guc_id.ref); 2290 2291 out_unlock: 2292 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2293 2294 /* 2295 * -EAGAIN indicates no guc_id are available, let's retire any 2296 * outstanding requests to see if that frees up a guc_id. If the first 2297 * retire didn't help, insert a sleep with the timeslice duration before 2298 * attempting to retire more requests. Double the sleep period each 2299 * subsequent pass before finally giving up. The sleep period has max of 2300 * 100ms and minimum of 1ms. 2301 */ 2302 if (ret == -EAGAIN && --tries) { 2303 if (PIN_GUC_ID_TRIES - tries > 1) { 2304 unsigned int timeslice_shifted = 2305 ce->engine->props.timeslice_duration_ms << 2306 (PIN_GUC_ID_TRIES - tries - 2); 2307 unsigned int max = min_t(unsigned int, 100, 2308 timeslice_shifted); 2309 2310 msleep(max_t(unsigned int, max, 1)); 2311 } 2312 intel_gt_retire_requests(guc_to_gt(guc)); 2313 goto try_again; 2314 } 2315 2316 return ret; 2317 } 2318 2319 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2320 { 2321 unsigned long flags; 2322 2323 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2324 GEM_BUG_ON(intel_context_is_child(ce)); 2325 2326 if (unlikely(context_guc_id_invalid(ce) || 2327 intel_context_is_parent(ce))) 2328 return; 2329 2330 spin_lock_irqsave(&guc->submission_state.lock, flags); 2331 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2332 !atomic_read(&ce->guc_id.ref)) 2333 list_add_tail(&ce->guc_id.link, 2334 &guc->submission_state.guc_id_list); 2335 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2336 } 2337 2338 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2339 struct intel_context *ce, 2340 u32 guc_id, 2341 u32 offset, 2342 bool loop) 2343 { 2344 struct intel_context *child; 2345 u32 action[4 + MAX_ENGINE_INSTANCE]; 2346 int len = 0; 2347 2348 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2349 2350 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2351 action[len++] = guc_id; 2352 action[len++] = ce->parallel.number_children + 1; 2353 action[len++] = offset; 2354 for_each_child(ce, child) { 2355 offset += sizeof(struct guc_lrc_desc_v69); 2356 action[len++] = offset; 2357 } 2358 2359 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2360 } 2361 2362 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2363 struct intel_context *ce, 2364 struct guc_ctxt_registration_info *info, 2365 bool loop) 2366 { 2367 struct intel_context *child; 2368 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2369 int len = 0; 2370 u32 next_id; 2371 2372 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2373 2374 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2375 action[len++] = info->flags; 2376 action[len++] = info->context_idx; 2377 action[len++] = info->engine_class; 2378 action[len++] = info->engine_submit_mask; 2379 action[len++] = info->wq_desc_lo; 2380 action[len++] = info->wq_desc_hi; 2381 action[len++] = info->wq_base_lo; 2382 action[len++] = info->wq_base_hi; 2383 action[len++] = info->wq_size; 2384 action[len++] = ce->parallel.number_children + 1; 2385 action[len++] = info->hwlrca_lo; 2386 action[len++] = info->hwlrca_hi; 2387 2388 next_id = info->context_idx + 1; 2389 for_each_child(ce, child) { 2390 GEM_BUG_ON(next_id++ != child->guc_id.id); 2391 2392 /* 2393 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2394 * only supports 32 bit currently. 2395 */ 2396 action[len++] = lower_32_bits(child->lrc.lrca); 2397 action[len++] = upper_32_bits(child->lrc.lrca); 2398 } 2399 2400 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2401 2402 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2403 } 2404 2405 static int __guc_action_register_context_v69(struct intel_guc *guc, 2406 u32 guc_id, 2407 u32 offset, 2408 bool loop) 2409 { 2410 u32 action[] = { 2411 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2412 guc_id, 2413 offset, 2414 }; 2415 2416 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2417 0, loop); 2418 } 2419 2420 static int __guc_action_register_context_v70(struct intel_guc *guc, 2421 struct guc_ctxt_registration_info *info, 2422 bool loop) 2423 { 2424 u32 action[] = { 2425 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2426 info->flags, 2427 info->context_idx, 2428 info->engine_class, 2429 info->engine_submit_mask, 2430 info->wq_desc_lo, 2431 info->wq_desc_hi, 2432 info->wq_base_lo, 2433 info->wq_base_hi, 2434 info->wq_size, 2435 info->hwlrca_lo, 2436 info->hwlrca_hi, 2437 }; 2438 2439 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2440 0, loop); 2441 } 2442 2443 static void prepare_context_registration_info_v69(struct intel_context *ce); 2444 static void prepare_context_registration_info_v70(struct intel_context *ce, 2445 struct guc_ctxt_registration_info *info); 2446 2447 static int 2448 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2449 { 2450 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2451 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2452 2453 prepare_context_registration_info_v69(ce); 2454 2455 if (intel_context_is_parent(ce)) 2456 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2457 offset, loop); 2458 else 2459 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2460 offset, loop); 2461 } 2462 2463 static int 2464 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2465 { 2466 struct guc_ctxt_registration_info info; 2467 2468 prepare_context_registration_info_v70(ce, &info); 2469 2470 if (intel_context_is_parent(ce)) 2471 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2472 else 2473 return __guc_action_register_context_v70(guc, &info, loop); 2474 } 2475 2476 static int register_context(struct intel_context *ce, bool loop) 2477 { 2478 struct intel_guc *guc = ce_to_guc(ce); 2479 int ret; 2480 2481 GEM_BUG_ON(intel_context_is_child(ce)); 2482 trace_intel_context_register(ce); 2483 2484 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2485 ret = register_context_v70(guc, ce, loop); 2486 else 2487 ret = register_context_v69(guc, ce, loop); 2488 2489 if (likely(!ret)) { 2490 unsigned long flags; 2491 2492 spin_lock_irqsave(&ce->guc_state.lock, flags); 2493 set_context_registered(ce); 2494 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2495 2496 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2497 guc_context_policy_init_v70(ce, loop); 2498 } 2499 2500 return ret; 2501 } 2502 2503 static int __guc_action_deregister_context(struct intel_guc *guc, 2504 u32 guc_id) 2505 { 2506 u32 action[] = { 2507 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2508 guc_id, 2509 }; 2510 2511 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2512 G2H_LEN_DW_DEREGISTER_CONTEXT, 2513 true); 2514 } 2515 2516 static int deregister_context(struct intel_context *ce, u32 guc_id) 2517 { 2518 struct intel_guc *guc = ce_to_guc(ce); 2519 2520 GEM_BUG_ON(intel_context_is_child(ce)); 2521 trace_intel_context_deregister(ce); 2522 2523 return __guc_action_deregister_context(guc, guc_id); 2524 } 2525 2526 static inline void clear_children_join_go_memory(struct intel_context *ce) 2527 { 2528 struct parent_scratch *ps = __get_parent_scratch(ce); 2529 int i; 2530 2531 ps->go.semaphore = 0; 2532 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2533 ps->join[i].semaphore = 0; 2534 } 2535 2536 static inline u32 get_children_go_value(struct intel_context *ce) 2537 { 2538 return __get_parent_scratch(ce)->go.semaphore; 2539 } 2540 2541 static inline u32 get_children_join_value(struct intel_context *ce, 2542 u8 child_index) 2543 { 2544 return __get_parent_scratch(ce)->join[child_index].semaphore; 2545 } 2546 2547 struct context_policy { 2548 u32 count; 2549 struct guc_update_context_policy h2g; 2550 }; 2551 2552 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2553 { 2554 size_t bytes = sizeof(policy->h2g.header) + 2555 (sizeof(policy->h2g.klv[0]) * policy->count); 2556 2557 return bytes / sizeof(u32); 2558 } 2559 2560 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2561 { 2562 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2563 policy->h2g.header.ctx_id = guc_id; 2564 policy->count = 0; 2565 } 2566 2567 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2568 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2569 { \ 2570 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2571 policy->h2g.klv[policy->count].kl = \ 2572 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2573 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2574 policy->h2g.klv[policy->count].value = data; \ 2575 policy->count++; \ 2576 } 2577 2578 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2579 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2580 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2581 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2582 2583 #undef MAKE_CONTEXT_POLICY_ADD 2584 2585 static int __guc_context_set_context_policies(struct intel_guc *guc, 2586 struct context_policy *policy, 2587 bool loop) 2588 { 2589 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2590 __guc_context_policy_action_size(policy), 2591 0, loop); 2592 } 2593 2594 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2595 { 2596 struct intel_engine_cs *engine = ce->engine; 2597 struct intel_guc *guc = &engine->gt->uc.guc; 2598 struct context_policy policy; 2599 u32 execution_quantum; 2600 u32 preemption_timeout; 2601 unsigned long flags; 2602 int ret; 2603 2604 /* NB: For both of these, zero means disabled. */ 2605 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2606 execution_quantum)); 2607 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2608 preemption_timeout)); 2609 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2610 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2611 2612 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2613 2614 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2615 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2616 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2617 2618 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2619 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2620 2621 ret = __guc_context_set_context_policies(guc, &policy, loop); 2622 2623 spin_lock_irqsave(&ce->guc_state.lock, flags); 2624 if (ret != 0) 2625 set_context_policy_required(ce); 2626 else 2627 clr_context_policy_required(ce); 2628 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2629 2630 return ret; 2631 } 2632 2633 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2634 struct guc_lrc_desc_v69 *desc) 2635 { 2636 desc->policy_flags = 0; 2637 2638 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2639 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2640 2641 /* NB: For both of these, zero means disabled. */ 2642 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2643 desc->execution_quantum)); 2644 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2645 desc->preemption_timeout)); 2646 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2647 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2648 } 2649 2650 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2651 { 2652 /* 2653 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2654 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2655 */ 2656 switch (prio) { 2657 default: 2658 MISSING_CASE(prio); 2659 fallthrough; 2660 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2661 return GEN12_CTX_PRIORITY_NORMAL; 2662 case GUC_CLIENT_PRIORITY_NORMAL: 2663 return GEN12_CTX_PRIORITY_LOW; 2664 case GUC_CLIENT_PRIORITY_HIGH: 2665 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2666 return GEN12_CTX_PRIORITY_HIGH; 2667 } 2668 } 2669 2670 static void prepare_context_registration_info_v69(struct intel_context *ce) 2671 { 2672 struct intel_engine_cs *engine = ce->engine; 2673 struct intel_guc *guc = &engine->gt->uc.guc; 2674 u32 ctx_id = ce->guc_id.id; 2675 struct guc_lrc_desc_v69 *desc; 2676 struct intel_context *child; 2677 2678 GEM_BUG_ON(!engine->mask); 2679 2680 /* 2681 * Ensure LRC + CT vmas are is same region as write barrier is done 2682 * based on CT vma region. 2683 */ 2684 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2685 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2686 2687 desc = __get_lrc_desc_v69(guc, ctx_id); 2688 GEM_BUG_ON(!desc); 2689 desc->engine_class = engine_class_to_guc_class(engine->class); 2690 desc->engine_submit_mask = engine->logical_mask; 2691 desc->hw_context_desc = ce->lrc.lrca; 2692 desc->priority = ce->guc_state.prio; 2693 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2694 guc_context_policy_init_v69(engine, desc); 2695 2696 /* 2697 * If context is a parent, we need to register a process descriptor 2698 * describing a work queue and register all child contexts. 2699 */ 2700 if (intel_context_is_parent(ce)) { 2701 struct guc_process_desc_v69 *pdesc; 2702 2703 ce->parallel.guc.wqi_tail = 0; 2704 ce->parallel.guc.wqi_head = 0; 2705 2706 desc->process_desc = i915_ggtt_offset(ce->state) + 2707 __get_parent_scratch_offset(ce); 2708 desc->wq_addr = i915_ggtt_offset(ce->state) + 2709 __get_wq_offset(ce); 2710 desc->wq_size = WQ_SIZE; 2711 2712 pdesc = __get_process_desc_v69(ce); 2713 memset(pdesc, 0, sizeof(*(pdesc))); 2714 pdesc->stage_id = ce->guc_id.id; 2715 pdesc->wq_base_addr = desc->wq_addr; 2716 pdesc->wq_size_bytes = desc->wq_size; 2717 pdesc->wq_status = WQ_STATUS_ACTIVE; 2718 2719 ce->parallel.guc.wq_head = &pdesc->head; 2720 ce->parallel.guc.wq_tail = &pdesc->tail; 2721 ce->parallel.guc.wq_status = &pdesc->wq_status; 2722 2723 for_each_child(ce, child) { 2724 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2725 2726 desc->engine_class = 2727 engine_class_to_guc_class(engine->class); 2728 desc->hw_context_desc = child->lrc.lrca; 2729 desc->priority = ce->guc_state.prio; 2730 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2731 guc_context_policy_init_v69(engine, desc); 2732 } 2733 2734 clear_children_join_go_memory(ce); 2735 } 2736 } 2737 2738 static void prepare_context_registration_info_v70(struct intel_context *ce, 2739 struct guc_ctxt_registration_info *info) 2740 { 2741 struct intel_engine_cs *engine = ce->engine; 2742 struct intel_guc *guc = &engine->gt->uc.guc; 2743 u32 ctx_id = ce->guc_id.id; 2744 2745 GEM_BUG_ON(!engine->mask); 2746 2747 /* 2748 * Ensure LRC + CT vmas are is same region as write barrier is done 2749 * based on CT vma region. 2750 */ 2751 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2752 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2753 2754 memset(info, 0, sizeof(*info)); 2755 info->context_idx = ctx_id; 2756 info->engine_class = engine_class_to_guc_class(engine->class); 2757 info->engine_submit_mask = engine->logical_mask; 2758 /* 2759 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2760 * only supports 32 bit currently. 2761 */ 2762 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2763 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2764 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2765 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2766 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2767 2768 /* 2769 * If context is a parent, we need to register a process descriptor 2770 * describing a work queue and register all child contexts. 2771 */ 2772 if (intel_context_is_parent(ce)) { 2773 struct guc_sched_wq_desc *wq_desc; 2774 u64 wq_desc_offset, wq_base_offset; 2775 2776 ce->parallel.guc.wqi_tail = 0; 2777 ce->parallel.guc.wqi_head = 0; 2778 2779 wq_desc_offset = i915_ggtt_offset(ce->state) + 2780 __get_parent_scratch_offset(ce); 2781 wq_base_offset = i915_ggtt_offset(ce->state) + 2782 __get_wq_offset(ce); 2783 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2784 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2785 info->wq_base_lo = lower_32_bits(wq_base_offset); 2786 info->wq_base_hi = upper_32_bits(wq_base_offset); 2787 info->wq_size = WQ_SIZE; 2788 2789 wq_desc = __get_wq_desc_v70(ce); 2790 memset(wq_desc, 0, sizeof(*wq_desc)); 2791 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2792 2793 ce->parallel.guc.wq_head = &wq_desc->head; 2794 ce->parallel.guc.wq_tail = &wq_desc->tail; 2795 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2796 2797 clear_children_join_go_memory(ce); 2798 } 2799 } 2800 2801 static int try_context_registration(struct intel_context *ce, bool loop) 2802 { 2803 struct intel_engine_cs *engine = ce->engine; 2804 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2805 struct intel_guc *guc = &engine->gt->uc.guc; 2806 intel_wakeref_t wakeref; 2807 u32 ctx_id = ce->guc_id.id; 2808 bool context_registered; 2809 int ret = 0; 2810 2811 GEM_BUG_ON(!sched_state_is_init(ce)); 2812 2813 context_registered = ctx_id_mapped(guc, ctx_id); 2814 2815 clr_ctx_id_mapping(guc, ctx_id); 2816 set_ctx_id_mapping(guc, ctx_id, ce); 2817 2818 /* 2819 * The context_lookup xarray is used to determine if the hardware 2820 * context is currently registered. There are two cases in which it 2821 * could be registered either the guc_id has been stolen from another 2822 * context or the lrc descriptor address of this context has changed. In 2823 * either case the context needs to be deregistered with the GuC before 2824 * registering this context. 2825 */ 2826 if (context_registered) { 2827 bool disabled; 2828 unsigned long flags; 2829 2830 trace_intel_context_steal_guc_id(ce); 2831 GEM_BUG_ON(!loop); 2832 2833 /* Seal race with Reset */ 2834 spin_lock_irqsave(&ce->guc_state.lock, flags); 2835 disabled = submission_disabled(guc); 2836 if (likely(!disabled)) { 2837 set_context_wait_for_deregister_to_register(ce); 2838 intel_context_get(ce); 2839 } 2840 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2841 if (unlikely(disabled)) { 2842 clr_ctx_id_mapping(guc, ctx_id); 2843 return 0; /* Will get registered later */ 2844 } 2845 2846 /* 2847 * If stealing the guc_id, this ce has the same guc_id as the 2848 * context whose guc_id was stolen. 2849 */ 2850 with_intel_runtime_pm(runtime_pm, wakeref) 2851 ret = deregister_context(ce, ce->guc_id.id); 2852 if (unlikely(ret == -ENODEV)) 2853 ret = 0; /* Will get registered later */ 2854 } else { 2855 with_intel_runtime_pm(runtime_pm, wakeref) 2856 ret = register_context(ce, loop); 2857 if (unlikely(ret == -EBUSY)) { 2858 clr_ctx_id_mapping(guc, ctx_id); 2859 } else if (unlikely(ret == -ENODEV)) { 2860 clr_ctx_id_mapping(guc, ctx_id); 2861 ret = 0; /* Will get registered later */ 2862 } 2863 } 2864 2865 return ret; 2866 } 2867 2868 static int __guc_context_pre_pin(struct intel_context *ce, 2869 struct intel_engine_cs *engine, 2870 struct i915_gem_ww_ctx *ww, 2871 void **vaddr) 2872 { 2873 return lrc_pre_pin(ce, engine, ww, vaddr); 2874 } 2875 2876 static int __guc_context_pin(struct intel_context *ce, 2877 struct intel_engine_cs *engine, 2878 void *vaddr) 2879 { 2880 if (i915_ggtt_offset(ce->state) != 2881 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2882 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2883 2884 /* 2885 * GuC context gets pinned in guc_request_alloc. See that function for 2886 * explaination of why. 2887 */ 2888 2889 return lrc_pin(ce, engine, vaddr); 2890 } 2891 2892 static int guc_context_pre_pin(struct intel_context *ce, 2893 struct i915_gem_ww_ctx *ww, 2894 void **vaddr) 2895 { 2896 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2897 } 2898 2899 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2900 { 2901 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2902 2903 if (likely(!ret && !intel_context_is_barrier(ce))) 2904 intel_engine_pm_get(ce->engine); 2905 2906 return ret; 2907 } 2908 2909 static void guc_context_unpin(struct intel_context *ce) 2910 { 2911 struct intel_guc *guc = ce_to_guc(ce); 2912 2913 __guc_context_update_stats(ce); 2914 unpin_guc_id(guc, ce); 2915 lrc_unpin(ce); 2916 2917 if (likely(!intel_context_is_barrier(ce))) 2918 intel_engine_pm_put_async(ce->engine); 2919 } 2920 2921 static void guc_context_post_unpin(struct intel_context *ce) 2922 { 2923 lrc_post_unpin(ce); 2924 } 2925 2926 static void __guc_context_sched_enable(struct intel_guc *guc, 2927 struct intel_context *ce) 2928 { 2929 u32 action[] = { 2930 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2931 ce->guc_id.id, 2932 GUC_CONTEXT_ENABLE 2933 }; 2934 2935 trace_intel_context_sched_enable(ce); 2936 2937 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2938 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2939 } 2940 2941 static void __guc_context_sched_disable(struct intel_guc *guc, 2942 struct intel_context *ce, 2943 u16 guc_id) 2944 { 2945 u32 action[] = { 2946 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2947 guc_id, /* ce->guc_id.id not stable */ 2948 GUC_CONTEXT_DISABLE 2949 }; 2950 2951 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2952 2953 GEM_BUG_ON(intel_context_is_child(ce)); 2954 trace_intel_context_sched_disable(ce); 2955 2956 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2957 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2958 } 2959 2960 static void guc_blocked_fence_complete(struct intel_context *ce) 2961 { 2962 lockdep_assert_held(&ce->guc_state.lock); 2963 2964 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2965 i915_sw_fence_complete(&ce->guc_state.blocked); 2966 } 2967 2968 static void guc_blocked_fence_reinit(struct intel_context *ce) 2969 { 2970 lockdep_assert_held(&ce->guc_state.lock); 2971 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2972 2973 /* 2974 * This fence is always complete unless a pending schedule disable is 2975 * outstanding. We arm the fence here and complete it when we receive 2976 * the pending schedule disable complete message. 2977 */ 2978 i915_sw_fence_fini(&ce->guc_state.blocked); 2979 i915_sw_fence_reinit(&ce->guc_state.blocked); 2980 i915_sw_fence_await(&ce->guc_state.blocked); 2981 i915_sw_fence_commit(&ce->guc_state.blocked); 2982 } 2983 2984 static u16 prep_context_pending_disable(struct intel_context *ce) 2985 { 2986 lockdep_assert_held(&ce->guc_state.lock); 2987 2988 set_context_pending_disable(ce); 2989 clr_context_enabled(ce); 2990 guc_blocked_fence_reinit(ce); 2991 intel_context_get(ce); 2992 2993 return ce->guc_id.id; 2994 } 2995 2996 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2997 { 2998 struct intel_guc *guc = ce_to_guc(ce); 2999 unsigned long flags; 3000 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 3001 intel_wakeref_t wakeref; 3002 u16 guc_id; 3003 bool enabled; 3004 3005 GEM_BUG_ON(intel_context_is_child(ce)); 3006 3007 spin_lock_irqsave(&ce->guc_state.lock, flags); 3008 3009 incr_context_blocked(ce); 3010 3011 enabled = context_enabled(ce); 3012 if (unlikely(!enabled || submission_disabled(guc))) { 3013 if (enabled) 3014 clr_context_enabled(ce); 3015 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3016 return &ce->guc_state.blocked; 3017 } 3018 3019 /* 3020 * We add +2 here as the schedule disable complete CTB handler calls 3021 * intel_context_sched_disable_unpin (-2 to pin_count). 3022 */ 3023 atomic_add(2, &ce->pin_count); 3024 3025 guc_id = prep_context_pending_disable(ce); 3026 3027 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3028 3029 with_intel_runtime_pm(runtime_pm, wakeref) 3030 __guc_context_sched_disable(guc, ce, guc_id); 3031 3032 return &ce->guc_state.blocked; 3033 } 3034 3035 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 3036 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 3037 #define SCHED_STATE_NO_UNBLOCK \ 3038 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 3039 SCHED_STATE_PENDING_DISABLE | \ 3040 SCHED_STATE_BANNED) 3041 3042 static bool context_cant_unblock(struct intel_context *ce) 3043 { 3044 lockdep_assert_held(&ce->guc_state.lock); 3045 3046 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 3047 context_guc_id_invalid(ce) || 3048 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 3049 !intel_context_is_pinned(ce); 3050 } 3051 3052 static void guc_context_unblock(struct intel_context *ce) 3053 { 3054 struct intel_guc *guc = ce_to_guc(ce); 3055 unsigned long flags; 3056 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 3057 intel_wakeref_t wakeref; 3058 bool enable; 3059 3060 GEM_BUG_ON(context_enabled(ce)); 3061 GEM_BUG_ON(intel_context_is_child(ce)); 3062 3063 spin_lock_irqsave(&ce->guc_state.lock, flags); 3064 3065 if (unlikely(submission_disabled(guc) || 3066 context_cant_unblock(ce))) { 3067 enable = false; 3068 } else { 3069 enable = true; 3070 set_context_pending_enable(ce); 3071 set_context_enabled(ce); 3072 intel_context_get(ce); 3073 } 3074 3075 decr_context_blocked(ce); 3076 3077 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3078 3079 if (enable) { 3080 with_intel_runtime_pm(runtime_pm, wakeref) 3081 __guc_context_sched_enable(guc, ce); 3082 } 3083 } 3084 3085 static void guc_context_cancel_request(struct intel_context *ce, 3086 struct i915_request *rq) 3087 { 3088 struct intel_context *block_context = 3089 request_to_scheduling_context(rq); 3090 3091 if (i915_sw_fence_signaled(&rq->submit)) { 3092 struct i915_sw_fence *fence; 3093 3094 intel_context_get(ce); 3095 fence = guc_context_block(block_context); 3096 i915_sw_fence_wait(fence); 3097 if (!i915_request_completed(rq)) { 3098 __i915_request_skip(rq); 3099 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 3100 true); 3101 } 3102 3103 guc_context_unblock(block_context); 3104 intel_context_put(ce); 3105 } 3106 } 3107 3108 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 3109 u16 guc_id, 3110 u32 preemption_timeout) 3111 { 3112 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3113 struct context_policy policy; 3114 3115 __guc_context_policy_start_klv(&policy, guc_id); 3116 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 3117 __guc_context_set_context_policies(guc, &policy, true); 3118 } else { 3119 u32 action[] = { 3120 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 3121 guc_id, 3122 preemption_timeout 3123 }; 3124 3125 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3126 } 3127 } 3128 3129 static void 3130 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 3131 unsigned int preempt_timeout_ms) 3132 { 3133 struct intel_guc *guc = ce_to_guc(ce); 3134 struct intel_runtime_pm *runtime_pm = 3135 &ce->engine->gt->i915->runtime_pm; 3136 intel_wakeref_t wakeref; 3137 unsigned long flags; 3138 3139 GEM_BUG_ON(intel_context_is_child(ce)); 3140 3141 guc_flush_submissions(guc); 3142 3143 spin_lock_irqsave(&ce->guc_state.lock, flags); 3144 set_context_banned(ce); 3145 3146 if (submission_disabled(guc) || 3147 (!context_enabled(ce) && !context_pending_disable(ce))) { 3148 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3149 3150 guc_cancel_context_requests(ce); 3151 intel_engine_signal_breadcrumbs(ce->engine); 3152 } else if (!context_pending_disable(ce)) { 3153 u16 guc_id; 3154 3155 /* 3156 * We add +2 here as the schedule disable complete CTB handler 3157 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3158 */ 3159 atomic_add(2, &ce->pin_count); 3160 3161 guc_id = prep_context_pending_disable(ce); 3162 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3163 3164 /* 3165 * In addition to disabling scheduling, set the preemption 3166 * timeout to the minimum value (1 us) so the banned context 3167 * gets kicked off the HW ASAP. 3168 */ 3169 with_intel_runtime_pm(runtime_pm, wakeref) { 3170 __guc_context_set_preemption_timeout(guc, guc_id, 3171 preempt_timeout_ms); 3172 __guc_context_sched_disable(guc, ce, guc_id); 3173 } 3174 } else { 3175 if (!context_guc_id_invalid(ce)) 3176 with_intel_runtime_pm(runtime_pm, wakeref) 3177 __guc_context_set_preemption_timeout(guc, 3178 ce->guc_id.id, 3179 preempt_timeout_ms); 3180 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3181 } 3182 } 3183 3184 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3185 unsigned long flags) 3186 __releases(ce->guc_state.lock) 3187 { 3188 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3189 intel_wakeref_t wakeref; 3190 u16 guc_id; 3191 3192 lockdep_assert_held(&ce->guc_state.lock); 3193 guc_id = prep_context_pending_disable(ce); 3194 3195 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3196 3197 with_intel_runtime_pm(runtime_pm, wakeref) 3198 __guc_context_sched_disable(guc, ce, guc_id); 3199 } 3200 3201 static bool bypass_sched_disable(struct intel_guc *guc, 3202 struct intel_context *ce) 3203 { 3204 lockdep_assert_held(&ce->guc_state.lock); 3205 GEM_BUG_ON(intel_context_is_child(ce)); 3206 3207 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3208 !ctx_id_mapped(guc, ce->guc_id.id)) { 3209 clr_context_enabled(ce); 3210 return true; 3211 } 3212 3213 return !context_enabled(ce); 3214 } 3215 3216 static void __delay_sched_disable(struct work_struct *wrk) 3217 { 3218 struct intel_context *ce = 3219 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3220 struct intel_guc *guc = ce_to_guc(ce); 3221 unsigned long flags; 3222 3223 spin_lock_irqsave(&ce->guc_state.lock, flags); 3224 3225 if (bypass_sched_disable(guc, ce)) { 3226 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3227 intel_context_sched_disable_unpin(ce); 3228 } else { 3229 do_sched_disable(guc, ce, flags); 3230 } 3231 } 3232 3233 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3234 { 3235 /* 3236 * parent contexts are perma-pinned, if we are unpinning do schedule 3237 * disable immediately. 3238 */ 3239 if (intel_context_is_parent(ce)) 3240 return true; 3241 3242 /* 3243 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3244 */ 3245 return guc->submission_state.guc_ids_in_use > 3246 guc->submission_state.sched_disable_gucid_threshold; 3247 } 3248 3249 static void guc_context_sched_disable(struct intel_context *ce) 3250 { 3251 struct intel_guc *guc = ce_to_guc(ce); 3252 u64 delay = guc->submission_state.sched_disable_delay_ms; 3253 unsigned long flags; 3254 3255 spin_lock_irqsave(&ce->guc_state.lock, flags); 3256 3257 if (bypass_sched_disable(guc, ce)) { 3258 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3259 intel_context_sched_disable_unpin(ce); 3260 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3261 delay) { 3262 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3263 mod_delayed_work(system_unbound_wq, 3264 &ce->guc_state.sched_disable_delay_work, 3265 msecs_to_jiffies(delay)); 3266 } else { 3267 do_sched_disable(guc, ce, flags); 3268 } 3269 } 3270 3271 static void guc_context_close(struct intel_context *ce) 3272 { 3273 unsigned long flags; 3274 3275 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3276 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3277 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3278 3279 spin_lock_irqsave(&ce->guc_state.lock, flags); 3280 set_context_close_done(ce); 3281 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3282 } 3283 3284 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3285 { 3286 struct intel_guc *guc = ce_to_guc(ce); 3287 struct intel_gt *gt = guc_to_gt(guc); 3288 unsigned long flags; 3289 bool disabled; 3290 3291 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3292 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3293 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3294 GEM_BUG_ON(context_enabled(ce)); 3295 3296 /* Seal race with Reset */ 3297 spin_lock_irqsave(&ce->guc_state.lock, flags); 3298 disabled = submission_disabled(guc); 3299 if (likely(!disabled)) { 3300 __intel_gt_pm_get(gt); 3301 set_context_destroyed(ce); 3302 clr_context_registered(ce); 3303 } 3304 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3305 if (unlikely(disabled)) { 3306 release_guc_id(guc, ce); 3307 __guc_context_destroy(ce); 3308 return; 3309 } 3310 3311 deregister_context(ce, ce->guc_id.id); 3312 } 3313 3314 static void __guc_context_destroy(struct intel_context *ce) 3315 { 3316 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3317 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3318 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3319 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3320 3321 lrc_fini(ce); 3322 intel_context_fini(ce); 3323 3324 if (intel_engine_is_virtual(ce->engine)) { 3325 struct guc_virtual_engine *ve = 3326 container_of(ce, typeof(*ve), context); 3327 3328 if (ve->base.breadcrumbs) 3329 intel_breadcrumbs_put(ve->base.breadcrumbs); 3330 3331 kfree(ve); 3332 } else { 3333 intel_context_free(ce); 3334 } 3335 } 3336 3337 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3338 { 3339 struct intel_context *ce; 3340 unsigned long flags; 3341 3342 GEM_BUG_ON(!submission_disabled(guc) && 3343 guc_submission_initialized(guc)); 3344 3345 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3346 spin_lock_irqsave(&guc->submission_state.lock, flags); 3347 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3348 struct intel_context, 3349 destroyed_link); 3350 if (ce) 3351 list_del_init(&ce->destroyed_link); 3352 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3353 3354 if (!ce) 3355 break; 3356 3357 release_guc_id(guc, ce); 3358 __guc_context_destroy(ce); 3359 } 3360 } 3361 3362 static void deregister_destroyed_contexts(struct intel_guc *guc) 3363 { 3364 struct intel_context *ce; 3365 unsigned long flags; 3366 3367 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3368 spin_lock_irqsave(&guc->submission_state.lock, flags); 3369 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3370 struct intel_context, 3371 destroyed_link); 3372 if (ce) 3373 list_del_init(&ce->destroyed_link); 3374 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3375 3376 if (!ce) 3377 break; 3378 3379 guc_lrc_desc_unpin(ce); 3380 } 3381 } 3382 3383 static void destroyed_worker_func(struct work_struct *w) 3384 { 3385 struct intel_guc *guc = container_of(w, struct intel_guc, 3386 submission_state.destroyed_worker); 3387 struct intel_gt *gt = guc_to_gt(guc); 3388 int tmp; 3389 3390 with_intel_gt_pm(gt, tmp) 3391 deregister_destroyed_contexts(guc); 3392 } 3393 3394 static void guc_context_destroy(struct kref *kref) 3395 { 3396 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3397 struct intel_guc *guc = ce_to_guc(ce); 3398 unsigned long flags; 3399 bool destroy; 3400 3401 /* 3402 * If the guc_id is invalid this context has been stolen and we can free 3403 * it immediately. Also can be freed immediately if the context is not 3404 * registered with the GuC or the GuC is in the middle of a reset. 3405 */ 3406 spin_lock_irqsave(&guc->submission_state.lock, flags); 3407 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3408 !ctx_id_mapped(guc, ce->guc_id.id); 3409 if (likely(!destroy)) { 3410 if (!list_empty(&ce->guc_id.link)) 3411 list_del_init(&ce->guc_id.link); 3412 list_add_tail(&ce->destroyed_link, 3413 &guc->submission_state.destroyed_contexts); 3414 } else { 3415 __release_guc_id(guc, ce); 3416 } 3417 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3418 if (unlikely(destroy)) { 3419 __guc_context_destroy(ce); 3420 return; 3421 } 3422 3423 /* 3424 * We use a worker to issue the H2G to deregister the context as we can 3425 * take the GT PM for the first time which isn't allowed from an atomic 3426 * context. 3427 */ 3428 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3429 } 3430 3431 static int guc_context_alloc(struct intel_context *ce) 3432 { 3433 return lrc_alloc(ce, ce->engine); 3434 } 3435 3436 static void __guc_context_set_prio(struct intel_guc *guc, 3437 struct intel_context *ce) 3438 { 3439 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3440 struct context_policy policy; 3441 3442 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3443 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3444 __guc_context_set_context_policies(guc, &policy, true); 3445 } else { 3446 u32 action[] = { 3447 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3448 ce->guc_id.id, 3449 ce->guc_state.prio, 3450 }; 3451 3452 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3453 } 3454 } 3455 3456 static void guc_context_set_prio(struct intel_guc *guc, 3457 struct intel_context *ce, 3458 u8 prio) 3459 { 3460 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3461 prio > GUC_CLIENT_PRIORITY_NORMAL); 3462 lockdep_assert_held(&ce->guc_state.lock); 3463 3464 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3465 !context_registered(ce)) { 3466 ce->guc_state.prio = prio; 3467 return; 3468 } 3469 3470 ce->guc_state.prio = prio; 3471 __guc_context_set_prio(guc, ce); 3472 3473 trace_intel_context_set_prio(ce); 3474 } 3475 3476 static inline u8 map_i915_prio_to_guc_prio(int prio) 3477 { 3478 if (prio == I915_PRIORITY_NORMAL) 3479 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3480 else if (prio < I915_PRIORITY_NORMAL) 3481 return GUC_CLIENT_PRIORITY_NORMAL; 3482 else if (prio < I915_PRIORITY_DISPLAY) 3483 return GUC_CLIENT_PRIORITY_HIGH; 3484 else 3485 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3486 } 3487 3488 static inline void add_context_inflight_prio(struct intel_context *ce, 3489 u8 guc_prio) 3490 { 3491 lockdep_assert_held(&ce->guc_state.lock); 3492 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3493 3494 ++ce->guc_state.prio_count[guc_prio]; 3495 3496 /* Overflow protection */ 3497 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3498 } 3499 3500 static inline void sub_context_inflight_prio(struct intel_context *ce, 3501 u8 guc_prio) 3502 { 3503 lockdep_assert_held(&ce->guc_state.lock); 3504 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3505 3506 /* Underflow protection */ 3507 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3508 3509 --ce->guc_state.prio_count[guc_prio]; 3510 } 3511 3512 static inline void update_context_prio(struct intel_context *ce) 3513 { 3514 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3515 int i; 3516 3517 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3518 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3519 3520 lockdep_assert_held(&ce->guc_state.lock); 3521 3522 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3523 if (ce->guc_state.prio_count[i]) { 3524 guc_context_set_prio(guc, ce, i); 3525 break; 3526 } 3527 } 3528 } 3529 3530 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3531 { 3532 /* Lower value is higher priority */ 3533 return new_guc_prio < old_guc_prio; 3534 } 3535 3536 static void add_to_context(struct i915_request *rq) 3537 { 3538 struct intel_context *ce = request_to_scheduling_context(rq); 3539 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3540 3541 GEM_BUG_ON(intel_context_is_child(ce)); 3542 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3543 3544 spin_lock(&ce->guc_state.lock); 3545 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3546 3547 if (rq->guc_prio == GUC_PRIO_INIT) { 3548 rq->guc_prio = new_guc_prio; 3549 add_context_inflight_prio(ce, rq->guc_prio); 3550 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3551 sub_context_inflight_prio(ce, rq->guc_prio); 3552 rq->guc_prio = new_guc_prio; 3553 add_context_inflight_prio(ce, rq->guc_prio); 3554 } 3555 update_context_prio(ce); 3556 3557 spin_unlock(&ce->guc_state.lock); 3558 } 3559 3560 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3561 { 3562 lockdep_assert_held(&ce->guc_state.lock); 3563 3564 if (rq->guc_prio != GUC_PRIO_INIT && 3565 rq->guc_prio != GUC_PRIO_FINI) { 3566 sub_context_inflight_prio(ce, rq->guc_prio); 3567 update_context_prio(ce); 3568 } 3569 rq->guc_prio = GUC_PRIO_FINI; 3570 } 3571 3572 static void remove_from_context(struct i915_request *rq) 3573 { 3574 struct intel_context *ce = request_to_scheduling_context(rq); 3575 3576 GEM_BUG_ON(intel_context_is_child(ce)); 3577 3578 spin_lock_irq(&ce->guc_state.lock); 3579 3580 list_del_init(&rq->sched.link); 3581 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3582 3583 /* Prevent further __await_execution() registering a cb, then flush */ 3584 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3585 3586 guc_prio_fini(rq, ce); 3587 3588 spin_unlock_irq(&ce->guc_state.lock); 3589 3590 atomic_dec(&ce->guc_id.ref); 3591 i915_request_notify_execute_cb_imm(rq); 3592 } 3593 3594 static const struct intel_context_ops guc_context_ops = { 3595 .flags = COPS_RUNTIME_CYCLES, 3596 .alloc = guc_context_alloc, 3597 3598 .close = guc_context_close, 3599 3600 .pre_pin = guc_context_pre_pin, 3601 .pin = guc_context_pin, 3602 .unpin = guc_context_unpin, 3603 .post_unpin = guc_context_post_unpin, 3604 3605 .revoke = guc_context_revoke, 3606 3607 .cancel_request = guc_context_cancel_request, 3608 3609 .enter = intel_context_enter_engine, 3610 .exit = intel_context_exit_engine, 3611 3612 .sched_disable = guc_context_sched_disable, 3613 3614 .update_stats = guc_context_update_stats, 3615 3616 .reset = lrc_reset, 3617 .destroy = guc_context_destroy, 3618 3619 .create_virtual = guc_create_virtual, 3620 .create_parallel = guc_create_parallel, 3621 }; 3622 3623 static void submit_work_cb(struct irq_work *wrk) 3624 { 3625 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3626 3627 might_lock(&rq->engine->sched_engine->lock); 3628 i915_sw_fence_complete(&rq->submit); 3629 } 3630 3631 static void __guc_signal_context_fence(struct intel_context *ce) 3632 { 3633 struct i915_request *rq, *rn; 3634 3635 lockdep_assert_held(&ce->guc_state.lock); 3636 3637 if (!list_empty(&ce->guc_state.fences)) 3638 trace_intel_context_fence_release(ce); 3639 3640 /* 3641 * Use an IRQ to ensure locking order of sched_engine->lock -> 3642 * ce->guc_state.lock is preserved. 3643 */ 3644 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3645 guc_fence_link) { 3646 list_del(&rq->guc_fence_link); 3647 irq_work_queue(&rq->submit_work); 3648 } 3649 3650 INIT_LIST_HEAD(&ce->guc_state.fences); 3651 } 3652 3653 static void guc_signal_context_fence(struct intel_context *ce) 3654 { 3655 unsigned long flags; 3656 3657 GEM_BUG_ON(intel_context_is_child(ce)); 3658 3659 spin_lock_irqsave(&ce->guc_state.lock, flags); 3660 clr_context_wait_for_deregister_to_register(ce); 3661 __guc_signal_context_fence(ce); 3662 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3663 } 3664 3665 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3666 { 3667 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3668 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3669 !submission_disabled(ce_to_guc(ce)); 3670 } 3671 3672 static void guc_context_init(struct intel_context *ce) 3673 { 3674 const struct i915_gem_context *ctx; 3675 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3676 3677 rcu_read_lock(); 3678 ctx = rcu_dereference(ce->gem_context); 3679 if (ctx) 3680 prio = ctx->sched.priority; 3681 rcu_read_unlock(); 3682 3683 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3684 3685 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3686 __delay_sched_disable); 3687 3688 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3689 } 3690 3691 static int guc_request_alloc(struct i915_request *rq) 3692 { 3693 struct intel_context *ce = request_to_scheduling_context(rq); 3694 struct intel_guc *guc = ce_to_guc(ce); 3695 unsigned long flags; 3696 int ret; 3697 3698 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3699 3700 /* 3701 * Flush enough space to reduce the likelihood of waiting after 3702 * we start building the request - in which case we will just 3703 * have to repeat work. 3704 */ 3705 rq->reserved_space += GUC_REQUEST_SIZE; 3706 3707 /* 3708 * Note that after this point, we have committed to using 3709 * this request as it is being used to both track the 3710 * state of engine initialisation and liveness of the 3711 * golden renderstate above. Think twice before you try 3712 * to cancel/unwind this request now. 3713 */ 3714 3715 /* Unconditionally invalidate GPU caches and TLBs. */ 3716 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3717 if (ret) 3718 return ret; 3719 3720 rq->reserved_space -= GUC_REQUEST_SIZE; 3721 3722 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3723 guc_context_init(ce); 3724 3725 /* 3726 * If the context gets closed while the execbuf is ongoing, the context 3727 * close code will race with the below code to cancel the delayed work. 3728 * If the context close wins the race and cancels the work, it will 3729 * immediately call the sched disable (see guc_context_close), so there 3730 * is a chance we can get past this check while the sched_disable code 3731 * is being executed. To make sure that code completes before we check 3732 * the status further down, we wait for the close process to complete. 3733 * Else, this code path could send a request down thinking that the 3734 * context is still in a schedule-enable mode while the GuC ends up 3735 * dropping the request completely because the disable did go from the 3736 * context_close path right to GuC just prior. In the event the CT is 3737 * full, we could potentially need to wait up to 1.5 seconds. 3738 */ 3739 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3740 intel_context_sched_disable_unpin(ce); 3741 else if (intel_context_is_closed(ce)) 3742 if (wait_for(context_close_done(ce), 1500)) 3743 guc_warn(guc, "timed out waiting on context sched close before realloc\n"); 3744 /* 3745 * Call pin_guc_id here rather than in the pinning step as with 3746 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3747 * guc_id and creating horrible race conditions. This is especially bad 3748 * when guc_id are being stolen due to over subscription. By the time 3749 * this function is reached, it is guaranteed that the guc_id will be 3750 * persistent until the generated request is retired. Thus, sealing these 3751 * race conditions. It is still safe to fail here if guc_id are 3752 * exhausted and return -EAGAIN to the user indicating that they can try 3753 * again in the future. 3754 * 3755 * There is no need for a lock here as the timeline mutex ensures at 3756 * most one context can be executing this code path at once. The 3757 * guc_id_ref is incremented once for every request in flight and 3758 * decremented on each retire. When it is zero, a lock around the 3759 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3760 */ 3761 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3762 goto out; 3763 3764 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3765 if (unlikely(ret < 0)) 3766 return ret; 3767 if (context_needs_register(ce, !!ret)) { 3768 ret = try_context_registration(ce, true); 3769 if (unlikely(ret)) { /* unwind */ 3770 if (ret == -EPIPE) { 3771 disable_submission(guc); 3772 goto out; /* GPU will be reset */ 3773 } 3774 atomic_dec(&ce->guc_id.ref); 3775 unpin_guc_id(guc, ce); 3776 return ret; 3777 } 3778 } 3779 3780 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3781 3782 out: 3783 /* 3784 * We block all requests on this context if a G2H is pending for a 3785 * schedule disable or context deregistration as the GuC will fail a 3786 * schedule enable or context registration if either G2H is pending 3787 * respectfully. Once a G2H returns, the fence is released that is 3788 * blocking these requests (see guc_signal_context_fence). 3789 */ 3790 spin_lock_irqsave(&ce->guc_state.lock, flags); 3791 if (context_wait_for_deregister_to_register(ce) || 3792 context_pending_disable(ce)) { 3793 init_irq_work(&rq->submit_work, submit_work_cb); 3794 i915_sw_fence_await(&rq->submit); 3795 3796 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3797 } 3798 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3799 3800 return 0; 3801 } 3802 3803 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3804 struct i915_gem_ww_ctx *ww, 3805 void **vaddr) 3806 { 3807 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3808 3809 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3810 } 3811 3812 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3813 { 3814 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3815 int ret = __guc_context_pin(ce, engine, vaddr); 3816 intel_engine_mask_t tmp, mask = ce->engine->mask; 3817 3818 if (likely(!ret)) 3819 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3820 intel_engine_pm_get(engine); 3821 3822 return ret; 3823 } 3824 3825 static void guc_virtual_context_unpin(struct intel_context *ce) 3826 { 3827 intel_engine_mask_t tmp, mask = ce->engine->mask; 3828 struct intel_engine_cs *engine; 3829 struct intel_guc *guc = ce_to_guc(ce); 3830 3831 GEM_BUG_ON(context_enabled(ce)); 3832 GEM_BUG_ON(intel_context_is_barrier(ce)); 3833 3834 unpin_guc_id(guc, ce); 3835 lrc_unpin(ce); 3836 3837 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3838 intel_engine_pm_put_async(engine); 3839 } 3840 3841 static void guc_virtual_context_enter(struct intel_context *ce) 3842 { 3843 intel_engine_mask_t tmp, mask = ce->engine->mask; 3844 struct intel_engine_cs *engine; 3845 3846 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3847 intel_engine_pm_get(engine); 3848 3849 intel_timeline_enter(ce->timeline); 3850 } 3851 3852 static void guc_virtual_context_exit(struct intel_context *ce) 3853 { 3854 intel_engine_mask_t tmp, mask = ce->engine->mask; 3855 struct intel_engine_cs *engine; 3856 3857 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3858 intel_engine_pm_put(engine); 3859 3860 intel_timeline_exit(ce->timeline); 3861 } 3862 3863 static int guc_virtual_context_alloc(struct intel_context *ce) 3864 { 3865 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3866 3867 return lrc_alloc(ce, engine); 3868 } 3869 3870 static const struct intel_context_ops virtual_guc_context_ops = { 3871 .flags = COPS_RUNTIME_CYCLES, 3872 .alloc = guc_virtual_context_alloc, 3873 3874 .close = guc_context_close, 3875 3876 .pre_pin = guc_virtual_context_pre_pin, 3877 .pin = guc_virtual_context_pin, 3878 .unpin = guc_virtual_context_unpin, 3879 .post_unpin = guc_context_post_unpin, 3880 3881 .revoke = guc_context_revoke, 3882 3883 .cancel_request = guc_context_cancel_request, 3884 3885 .enter = guc_virtual_context_enter, 3886 .exit = guc_virtual_context_exit, 3887 3888 .sched_disable = guc_context_sched_disable, 3889 .update_stats = guc_context_update_stats, 3890 3891 .destroy = guc_context_destroy, 3892 3893 .get_sibling = guc_virtual_get_sibling, 3894 }; 3895 3896 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3897 { 3898 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3899 struct intel_guc *guc = ce_to_guc(ce); 3900 int ret; 3901 3902 GEM_BUG_ON(!intel_context_is_parent(ce)); 3903 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3904 3905 ret = pin_guc_id(guc, ce); 3906 if (unlikely(ret < 0)) 3907 return ret; 3908 3909 return __guc_context_pin(ce, engine, vaddr); 3910 } 3911 3912 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3913 { 3914 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3915 3916 GEM_BUG_ON(!intel_context_is_child(ce)); 3917 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3918 3919 __intel_context_pin(ce->parallel.parent); 3920 return __guc_context_pin(ce, engine, vaddr); 3921 } 3922 3923 static void guc_parent_context_unpin(struct intel_context *ce) 3924 { 3925 struct intel_guc *guc = ce_to_guc(ce); 3926 3927 GEM_BUG_ON(context_enabled(ce)); 3928 GEM_BUG_ON(intel_context_is_barrier(ce)); 3929 GEM_BUG_ON(!intel_context_is_parent(ce)); 3930 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3931 3932 unpin_guc_id(guc, ce); 3933 lrc_unpin(ce); 3934 } 3935 3936 static void guc_child_context_unpin(struct intel_context *ce) 3937 { 3938 GEM_BUG_ON(context_enabled(ce)); 3939 GEM_BUG_ON(intel_context_is_barrier(ce)); 3940 GEM_BUG_ON(!intel_context_is_child(ce)); 3941 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3942 3943 lrc_unpin(ce); 3944 } 3945 3946 static void guc_child_context_post_unpin(struct intel_context *ce) 3947 { 3948 GEM_BUG_ON(!intel_context_is_child(ce)); 3949 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3950 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3951 3952 lrc_post_unpin(ce); 3953 intel_context_unpin(ce->parallel.parent); 3954 } 3955 3956 static void guc_child_context_destroy(struct kref *kref) 3957 { 3958 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3959 3960 __guc_context_destroy(ce); 3961 } 3962 3963 static const struct intel_context_ops virtual_parent_context_ops = { 3964 .alloc = guc_virtual_context_alloc, 3965 3966 .close = guc_context_close, 3967 3968 .pre_pin = guc_context_pre_pin, 3969 .pin = guc_parent_context_pin, 3970 .unpin = guc_parent_context_unpin, 3971 .post_unpin = guc_context_post_unpin, 3972 3973 .revoke = guc_context_revoke, 3974 3975 .cancel_request = guc_context_cancel_request, 3976 3977 .enter = guc_virtual_context_enter, 3978 .exit = guc_virtual_context_exit, 3979 3980 .sched_disable = guc_context_sched_disable, 3981 3982 .destroy = guc_context_destroy, 3983 3984 .get_sibling = guc_virtual_get_sibling, 3985 }; 3986 3987 static const struct intel_context_ops virtual_child_context_ops = { 3988 .alloc = guc_virtual_context_alloc, 3989 3990 .pre_pin = guc_context_pre_pin, 3991 .pin = guc_child_context_pin, 3992 .unpin = guc_child_context_unpin, 3993 .post_unpin = guc_child_context_post_unpin, 3994 3995 .cancel_request = guc_context_cancel_request, 3996 3997 .enter = guc_virtual_context_enter, 3998 .exit = guc_virtual_context_exit, 3999 4000 .destroy = guc_child_context_destroy, 4001 4002 .get_sibling = guc_virtual_get_sibling, 4003 }; 4004 4005 /* 4006 * The below override of the breadcrumbs is enabled when the user configures a 4007 * context for parallel submission (multi-lrc, parent-child). 4008 * 4009 * The overridden breadcrumbs implements an algorithm which allows the GuC to 4010 * safely preempt all the hw contexts configured for parallel submission 4011 * between each BB. The contract between the i915 and GuC is if the parent 4012 * context can be preempted, all the children can be preempted, and the GuC will 4013 * always try to preempt the parent before the children. A handshake between the 4014 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 4015 * creating a window to preempt between each set of BBs. 4016 */ 4017 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4018 u64 offset, u32 len, 4019 const unsigned int flags); 4020 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4021 u64 offset, u32 len, 4022 const unsigned int flags); 4023 static u32 * 4024 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4025 u32 *cs); 4026 static u32 * 4027 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4028 u32 *cs); 4029 4030 static struct intel_context * 4031 guc_create_parallel(struct intel_engine_cs **engines, 4032 unsigned int num_siblings, 4033 unsigned int width) 4034 { 4035 struct intel_engine_cs **siblings = NULL; 4036 struct intel_context *parent = NULL, *ce, *err; 4037 int i, j; 4038 4039 siblings = kmalloc_array(num_siblings, 4040 sizeof(*siblings), 4041 GFP_KERNEL); 4042 if (!siblings) 4043 return ERR_PTR(-ENOMEM); 4044 4045 for (i = 0; i < width; ++i) { 4046 for (j = 0; j < num_siblings; ++j) 4047 siblings[j] = engines[i * num_siblings + j]; 4048 4049 ce = intel_engine_create_virtual(siblings, num_siblings, 4050 FORCE_VIRTUAL); 4051 if (IS_ERR(ce)) { 4052 err = ERR_CAST(ce); 4053 goto unwind; 4054 } 4055 4056 if (i == 0) { 4057 parent = ce; 4058 parent->ops = &virtual_parent_context_ops; 4059 } else { 4060 ce->ops = &virtual_child_context_ops; 4061 intel_context_bind_parent_child(parent, ce); 4062 } 4063 } 4064 4065 parent->parallel.fence_context = dma_fence_context_alloc(1); 4066 4067 parent->engine->emit_bb_start = 4068 emit_bb_start_parent_no_preempt_mid_batch; 4069 parent->engine->emit_fini_breadcrumb = 4070 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 4071 parent->engine->emit_fini_breadcrumb_dw = 4072 12 + 4 * parent->parallel.number_children; 4073 for_each_child(parent, ce) { 4074 ce->engine->emit_bb_start = 4075 emit_bb_start_child_no_preempt_mid_batch; 4076 ce->engine->emit_fini_breadcrumb = 4077 emit_fini_breadcrumb_child_no_preempt_mid_batch; 4078 ce->engine->emit_fini_breadcrumb_dw = 16; 4079 } 4080 4081 kfree(siblings); 4082 return parent; 4083 4084 unwind: 4085 if (parent) 4086 intel_context_put(parent); 4087 kfree(siblings); 4088 return err; 4089 } 4090 4091 static bool 4092 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 4093 { 4094 struct intel_engine_cs *sibling; 4095 intel_engine_mask_t tmp, mask = b->engine_mask; 4096 bool result = false; 4097 4098 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4099 result |= intel_engine_irq_enable(sibling); 4100 4101 return result; 4102 } 4103 4104 static void 4105 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 4106 { 4107 struct intel_engine_cs *sibling; 4108 intel_engine_mask_t tmp, mask = b->engine_mask; 4109 4110 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4111 intel_engine_irq_disable(sibling); 4112 } 4113 4114 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 4115 { 4116 int i; 4117 4118 /* 4119 * In GuC submission mode we do not know which physical engine a request 4120 * will be scheduled on, this creates a problem because the breadcrumb 4121 * interrupt is per physical engine. To work around this we attach 4122 * requests and direct all breadcrumb interrupts to the first instance 4123 * of an engine per class. In addition all breadcrumb interrupts are 4124 * enabled / disabled across an engine class in unison. 4125 */ 4126 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 4127 struct intel_engine_cs *sibling = 4128 engine->gt->engine_class[engine->class][i]; 4129 4130 if (sibling) { 4131 if (engine->breadcrumbs != sibling->breadcrumbs) { 4132 intel_breadcrumbs_put(engine->breadcrumbs); 4133 engine->breadcrumbs = 4134 intel_breadcrumbs_get(sibling->breadcrumbs); 4135 } 4136 break; 4137 } 4138 } 4139 4140 if (engine->breadcrumbs) { 4141 engine->breadcrumbs->engine_mask |= engine->mask; 4142 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 4143 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 4144 } 4145 } 4146 4147 static void guc_bump_inflight_request_prio(struct i915_request *rq, 4148 int prio) 4149 { 4150 struct intel_context *ce = request_to_scheduling_context(rq); 4151 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 4152 4153 /* Short circuit function */ 4154 if (prio < I915_PRIORITY_NORMAL || 4155 rq->guc_prio == GUC_PRIO_FINI || 4156 (rq->guc_prio != GUC_PRIO_INIT && 4157 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 4158 return; 4159 4160 spin_lock(&ce->guc_state.lock); 4161 if (rq->guc_prio != GUC_PRIO_FINI) { 4162 if (rq->guc_prio != GUC_PRIO_INIT) 4163 sub_context_inflight_prio(ce, rq->guc_prio); 4164 rq->guc_prio = new_guc_prio; 4165 add_context_inflight_prio(ce, rq->guc_prio); 4166 update_context_prio(ce); 4167 } 4168 spin_unlock(&ce->guc_state.lock); 4169 } 4170 4171 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4172 { 4173 struct intel_context *ce = request_to_scheduling_context(rq); 4174 4175 spin_lock(&ce->guc_state.lock); 4176 guc_prio_fini(rq, ce); 4177 spin_unlock(&ce->guc_state.lock); 4178 } 4179 4180 static void sanitize_hwsp(struct intel_engine_cs *engine) 4181 { 4182 struct intel_timeline *tl; 4183 4184 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4185 intel_timeline_reset_seqno(tl); 4186 } 4187 4188 static void guc_sanitize(struct intel_engine_cs *engine) 4189 { 4190 /* 4191 * Poison residual state on resume, in case the suspend didn't! 4192 * 4193 * We have to assume that across suspend/resume (or other loss 4194 * of control) that the contents of our pinned buffers has been 4195 * lost, replaced by garbage. Since this doesn't always happen, 4196 * let's poison such state so that we more quickly spot when 4197 * we falsely assume it has been preserved. 4198 */ 4199 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4200 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4201 4202 /* 4203 * The kernel_context HWSP is stored in the status_page. As above, 4204 * that may be lost on resume/initialisation, and so we need to 4205 * reset the value in the HWSP. 4206 */ 4207 sanitize_hwsp(engine); 4208 4209 /* And scrub the dirty cachelines for the HWSP */ 4210 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4211 4212 intel_engine_reset_pinned_contexts(engine); 4213 } 4214 4215 static void setup_hwsp(struct intel_engine_cs *engine) 4216 { 4217 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4218 4219 ENGINE_WRITE_FW(engine, 4220 RING_HWS_PGA, 4221 i915_ggtt_offset(engine->status_page.vma)); 4222 } 4223 4224 static void start_engine(struct intel_engine_cs *engine) 4225 { 4226 ENGINE_WRITE_FW(engine, 4227 RING_MODE_GEN7, 4228 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4229 4230 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4231 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4232 } 4233 4234 static int guc_resume(struct intel_engine_cs *engine) 4235 { 4236 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4237 4238 intel_mocs_init_engine(engine); 4239 4240 intel_breadcrumbs_reset(engine->breadcrumbs); 4241 4242 setup_hwsp(engine); 4243 start_engine(engine); 4244 4245 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4246 xehp_enable_ccs_engines(engine); 4247 4248 return 0; 4249 } 4250 4251 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4252 { 4253 return !sched_engine->tasklet.callback; 4254 } 4255 4256 static void guc_set_default_submission(struct intel_engine_cs *engine) 4257 { 4258 engine->submit_request = guc_submit_request; 4259 } 4260 4261 static inline int guc_kernel_context_pin(struct intel_guc *guc, 4262 struct intel_context *ce) 4263 { 4264 int ret; 4265 4266 /* 4267 * Note: we purposefully do not check the returns below because 4268 * the registration can only fail if a reset is just starting. 4269 * This is called at the end of reset so presumably another reset 4270 * isn't happening and even it did this code would be run again. 4271 */ 4272 4273 if (context_guc_id_invalid(ce)) { 4274 ret = pin_guc_id(guc, ce); 4275 4276 if (ret < 0) 4277 return ret; 4278 } 4279 4280 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) 4281 guc_context_init(ce); 4282 4283 ret = try_context_registration(ce, true); 4284 if (ret) 4285 unpin_guc_id(guc, ce); 4286 4287 return ret; 4288 } 4289 4290 static inline int guc_init_submission(struct intel_guc *guc) 4291 { 4292 struct intel_gt *gt = guc_to_gt(guc); 4293 struct intel_engine_cs *engine; 4294 enum intel_engine_id id; 4295 4296 /* make sure all descriptors are clean... */ 4297 xa_destroy(&guc->context_lookup); 4298 4299 /* 4300 * A reset might have occurred while we had a pending stalled request, 4301 * so make sure we clean that up. 4302 */ 4303 guc->stalled_request = NULL; 4304 guc->submission_stall_reason = STALL_NONE; 4305 4306 /* 4307 * Some contexts might have been pinned before we enabled GuC 4308 * submission, so we need to add them to the GuC bookeeping. 4309 * Also, after a reset the of the GuC we want to make sure that the 4310 * information shared with GuC is properly reset. The kernel LRCs are 4311 * not attached to the gem_context, so they need to be added separately. 4312 */ 4313 for_each_engine(engine, gt, id) { 4314 struct intel_context *ce; 4315 4316 list_for_each_entry(ce, &engine->pinned_contexts_list, 4317 pinned_contexts_link) { 4318 int ret = guc_kernel_context_pin(guc, ce); 4319 4320 if (ret) { 4321 /* No point in trying to clean up as i915 will wedge on failure */ 4322 return ret; 4323 } 4324 } 4325 } 4326 4327 return 0; 4328 } 4329 4330 static void guc_release(struct intel_engine_cs *engine) 4331 { 4332 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4333 4334 intel_engine_cleanup_common(engine); 4335 lrc_fini_wa_ctx(engine); 4336 } 4337 4338 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4339 { 4340 struct intel_engine_cs *e; 4341 intel_engine_mask_t tmp, mask = engine->mask; 4342 4343 for_each_engine_masked(e, engine->gt, mask, tmp) 4344 e->serial++; 4345 } 4346 4347 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4348 { 4349 /* Default vfuncs which can be overridden by each engine. */ 4350 4351 engine->resume = guc_resume; 4352 4353 engine->cops = &guc_context_ops; 4354 engine->request_alloc = guc_request_alloc; 4355 engine->add_active_request = add_to_context; 4356 engine->remove_active_request = remove_from_context; 4357 4358 engine->sched_engine->schedule = i915_schedule; 4359 4360 engine->reset.prepare = guc_engine_reset_prepare; 4361 engine->reset.rewind = guc_rewind_nop; 4362 engine->reset.cancel = guc_reset_nop; 4363 engine->reset.finish = guc_reset_nop; 4364 4365 engine->emit_flush = gen8_emit_flush_xcs; 4366 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4367 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4368 if (GRAPHICS_VER(engine->i915) >= 12) { 4369 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4370 engine->emit_flush = gen12_emit_flush_xcs; 4371 } 4372 engine->set_default_submission = guc_set_default_submission; 4373 engine->busyness = guc_engine_busyness; 4374 4375 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4376 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4377 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4378 4379 /* Wa_14014475959:dg2 */ 4380 if (engine->class == COMPUTE_CLASS) 4381 if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || 4382 IS_DG2(engine->i915)) 4383 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4384 4385 /* 4386 * TODO: GuC supports timeslicing and semaphores as well, but they're 4387 * handled by the firmware so some minor tweaks are required before 4388 * enabling. 4389 * 4390 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4391 */ 4392 4393 engine->emit_bb_start = gen8_emit_bb_start; 4394 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4395 engine->emit_bb_start = xehp_emit_bb_start; 4396 } 4397 4398 static void rcs_submission_override(struct intel_engine_cs *engine) 4399 { 4400 switch (GRAPHICS_VER(engine->i915)) { 4401 case 12: 4402 engine->emit_flush = gen12_emit_flush_rcs; 4403 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4404 break; 4405 case 11: 4406 engine->emit_flush = gen11_emit_flush_rcs; 4407 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4408 break; 4409 default: 4410 engine->emit_flush = gen8_emit_flush_rcs; 4411 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4412 break; 4413 } 4414 } 4415 4416 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4417 { 4418 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4419 intel_engine_set_irq_handler(engine, cs_irq_handler); 4420 } 4421 4422 static void guc_sched_engine_destroy(struct kref *kref) 4423 { 4424 struct i915_sched_engine *sched_engine = 4425 container_of(kref, typeof(*sched_engine), ref); 4426 struct intel_guc *guc = sched_engine->private_data; 4427 4428 guc->sched_engine = NULL; 4429 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4430 kfree(sched_engine); 4431 } 4432 4433 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4434 { 4435 struct drm_i915_private *i915 = engine->i915; 4436 struct intel_guc *guc = &engine->gt->uc.guc; 4437 4438 /* 4439 * The setup relies on several assumptions (e.g. irqs always enabled) 4440 * that are only valid on gen11+ 4441 */ 4442 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4443 4444 if (!guc->sched_engine) { 4445 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4446 if (!guc->sched_engine) 4447 return -ENOMEM; 4448 4449 guc->sched_engine->schedule = i915_schedule; 4450 guc->sched_engine->disabled = guc_sched_engine_disabled; 4451 guc->sched_engine->private_data = guc; 4452 guc->sched_engine->destroy = guc_sched_engine_destroy; 4453 guc->sched_engine->bump_inflight_request_prio = 4454 guc_bump_inflight_request_prio; 4455 guc->sched_engine->retire_inflight_request_prio = 4456 guc_retire_inflight_request_prio; 4457 tasklet_setup(&guc->sched_engine->tasklet, 4458 guc_submission_tasklet); 4459 } 4460 i915_sched_engine_put(engine->sched_engine); 4461 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4462 4463 guc_default_vfuncs(engine); 4464 guc_default_irqs(engine); 4465 guc_init_breadcrumbs(engine); 4466 4467 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4468 rcs_submission_override(engine); 4469 4470 lrc_init_wa_ctx(engine); 4471 4472 /* Finally, take ownership and responsibility for cleanup! */ 4473 engine->sanitize = guc_sanitize; 4474 engine->release = guc_release; 4475 4476 return 0; 4477 } 4478 4479 struct scheduling_policy { 4480 /* internal data */ 4481 u32 max_words, num_words; 4482 u32 count; 4483 /* API data */ 4484 struct guc_update_scheduling_policy h2g; 4485 }; 4486 4487 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4488 { 4489 u32 *start = (void *)&policy->h2g; 4490 u32 *end = policy->h2g.data + policy->num_words; 4491 size_t delta = end - start; 4492 4493 return delta; 4494 } 4495 4496 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4497 { 4498 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4499 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4500 policy->num_words = 0; 4501 policy->count = 0; 4502 4503 return policy; 4504 } 4505 4506 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4507 u32 action, u32 *data, u32 len) 4508 { 4509 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4510 4511 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4512 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4513 FIELD_PREP(GUC_KLV_0_LEN, len); 4514 memcpy(klv_ptr, data, sizeof(u32) * len); 4515 policy->num_words += 1 + len; 4516 policy->count++; 4517 } 4518 4519 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4520 struct scheduling_policy *policy) 4521 { 4522 int ret; 4523 4524 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4525 __guc_scheduling_policy_action_size(policy)); 4526 if (ret < 0) { 4527 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n", 4528 ERR_PTR(ret)); 4529 return ret; 4530 } 4531 4532 if (ret != policy->count) { 4533 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!", 4534 ret, policy->count); 4535 if (ret > policy->count) 4536 return -EPROTO; 4537 } 4538 4539 return 0; 4540 } 4541 4542 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4543 { 4544 struct scheduling_policy policy; 4545 struct intel_gt *gt = guc_to_gt(guc); 4546 intel_wakeref_t wakeref; 4547 int ret; 4548 4549 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 4550 return 0; 4551 4552 __guc_scheduling_policy_start_klv(&policy); 4553 4554 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4555 u32 yield[] = { 4556 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4557 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4558 }; 4559 4560 __guc_scheduling_policy_add_klv(&policy, 4561 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4562 yield, ARRAY_SIZE(yield)); 4563 4564 ret = __guc_action_set_scheduling_policies(guc, &policy); 4565 } 4566 4567 return ret; 4568 } 4569 4570 static void guc_route_semaphores(struct intel_guc *guc, bool to_guc) 4571 { 4572 struct intel_gt *gt = guc_to_gt(guc); 4573 u32 val; 4574 4575 if (GRAPHICS_VER(gt->i915) < 12) 4576 return; 4577 4578 if (to_guc) 4579 val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL; 4580 else 4581 val = 0; 4582 4583 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val); 4584 } 4585 4586 int intel_guc_submission_enable(struct intel_guc *guc) 4587 { 4588 int ret; 4589 4590 /* Semaphore interrupt enable and route to GuC */ 4591 guc_route_semaphores(guc, true); 4592 4593 ret = guc_init_submission(guc); 4594 if (ret) 4595 goto fail_sem; 4596 4597 ret = guc_init_engine_stats(guc); 4598 if (ret) 4599 goto fail_sem; 4600 4601 ret = guc_init_global_schedule_policy(guc); 4602 if (ret) 4603 goto fail_stats; 4604 4605 return 0; 4606 4607 fail_stats: 4608 guc_fini_engine_stats(guc); 4609 fail_sem: 4610 guc_route_semaphores(guc, false); 4611 return ret; 4612 } 4613 4614 /* Note: By the time we're here, GuC may have already been reset */ 4615 void intel_guc_submission_disable(struct intel_guc *guc) 4616 { 4617 guc_cancel_busyness_worker(guc); 4618 4619 /* Semaphore interrupt disable and route to host */ 4620 guc_route_semaphores(guc, false); 4621 } 4622 4623 static bool __guc_submission_supported(struct intel_guc *guc) 4624 { 4625 /* GuC submission is unavailable for pre-Gen11 */ 4626 return intel_guc_is_supported(guc) && 4627 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4628 } 4629 4630 static bool __guc_submission_selected(struct intel_guc *guc) 4631 { 4632 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4633 4634 if (!intel_guc_submission_is_supported(guc)) 4635 return false; 4636 4637 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4638 } 4639 4640 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4641 { 4642 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4643 } 4644 4645 /* 4646 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4647 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4648 * operation. This matches the 30fps game-render + encode (real world) workload this 4649 * knob was tested against. 4650 */ 4651 #define SCHED_DISABLE_DELAY_MS 34 4652 4653 /* 4654 * A threshold of 75% is a reasonable starting point considering that real world apps 4655 * generally don't get anywhere near this. 4656 */ 4657 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4658 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4659 4660 void intel_guc_submission_init_early(struct intel_guc *guc) 4661 { 4662 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4663 4664 spin_lock_init(&guc->submission_state.lock); 4665 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4666 ida_init(&guc->submission_state.guc_ids); 4667 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4668 INIT_WORK(&guc->submission_state.destroyed_worker, 4669 destroyed_worker_func); 4670 INIT_WORK(&guc->submission_state.reset_fail_worker, 4671 reset_fail_worker_func); 4672 4673 spin_lock_init(&guc->timestamp.lock); 4674 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4675 4676 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4677 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4678 guc->submission_state.sched_disable_gucid_threshold = 4679 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4680 guc->submission_supported = __guc_submission_supported(guc); 4681 guc->submission_selected = __guc_submission_selected(guc); 4682 } 4683 4684 static inline struct intel_context * 4685 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4686 { 4687 struct intel_context *ce; 4688 4689 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4690 guc_err(guc, "Invalid ctx_id %u\n", ctx_id); 4691 return NULL; 4692 } 4693 4694 ce = __get_context(guc, ctx_id); 4695 if (unlikely(!ce)) { 4696 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id); 4697 return NULL; 4698 } 4699 4700 if (unlikely(intel_context_is_child(ce))) { 4701 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id); 4702 return NULL; 4703 } 4704 4705 return ce; 4706 } 4707 4708 static void wait_wake_outstanding_tlb_g2h(struct intel_guc *guc, u32 seqno) 4709 { 4710 struct intel_guc_tlb_wait *wait; 4711 unsigned long flags; 4712 4713 xa_lock_irqsave(&guc->tlb_lookup, flags); 4714 wait = xa_load(&guc->tlb_lookup, seqno); 4715 4716 if (wait) 4717 wake_up(&wait->wq); 4718 else 4719 guc_dbg(guc, 4720 "Stale TLB invalidation response with seqno %d\n", seqno); 4721 4722 xa_unlock_irqrestore(&guc->tlb_lookup, flags); 4723 } 4724 4725 int intel_guc_tlb_invalidation_done(struct intel_guc *guc, 4726 const u32 *payload, u32 len) 4727 { 4728 if (len < 1) 4729 return -EPROTO; 4730 4731 wait_wake_outstanding_tlb_g2h(guc, payload[0]); 4732 return 0; 4733 } 4734 4735 static long must_wait_woken(struct wait_queue_entry *wq_entry, long timeout) 4736 { 4737 /* 4738 * This is equivalent to wait_woken() with the exception that 4739 * we do not wake up early if the kthread task has been completed. 4740 * As we are called from page reclaim in any task context, 4741 * we may be invoked from stopped kthreads, but we *must* 4742 * complete the wait from the HW. 4743 */ 4744 do { 4745 set_current_state(TASK_UNINTERRUPTIBLE); 4746 if (wq_entry->flags & WQ_FLAG_WOKEN) 4747 break; 4748 4749 timeout = schedule_timeout(timeout); 4750 } while (timeout); 4751 4752 /* See wait_woken() and woken_wake_function() */ 4753 __set_current_state(TASK_RUNNING); 4754 smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); 4755 4756 return timeout; 4757 } 4758 4759 static bool intel_gt_is_enabled(const struct intel_gt *gt) 4760 { 4761 /* Check if GT is wedged or suspended */ 4762 if (intel_gt_is_wedged(gt) || !intel_irqs_enabled(gt->i915)) 4763 return false; 4764 return true; 4765 } 4766 4767 static int guc_send_invalidate_tlb(struct intel_guc *guc, 4768 enum intel_guc_tlb_invalidation_type type) 4769 { 4770 struct intel_guc_tlb_wait _wq, *wq = &_wq; 4771 struct intel_gt *gt = guc_to_gt(guc); 4772 DEFINE_WAIT_FUNC(wait, woken_wake_function); 4773 int err; 4774 u32 seqno; 4775 u32 action[] = { 4776 INTEL_GUC_ACTION_TLB_INVALIDATION, 4777 0, 4778 REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_TYPE_MASK, type) | 4779 REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_MODE_MASK, 4780 INTEL_GUC_TLB_INVAL_MODE_HEAVY) | 4781 INTEL_GUC_TLB_INVAL_FLUSH_CACHE, 4782 }; 4783 u32 size = ARRAY_SIZE(action); 4784 4785 /* 4786 * Early guard against GT enablement. TLB invalidation should not be 4787 * attempted if the GT is disabled due to suspend/wedge. 4788 */ 4789 if (!intel_gt_is_enabled(gt)) 4790 return -EINVAL; 4791 4792 init_waitqueue_head(&_wq.wq); 4793 4794 if (xa_alloc_cyclic_irq(&guc->tlb_lookup, &seqno, wq, 4795 xa_limit_32b, &guc->next_seqno, 4796 GFP_ATOMIC | __GFP_NOWARN) < 0) { 4797 /* Under severe memory pressure? Serialise TLB allocations */ 4798 xa_lock_irq(&guc->tlb_lookup); 4799 wq = xa_load(&guc->tlb_lookup, guc->serial_slot); 4800 wait_event_lock_irq(wq->wq, 4801 !READ_ONCE(wq->busy), 4802 guc->tlb_lookup.xa_lock); 4803 /* 4804 * Update wq->busy under lock to ensure only one waiter can 4805 * issue the TLB invalidation command using the serial slot at a 4806 * time. The condition is set to true before releasing the lock 4807 * so that other caller continue to wait until woken up again. 4808 */ 4809 wq->busy = true; 4810 xa_unlock_irq(&guc->tlb_lookup); 4811 4812 seqno = guc->serial_slot; 4813 } 4814 4815 action[1] = seqno; 4816 4817 add_wait_queue(&wq->wq, &wait); 4818 4819 /* This is a critical reclaim path and thus we must loop here. */ 4820 err = intel_guc_send_busy_loop(guc, action, size, G2H_LEN_DW_INVALIDATE_TLB, true); 4821 if (err) 4822 goto out; 4823 4824 /* 4825 * Late guard against GT enablement. It is not an error for the TLB 4826 * invalidation to time out if the GT is disabled during the process 4827 * due to suspend/wedge. In fact, the TLB invalidation is cancelled 4828 * in this case. 4829 */ 4830 if (!must_wait_woken(&wait, intel_guc_ct_max_queue_time_jiffies()) && 4831 intel_gt_is_enabled(gt)) { 4832 guc_err(guc, 4833 "TLB invalidation response timed out for seqno %u\n", seqno); 4834 err = -ETIME; 4835 } 4836 out: 4837 remove_wait_queue(&wq->wq, &wait); 4838 if (seqno != guc->serial_slot) 4839 xa_erase_irq(&guc->tlb_lookup, seqno); 4840 4841 return err; 4842 } 4843 4844 /* Send a H2G command to invalidate the TLBs at engine level and beyond. */ 4845 int intel_guc_invalidate_tlb_engines(struct intel_guc *guc) 4846 { 4847 return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_ENGINES); 4848 } 4849 4850 /* Send a H2G command to invalidate the GuC's internal TLB. */ 4851 int intel_guc_invalidate_tlb_guc(struct intel_guc *guc) 4852 { 4853 return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_GUC); 4854 } 4855 4856 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4857 const u32 *msg, 4858 u32 len) 4859 { 4860 struct intel_context *ce; 4861 u32 ctx_id; 4862 4863 if (unlikely(len < 1)) { 4864 guc_err(guc, "Invalid length %u\n", len); 4865 return -EPROTO; 4866 } 4867 ctx_id = msg[0]; 4868 4869 ce = g2h_context_lookup(guc, ctx_id); 4870 if (unlikely(!ce)) 4871 return -EPROTO; 4872 4873 trace_intel_context_deregister_done(ce); 4874 4875 #ifdef CONFIG_DRM_I915_SELFTEST 4876 if (unlikely(ce->drop_deregister)) { 4877 ce->drop_deregister = false; 4878 return 0; 4879 } 4880 #endif 4881 4882 if (context_wait_for_deregister_to_register(ce)) { 4883 struct intel_runtime_pm *runtime_pm = 4884 &ce->engine->gt->i915->runtime_pm; 4885 intel_wakeref_t wakeref; 4886 4887 /* 4888 * Previous owner of this guc_id has been deregistered, now safe 4889 * register this context. 4890 */ 4891 with_intel_runtime_pm(runtime_pm, wakeref) 4892 register_context(ce, true); 4893 guc_signal_context_fence(ce); 4894 intel_context_put(ce); 4895 } else if (context_destroyed(ce)) { 4896 /* Context has been destroyed */ 4897 intel_gt_pm_put_async(guc_to_gt(guc)); 4898 release_guc_id(guc, ce); 4899 __guc_context_destroy(ce); 4900 } 4901 4902 decr_outstanding_submission_g2h(guc); 4903 4904 return 0; 4905 } 4906 4907 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4908 const u32 *msg, 4909 u32 len) 4910 { 4911 struct intel_context *ce; 4912 unsigned long flags; 4913 u32 ctx_id; 4914 4915 if (unlikely(len < 2)) { 4916 guc_err(guc, "Invalid length %u\n", len); 4917 return -EPROTO; 4918 } 4919 ctx_id = msg[0]; 4920 4921 ce = g2h_context_lookup(guc, ctx_id); 4922 if (unlikely(!ce)) 4923 return -EPROTO; 4924 4925 if (unlikely(context_destroyed(ce) || 4926 (!context_pending_enable(ce) && 4927 !context_pending_disable(ce)))) { 4928 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n", 4929 ce->guc_state.sched_state, ctx_id); 4930 return -EPROTO; 4931 } 4932 4933 trace_intel_context_sched_done(ce); 4934 4935 if (context_pending_enable(ce)) { 4936 #ifdef CONFIG_DRM_I915_SELFTEST 4937 if (unlikely(ce->drop_schedule_enable)) { 4938 ce->drop_schedule_enable = false; 4939 return 0; 4940 } 4941 #endif 4942 4943 spin_lock_irqsave(&ce->guc_state.lock, flags); 4944 clr_context_pending_enable(ce); 4945 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4946 } else if (context_pending_disable(ce)) { 4947 bool banned; 4948 4949 #ifdef CONFIG_DRM_I915_SELFTEST 4950 if (unlikely(ce->drop_schedule_disable)) { 4951 ce->drop_schedule_disable = false; 4952 return 0; 4953 } 4954 #endif 4955 4956 /* 4957 * Unpin must be done before __guc_signal_context_fence, 4958 * otherwise a race exists between the requests getting 4959 * submitted + retired before this unpin completes resulting in 4960 * the pin_count going to zero and the context still being 4961 * enabled. 4962 */ 4963 intel_context_sched_disable_unpin(ce); 4964 4965 spin_lock_irqsave(&ce->guc_state.lock, flags); 4966 banned = context_banned(ce); 4967 clr_context_banned(ce); 4968 clr_context_pending_disable(ce); 4969 __guc_signal_context_fence(ce); 4970 guc_blocked_fence_complete(ce); 4971 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4972 4973 if (banned) { 4974 guc_cancel_context_requests(ce); 4975 intel_engine_signal_breadcrumbs(ce->engine); 4976 } 4977 } 4978 4979 decr_outstanding_submission_g2h(guc); 4980 intel_context_put(ce); 4981 4982 return 0; 4983 } 4984 4985 static void capture_error_state(struct intel_guc *guc, 4986 struct intel_context *ce) 4987 { 4988 struct intel_gt *gt = guc_to_gt(guc); 4989 struct drm_i915_private *i915 = gt->i915; 4990 intel_wakeref_t wakeref; 4991 intel_engine_mask_t engine_mask; 4992 4993 if (intel_engine_is_virtual(ce->engine)) { 4994 struct intel_engine_cs *e; 4995 intel_engine_mask_t tmp, virtual_mask = ce->engine->mask; 4996 4997 engine_mask = 0; 4998 for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) { 4999 bool match = intel_guc_capture_is_matching_engine(gt, ce, e); 5000 5001 if (match) { 5002 intel_engine_set_hung_context(e, ce); 5003 engine_mask |= e->mask; 5004 atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]); 5005 } 5006 } 5007 5008 if (!engine_mask) { 5009 guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s", 5010 ce->guc_id.id, ce->engine->name); 5011 engine_mask = ~0U; 5012 } 5013 } else { 5014 intel_engine_set_hung_context(ce->engine, ce); 5015 engine_mask = ce->engine->mask; 5016 atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]); 5017 } 5018 5019 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 5020 i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 5021 } 5022 5023 static void guc_context_replay(struct intel_context *ce) 5024 { 5025 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 5026 5027 __guc_reset_context(ce, ce->engine->mask); 5028 tasklet_hi_schedule(&sched_engine->tasklet); 5029 } 5030 5031 static void guc_handle_context_reset(struct intel_guc *guc, 5032 struct intel_context *ce) 5033 { 5034 bool capture = intel_context_is_schedulable(ce); 5035 5036 trace_intel_context_reset(ce); 5037 5038 guc_dbg(guc, "%s context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n", 5039 capture ? "Got" : "Ignoring", 5040 ce->guc_id.id, ce->engine->name, 5041 str_yes_no(intel_context_is_exiting(ce)), 5042 str_yes_no(intel_context_is_banned(ce))); 5043 5044 if (capture) { 5045 capture_error_state(guc, ce); 5046 guc_context_replay(ce); 5047 } 5048 } 5049 5050 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 5051 const u32 *msg, u32 len) 5052 { 5053 struct intel_context *ce; 5054 unsigned long flags; 5055 int ctx_id; 5056 5057 if (unlikely(len != 1)) { 5058 guc_err(guc, "Invalid length %u", len); 5059 return -EPROTO; 5060 } 5061 5062 ctx_id = msg[0]; 5063 5064 /* 5065 * The context lookup uses the xarray but lookups only require an RCU lock 5066 * not the full spinlock. So take the lock explicitly and keep it until the 5067 * context has been reference count locked to ensure it can't be destroyed 5068 * asynchronously until the reset is done. 5069 */ 5070 xa_lock_irqsave(&guc->context_lookup, flags); 5071 ce = g2h_context_lookup(guc, ctx_id); 5072 if (ce) 5073 intel_context_get(ce); 5074 xa_unlock_irqrestore(&guc->context_lookup, flags); 5075 5076 if (unlikely(!ce)) 5077 return -EPROTO; 5078 5079 guc_handle_context_reset(guc, ce); 5080 intel_context_put(ce); 5081 5082 return 0; 5083 } 5084 5085 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 5086 const u32 *msg, u32 len) 5087 { 5088 u32 status; 5089 5090 if (unlikely(len != 1)) { 5091 guc_dbg(guc, "Invalid length %u", len); 5092 return -EPROTO; 5093 } 5094 5095 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 5096 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 5097 guc_warn(guc, "No space for error capture"); 5098 5099 intel_guc_capture_process(guc); 5100 5101 return 0; 5102 } 5103 5104 struct intel_engine_cs * 5105 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 5106 { 5107 struct intel_gt *gt = guc_to_gt(guc); 5108 u8 engine_class = guc_class_to_engine_class(guc_class); 5109 5110 /* Class index is checked in class converter */ 5111 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 5112 5113 return gt->engine_class[engine_class][instance]; 5114 } 5115 5116 static void reset_fail_worker_func(struct work_struct *w) 5117 { 5118 struct intel_guc *guc = container_of(w, struct intel_guc, 5119 submission_state.reset_fail_worker); 5120 struct intel_gt *gt = guc_to_gt(guc); 5121 intel_engine_mask_t reset_fail_mask; 5122 unsigned long flags; 5123 5124 spin_lock_irqsave(&guc->submission_state.lock, flags); 5125 reset_fail_mask = guc->submission_state.reset_fail_mask; 5126 guc->submission_state.reset_fail_mask = 0; 5127 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 5128 5129 if (likely(reset_fail_mask)) { 5130 struct intel_engine_cs *engine; 5131 enum intel_engine_id id; 5132 5133 /* 5134 * GuC is toast at this point - it dead loops after sending the failed 5135 * reset notification. So need to manually determine the guilty context. 5136 * Note that it should be reliable to do this here because the GuC is 5137 * toast and will not be scheduling behind the KMD's back. 5138 */ 5139 for_each_engine_masked(engine, gt, reset_fail_mask, id) 5140 intel_guc_find_hung_context(engine); 5141 5142 intel_gt_handle_error(gt, reset_fail_mask, 5143 I915_ERROR_CAPTURE, 5144 "GuC failed to reset engine mask=0x%x", 5145 reset_fail_mask); 5146 } 5147 } 5148 5149 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 5150 const u32 *msg, u32 len) 5151 { 5152 struct intel_engine_cs *engine; 5153 u8 guc_class, instance; 5154 u32 reason; 5155 unsigned long flags; 5156 5157 if (unlikely(len != 3)) { 5158 guc_err(guc, "Invalid length %u", len); 5159 return -EPROTO; 5160 } 5161 5162 guc_class = msg[0]; 5163 instance = msg[1]; 5164 reason = msg[2]; 5165 5166 engine = intel_guc_lookup_engine(guc, guc_class, instance); 5167 if (unlikely(!engine)) { 5168 guc_err(guc, "Invalid engine %d:%d", guc_class, instance); 5169 return -EPROTO; 5170 } 5171 5172 /* 5173 * This is an unexpected failure of a hardware feature. So, log a real 5174 * error message not just the informational that comes with the reset. 5175 */ 5176 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X", 5177 guc_class, instance, engine->name, reason); 5178 5179 spin_lock_irqsave(&guc->submission_state.lock, flags); 5180 guc->submission_state.reset_fail_mask |= engine->mask; 5181 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 5182 5183 /* 5184 * A GT reset flushes this worker queue (G2H handler) so we must use 5185 * another worker to trigger a GT reset. 5186 */ 5187 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 5188 5189 return 0; 5190 } 5191 5192 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 5193 { 5194 struct intel_guc *guc = &engine->gt->uc.guc; 5195 struct intel_context *ce; 5196 struct i915_request *rq; 5197 unsigned long index; 5198 unsigned long flags; 5199 5200 /* Reset called during driver load? GuC not yet initialised! */ 5201 if (unlikely(!guc_submission_initialized(guc))) 5202 return; 5203 5204 xa_lock_irqsave(&guc->context_lookup, flags); 5205 xa_for_each(&guc->context_lookup, index, ce) { 5206 bool found; 5207 5208 if (!kref_get_unless_zero(&ce->ref)) 5209 continue; 5210 5211 xa_unlock(&guc->context_lookup); 5212 5213 if (!intel_context_is_pinned(ce)) 5214 goto next; 5215 5216 if (intel_engine_is_virtual(ce->engine)) { 5217 if (!(ce->engine->mask & engine->mask)) 5218 goto next; 5219 } else { 5220 if (ce->engine != engine) 5221 goto next; 5222 } 5223 5224 found = false; 5225 spin_lock(&ce->guc_state.lock); 5226 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 5227 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 5228 continue; 5229 5230 found = true; 5231 break; 5232 } 5233 spin_unlock(&ce->guc_state.lock); 5234 5235 if (found) { 5236 intel_engine_set_hung_context(engine, ce); 5237 5238 /* Can only cope with one hang at a time... */ 5239 intel_context_put(ce); 5240 xa_lock(&guc->context_lookup); 5241 goto done; 5242 } 5243 5244 next: 5245 intel_context_put(ce); 5246 xa_lock(&guc->context_lookup); 5247 } 5248 done: 5249 xa_unlock_irqrestore(&guc->context_lookup, flags); 5250 } 5251 5252 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 5253 struct i915_request *hung_rq, 5254 struct drm_printer *m) 5255 { 5256 struct intel_guc *guc = &engine->gt->uc.guc; 5257 struct intel_context *ce; 5258 unsigned long index; 5259 unsigned long flags; 5260 5261 /* Reset called during driver load? GuC not yet initialised! */ 5262 if (unlikely(!guc_submission_initialized(guc))) 5263 return; 5264 5265 xa_lock_irqsave(&guc->context_lookup, flags); 5266 xa_for_each(&guc->context_lookup, index, ce) { 5267 if (!kref_get_unless_zero(&ce->ref)) 5268 continue; 5269 5270 xa_unlock(&guc->context_lookup); 5271 5272 if (!intel_context_is_pinned(ce)) 5273 goto next; 5274 5275 if (intel_engine_is_virtual(ce->engine)) { 5276 if (!(ce->engine->mask & engine->mask)) 5277 goto next; 5278 } else { 5279 if (ce->engine != engine) 5280 goto next; 5281 } 5282 5283 spin_lock(&ce->guc_state.lock); 5284 intel_engine_dump_active_requests(&ce->guc_state.requests, 5285 hung_rq, m); 5286 spin_unlock(&ce->guc_state.lock); 5287 5288 next: 5289 intel_context_put(ce); 5290 xa_lock(&guc->context_lookup); 5291 } 5292 xa_unlock_irqrestore(&guc->context_lookup, flags); 5293 } 5294 5295 void intel_guc_submission_print_info(struct intel_guc *guc, 5296 struct drm_printer *p) 5297 { 5298 struct i915_sched_engine *sched_engine = guc->sched_engine; 5299 struct rb_node *rb; 5300 unsigned long flags; 5301 5302 if (!sched_engine) 5303 return; 5304 5305 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", 5306 guc->submission_version.major, guc->submission_version.minor, 5307 guc->submission_version.patch); 5308 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 5309 atomic_read(&guc->outstanding_submission_g2h)); 5310 drm_printf(p, "GuC tasklet count: %u\n", 5311 atomic_read(&sched_engine->tasklet.count)); 5312 5313 spin_lock_irqsave(&sched_engine->lock, flags); 5314 drm_printf(p, "Requests in GuC submit tasklet:\n"); 5315 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 5316 struct i915_priolist *pl = to_priolist(rb); 5317 struct i915_request *rq; 5318 5319 priolist_for_each_request(rq, pl) 5320 drm_printf(p, "guc_id=%u, seqno=%llu\n", 5321 rq->context->guc_id.id, 5322 rq->fence.seqno); 5323 } 5324 spin_unlock_irqrestore(&sched_engine->lock, flags); 5325 drm_printf(p, "\n"); 5326 } 5327 5328 static inline void guc_log_context_priority(struct drm_printer *p, 5329 struct intel_context *ce) 5330 { 5331 int i; 5332 5333 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 5334 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 5335 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 5336 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 5337 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 5338 i, ce->guc_state.prio_count[i]); 5339 } 5340 drm_printf(p, "\n"); 5341 } 5342 5343 static inline void guc_log_context(struct drm_printer *p, 5344 struct intel_context *ce) 5345 { 5346 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 5347 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 5348 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 5349 ce->ring->head, 5350 ce->lrc_reg_state[CTX_RING_HEAD]); 5351 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 5352 ce->ring->tail, 5353 ce->lrc_reg_state[CTX_RING_TAIL]); 5354 drm_printf(p, "\t\tContext Pin Count: %u\n", 5355 atomic_read(&ce->pin_count)); 5356 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 5357 atomic_read(&ce->guc_id.ref)); 5358 drm_printf(p, "\t\tSchedule State: 0x%x\n", 5359 ce->guc_state.sched_state); 5360 } 5361 5362 void intel_guc_submission_print_context_info(struct intel_guc *guc, 5363 struct drm_printer *p) 5364 { 5365 struct intel_context *ce; 5366 unsigned long index; 5367 unsigned long flags; 5368 5369 xa_lock_irqsave(&guc->context_lookup, flags); 5370 xa_for_each(&guc->context_lookup, index, ce) { 5371 GEM_BUG_ON(intel_context_is_child(ce)); 5372 5373 guc_log_context(p, ce); 5374 guc_log_context_priority(p, ce); 5375 5376 if (intel_context_is_parent(ce)) { 5377 struct intel_context *child; 5378 5379 drm_printf(p, "\t\tNumber children: %u\n", 5380 ce->parallel.number_children); 5381 5382 if (ce->parallel.guc.wq_status) { 5383 drm_printf(p, "\t\tWQI Head: %u\n", 5384 READ_ONCE(*ce->parallel.guc.wq_head)); 5385 drm_printf(p, "\t\tWQI Tail: %u\n", 5386 READ_ONCE(*ce->parallel.guc.wq_tail)); 5387 drm_printf(p, "\t\tWQI Status: %u\n", 5388 READ_ONCE(*ce->parallel.guc.wq_status)); 5389 } 5390 5391 if (ce->engine->emit_bb_start == 5392 emit_bb_start_parent_no_preempt_mid_batch) { 5393 u8 i; 5394 5395 drm_printf(p, "\t\tChildren Go: %u\n", 5396 get_children_go_value(ce)); 5397 for (i = 0; i < ce->parallel.number_children; ++i) 5398 drm_printf(p, "\t\tChildren Join: %u\n", 5399 get_children_join_value(ce, i)); 5400 } 5401 5402 for_each_child(ce, child) 5403 guc_log_context(p, child); 5404 } 5405 } 5406 xa_unlock_irqrestore(&guc->context_lookup, flags); 5407 } 5408 5409 static inline u32 get_children_go_addr(struct intel_context *ce) 5410 { 5411 GEM_BUG_ON(!intel_context_is_parent(ce)); 5412 5413 return i915_ggtt_offset(ce->state) + 5414 __get_parent_scratch_offset(ce) + 5415 offsetof(struct parent_scratch, go.semaphore); 5416 } 5417 5418 static inline u32 get_children_join_addr(struct intel_context *ce, 5419 u8 child_index) 5420 { 5421 GEM_BUG_ON(!intel_context_is_parent(ce)); 5422 5423 return i915_ggtt_offset(ce->state) + 5424 __get_parent_scratch_offset(ce) + 5425 offsetof(struct parent_scratch, join[child_index].semaphore); 5426 } 5427 5428 #define PARENT_GO_BB 1 5429 #define PARENT_GO_FINI_BREADCRUMB 0 5430 #define CHILD_GO_BB 1 5431 #define CHILD_GO_FINI_BREADCRUMB 0 5432 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5433 u64 offset, u32 len, 5434 const unsigned int flags) 5435 { 5436 struct intel_context *ce = rq->context; 5437 u32 *cs; 5438 u8 i; 5439 5440 GEM_BUG_ON(!intel_context_is_parent(ce)); 5441 5442 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5443 if (IS_ERR(cs)) 5444 return PTR_ERR(cs); 5445 5446 /* Wait on children */ 5447 for (i = 0; i < ce->parallel.number_children; ++i) { 5448 *cs++ = (MI_SEMAPHORE_WAIT | 5449 MI_SEMAPHORE_GLOBAL_GTT | 5450 MI_SEMAPHORE_POLL | 5451 MI_SEMAPHORE_SAD_EQ_SDD); 5452 *cs++ = PARENT_GO_BB; 5453 *cs++ = get_children_join_addr(ce, i); 5454 *cs++ = 0; 5455 } 5456 5457 /* Turn off preemption */ 5458 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5459 *cs++ = MI_NOOP; 5460 5461 /* Tell children go */ 5462 cs = gen8_emit_ggtt_write(cs, 5463 CHILD_GO_BB, 5464 get_children_go_addr(ce), 5465 0); 5466 5467 /* Jump to batch */ 5468 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5469 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5470 *cs++ = lower_32_bits(offset); 5471 *cs++ = upper_32_bits(offset); 5472 *cs++ = MI_NOOP; 5473 5474 intel_ring_advance(rq, cs); 5475 5476 return 0; 5477 } 5478 5479 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5480 u64 offset, u32 len, 5481 const unsigned int flags) 5482 { 5483 struct intel_context *ce = rq->context; 5484 struct intel_context *parent = intel_context_to_parent(ce); 5485 u32 *cs; 5486 5487 GEM_BUG_ON(!intel_context_is_child(ce)); 5488 5489 cs = intel_ring_begin(rq, 12); 5490 if (IS_ERR(cs)) 5491 return PTR_ERR(cs); 5492 5493 /* Signal parent */ 5494 cs = gen8_emit_ggtt_write(cs, 5495 PARENT_GO_BB, 5496 get_children_join_addr(parent, 5497 ce->parallel.child_index), 5498 0); 5499 5500 /* Wait on parent for go */ 5501 *cs++ = (MI_SEMAPHORE_WAIT | 5502 MI_SEMAPHORE_GLOBAL_GTT | 5503 MI_SEMAPHORE_POLL | 5504 MI_SEMAPHORE_SAD_EQ_SDD); 5505 *cs++ = CHILD_GO_BB; 5506 *cs++ = get_children_go_addr(parent); 5507 *cs++ = 0; 5508 5509 /* Turn off preemption */ 5510 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5511 5512 /* Jump to batch */ 5513 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5514 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5515 *cs++ = lower_32_bits(offset); 5516 *cs++ = upper_32_bits(offset); 5517 5518 intel_ring_advance(rq, cs); 5519 5520 return 0; 5521 } 5522 5523 static u32 * 5524 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5525 u32 *cs) 5526 { 5527 struct intel_context *ce = rq->context; 5528 u8 i; 5529 5530 GEM_BUG_ON(!intel_context_is_parent(ce)); 5531 5532 /* Wait on children */ 5533 for (i = 0; i < ce->parallel.number_children; ++i) { 5534 *cs++ = (MI_SEMAPHORE_WAIT | 5535 MI_SEMAPHORE_GLOBAL_GTT | 5536 MI_SEMAPHORE_POLL | 5537 MI_SEMAPHORE_SAD_EQ_SDD); 5538 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5539 *cs++ = get_children_join_addr(ce, i); 5540 *cs++ = 0; 5541 } 5542 5543 /* Turn on preemption */ 5544 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5545 *cs++ = MI_NOOP; 5546 5547 /* Tell children go */ 5548 cs = gen8_emit_ggtt_write(cs, 5549 CHILD_GO_FINI_BREADCRUMB, 5550 get_children_go_addr(ce), 5551 0); 5552 5553 return cs; 5554 } 5555 5556 /* 5557 * If this true, a submission of multi-lrc requests had an error and the 5558 * requests need to be skipped. The front end (execuf IOCTL) should've called 5559 * i915_request_skip which squashes the BB but we still need to emit the fini 5560 * breadrcrumbs seqno write. At this point we don't know how many of the 5561 * requests in the multi-lrc submission were generated so we can't do the 5562 * handshake between the parent and children (e.g. if 4 requests should be 5563 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5564 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5565 * has occurred on any of the requests in submission / relationship. 5566 */ 5567 static inline bool skip_handshake(struct i915_request *rq) 5568 { 5569 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5570 } 5571 5572 #define NON_SKIP_LEN 6 5573 static u32 * 5574 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5575 u32 *cs) 5576 { 5577 struct intel_context *ce = rq->context; 5578 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5579 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5580 5581 GEM_BUG_ON(!intel_context_is_parent(ce)); 5582 5583 if (unlikely(skip_handshake(rq))) { 5584 /* 5585 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5586 * the NON_SKIP_LEN comes from the length of the emits below. 5587 */ 5588 memset(cs, 0, sizeof(u32) * 5589 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5590 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5591 } else { 5592 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5593 } 5594 5595 /* Emit fini breadcrumb */ 5596 before_fini_breadcrumb_user_interrupt_cs = cs; 5597 cs = gen8_emit_ggtt_write(cs, 5598 rq->fence.seqno, 5599 i915_request_active_timeline(rq)->hwsp_offset, 5600 0); 5601 5602 /* User interrupt */ 5603 *cs++ = MI_USER_INTERRUPT; 5604 *cs++ = MI_NOOP; 5605 5606 /* Ensure our math for skip + emit is correct */ 5607 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5608 cs); 5609 GEM_BUG_ON(start_fini_breadcrumb_cs + 5610 ce->engine->emit_fini_breadcrumb_dw != cs); 5611 5612 rq->tail = intel_ring_offset(rq, cs); 5613 5614 return cs; 5615 } 5616 5617 static u32 * 5618 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5619 u32 *cs) 5620 { 5621 struct intel_context *ce = rq->context; 5622 struct intel_context *parent = intel_context_to_parent(ce); 5623 5624 GEM_BUG_ON(!intel_context_is_child(ce)); 5625 5626 /* Turn on preemption */ 5627 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5628 *cs++ = MI_NOOP; 5629 5630 /* Signal parent */ 5631 cs = gen8_emit_ggtt_write(cs, 5632 PARENT_GO_FINI_BREADCRUMB, 5633 get_children_join_addr(parent, 5634 ce->parallel.child_index), 5635 0); 5636 5637 /* Wait parent on for go */ 5638 *cs++ = (MI_SEMAPHORE_WAIT | 5639 MI_SEMAPHORE_GLOBAL_GTT | 5640 MI_SEMAPHORE_POLL | 5641 MI_SEMAPHORE_SAD_EQ_SDD); 5642 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5643 *cs++ = get_children_go_addr(parent); 5644 *cs++ = 0; 5645 5646 return cs; 5647 } 5648 5649 static u32 * 5650 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5651 u32 *cs) 5652 { 5653 struct intel_context *ce = rq->context; 5654 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5655 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5656 5657 GEM_BUG_ON(!intel_context_is_child(ce)); 5658 5659 if (unlikely(skip_handshake(rq))) { 5660 /* 5661 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5662 * the NON_SKIP_LEN comes from the length of the emits below. 5663 */ 5664 memset(cs, 0, sizeof(u32) * 5665 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5666 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5667 } else { 5668 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5669 } 5670 5671 /* Emit fini breadcrumb */ 5672 before_fini_breadcrumb_user_interrupt_cs = cs; 5673 cs = gen8_emit_ggtt_write(cs, 5674 rq->fence.seqno, 5675 i915_request_active_timeline(rq)->hwsp_offset, 5676 0); 5677 5678 /* User interrupt */ 5679 *cs++ = MI_USER_INTERRUPT; 5680 *cs++ = MI_NOOP; 5681 5682 /* Ensure our math for skip + emit is correct */ 5683 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5684 cs); 5685 GEM_BUG_ON(start_fini_breadcrumb_cs + 5686 ce->engine->emit_fini_breadcrumb_dw != cs); 5687 5688 rq->tail = intel_ring_offset(rq, cs); 5689 5690 return cs; 5691 } 5692 5693 #undef NON_SKIP_LEN 5694 5695 static struct intel_context * 5696 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5697 unsigned long flags) 5698 { 5699 struct guc_virtual_engine *ve; 5700 struct intel_guc *guc; 5701 unsigned int n; 5702 int err; 5703 5704 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5705 if (!ve) 5706 return ERR_PTR(-ENOMEM); 5707 5708 guc = &siblings[0]->gt->uc.guc; 5709 5710 ve->base.i915 = siblings[0]->i915; 5711 ve->base.gt = siblings[0]->gt; 5712 ve->base.uncore = siblings[0]->uncore; 5713 ve->base.id = -1; 5714 5715 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5716 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5717 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5718 ve->base.saturated = ALL_ENGINES; 5719 5720 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5721 5722 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5723 5724 ve->base.cops = &virtual_guc_context_ops; 5725 ve->base.request_alloc = guc_request_alloc; 5726 ve->base.bump_serial = virtual_guc_bump_serial; 5727 5728 ve->base.submit_request = guc_submit_request; 5729 5730 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5731 5732 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); 5733 ve->base.mask = VIRTUAL_ENGINES; 5734 5735 intel_context_init(&ve->context, &ve->base); 5736 5737 for (n = 0; n < count; n++) { 5738 struct intel_engine_cs *sibling = siblings[n]; 5739 5740 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5741 if (sibling->mask & ve->base.mask) { 5742 guc_dbg(guc, "duplicate %s entry in load balancer\n", 5743 sibling->name); 5744 err = -EINVAL; 5745 goto err_put; 5746 } 5747 5748 ve->base.mask |= sibling->mask; 5749 ve->base.logical_mask |= sibling->logical_mask; 5750 5751 if (n != 0 && ve->base.class != sibling->class) { 5752 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n", 5753 sibling->class, ve->base.class); 5754 err = -EINVAL; 5755 goto err_put; 5756 } else if (n == 0) { 5757 ve->base.class = sibling->class; 5758 ve->base.uabi_class = sibling->uabi_class; 5759 snprintf(ve->base.name, sizeof(ve->base.name), 5760 "v%dx%d", ve->base.class, count); 5761 ve->base.context_size = sibling->context_size; 5762 5763 ve->base.add_active_request = 5764 sibling->add_active_request; 5765 ve->base.remove_active_request = 5766 sibling->remove_active_request; 5767 ve->base.emit_bb_start = sibling->emit_bb_start; 5768 ve->base.emit_flush = sibling->emit_flush; 5769 ve->base.emit_init_breadcrumb = 5770 sibling->emit_init_breadcrumb; 5771 ve->base.emit_fini_breadcrumb = 5772 sibling->emit_fini_breadcrumb; 5773 ve->base.emit_fini_breadcrumb_dw = 5774 sibling->emit_fini_breadcrumb_dw; 5775 ve->base.breadcrumbs = 5776 intel_breadcrumbs_get(sibling->breadcrumbs); 5777 5778 ve->base.flags |= sibling->flags; 5779 5780 ve->base.props.timeslice_duration_ms = 5781 sibling->props.timeslice_duration_ms; 5782 ve->base.props.preempt_timeout_ms = 5783 sibling->props.preempt_timeout_ms; 5784 } 5785 } 5786 5787 return &ve->context; 5788 5789 err_put: 5790 intel_context_put(&ve->context); 5791 return ERR_PTR(err); 5792 } 5793 5794 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5795 { 5796 struct intel_engine_cs *engine; 5797 intel_engine_mask_t tmp, mask = ve->mask; 5798 5799 for_each_engine_masked(engine, ve->gt, mask, tmp) 5800 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5801 return true; 5802 5803 return false; 5804 } 5805 5806 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5807 #include "selftest_guc.c" 5808 #include "selftest_guc_multi_lrc.c" 5809 #include "selftest_guc_hangcheck.c" 5810 #endif 5811