1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_print.h" 31 #include "intel_guc_submission.h" 32 33 #include "i915_drv.h" 34 #include "i915_reg.h" 35 #include "i915_irq.h" 36 #include "i915_trace.h" 37 38 /** 39 * DOC: GuC-based command submission 40 * 41 * The Scratch registers: 42 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 43 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 44 * triggers an interrupt on the GuC via another register write (0xC4C8). 45 * Firmware writes a success/fail code back to the action register after 46 * processes the request. The kernel driver polls waiting for this update and 47 * then proceeds. 48 * 49 * Command Transport buffers (CTBs): 50 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 51 * - G2H) are a message interface between the i915 and GuC. 52 * 53 * Context registration: 54 * Before a context can be submitted it must be registered with the GuC via a 55 * H2G. A unique guc_id is associated with each context. The context is either 56 * registered at request creation time (normal operation) or at submission time 57 * (abnormal operation, e.g. after a reset). 58 * 59 * Context submission: 60 * The i915 updates the LRC tail value in memory. The i915 must enable the 61 * scheduling of the context within the GuC for the GuC to actually consider it. 62 * Therefore, the first time a disabled context is submitted we use a schedule 63 * enable H2G, while follow up submissions are done via the context submit H2G, 64 * which informs the GuC that a previously enabled context has new work 65 * available. 66 * 67 * Context unpin: 68 * To unpin a context a H2G is used to disable scheduling. When the 69 * corresponding G2H returns indicating the scheduling disable operation has 70 * completed it is safe to unpin the context. While a disable is in flight it 71 * isn't safe to resubmit the context so a fence is used to stall all future 72 * requests of that context until the G2H is returned. Because this interaction 73 * with the GuC takes a non-zero amount of time we delay the disabling of 74 * scheduling after the pin count goes to zero by a configurable period of time 75 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 76 * time to resubmit something on the context before doing this costly operation. 77 * This delay is only done if the context isn't closed and the guc_id usage is 78 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 79 * 80 * Context deregistration: 81 * Before a context can be destroyed or if we steal its guc_id we must 82 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 83 * safe to submit anything to this guc_id until the deregister completes so a 84 * fence is used to stall all requests associated with this guc_id until the 85 * corresponding G2H returns indicating the guc_id has been deregistered. 86 * 87 * submission_state.guc_ids: 88 * Unique number associated with private GuC context data passed in during 89 * context registration / submission / deregistration. 64k available. Simple ida 90 * is used for allocation. 91 * 92 * Stealing guc_ids: 93 * If no guc_ids are available they can be stolen from another context at 94 * request creation time if that context is unpinned. If a guc_id can't be found 95 * we punt this problem to the user as we believe this is near impossible to hit 96 * during normal use cases. 97 * 98 * Locking: 99 * In the GuC submission code we have 3 basic spin locks which protect 100 * everything. Details about each below. 101 * 102 * sched_engine->lock 103 * This is the submission lock for all contexts that share an i915 schedule 104 * engine (sched_engine), thus only one of the contexts which share a 105 * sched_engine can be submitting at a time. Currently only one sched_engine is 106 * used for all of GuC submission but that could change in the future. 107 * 108 * guc->submission_state.lock 109 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 110 * list. 111 * 112 * ce->guc_state.lock 113 * Protects everything under ce->guc_state. Ensures that a context is in the 114 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 115 * on a disabled context (bad idea), we don't issue a schedule enable when a 116 * schedule disable is in flight, etc... Also protects list of inflight requests 117 * on the context and the priority management state. Lock is individual to each 118 * context. 119 * 120 * Lock ordering rules: 121 * sched_engine->lock -> ce->guc_state.lock 122 * guc->submission_state.lock -> ce->guc_state.lock 123 * 124 * Reset races: 125 * When a full GT reset is triggered it is assumed that some G2H responses to 126 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 127 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 128 * contexts, release guc_ids, etc...). When this occurs we can scrub the 129 * context state and cleanup appropriately, however this is quite racey. 130 * To avoid races, the reset code must disable submission before scrubbing for 131 * the missing G2H, while the submission code must check for submission being 132 * disabled and skip sending H2Gs and updating context states when it is. Both 133 * sides must also make sure to hold the relevant locks. 134 */ 135 136 /* GuC Virtual Engine */ 137 struct guc_virtual_engine { 138 struct intel_engine_cs base; 139 struct intel_context context; 140 }; 141 142 static struct intel_context * 143 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 144 unsigned long flags); 145 146 static struct intel_context * 147 guc_create_parallel(struct intel_engine_cs **engines, 148 unsigned int num_siblings, 149 unsigned int width); 150 151 #define GUC_REQUEST_SIZE 64 /* bytes */ 152 153 /* 154 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 155 * per the GuC submission interface. A different allocation algorithm is used 156 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 157 * partition the guc_id space. We believe the number of multi-lrc contexts in 158 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 159 * multi-lrc. 160 */ 161 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 162 ((guc)->submission_state.num_guc_ids / 16) 163 164 /* 165 * Below is a set of functions which control the GuC scheduling state which 166 * require a lock. 167 */ 168 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 169 #define SCHED_STATE_DESTROYED BIT(1) 170 #define SCHED_STATE_PENDING_DISABLE BIT(2) 171 #define SCHED_STATE_BANNED BIT(3) 172 #define SCHED_STATE_ENABLED BIT(4) 173 #define SCHED_STATE_PENDING_ENABLE BIT(5) 174 #define SCHED_STATE_REGISTERED BIT(6) 175 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 176 #define SCHED_STATE_CLOSED BIT(8) 177 #define SCHED_STATE_BLOCKED_SHIFT 9 178 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 179 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 180 181 static inline void init_sched_state(struct intel_context *ce) 182 { 183 lockdep_assert_held(&ce->guc_state.lock); 184 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 185 } 186 187 /* 188 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 189 * A context close can race with the submission path, so SCHED_STATE_CLOSED 190 * can be set immediately before we try to register. 191 */ 192 #define SCHED_STATE_VALID_INIT \ 193 (SCHED_STATE_BLOCKED_MASK | \ 194 SCHED_STATE_CLOSED | \ 195 SCHED_STATE_REGISTERED) 196 197 __maybe_unused 198 static bool sched_state_is_init(struct intel_context *ce) 199 { 200 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 201 } 202 203 static inline bool 204 context_wait_for_deregister_to_register(struct intel_context *ce) 205 { 206 return ce->guc_state.sched_state & 207 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 208 } 209 210 static inline void 211 set_context_wait_for_deregister_to_register(struct intel_context *ce) 212 { 213 lockdep_assert_held(&ce->guc_state.lock); 214 ce->guc_state.sched_state |= 215 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 216 } 217 218 static inline void 219 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 220 { 221 lockdep_assert_held(&ce->guc_state.lock); 222 ce->guc_state.sched_state &= 223 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 224 } 225 226 static inline bool 227 context_destroyed(struct intel_context *ce) 228 { 229 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 230 } 231 232 static inline void 233 set_context_destroyed(struct intel_context *ce) 234 { 235 lockdep_assert_held(&ce->guc_state.lock); 236 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 237 } 238 239 static inline void 240 clr_context_destroyed(struct intel_context *ce) 241 { 242 lockdep_assert_held(&ce->guc_state.lock); 243 ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED; 244 } 245 246 static inline bool context_pending_disable(struct intel_context *ce) 247 { 248 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 249 } 250 251 static inline void set_context_pending_disable(struct intel_context *ce) 252 { 253 lockdep_assert_held(&ce->guc_state.lock); 254 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 255 } 256 257 static inline void clr_context_pending_disable(struct intel_context *ce) 258 { 259 lockdep_assert_held(&ce->guc_state.lock); 260 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 261 } 262 263 static inline bool context_banned(struct intel_context *ce) 264 { 265 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 266 } 267 268 static inline void set_context_banned(struct intel_context *ce) 269 { 270 lockdep_assert_held(&ce->guc_state.lock); 271 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 272 } 273 274 static inline void clr_context_banned(struct intel_context *ce) 275 { 276 lockdep_assert_held(&ce->guc_state.lock); 277 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 278 } 279 280 static inline bool context_enabled(struct intel_context *ce) 281 { 282 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 283 } 284 285 static inline void set_context_enabled(struct intel_context *ce) 286 { 287 lockdep_assert_held(&ce->guc_state.lock); 288 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 289 } 290 291 static inline void clr_context_enabled(struct intel_context *ce) 292 { 293 lockdep_assert_held(&ce->guc_state.lock); 294 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 295 } 296 297 static inline bool context_pending_enable(struct intel_context *ce) 298 { 299 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 300 } 301 302 static inline void set_context_pending_enable(struct intel_context *ce) 303 { 304 lockdep_assert_held(&ce->guc_state.lock); 305 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 306 } 307 308 static inline void clr_context_pending_enable(struct intel_context *ce) 309 { 310 lockdep_assert_held(&ce->guc_state.lock); 311 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 312 } 313 314 static inline bool context_registered(struct intel_context *ce) 315 { 316 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 317 } 318 319 static inline void set_context_registered(struct intel_context *ce) 320 { 321 lockdep_assert_held(&ce->guc_state.lock); 322 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 323 } 324 325 static inline void clr_context_registered(struct intel_context *ce) 326 { 327 lockdep_assert_held(&ce->guc_state.lock); 328 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 329 } 330 331 static inline bool context_policy_required(struct intel_context *ce) 332 { 333 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 334 } 335 336 static inline void set_context_policy_required(struct intel_context *ce) 337 { 338 lockdep_assert_held(&ce->guc_state.lock); 339 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 340 } 341 342 static inline void clr_context_policy_required(struct intel_context *ce) 343 { 344 lockdep_assert_held(&ce->guc_state.lock); 345 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 346 } 347 348 static inline bool context_close_done(struct intel_context *ce) 349 { 350 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 351 } 352 353 static inline void set_context_close_done(struct intel_context *ce) 354 { 355 lockdep_assert_held(&ce->guc_state.lock); 356 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 357 } 358 359 static inline u32 context_blocked(struct intel_context *ce) 360 { 361 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 362 SCHED_STATE_BLOCKED_SHIFT; 363 } 364 365 static inline void incr_context_blocked(struct intel_context *ce) 366 { 367 lockdep_assert_held(&ce->guc_state.lock); 368 369 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 370 371 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 372 } 373 374 static inline void decr_context_blocked(struct intel_context *ce) 375 { 376 lockdep_assert_held(&ce->guc_state.lock); 377 378 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 379 380 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 381 } 382 383 static struct intel_context * 384 request_to_scheduling_context(struct i915_request *rq) 385 { 386 return intel_context_to_parent(rq->context); 387 } 388 389 static inline bool context_guc_id_invalid(struct intel_context *ce) 390 { 391 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 392 } 393 394 static inline void set_context_guc_id_invalid(struct intel_context *ce) 395 { 396 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 397 } 398 399 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 400 { 401 return gt_to_guc(ce->engine->gt); 402 } 403 404 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 405 { 406 return rb_entry(rb, struct i915_priolist, node); 407 } 408 409 /* 410 * When using multi-lrc submission a scratch memory area is reserved in the 411 * parent's context state for the process descriptor, work queue, and handshake 412 * between the parent + children contexts to insert safe preemption points 413 * between each of the BBs. Currently the scratch area is sized to a page. 414 * 415 * The layout of this scratch area is below: 416 * 0 guc_process_desc 417 * + sizeof(struct guc_process_desc) child go 418 * + CACHELINE_BYTES child join[0] 419 * ... 420 * + CACHELINE_BYTES child join[n - 1] 421 * ... unused 422 * PARENT_SCRATCH_SIZE / 2 work queue start 423 * ... work queue 424 * PARENT_SCRATCH_SIZE - 1 work queue end 425 */ 426 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 427 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 428 429 struct sync_semaphore { 430 u32 semaphore; 431 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 432 }; 433 434 struct parent_scratch { 435 union guc_descs { 436 struct guc_sched_wq_desc wq_desc; 437 struct guc_process_desc_v69 pdesc; 438 } descs; 439 440 struct sync_semaphore go; 441 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 442 443 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 444 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 445 446 u32 wq[WQ_SIZE / sizeof(u32)]; 447 }; 448 449 static u32 __get_parent_scratch_offset(struct intel_context *ce) 450 { 451 GEM_BUG_ON(!ce->parallel.guc.parent_page); 452 453 return ce->parallel.guc.parent_page * PAGE_SIZE; 454 } 455 456 static u32 __get_wq_offset(struct intel_context *ce) 457 { 458 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 459 460 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 461 } 462 463 static struct parent_scratch * 464 __get_parent_scratch(struct intel_context *ce) 465 { 466 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 467 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 468 469 /* 470 * Need to subtract LRC_STATE_OFFSET here as the 471 * parallel.guc.parent_page is the offset into ce->state while 472 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 473 */ 474 return (struct parent_scratch *) 475 (ce->lrc_reg_state + 476 ((__get_parent_scratch_offset(ce) - 477 LRC_STATE_OFFSET) / sizeof(u32))); 478 } 479 480 static struct guc_process_desc_v69 * 481 __get_process_desc_v69(struct intel_context *ce) 482 { 483 struct parent_scratch *ps = __get_parent_scratch(ce); 484 485 return &ps->descs.pdesc; 486 } 487 488 static struct guc_sched_wq_desc * 489 __get_wq_desc_v70(struct intel_context *ce) 490 { 491 struct parent_scratch *ps = __get_parent_scratch(ce); 492 493 return &ps->descs.wq_desc; 494 } 495 496 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 497 { 498 /* 499 * Check for space in work queue. Caching a value of head pointer in 500 * intel_context structure in order reduce the number accesses to shared 501 * GPU memory which may be across a PCIe bus. 502 */ 503 #define AVAILABLE_SPACE \ 504 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 505 if (wqi_size > AVAILABLE_SPACE) { 506 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 507 508 if (wqi_size > AVAILABLE_SPACE) 509 return NULL; 510 } 511 #undef AVAILABLE_SPACE 512 513 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 514 } 515 516 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 517 { 518 struct intel_context *ce = xa_load(&guc->context_lookup, id); 519 520 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 521 522 return ce; 523 } 524 525 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 526 { 527 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 528 529 if (!base) 530 return NULL; 531 532 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 533 534 return &base[index]; 535 } 536 537 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 538 { 539 u32 size; 540 int ret; 541 542 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 543 GUC_MAX_CONTEXT_ID); 544 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 545 (void **)&guc->lrc_desc_pool_vaddr_v69); 546 if (ret) 547 return ret; 548 549 return 0; 550 } 551 552 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 553 { 554 if (!guc->lrc_desc_pool_vaddr_v69) 555 return; 556 557 guc->lrc_desc_pool_vaddr_v69 = NULL; 558 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 559 } 560 561 static inline bool guc_submission_initialized(struct intel_guc *guc) 562 { 563 return guc->submission_initialized; 564 } 565 566 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 567 { 568 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 569 570 if (desc) 571 memset(desc, 0, sizeof(*desc)); 572 } 573 574 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 575 { 576 return __get_context(guc, id); 577 } 578 579 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 580 struct intel_context *ce) 581 { 582 unsigned long flags; 583 584 /* 585 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 586 * lower level functions directly. 587 */ 588 xa_lock_irqsave(&guc->context_lookup, flags); 589 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 590 xa_unlock_irqrestore(&guc->context_lookup, flags); 591 } 592 593 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 594 { 595 unsigned long flags; 596 597 if (unlikely(!guc_submission_initialized(guc))) 598 return; 599 600 _reset_lrc_desc_v69(guc, id); 601 602 /* 603 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 604 * the lower level functions directly. 605 */ 606 xa_lock_irqsave(&guc->context_lookup, flags); 607 __xa_erase(&guc->context_lookup, id); 608 xa_unlock_irqrestore(&guc->context_lookup, flags); 609 } 610 611 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 612 { 613 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 614 wake_up_all(&guc->ct.wq); 615 } 616 617 static int guc_submission_send_busy_loop(struct intel_guc *guc, 618 const u32 *action, 619 u32 len, 620 u32 g2h_len_dw, 621 bool loop) 622 { 623 int ret; 624 625 /* 626 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 627 * so we don't handle the case where we don't get a reply because we 628 * aborted the send due to the channel being busy. 629 */ 630 GEM_BUG_ON(g2h_len_dw && !loop); 631 632 if (g2h_len_dw) 633 atomic_inc(&guc->outstanding_submission_g2h); 634 635 ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 636 if (ret && g2h_len_dw) 637 atomic_dec(&guc->outstanding_submission_g2h); 638 639 return ret; 640 } 641 642 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 643 atomic_t *wait_var, 644 bool interruptible, 645 long timeout) 646 { 647 const int state = interruptible ? 648 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 649 DEFINE_WAIT(wait); 650 651 might_sleep(); 652 GEM_BUG_ON(timeout < 0); 653 654 if (!atomic_read(wait_var)) 655 return 0; 656 657 if (!timeout) 658 return -ETIME; 659 660 for (;;) { 661 prepare_to_wait(&guc->ct.wq, &wait, state); 662 663 if (!atomic_read(wait_var)) 664 break; 665 666 if (signal_pending_state(state, current)) { 667 timeout = -EINTR; 668 break; 669 } 670 671 if (!timeout) { 672 timeout = -ETIME; 673 break; 674 } 675 676 timeout = io_schedule_timeout(timeout); 677 } 678 finish_wait(&guc->ct.wq, &wait); 679 680 return (timeout < 0) ? timeout : 0; 681 } 682 683 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 684 { 685 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 686 return 0; 687 688 return intel_guc_wait_for_pending_msg(guc, 689 &guc->outstanding_submission_g2h, 690 true, timeout); 691 } 692 693 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 694 static int try_context_registration(struct intel_context *ce, bool loop); 695 696 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 697 { 698 int err = 0; 699 struct intel_context *ce = request_to_scheduling_context(rq); 700 u32 action[3]; 701 int len = 0; 702 u32 g2h_len_dw = 0; 703 bool enabled; 704 705 lockdep_assert_held(&rq->engine->sched_engine->lock); 706 707 /* 708 * Corner case where requests were sitting in the priority list or a 709 * request resubmitted after the context was banned. 710 */ 711 if (unlikely(!intel_context_is_schedulable(ce))) { 712 i915_request_put(i915_request_mark_eio(rq)); 713 intel_engine_signal_breadcrumbs(ce->engine); 714 return 0; 715 } 716 717 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 718 GEM_BUG_ON(context_guc_id_invalid(ce)); 719 720 if (context_policy_required(ce)) { 721 err = guc_context_policy_init_v70(ce, false); 722 if (err) 723 return err; 724 } 725 726 spin_lock(&ce->guc_state.lock); 727 728 /* 729 * The request / context will be run on the hardware when scheduling 730 * gets enabled in the unblock. For multi-lrc we still submit the 731 * context to move the LRC tails. 732 */ 733 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 734 goto out; 735 736 enabled = context_enabled(ce) || context_blocked(ce); 737 738 if (!enabled) { 739 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 740 action[len++] = ce->guc_id.id; 741 action[len++] = GUC_CONTEXT_ENABLE; 742 set_context_pending_enable(ce); 743 intel_context_get(ce); 744 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 745 } else { 746 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 747 action[len++] = ce->guc_id.id; 748 } 749 750 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 751 if (!enabled && !err) { 752 trace_intel_context_sched_enable(ce); 753 atomic_inc(&guc->outstanding_submission_g2h); 754 set_context_enabled(ce); 755 756 /* 757 * Without multi-lrc KMD does the submission step (moving the 758 * lrc tail) so enabling scheduling is sufficient to submit the 759 * context. This isn't the case in multi-lrc submission as the 760 * GuC needs to move the tails, hence the need for another H2G 761 * to submit a multi-lrc context after enabling scheduling. 762 */ 763 if (intel_context_is_parent(ce)) { 764 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 765 err = intel_guc_send_nb(guc, action, len - 1, 0); 766 } 767 } else if (!enabled) { 768 clr_context_pending_enable(ce); 769 intel_context_put(ce); 770 } 771 if (likely(!err)) 772 trace_i915_request_guc_submit(rq); 773 774 out: 775 spin_unlock(&ce->guc_state.lock); 776 return err; 777 } 778 779 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 780 { 781 int ret = __guc_add_request(guc, rq); 782 783 if (unlikely(ret == -EBUSY)) { 784 guc->stalled_request = rq; 785 guc->submission_stall_reason = STALL_ADD_REQUEST; 786 } 787 788 return ret; 789 } 790 791 static inline void guc_set_lrc_tail(struct i915_request *rq) 792 { 793 rq->context->lrc_reg_state[CTX_RING_TAIL] = 794 intel_ring_set_tail(rq->ring, rq->tail); 795 } 796 797 static inline int rq_prio(const struct i915_request *rq) 798 { 799 return rq->sched.attr.priority; 800 } 801 802 static bool is_multi_lrc_rq(struct i915_request *rq) 803 { 804 return intel_context_is_parallel(rq->context); 805 } 806 807 static bool can_merge_rq(struct i915_request *rq, 808 struct i915_request *last) 809 { 810 return request_to_scheduling_context(rq) == 811 request_to_scheduling_context(last); 812 } 813 814 static u32 wq_space_until_wrap(struct intel_context *ce) 815 { 816 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 817 } 818 819 static void write_wqi(struct intel_context *ce, u32 wqi_size) 820 { 821 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 822 823 /* 824 * Ensure WQI are visible before updating tail 825 */ 826 intel_guc_write_barrier(ce_to_guc(ce)); 827 828 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 829 (WQ_SIZE - 1); 830 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 831 } 832 833 static int guc_wq_noop_append(struct intel_context *ce) 834 { 835 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 836 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 837 838 if (!wqi) 839 return -EBUSY; 840 841 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 842 843 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 844 FIELD_PREP(WQ_LEN_MASK, len_dw); 845 ce->parallel.guc.wqi_tail = 0; 846 847 return 0; 848 } 849 850 static int __guc_wq_item_append(struct i915_request *rq) 851 { 852 struct intel_context *ce = request_to_scheduling_context(rq); 853 struct intel_context *child; 854 unsigned int wqi_size = (ce->parallel.number_children + 4) * 855 sizeof(u32); 856 u32 *wqi; 857 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 858 int ret; 859 860 /* Ensure context is in correct state updating work queue */ 861 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 862 GEM_BUG_ON(context_guc_id_invalid(ce)); 863 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 864 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 865 866 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 867 if (wqi_size > wq_space_until_wrap(ce)) { 868 ret = guc_wq_noop_append(ce); 869 if (ret) 870 return ret; 871 } 872 873 wqi = get_wq_pointer(ce, wqi_size); 874 if (!wqi) 875 return -EBUSY; 876 877 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 878 879 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 880 FIELD_PREP(WQ_LEN_MASK, len_dw); 881 *wqi++ = ce->lrc.lrca; 882 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 883 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 884 *wqi++ = 0; /* fence_id */ 885 for_each_child(ce, child) 886 *wqi++ = child->ring->tail / sizeof(u64); 887 888 write_wqi(ce, wqi_size); 889 890 return 0; 891 } 892 893 static int guc_wq_item_append(struct intel_guc *guc, 894 struct i915_request *rq) 895 { 896 struct intel_context *ce = request_to_scheduling_context(rq); 897 int ret; 898 899 if (unlikely(!intel_context_is_schedulable(ce))) 900 return 0; 901 902 ret = __guc_wq_item_append(rq); 903 if (unlikely(ret == -EBUSY)) { 904 guc->stalled_request = rq; 905 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 906 } 907 908 return ret; 909 } 910 911 static bool multi_lrc_submit(struct i915_request *rq) 912 { 913 struct intel_context *ce = request_to_scheduling_context(rq); 914 915 intel_ring_set_tail(rq->ring, rq->tail); 916 917 /* 918 * We expect the front end (execbuf IOCTL) to set this flag on the last 919 * request generated from a multi-BB submission. This indicates to the 920 * backend (GuC interface) that we should submit this context thus 921 * submitting all the requests generated in parallel. 922 */ 923 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 924 !intel_context_is_schedulable(ce); 925 } 926 927 static int guc_dequeue_one_context(struct intel_guc *guc) 928 { 929 struct i915_sched_engine * const sched_engine = guc->sched_engine; 930 struct i915_request *last = NULL; 931 bool submit = false; 932 struct rb_node *rb; 933 int ret; 934 935 lockdep_assert_held(&sched_engine->lock); 936 937 if (guc->stalled_request) { 938 submit = true; 939 last = guc->stalled_request; 940 941 switch (guc->submission_stall_reason) { 942 case STALL_REGISTER_CONTEXT: 943 goto register_context; 944 case STALL_MOVE_LRC_TAIL: 945 goto move_lrc_tail; 946 case STALL_ADD_REQUEST: 947 goto add_request; 948 default: 949 MISSING_CASE(guc->submission_stall_reason); 950 } 951 } 952 953 while ((rb = rb_first_cached(&sched_engine->queue))) { 954 struct i915_priolist *p = to_priolist(rb); 955 struct i915_request *rq, *rn; 956 957 priolist_for_each_request_consume(rq, rn, p) { 958 if (last && !can_merge_rq(rq, last)) 959 goto register_context; 960 961 list_del_init(&rq->sched.link); 962 963 __i915_request_submit(rq); 964 965 trace_i915_request_in(rq, 0); 966 last = rq; 967 968 if (is_multi_lrc_rq(rq)) { 969 /* 970 * We need to coalesce all multi-lrc requests in 971 * a relationship into a single H2G. We are 972 * guaranteed that all of these requests will be 973 * submitted sequentially. 974 */ 975 if (multi_lrc_submit(rq)) { 976 submit = true; 977 goto register_context; 978 } 979 } else { 980 submit = true; 981 } 982 } 983 984 rb_erase_cached(&p->node, &sched_engine->queue); 985 i915_priolist_free(p); 986 } 987 988 register_context: 989 if (submit) { 990 struct intel_context *ce = request_to_scheduling_context(last); 991 992 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 993 intel_context_is_schedulable(ce))) { 994 ret = try_context_registration(ce, false); 995 if (unlikely(ret == -EPIPE)) { 996 goto deadlk; 997 } else if (ret == -EBUSY) { 998 guc->stalled_request = last; 999 guc->submission_stall_reason = 1000 STALL_REGISTER_CONTEXT; 1001 goto schedule_tasklet; 1002 } else if (ret != 0) { 1003 GEM_WARN_ON(ret); /* Unexpected */ 1004 goto deadlk; 1005 } 1006 } 1007 1008 move_lrc_tail: 1009 if (is_multi_lrc_rq(last)) { 1010 ret = guc_wq_item_append(guc, last); 1011 if (ret == -EBUSY) { 1012 goto schedule_tasklet; 1013 } else if (ret != 0) { 1014 GEM_WARN_ON(ret); /* Unexpected */ 1015 goto deadlk; 1016 } 1017 } else { 1018 guc_set_lrc_tail(last); 1019 } 1020 1021 add_request: 1022 ret = guc_add_request(guc, last); 1023 if (unlikely(ret == -EPIPE)) { 1024 goto deadlk; 1025 } else if (ret == -EBUSY) { 1026 goto schedule_tasklet; 1027 } else if (ret != 0) { 1028 GEM_WARN_ON(ret); /* Unexpected */ 1029 goto deadlk; 1030 } 1031 } 1032 1033 guc->stalled_request = NULL; 1034 guc->submission_stall_reason = STALL_NONE; 1035 return submit; 1036 1037 deadlk: 1038 sched_engine->tasklet.callback = NULL; 1039 tasklet_disable_nosync(&sched_engine->tasklet); 1040 return false; 1041 1042 schedule_tasklet: 1043 tasklet_schedule(&sched_engine->tasklet); 1044 return false; 1045 } 1046 1047 static void guc_submission_tasklet(struct tasklet_struct *t) 1048 { 1049 struct i915_sched_engine *sched_engine = 1050 from_tasklet(sched_engine, t, tasklet); 1051 unsigned long flags; 1052 bool loop; 1053 1054 spin_lock_irqsave(&sched_engine->lock, flags); 1055 1056 do { 1057 loop = guc_dequeue_one_context(sched_engine->private_data); 1058 } while (loop); 1059 1060 i915_sched_engine_reset_on_empty(sched_engine); 1061 1062 spin_unlock_irqrestore(&sched_engine->lock, flags); 1063 } 1064 1065 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1066 { 1067 if (iir & GT_RENDER_USER_INTERRUPT) 1068 intel_engine_signal_breadcrumbs(engine); 1069 } 1070 1071 static void __guc_context_destroy(struct intel_context *ce); 1072 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1073 static void guc_signal_context_fence(struct intel_context *ce); 1074 static void guc_cancel_context_requests(struct intel_context *ce); 1075 static void guc_blocked_fence_complete(struct intel_context *ce); 1076 1077 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1078 { 1079 struct intel_context *ce; 1080 unsigned long index, flags; 1081 bool pending_disable, pending_enable, deregister, destroyed, banned; 1082 1083 xa_lock_irqsave(&guc->context_lookup, flags); 1084 xa_for_each(&guc->context_lookup, index, ce) { 1085 /* 1086 * Corner case where the ref count on the object is zero but and 1087 * deregister G2H was lost. In this case we don't touch the ref 1088 * count and finish the destroy of the context. 1089 */ 1090 bool do_put = kref_get_unless_zero(&ce->ref); 1091 1092 xa_unlock(&guc->context_lookup); 1093 1094 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1095 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1096 /* successful cancel so jump straight to close it */ 1097 intel_context_sched_disable_unpin(ce); 1098 } 1099 1100 spin_lock(&ce->guc_state.lock); 1101 1102 /* 1103 * Once we are at this point submission_disabled() is guaranteed 1104 * to be visible to all callers who set the below flags (see above 1105 * flush and flushes in reset_prepare). If submission_disabled() 1106 * is set, the caller shouldn't set these flags. 1107 */ 1108 1109 destroyed = context_destroyed(ce); 1110 pending_enable = context_pending_enable(ce); 1111 pending_disable = context_pending_disable(ce); 1112 deregister = context_wait_for_deregister_to_register(ce); 1113 banned = context_banned(ce); 1114 init_sched_state(ce); 1115 1116 spin_unlock(&ce->guc_state.lock); 1117 1118 if (pending_enable || destroyed || deregister) { 1119 decr_outstanding_submission_g2h(guc); 1120 if (deregister) 1121 guc_signal_context_fence(ce); 1122 if (destroyed) { 1123 intel_gt_pm_put_async_untracked(guc_to_gt(guc)); 1124 release_guc_id(guc, ce); 1125 __guc_context_destroy(ce); 1126 } 1127 if (pending_enable || deregister) 1128 intel_context_put(ce); 1129 } 1130 1131 /* Not mutualy exclusive with above if statement. */ 1132 if (pending_disable) { 1133 guc_signal_context_fence(ce); 1134 if (banned) { 1135 guc_cancel_context_requests(ce); 1136 intel_engine_signal_breadcrumbs(ce->engine); 1137 } 1138 intel_context_sched_disable_unpin(ce); 1139 decr_outstanding_submission_g2h(guc); 1140 1141 spin_lock(&ce->guc_state.lock); 1142 guc_blocked_fence_complete(ce); 1143 spin_unlock(&ce->guc_state.lock); 1144 1145 intel_context_put(ce); 1146 } 1147 1148 if (do_put) 1149 intel_context_put(ce); 1150 xa_lock(&guc->context_lookup); 1151 } 1152 xa_unlock_irqrestore(&guc->context_lookup, flags); 1153 } 1154 1155 /* 1156 * GuC stores busyness stats for each engine at context in/out boundaries. A 1157 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1158 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1159 * GuC. 1160 * 1161 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1162 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1163 * active. For an active engine total busyness = total + (now - start), where 1164 * 'now' is the time at which the busyness is sampled. For inactive engine, 1165 * total busyness = total. 1166 * 1167 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1168 * 1169 * The start and total values provided by GuC are 32 bits and wrap around in a 1170 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1171 * increasing ns values, there is a need for this implementation to account for 1172 * overflows and extend the GuC provided values to 64 bits before returning 1173 * busyness to the user. In order to do that, a worker runs periodically at 1174 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1175 * 27 seconds for a gt clock frequency of 19.2 MHz). 1176 */ 1177 1178 #define WRAP_TIME_CLKS U32_MAX 1179 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1180 1181 static void 1182 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1183 { 1184 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1185 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1186 1187 if (new_start == lower_32_bits(*prev_start)) 1188 return; 1189 1190 /* 1191 * When gt is unparked, we update the gt timestamp and start the ping 1192 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1193 * is unparked, all switched in contexts will have a start time that is 1194 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1195 * 1196 * If neither gt_stamp nor new_start has rolled over, then the 1197 * gt_stamp_hi does not need to be adjusted, however if one of them has 1198 * rolled over, we need to adjust gt_stamp_hi accordingly. 1199 * 1200 * The below conditions address the cases of new_start rollover and 1201 * gt_stamp_last rollover respectively. 1202 */ 1203 if (new_start < gt_stamp_last && 1204 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1205 gt_stamp_hi++; 1206 1207 if (new_start > gt_stamp_last && 1208 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1209 gt_stamp_hi--; 1210 1211 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1212 } 1213 1214 #define record_read(map_, field_) \ 1215 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1216 1217 /* 1218 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1219 * we run into a race where the value read is inconsistent. Sometimes the 1220 * inconsistency is in reading the upper MSB bytes of the last_in value when 1221 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1222 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1223 * determine validity of these values. Instead we read the values multiple times 1224 * until they are consistent. In test runs, 3 attempts results in consistent 1225 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1226 * any new occurrences. 1227 */ 1228 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1229 u32 *last_in, u32 *id, u32 *total) 1230 { 1231 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1232 int i = 0; 1233 1234 do { 1235 *last_in = record_read(&rec_map, last_switch_in_stamp); 1236 *id = record_read(&rec_map, current_context_index); 1237 *total = record_read(&rec_map, total_runtime); 1238 1239 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1240 record_read(&rec_map, current_context_index) == *id && 1241 record_read(&rec_map, total_runtime) == *total) 1242 break; 1243 } while (++i < 6); 1244 } 1245 1246 static void __set_engine_usage_record(struct intel_engine_cs *engine, 1247 u32 last_in, u32 id, u32 total) 1248 { 1249 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1250 1251 #define record_write(map_, field_, val_) \ 1252 iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_) 1253 1254 record_write(&rec_map, last_switch_in_stamp, last_in); 1255 record_write(&rec_map, current_context_index, id); 1256 record_write(&rec_map, total_runtime, total); 1257 1258 #undef record_write 1259 } 1260 1261 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1262 { 1263 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1264 struct intel_guc *guc = gt_to_guc(engine->gt); 1265 u32 last_switch, ctx_id, total; 1266 1267 lockdep_assert_held(&guc->timestamp.lock); 1268 1269 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1270 1271 stats->running = ctx_id != ~0U && last_switch; 1272 if (stats->running) 1273 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1274 1275 /* 1276 * Instead of adjusting the total for overflow, just add the 1277 * difference from previous sample stats->total_gt_clks 1278 */ 1279 if (total && total != ~0U) { 1280 stats->total_gt_clks += (u32)(total - stats->prev_total); 1281 stats->prev_total = total; 1282 } 1283 } 1284 1285 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1286 { 1287 intel_wakeref_t wakeref; 1288 u32 reg; 1289 1290 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1291 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1292 1293 return 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); 1294 } 1295 1296 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1297 { 1298 struct intel_gt *gt = guc_to_gt(guc); 1299 u32 gt_stamp_lo, gt_stamp_hi; 1300 u64 gpm_ts; 1301 1302 lockdep_assert_held(&guc->timestamp.lock); 1303 1304 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1305 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1306 MISC_STATUS1) >> guc->timestamp.shift; 1307 gt_stamp_lo = lower_32_bits(gpm_ts); 1308 *now = ktime_get(); 1309 1310 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1311 gt_stamp_hi++; 1312 1313 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1314 } 1315 1316 /* 1317 * Unlike the execlist mode of submission total and active times are in terms of 1318 * gt clocks. The *now parameter is retained to return the cpu time at which the 1319 * busyness was sampled. 1320 */ 1321 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1322 { 1323 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1324 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1325 struct intel_gt *gt = engine->gt; 1326 struct intel_guc *guc = gt_to_guc(gt); 1327 u64 total, gt_stamp_saved; 1328 unsigned long flags; 1329 u32 reset_count; 1330 bool in_reset; 1331 intel_wakeref_t wakeref; 1332 1333 spin_lock_irqsave(&guc->timestamp.lock, flags); 1334 1335 /* 1336 * If a reset happened, we risk reading partially updated engine 1337 * busyness from GuC, so we just use the driver stored copy of busyness. 1338 * Synchronize with gt reset using reset_count and the 1339 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1340 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1341 * usable by checking the flag afterwards. 1342 */ 1343 reset_count = i915_reset_count(gpu_error); 1344 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1345 1346 *now = ktime_get(); 1347 1348 /* 1349 * The active busyness depends on start_gt_clk and gt_stamp. 1350 * gt_stamp is updated by i915 only when gt is awake and the 1351 * start_gt_clk is derived from GuC state. To get a consistent 1352 * view of activity, we query the GuC state only if gt is awake. 1353 */ 1354 wakeref = in_reset ? NULL : intel_gt_pm_get_if_awake(gt); 1355 if (wakeref) { 1356 stats_saved = *stats; 1357 gt_stamp_saved = guc->timestamp.gt_stamp; 1358 /* 1359 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1360 * start_gt_clk' calculation below for active engines. 1361 */ 1362 guc_update_engine_gt_clks(engine); 1363 guc_update_pm_timestamp(guc, now); 1364 intel_gt_pm_put_async(gt, wakeref); 1365 if (i915_reset_count(gpu_error) != reset_count) { 1366 *stats = stats_saved; 1367 guc->timestamp.gt_stamp = gt_stamp_saved; 1368 } 1369 } 1370 1371 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1372 if (stats->running) { 1373 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1374 1375 total += intel_gt_clock_interval_to_ns(gt, clk); 1376 } 1377 1378 if (total > stats->total) 1379 stats->total = total; 1380 1381 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1382 1383 return ns_to_ktime(stats->total); 1384 } 1385 1386 static void guc_enable_busyness_worker(struct intel_guc *guc) 1387 { 1388 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); 1389 } 1390 1391 static void guc_cancel_busyness_worker(struct intel_guc *guc) 1392 { 1393 /* 1394 * There are many different call stacks that can get here. Some of them 1395 * hold the reset mutex. The busyness worker also attempts to acquire the 1396 * reset mutex. Synchronously flushing a worker thread requires acquiring 1397 * the worker mutex. Lockdep sees this as a conflict. It thinks that the 1398 * flush can deadlock because it holds the worker mutex while waiting for 1399 * the reset mutex, but another thread is holding the reset mutex and might 1400 * attempt to use other worker functions. 1401 * 1402 * In practice, this scenario does not exist because the busyness worker 1403 * does not block waiting for the reset mutex. It does a try-lock on it and 1404 * immediately exits if the lock is already held. Unfortunately, the mutex 1405 * in question (I915_RESET_BACKOFF) is an i915 implementation which has lockdep 1406 * annotation but not to the extent of explaining the 'might lock' is also a 1407 * 'does not need to lock'. So one option would be to add more complex lockdep 1408 * annotations to ignore the issue (if at all possible). A simpler option is to 1409 * just not flush synchronously when a rest in progress. Given that the worker 1410 * will just early exit and re-schedule itself anyway, there is no advantage 1411 * to running it immediately. 1412 * 1413 * If a reset is not in progress, then the synchronous flush may be required. 1414 * As noted many call stacks lead here, some during suspend and driver unload 1415 * which do require a synchronous flush to make sure the worker is stopped 1416 * before memory is freed. 1417 * 1418 * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through 1419 * every possible call stack is unfeasible. It would be too intrusive to many 1420 * areas that really don't care about the GuC backend. However, there is the 1421 * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked. 1422 * So just use those. Note that testing both is required due to the hideously 1423 * complex nature of the i915 driver's reset code paths. 1424 * 1425 * And note that in the case of a reset occurring during driver unload 1426 * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the 1427 * reset flag/mutex are set) is fine because there is another explicit cancel in 1428 * intel_guc_submission_fini (when the reset flag/mutex are not). 1429 */ 1430 if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) || 1431 test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags)) 1432 cancel_delayed_work(&guc->timestamp.work); 1433 else 1434 cancel_delayed_work_sync(&guc->timestamp.work); 1435 } 1436 1437 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1438 { 1439 struct intel_gt *gt = guc_to_gt(guc); 1440 struct intel_engine_cs *engine; 1441 enum intel_engine_id id; 1442 unsigned long flags; 1443 ktime_t unused; 1444 1445 spin_lock_irqsave(&guc->timestamp.lock, flags); 1446 1447 guc_update_pm_timestamp(guc, &unused); 1448 for_each_engine(engine, gt, id) { 1449 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1450 1451 guc_update_engine_gt_clks(engine); 1452 1453 /* 1454 * If resetting a running context, accumulate the active 1455 * time as well since there will be no context switch. 1456 */ 1457 if (stats->running) { 1458 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1459 1460 stats->total_gt_clks += clk; 1461 } 1462 stats->prev_total = 0; 1463 stats->running = 0; 1464 } 1465 1466 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1467 } 1468 1469 static void __update_guc_busyness_running_state(struct intel_guc *guc) 1470 { 1471 struct intel_gt *gt = guc_to_gt(guc); 1472 struct intel_engine_cs *engine; 1473 enum intel_engine_id id; 1474 unsigned long flags; 1475 1476 spin_lock_irqsave(&guc->timestamp.lock, flags); 1477 for_each_engine(engine, gt, id) 1478 engine->stats.guc.running = false; 1479 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1480 } 1481 1482 static void __update_guc_busyness_stats(struct intel_guc *guc) 1483 { 1484 struct intel_gt *gt = guc_to_gt(guc); 1485 struct intel_engine_cs *engine; 1486 enum intel_engine_id id; 1487 unsigned long flags; 1488 ktime_t unused; 1489 1490 guc->timestamp.last_stat_jiffies = jiffies; 1491 1492 spin_lock_irqsave(&guc->timestamp.lock, flags); 1493 1494 guc_update_pm_timestamp(guc, &unused); 1495 for_each_engine(engine, gt, id) 1496 guc_update_engine_gt_clks(engine); 1497 1498 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1499 } 1500 1501 static void __guc_context_update_stats(struct intel_context *ce) 1502 { 1503 struct intel_guc *guc = ce_to_guc(ce); 1504 unsigned long flags; 1505 1506 spin_lock_irqsave(&guc->timestamp.lock, flags); 1507 lrc_update_runtime(ce); 1508 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1509 } 1510 1511 static void guc_context_update_stats(struct intel_context *ce) 1512 { 1513 if (!intel_context_pin_if_active(ce)) 1514 return; 1515 1516 __guc_context_update_stats(ce); 1517 intel_context_unpin(ce); 1518 } 1519 1520 static void guc_timestamp_ping(struct work_struct *wrk) 1521 { 1522 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1523 timestamp.work.work); 1524 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1525 struct intel_gt *gt = guc_to_gt(guc); 1526 struct intel_context *ce; 1527 intel_wakeref_t wakeref; 1528 unsigned long index; 1529 int srcu, ret; 1530 1531 /* 1532 * Ideally the busyness worker should take a gt pm wakeref because the 1533 * worker only needs to be active while gt is awake. However, the 1534 * gt_park path cancels the worker synchronously and this complicates 1535 * the flow if the worker is also running at the same time. The cancel 1536 * waits for the worker and when the worker releases the wakeref, that 1537 * would call gt_park and would lead to a deadlock. 1538 * 1539 * The resolution is to take the global pm wakeref if runtime pm is 1540 * already active. If not, we don't need to update the busyness stats as 1541 * the stats would already be updated when the gt was parked. 1542 * 1543 * Note: 1544 * - We do not requeue the worker if we cannot take a reference to runtime 1545 * pm since intel_guc_busyness_unpark would requeue the worker in the 1546 * resume path. 1547 * 1548 * - If the gt was parked longer than time taken for GT timestamp to roll 1549 * over, we ignore those rollovers since we don't care about tracking 1550 * the exact GT time. We only care about roll overs when the gt is 1551 * active and running workloads. 1552 * 1553 * - There is a window of time between gt_park and runtime suspend, 1554 * where the worker may run. This is acceptable since the worker will 1555 * not find any new data to update busyness. 1556 */ 1557 wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); 1558 if (!wakeref) 1559 return; 1560 1561 /* 1562 * Synchronize with gt reset to make sure the worker does not 1563 * corrupt the engine/guc stats. NB: can't actually block waiting 1564 * for a reset to complete as the reset requires flushing out 1565 * this worker thread if started. So waiting would deadlock. 1566 */ 1567 ret = intel_gt_reset_trylock(gt, &srcu); 1568 if (ret) 1569 goto err_trylock; 1570 1571 __update_guc_busyness_stats(guc); 1572 1573 /* adjust context stats for overflow */ 1574 xa_for_each(&guc->context_lookup, index, ce) 1575 guc_context_update_stats(ce); 1576 1577 intel_gt_reset_unlock(gt, srcu); 1578 1579 guc_enable_busyness_worker(guc); 1580 1581 err_trylock: 1582 intel_runtime_pm_put(>->i915->runtime_pm, wakeref); 1583 } 1584 1585 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1586 { 1587 struct intel_gt *gt = guc_to_gt(guc); 1588 struct intel_engine_cs *engine; 1589 enum intel_engine_id id; 1590 u32 offset = intel_guc_engine_usage_offset(guc); 1591 u32 action[] = { 1592 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1593 offset, 1594 0, 1595 }; 1596 1597 for_each_engine(engine, gt, id) 1598 __set_engine_usage_record(engine, 0, 0xffffffff, 0); 1599 1600 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1601 } 1602 1603 static int guc_init_engine_stats(struct intel_guc *guc) 1604 { 1605 struct intel_gt *gt = guc_to_gt(guc); 1606 intel_wakeref_t wakeref; 1607 int ret; 1608 1609 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1610 ret = guc_action_enable_usage_stats(guc); 1611 1612 if (ret) 1613 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); 1614 else 1615 guc_enable_busyness_worker(guc); 1616 1617 return ret; 1618 } 1619 1620 static void guc_fini_engine_stats(struct intel_guc *guc) 1621 { 1622 guc_cancel_busyness_worker(guc); 1623 } 1624 1625 void intel_guc_busyness_park(struct intel_gt *gt) 1626 { 1627 struct intel_guc *guc = gt_to_guc(gt); 1628 1629 if (!guc_submission_initialized(guc)) 1630 return; 1631 1632 /* Assume no engines are running and set running state to false */ 1633 __update_guc_busyness_running_state(guc); 1634 1635 /* 1636 * There is a race with suspend flow where the worker runs after suspend 1637 * and causes an unclaimed register access warning. Cancel the worker 1638 * synchronously here. 1639 */ 1640 guc_cancel_busyness_worker(guc); 1641 1642 /* 1643 * Before parking, we should sample engine busyness stats if we need to. 1644 * We can skip it if we are less than half a ping from the last time we 1645 * sampled the busyness stats. 1646 */ 1647 if (guc->timestamp.last_stat_jiffies && 1648 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1649 (guc->timestamp.ping_delay / 2))) 1650 return; 1651 1652 __update_guc_busyness_stats(guc); 1653 } 1654 1655 void intel_guc_busyness_unpark(struct intel_gt *gt) 1656 { 1657 struct intel_guc *guc = gt_to_guc(gt); 1658 unsigned long flags; 1659 ktime_t unused; 1660 1661 if (!guc_submission_initialized(guc)) 1662 return; 1663 1664 spin_lock_irqsave(&guc->timestamp.lock, flags); 1665 guc_update_pm_timestamp(guc, &unused); 1666 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1667 guc_enable_busyness_worker(guc); 1668 } 1669 1670 static inline bool 1671 submission_disabled(struct intel_guc *guc) 1672 { 1673 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1674 1675 return unlikely(!sched_engine || 1676 !__tasklet_is_enabled(&sched_engine->tasklet) || 1677 intel_gt_is_wedged(guc_to_gt(guc))); 1678 } 1679 1680 static void disable_submission(struct intel_guc *guc) 1681 { 1682 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1683 1684 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1685 GEM_BUG_ON(!guc->ct.enabled); 1686 __tasklet_disable_sync_once(&sched_engine->tasklet); 1687 sched_engine->tasklet.callback = NULL; 1688 } 1689 } 1690 1691 static void enable_submission(struct intel_guc *guc) 1692 { 1693 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1694 unsigned long flags; 1695 1696 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1697 sched_engine->tasklet.callback = guc_submission_tasklet; 1698 wmb(); /* Make sure callback visible */ 1699 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1700 __tasklet_enable(&sched_engine->tasklet)) { 1701 GEM_BUG_ON(!guc->ct.enabled); 1702 1703 /* And kick in case we missed a new request submission. */ 1704 tasklet_hi_schedule(&sched_engine->tasklet); 1705 } 1706 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1707 } 1708 1709 static void guc_flush_submissions(struct intel_guc *guc) 1710 { 1711 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1712 unsigned long flags; 1713 1714 spin_lock_irqsave(&sched_engine->lock, flags); 1715 spin_unlock_irqrestore(&sched_engine->lock, flags); 1716 } 1717 1718 void intel_guc_submission_flush_work(struct intel_guc *guc) 1719 { 1720 flush_work(&guc->submission_state.destroyed_worker); 1721 } 1722 1723 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1724 1725 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1726 { 1727 if (unlikely(!guc_submission_initialized(guc))) { 1728 /* Reset called during driver load? GuC not yet initialised! */ 1729 return; 1730 } 1731 1732 intel_gt_park_heartbeats(guc_to_gt(guc)); 1733 disable_submission(guc); 1734 guc->interrupts.disable(guc); 1735 __reset_guc_busyness_stats(guc); 1736 1737 /* Flush IRQ handler */ 1738 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1739 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1740 1741 /* Flush tasklet */ 1742 tasklet_disable(&guc->ct.receive_tasklet); 1743 tasklet_enable(&guc->ct.receive_tasklet); 1744 1745 guc_flush_submissions(guc); 1746 guc_flush_destroyed_contexts(guc); 1747 flush_work(&guc->ct.requests.worker); 1748 1749 scrub_guc_desc_for_outstanding_g2h(guc); 1750 } 1751 1752 static struct intel_engine_cs * 1753 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1754 { 1755 struct intel_engine_cs *engine; 1756 intel_engine_mask_t tmp, mask = ve->mask; 1757 unsigned int num_siblings = 0; 1758 1759 for_each_engine_masked(engine, ve->gt, mask, tmp) 1760 if (num_siblings++ == sibling) 1761 return engine; 1762 1763 return NULL; 1764 } 1765 1766 static inline struct intel_engine_cs * 1767 __context_to_physical_engine(struct intel_context *ce) 1768 { 1769 struct intel_engine_cs *engine = ce->engine; 1770 1771 if (intel_engine_is_virtual(engine)) 1772 engine = guc_virtual_get_sibling(engine, 0); 1773 1774 return engine; 1775 } 1776 1777 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1778 { 1779 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1780 1781 if (!intel_context_is_schedulable(ce)) 1782 return; 1783 1784 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1785 1786 /* 1787 * We want a simple context + ring to execute the breadcrumb update. 1788 * We cannot rely on the context being intact across the GPU hang, 1789 * so clear it and rebuild just what we need for the breadcrumb. 1790 * All pending requests for this context will be zapped, and any 1791 * future request will be after userspace has had the opportunity 1792 * to recreate its own state. 1793 */ 1794 if (scrub) 1795 lrc_init_regs(ce, engine, true); 1796 1797 /* Rerun the request; its payload has been neutered (if guilty). */ 1798 lrc_update_regs(ce, engine, head); 1799 } 1800 1801 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1802 { 1803 /* 1804 * Wa_22011802037: In addition to stopping the cs, we need 1805 * to wait for any pending mi force wakeups 1806 */ 1807 if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { 1808 intel_engine_stop_cs(engine); 1809 intel_engine_wait_for_pending_mi_fw(engine); 1810 } 1811 } 1812 1813 static void guc_reset_nop(struct intel_engine_cs *engine) 1814 { 1815 } 1816 1817 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1818 { 1819 } 1820 1821 static void 1822 __unwind_incomplete_requests(struct intel_context *ce) 1823 { 1824 struct i915_request *rq, *rn; 1825 struct list_head *pl; 1826 int prio = I915_PRIORITY_INVALID; 1827 struct i915_sched_engine * const sched_engine = 1828 ce->engine->sched_engine; 1829 unsigned long flags; 1830 1831 spin_lock_irqsave(&sched_engine->lock, flags); 1832 spin_lock(&ce->guc_state.lock); 1833 list_for_each_entry_safe_reverse(rq, rn, 1834 &ce->guc_state.requests, 1835 sched.link) { 1836 if (i915_request_completed(rq)) 1837 continue; 1838 1839 list_del_init(&rq->sched.link); 1840 __i915_request_unsubmit(rq); 1841 1842 /* Push the request back into the queue for later resubmission. */ 1843 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1844 if (rq_prio(rq) != prio) { 1845 prio = rq_prio(rq); 1846 pl = i915_sched_lookup_priolist(sched_engine, prio); 1847 } 1848 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1849 1850 list_add(&rq->sched.link, pl); 1851 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1852 } 1853 spin_unlock(&ce->guc_state.lock); 1854 spin_unlock_irqrestore(&sched_engine->lock, flags); 1855 } 1856 1857 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1858 { 1859 bool guilty; 1860 struct i915_request *rq; 1861 unsigned long flags; 1862 u32 head; 1863 int i, number_children = ce->parallel.number_children; 1864 struct intel_context *parent = ce; 1865 1866 GEM_BUG_ON(intel_context_is_child(ce)); 1867 1868 intel_context_get(ce); 1869 1870 /* 1871 * GuC will implicitly mark the context as non-schedulable when it sends 1872 * the reset notification. Make sure our state reflects this change. The 1873 * context will be marked enabled on resubmission. 1874 */ 1875 spin_lock_irqsave(&ce->guc_state.lock, flags); 1876 clr_context_enabled(ce); 1877 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1878 1879 /* 1880 * For each context in the relationship find the hanging request 1881 * resetting each context / request as needed 1882 */ 1883 for (i = 0; i < number_children + 1; ++i) { 1884 if (!intel_context_is_pinned(ce)) 1885 goto next_context; 1886 1887 guilty = false; 1888 rq = intel_context_get_active_request(ce); 1889 if (!rq) { 1890 head = ce->ring->tail; 1891 goto out_replay; 1892 } 1893 1894 if (i915_request_started(rq)) 1895 guilty = stalled & ce->engine->mask; 1896 1897 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1898 head = intel_ring_wrap(ce->ring, rq->head); 1899 1900 __i915_request_reset(rq, guilty); 1901 i915_request_put(rq); 1902 out_replay: 1903 guc_reset_state(ce, head, guilty); 1904 next_context: 1905 if (i != number_children) 1906 ce = list_next_entry(ce, parallel.child_link); 1907 } 1908 1909 __unwind_incomplete_requests(parent); 1910 intel_context_put(parent); 1911 } 1912 1913 void wake_up_all_tlb_invalidate(struct intel_guc *guc) 1914 { 1915 struct intel_guc_tlb_wait *wait; 1916 unsigned long i; 1917 1918 if (!intel_guc_tlb_invalidation_is_available(guc)) 1919 return; 1920 1921 xa_lock_irq(&guc->tlb_lookup); 1922 xa_for_each(&guc->tlb_lookup, i, wait) 1923 wake_up(&wait->wq); 1924 xa_unlock_irq(&guc->tlb_lookup); 1925 } 1926 1927 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1928 { 1929 struct intel_context *ce; 1930 unsigned long index; 1931 unsigned long flags; 1932 1933 if (unlikely(!guc_submission_initialized(guc))) { 1934 /* Reset called during driver load? GuC not yet initialised! */ 1935 return; 1936 } 1937 1938 xa_lock_irqsave(&guc->context_lookup, flags); 1939 xa_for_each(&guc->context_lookup, index, ce) { 1940 if (!kref_get_unless_zero(&ce->ref)) 1941 continue; 1942 1943 xa_unlock(&guc->context_lookup); 1944 1945 if (intel_context_is_pinned(ce) && 1946 !intel_context_is_child(ce)) 1947 __guc_reset_context(ce, stalled); 1948 1949 intel_context_put(ce); 1950 1951 xa_lock(&guc->context_lookup); 1952 } 1953 xa_unlock_irqrestore(&guc->context_lookup, flags); 1954 1955 /* GuC is blown away, drop all references to contexts */ 1956 xa_destroy(&guc->context_lookup); 1957 } 1958 1959 static void guc_cancel_context_requests(struct intel_context *ce) 1960 { 1961 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1962 struct i915_request *rq; 1963 unsigned long flags; 1964 1965 /* Mark all executing requests as skipped. */ 1966 spin_lock_irqsave(&sched_engine->lock, flags); 1967 spin_lock(&ce->guc_state.lock); 1968 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1969 i915_request_put(i915_request_mark_eio(rq)); 1970 spin_unlock(&ce->guc_state.lock); 1971 spin_unlock_irqrestore(&sched_engine->lock, flags); 1972 } 1973 1974 static void 1975 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1976 { 1977 struct i915_request *rq, *rn; 1978 struct rb_node *rb; 1979 unsigned long flags; 1980 1981 /* Can be called during boot if GuC fails to load */ 1982 if (!sched_engine) 1983 return; 1984 1985 /* 1986 * Before we call engine->cancel_requests(), we should have exclusive 1987 * access to the submission state. This is arranged for us by the 1988 * caller disabling the interrupt generation, the tasklet and other 1989 * threads that may then access the same state, giving us a free hand 1990 * to reset state. However, we still need to let lockdep be aware that 1991 * we know this state may be accessed in hardirq context, so we 1992 * disable the irq around this manipulation and we want to keep 1993 * the spinlock focused on its duties and not accidentally conflate 1994 * coverage to the submission's irq state. (Similarly, although we 1995 * shouldn't need to disable irq around the manipulation of the 1996 * submission's irq state, we also wish to remind ourselves that 1997 * it is irq state.) 1998 */ 1999 spin_lock_irqsave(&sched_engine->lock, flags); 2000 2001 /* Flush the queued requests to the timeline list (for retiring). */ 2002 while ((rb = rb_first_cached(&sched_engine->queue))) { 2003 struct i915_priolist *p = to_priolist(rb); 2004 2005 priolist_for_each_request_consume(rq, rn, p) { 2006 list_del_init(&rq->sched.link); 2007 2008 __i915_request_submit(rq); 2009 2010 i915_request_put(i915_request_mark_eio(rq)); 2011 } 2012 2013 rb_erase_cached(&p->node, &sched_engine->queue); 2014 i915_priolist_free(p); 2015 } 2016 2017 /* Remaining _unready_ requests will be nop'ed when submitted */ 2018 2019 sched_engine->queue_priority_hint = INT_MIN; 2020 sched_engine->queue = RB_ROOT_CACHED; 2021 2022 spin_unlock_irqrestore(&sched_engine->lock, flags); 2023 } 2024 2025 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 2026 { 2027 struct intel_context *ce; 2028 unsigned long index; 2029 unsigned long flags; 2030 2031 xa_lock_irqsave(&guc->context_lookup, flags); 2032 xa_for_each(&guc->context_lookup, index, ce) { 2033 if (!kref_get_unless_zero(&ce->ref)) 2034 continue; 2035 2036 xa_unlock(&guc->context_lookup); 2037 2038 if (intel_context_is_pinned(ce) && 2039 !intel_context_is_child(ce)) 2040 guc_cancel_context_requests(ce); 2041 2042 intel_context_put(ce); 2043 2044 xa_lock(&guc->context_lookup); 2045 } 2046 xa_unlock_irqrestore(&guc->context_lookup, flags); 2047 2048 guc_cancel_sched_engine_requests(guc->sched_engine); 2049 2050 /* GuC is blown away, drop all references to contexts */ 2051 xa_destroy(&guc->context_lookup); 2052 2053 /* 2054 * Wedged GT won't respond to any TLB invalidation request. Simply 2055 * release all the blocked waiters. 2056 */ 2057 wake_up_all_tlb_invalidate(guc); 2058 } 2059 2060 void intel_guc_submission_reset_finish(struct intel_guc *guc) 2061 { 2062 int outstanding; 2063 2064 /* Reset called during driver load or during wedge? */ 2065 if (unlikely(!guc_submission_initialized(guc) || 2066 !intel_guc_is_fw_running(guc) || 2067 intel_gt_is_wedged(guc_to_gt(guc)))) { 2068 return; 2069 } 2070 2071 /* 2072 * Technically possible for either of these values to be non-zero here, 2073 * but very unlikely + harmless. Regardless let's add an error so we can 2074 * see in CI if this happens frequently / a precursor to taking down the 2075 * machine. 2076 */ 2077 outstanding = atomic_read(&guc->outstanding_submission_g2h); 2078 if (outstanding) 2079 guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n", 2080 outstanding); 2081 atomic_set(&guc->outstanding_submission_g2h, 0); 2082 2083 intel_guc_global_policies_update(guc); 2084 enable_submission(guc); 2085 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 2086 2087 /* 2088 * The full GT reset will have cleared the TLB caches and flushed the 2089 * G2H message queue; we can release all the blocked waiters. 2090 */ 2091 wake_up_all_tlb_invalidate(guc); 2092 } 2093 2094 static void destroyed_worker_func(struct work_struct *w); 2095 static void reset_fail_worker_func(struct work_struct *w); 2096 2097 bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc) 2098 { 2099 return HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915) && 2100 intel_guc_is_ready(guc); 2101 } 2102 2103 static int init_tlb_lookup(struct intel_guc *guc) 2104 { 2105 struct intel_guc_tlb_wait *wait; 2106 int err; 2107 2108 if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) 2109 return 0; 2110 2111 xa_init_flags(&guc->tlb_lookup, XA_FLAGS_ALLOC); 2112 2113 wait = kzalloc(sizeof(*wait), GFP_KERNEL); 2114 if (!wait) 2115 return -ENOMEM; 2116 2117 init_waitqueue_head(&wait->wq); 2118 2119 /* Preallocate a shared id for use under memory pressure. */ 2120 err = xa_alloc_cyclic_irq(&guc->tlb_lookup, &guc->serial_slot, wait, 2121 xa_limit_32b, &guc->next_seqno, GFP_KERNEL); 2122 if (err < 0) { 2123 kfree(wait); 2124 return err; 2125 } 2126 2127 return 0; 2128 } 2129 2130 static void fini_tlb_lookup(struct intel_guc *guc) 2131 { 2132 struct intel_guc_tlb_wait *wait; 2133 2134 if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) 2135 return; 2136 2137 wait = xa_load(&guc->tlb_lookup, guc->serial_slot); 2138 if (wait && wait->busy) 2139 guc_err(guc, "Unexpected busy item in tlb_lookup on fini\n"); 2140 kfree(wait); 2141 2142 xa_destroy(&guc->tlb_lookup); 2143 } 2144 2145 /* 2146 * Set up the memory resources to be shared with the GuC (via the GGTT) 2147 * at firmware loading time. 2148 */ 2149 int intel_guc_submission_init(struct intel_guc *guc) 2150 { 2151 struct intel_gt *gt = guc_to_gt(guc); 2152 int ret; 2153 2154 if (guc->submission_initialized) 2155 return 0; 2156 2157 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { 2158 ret = guc_lrc_desc_pool_create_v69(guc); 2159 if (ret) 2160 return ret; 2161 } 2162 2163 ret = init_tlb_lookup(guc); 2164 if (ret) 2165 goto destroy_pool; 2166 2167 guc->submission_state.guc_ids_bitmap = 2168 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 2169 if (!guc->submission_state.guc_ids_bitmap) { 2170 ret = -ENOMEM; 2171 goto destroy_tlb; 2172 } 2173 2174 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 2175 guc->timestamp.shift = gpm_timestamp_shift(gt); 2176 guc->submission_initialized = true; 2177 2178 return 0; 2179 2180 destroy_tlb: 2181 fini_tlb_lookup(guc); 2182 destroy_pool: 2183 guc_lrc_desc_pool_destroy_v69(guc); 2184 return ret; 2185 } 2186 2187 void intel_guc_submission_fini(struct intel_guc *guc) 2188 { 2189 if (!guc->submission_initialized) 2190 return; 2191 2192 guc_fini_engine_stats(guc); 2193 guc_flush_destroyed_contexts(guc); 2194 guc_lrc_desc_pool_destroy_v69(guc); 2195 i915_sched_engine_put(guc->sched_engine); 2196 bitmap_free(guc->submission_state.guc_ids_bitmap); 2197 fini_tlb_lookup(guc); 2198 guc->submission_initialized = false; 2199 } 2200 2201 static inline void queue_request(struct i915_sched_engine *sched_engine, 2202 struct i915_request *rq, 2203 int prio) 2204 { 2205 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2206 list_add_tail(&rq->sched.link, 2207 i915_sched_lookup_priolist(sched_engine, prio)); 2208 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2209 tasklet_hi_schedule(&sched_engine->tasklet); 2210 } 2211 2212 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 2213 struct i915_request *rq) 2214 { 2215 int ret = 0; 2216 2217 __i915_request_submit(rq); 2218 2219 trace_i915_request_in(rq, 0); 2220 2221 if (is_multi_lrc_rq(rq)) { 2222 if (multi_lrc_submit(rq)) { 2223 ret = guc_wq_item_append(guc, rq); 2224 if (!ret) 2225 ret = guc_add_request(guc, rq); 2226 } 2227 } else { 2228 guc_set_lrc_tail(rq); 2229 ret = guc_add_request(guc, rq); 2230 } 2231 2232 if (unlikely(ret == -EPIPE)) 2233 disable_submission(guc); 2234 2235 return ret; 2236 } 2237 2238 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 2239 { 2240 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2241 struct intel_context *ce = request_to_scheduling_context(rq); 2242 2243 return submission_disabled(guc) || guc->stalled_request || 2244 !i915_sched_engine_is_empty(sched_engine) || 2245 !ctx_id_mapped(guc, ce->guc_id.id); 2246 } 2247 2248 static void guc_submit_request(struct i915_request *rq) 2249 { 2250 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2251 struct intel_guc *guc = gt_to_guc(rq->engine->gt); 2252 unsigned long flags; 2253 2254 /* Will be called from irq-context when using foreign fences. */ 2255 spin_lock_irqsave(&sched_engine->lock, flags); 2256 2257 if (need_tasklet(guc, rq)) 2258 queue_request(sched_engine, rq, rq_prio(rq)); 2259 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 2260 tasklet_hi_schedule(&sched_engine->tasklet); 2261 2262 spin_unlock_irqrestore(&sched_engine->lock, flags); 2263 } 2264 2265 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 2266 { 2267 int ret; 2268 2269 GEM_BUG_ON(intel_context_is_child(ce)); 2270 2271 if (intel_context_is_parent(ce)) 2272 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2273 NUMBER_MULTI_LRC_GUC_ID(guc), 2274 order_base_2(ce->parallel.number_children 2275 + 1)); 2276 else 2277 ret = ida_alloc_range(&guc->submission_state.guc_ids, 2278 NUMBER_MULTI_LRC_GUC_ID(guc), 2279 guc->submission_state.num_guc_ids - 1, 2280 GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2281 if (unlikely(ret < 0)) 2282 return ret; 2283 2284 if (!intel_context_is_parent(ce)) 2285 ++guc->submission_state.guc_ids_in_use; 2286 2287 ce->guc_id.id = ret; 2288 return 0; 2289 } 2290 2291 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2292 { 2293 GEM_BUG_ON(intel_context_is_child(ce)); 2294 2295 if (!context_guc_id_invalid(ce)) { 2296 if (intel_context_is_parent(ce)) { 2297 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2298 ce->guc_id.id, 2299 order_base_2(ce->parallel.number_children 2300 + 1)); 2301 } else { 2302 --guc->submission_state.guc_ids_in_use; 2303 ida_free(&guc->submission_state.guc_ids, 2304 ce->guc_id.id); 2305 } 2306 clr_ctx_id_mapping(guc, ce->guc_id.id); 2307 set_context_guc_id_invalid(ce); 2308 } 2309 if (!list_empty(&ce->guc_id.link)) 2310 list_del_init(&ce->guc_id.link); 2311 } 2312 2313 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2314 { 2315 unsigned long flags; 2316 2317 spin_lock_irqsave(&guc->submission_state.lock, flags); 2318 __release_guc_id(guc, ce); 2319 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2320 } 2321 2322 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2323 { 2324 struct intel_context *cn; 2325 2326 lockdep_assert_held(&guc->submission_state.lock); 2327 GEM_BUG_ON(intel_context_is_child(ce)); 2328 GEM_BUG_ON(intel_context_is_parent(ce)); 2329 2330 if (!list_empty(&guc->submission_state.guc_id_list)) { 2331 cn = list_first_entry(&guc->submission_state.guc_id_list, 2332 struct intel_context, 2333 guc_id.link); 2334 2335 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2336 GEM_BUG_ON(context_guc_id_invalid(cn)); 2337 GEM_BUG_ON(intel_context_is_child(cn)); 2338 GEM_BUG_ON(intel_context_is_parent(cn)); 2339 2340 list_del_init(&cn->guc_id.link); 2341 ce->guc_id.id = cn->guc_id.id; 2342 2343 spin_lock(&cn->guc_state.lock); 2344 clr_context_registered(cn); 2345 spin_unlock(&cn->guc_state.lock); 2346 2347 set_context_guc_id_invalid(cn); 2348 2349 #ifdef CONFIG_DRM_I915_SELFTEST 2350 guc->number_guc_id_stolen++; 2351 #endif 2352 2353 return 0; 2354 } else { 2355 return -EAGAIN; 2356 } 2357 } 2358 2359 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2360 { 2361 int ret; 2362 2363 lockdep_assert_held(&guc->submission_state.lock); 2364 GEM_BUG_ON(intel_context_is_child(ce)); 2365 2366 ret = new_guc_id(guc, ce); 2367 if (unlikely(ret < 0)) { 2368 if (intel_context_is_parent(ce)) 2369 return -ENOSPC; 2370 2371 ret = steal_guc_id(guc, ce); 2372 if (ret < 0) 2373 return ret; 2374 } 2375 2376 if (intel_context_is_parent(ce)) { 2377 struct intel_context *child; 2378 int i = 1; 2379 2380 for_each_child(ce, child) 2381 child->guc_id.id = ce->guc_id.id + i++; 2382 } 2383 2384 return 0; 2385 } 2386 2387 #define PIN_GUC_ID_TRIES 4 2388 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2389 { 2390 int ret = 0; 2391 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2392 2393 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2394 2395 try_again: 2396 spin_lock_irqsave(&guc->submission_state.lock, flags); 2397 2398 might_lock(&ce->guc_state.lock); 2399 2400 if (context_guc_id_invalid(ce)) { 2401 ret = assign_guc_id(guc, ce); 2402 if (ret) 2403 goto out_unlock; 2404 ret = 1; /* Indidcates newly assigned guc_id */ 2405 } 2406 if (!list_empty(&ce->guc_id.link)) 2407 list_del_init(&ce->guc_id.link); 2408 atomic_inc(&ce->guc_id.ref); 2409 2410 out_unlock: 2411 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2412 2413 /* 2414 * -EAGAIN indicates no guc_id are available, let's retire any 2415 * outstanding requests to see if that frees up a guc_id. If the first 2416 * retire didn't help, insert a sleep with the timeslice duration before 2417 * attempting to retire more requests. Double the sleep period each 2418 * subsequent pass before finally giving up. The sleep period has max of 2419 * 100ms and minimum of 1ms. 2420 */ 2421 if (ret == -EAGAIN && --tries) { 2422 if (PIN_GUC_ID_TRIES - tries > 1) { 2423 unsigned int timeslice_shifted = 2424 ce->engine->props.timeslice_duration_ms << 2425 (PIN_GUC_ID_TRIES - tries - 2); 2426 unsigned int max = min_t(unsigned int, 100, 2427 timeslice_shifted); 2428 2429 msleep(max_t(unsigned int, max, 1)); 2430 } 2431 intel_gt_retire_requests(guc_to_gt(guc)); 2432 goto try_again; 2433 } 2434 2435 return ret; 2436 } 2437 2438 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2439 { 2440 unsigned long flags; 2441 2442 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2443 GEM_BUG_ON(intel_context_is_child(ce)); 2444 2445 if (unlikely(context_guc_id_invalid(ce) || 2446 intel_context_is_parent(ce))) 2447 return; 2448 2449 spin_lock_irqsave(&guc->submission_state.lock, flags); 2450 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2451 !atomic_read(&ce->guc_id.ref)) 2452 list_add_tail(&ce->guc_id.link, 2453 &guc->submission_state.guc_id_list); 2454 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2455 } 2456 2457 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2458 struct intel_context *ce, 2459 u32 guc_id, 2460 u32 offset, 2461 bool loop) 2462 { 2463 struct intel_context *child; 2464 u32 action[4 + MAX_ENGINE_INSTANCE]; 2465 int len = 0; 2466 2467 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2468 2469 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2470 action[len++] = guc_id; 2471 action[len++] = ce->parallel.number_children + 1; 2472 action[len++] = offset; 2473 for_each_child(ce, child) { 2474 offset += sizeof(struct guc_lrc_desc_v69); 2475 action[len++] = offset; 2476 } 2477 2478 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2479 } 2480 2481 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2482 struct intel_context *ce, 2483 struct guc_ctxt_registration_info *info, 2484 bool loop) 2485 { 2486 struct intel_context *child; 2487 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2488 int len = 0; 2489 u32 next_id; 2490 2491 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2492 2493 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2494 action[len++] = info->flags; 2495 action[len++] = info->context_idx; 2496 action[len++] = info->engine_class; 2497 action[len++] = info->engine_submit_mask; 2498 action[len++] = info->wq_desc_lo; 2499 action[len++] = info->wq_desc_hi; 2500 action[len++] = info->wq_base_lo; 2501 action[len++] = info->wq_base_hi; 2502 action[len++] = info->wq_size; 2503 action[len++] = ce->parallel.number_children + 1; 2504 action[len++] = info->hwlrca_lo; 2505 action[len++] = info->hwlrca_hi; 2506 2507 next_id = info->context_idx + 1; 2508 for_each_child(ce, child) { 2509 GEM_BUG_ON(next_id++ != child->guc_id.id); 2510 2511 /* 2512 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2513 * only supports 32 bit currently. 2514 */ 2515 action[len++] = lower_32_bits(child->lrc.lrca); 2516 action[len++] = upper_32_bits(child->lrc.lrca); 2517 } 2518 2519 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2520 2521 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2522 } 2523 2524 static int __guc_action_register_context_v69(struct intel_guc *guc, 2525 u32 guc_id, 2526 u32 offset, 2527 bool loop) 2528 { 2529 u32 action[] = { 2530 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2531 guc_id, 2532 offset, 2533 }; 2534 2535 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2536 0, loop); 2537 } 2538 2539 static int __guc_action_register_context_v70(struct intel_guc *guc, 2540 struct guc_ctxt_registration_info *info, 2541 bool loop) 2542 { 2543 u32 action[] = { 2544 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2545 info->flags, 2546 info->context_idx, 2547 info->engine_class, 2548 info->engine_submit_mask, 2549 info->wq_desc_lo, 2550 info->wq_desc_hi, 2551 info->wq_base_lo, 2552 info->wq_base_hi, 2553 info->wq_size, 2554 info->hwlrca_lo, 2555 info->hwlrca_hi, 2556 }; 2557 2558 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2559 0, loop); 2560 } 2561 2562 static void prepare_context_registration_info_v69(struct intel_context *ce); 2563 static void prepare_context_registration_info_v70(struct intel_context *ce, 2564 struct guc_ctxt_registration_info *info); 2565 2566 static int 2567 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2568 { 2569 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2570 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2571 2572 prepare_context_registration_info_v69(ce); 2573 2574 if (intel_context_is_parent(ce)) 2575 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2576 offset, loop); 2577 else 2578 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2579 offset, loop); 2580 } 2581 2582 static int 2583 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2584 { 2585 struct guc_ctxt_registration_info info; 2586 2587 prepare_context_registration_info_v70(ce, &info); 2588 2589 if (intel_context_is_parent(ce)) 2590 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2591 else 2592 return __guc_action_register_context_v70(guc, &info, loop); 2593 } 2594 2595 static int register_context(struct intel_context *ce, bool loop) 2596 { 2597 struct intel_guc *guc = ce_to_guc(ce); 2598 int ret; 2599 2600 GEM_BUG_ON(intel_context_is_child(ce)); 2601 trace_intel_context_register(ce); 2602 2603 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2604 ret = register_context_v70(guc, ce, loop); 2605 else 2606 ret = register_context_v69(guc, ce, loop); 2607 2608 if (likely(!ret)) { 2609 unsigned long flags; 2610 2611 spin_lock_irqsave(&ce->guc_state.lock, flags); 2612 set_context_registered(ce); 2613 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2614 2615 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2616 guc_context_policy_init_v70(ce, loop); 2617 } 2618 2619 return ret; 2620 } 2621 2622 static int __guc_action_deregister_context(struct intel_guc *guc, 2623 u32 guc_id) 2624 { 2625 u32 action[] = { 2626 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2627 guc_id, 2628 }; 2629 2630 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2631 G2H_LEN_DW_DEREGISTER_CONTEXT, 2632 true); 2633 } 2634 2635 static int deregister_context(struct intel_context *ce, u32 guc_id) 2636 { 2637 struct intel_guc *guc = ce_to_guc(ce); 2638 2639 GEM_BUG_ON(intel_context_is_child(ce)); 2640 trace_intel_context_deregister(ce); 2641 2642 return __guc_action_deregister_context(guc, guc_id); 2643 } 2644 2645 static inline void clear_children_join_go_memory(struct intel_context *ce) 2646 { 2647 struct parent_scratch *ps = __get_parent_scratch(ce); 2648 int i; 2649 2650 ps->go.semaphore = 0; 2651 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2652 ps->join[i].semaphore = 0; 2653 } 2654 2655 static inline u32 get_children_go_value(struct intel_context *ce) 2656 { 2657 return __get_parent_scratch(ce)->go.semaphore; 2658 } 2659 2660 static inline u32 get_children_join_value(struct intel_context *ce, 2661 u8 child_index) 2662 { 2663 return __get_parent_scratch(ce)->join[child_index].semaphore; 2664 } 2665 2666 struct context_policy { 2667 u32 count; 2668 struct guc_update_context_policy h2g; 2669 }; 2670 2671 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2672 { 2673 size_t bytes = sizeof(policy->h2g.header) + 2674 (sizeof(policy->h2g.klv[0]) * policy->count); 2675 2676 return bytes / sizeof(u32); 2677 } 2678 2679 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2680 { 2681 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2682 policy->h2g.header.ctx_id = guc_id; 2683 policy->count = 0; 2684 } 2685 2686 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2687 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2688 { \ 2689 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2690 policy->h2g.klv[policy->count].kl = \ 2691 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2692 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2693 policy->h2g.klv[policy->count].value = data; \ 2694 policy->count++; \ 2695 } 2696 2697 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2698 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2699 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2700 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2701 MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY) 2702 2703 #undef MAKE_CONTEXT_POLICY_ADD 2704 2705 static int __guc_context_set_context_policies(struct intel_guc *guc, 2706 struct context_policy *policy, 2707 bool loop) 2708 { 2709 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2710 __guc_context_policy_action_size(policy), 2711 0, loop); 2712 } 2713 2714 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2715 { 2716 struct intel_engine_cs *engine = ce->engine; 2717 struct intel_guc *guc = gt_to_guc(engine->gt); 2718 struct context_policy policy; 2719 u32 execution_quantum; 2720 u32 preemption_timeout; 2721 u32 slpc_ctx_freq_req = 0; 2722 unsigned long flags; 2723 int ret; 2724 2725 /* NB: For both of these, zero means disabled. */ 2726 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2727 execution_quantum)); 2728 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2729 preemption_timeout)); 2730 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2731 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2732 2733 if (ce->flags & BIT(CONTEXT_LOW_LATENCY)) 2734 slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; 2735 2736 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2737 2738 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2739 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2740 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2741 __guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req); 2742 2743 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2744 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2745 2746 ret = __guc_context_set_context_policies(guc, &policy, loop); 2747 2748 spin_lock_irqsave(&ce->guc_state.lock, flags); 2749 if (ret != 0) 2750 set_context_policy_required(ce); 2751 else 2752 clr_context_policy_required(ce); 2753 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2754 2755 return ret; 2756 } 2757 2758 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2759 struct guc_lrc_desc_v69 *desc) 2760 { 2761 desc->policy_flags = 0; 2762 2763 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2764 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2765 2766 /* NB: For both of these, zero means disabled. */ 2767 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2768 desc->execution_quantum)); 2769 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2770 desc->preemption_timeout)); 2771 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2772 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2773 } 2774 2775 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2776 { 2777 /* 2778 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2779 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2780 */ 2781 switch (prio) { 2782 default: 2783 MISSING_CASE(prio); 2784 fallthrough; 2785 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2786 return GEN12_CTX_PRIORITY_NORMAL; 2787 case GUC_CLIENT_PRIORITY_NORMAL: 2788 return GEN12_CTX_PRIORITY_LOW; 2789 case GUC_CLIENT_PRIORITY_HIGH: 2790 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2791 return GEN12_CTX_PRIORITY_HIGH; 2792 } 2793 } 2794 2795 static void prepare_context_registration_info_v69(struct intel_context *ce) 2796 { 2797 struct intel_engine_cs *engine = ce->engine; 2798 struct intel_guc *guc = gt_to_guc(engine->gt); 2799 u32 ctx_id = ce->guc_id.id; 2800 struct guc_lrc_desc_v69 *desc; 2801 struct intel_context *child; 2802 2803 GEM_BUG_ON(!engine->mask); 2804 2805 /* 2806 * Ensure LRC + CT vmas are is same region as write barrier is done 2807 * based on CT vma region. 2808 */ 2809 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2810 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2811 2812 desc = __get_lrc_desc_v69(guc, ctx_id); 2813 GEM_BUG_ON(!desc); 2814 desc->engine_class = engine_class_to_guc_class(engine->class); 2815 desc->engine_submit_mask = engine->logical_mask; 2816 desc->hw_context_desc = ce->lrc.lrca; 2817 desc->priority = ce->guc_state.prio; 2818 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2819 guc_context_policy_init_v69(engine, desc); 2820 2821 /* 2822 * If context is a parent, we need to register a process descriptor 2823 * describing a work queue and register all child contexts. 2824 */ 2825 if (intel_context_is_parent(ce)) { 2826 struct guc_process_desc_v69 *pdesc; 2827 2828 ce->parallel.guc.wqi_tail = 0; 2829 ce->parallel.guc.wqi_head = 0; 2830 2831 desc->process_desc = i915_ggtt_offset(ce->state) + 2832 __get_parent_scratch_offset(ce); 2833 desc->wq_addr = i915_ggtt_offset(ce->state) + 2834 __get_wq_offset(ce); 2835 desc->wq_size = WQ_SIZE; 2836 2837 pdesc = __get_process_desc_v69(ce); 2838 memset(pdesc, 0, sizeof(*(pdesc))); 2839 pdesc->stage_id = ce->guc_id.id; 2840 pdesc->wq_base_addr = desc->wq_addr; 2841 pdesc->wq_size_bytes = desc->wq_size; 2842 pdesc->wq_status = WQ_STATUS_ACTIVE; 2843 2844 ce->parallel.guc.wq_head = &pdesc->head; 2845 ce->parallel.guc.wq_tail = &pdesc->tail; 2846 ce->parallel.guc.wq_status = &pdesc->wq_status; 2847 2848 for_each_child(ce, child) { 2849 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2850 2851 desc->engine_class = 2852 engine_class_to_guc_class(engine->class); 2853 desc->hw_context_desc = child->lrc.lrca; 2854 desc->priority = ce->guc_state.prio; 2855 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2856 guc_context_policy_init_v69(engine, desc); 2857 } 2858 2859 clear_children_join_go_memory(ce); 2860 } 2861 } 2862 2863 static void prepare_context_registration_info_v70(struct intel_context *ce, 2864 struct guc_ctxt_registration_info *info) 2865 { 2866 struct intel_engine_cs *engine = ce->engine; 2867 struct intel_guc *guc = gt_to_guc(engine->gt); 2868 u32 ctx_id = ce->guc_id.id; 2869 2870 GEM_BUG_ON(!engine->mask); 2871 2872 /* 2873 * Ensure LRC + CT vmas are is same region as write barrier is done 2874 * based on CT vma region. 2875 */ 2876 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2877 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2878 2879 memset(info, 0, sizeof(*info)); 2880 info->context_idx = ctx_id; 2881 info->engine_class = engine_class_to_guc_class(engine->class); 2882 info->engine_submit_mask = engine->logical_mask; 2883 /* 2884 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2885 * only supports 32 bit currently. 2886 */ 2887 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2888 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2889 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2890 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2891 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2892 2893 /* 2894 * If context is a parent, we need to register a process descriptor 2895 * describing a work queue and register all child contexts. 2896 */ 2897 if (intel_context_is_parent(ce)) { 2898 struct guc_sched_wq_desc *wq_desc; 2899 u64 wq_desc_offset, wq_base_offset; 2900 2901 ce->parallel.guc.wqi_tail = 0; 2902 ce->parallel.guc.wqi_head = 0; 2903 2904 wq_desc_offset = (u64)i915_ggtt_offset(ce->state) + 2905 __get_parent_scratch_offset(ce); 2906 wq_base_offset = (u64)i915_ggtt_offset(ce->state) + 2907 __get_wq_offset(ce); 2908 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2909 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2910 info->wq_base_lo = lower_32_bits(wq_base_offset); 2911 info->wq_base_hi = upper_32_bits(wq_base_offset); 2912 info->wq_size = WQ_SIZE; 2913 2914 wq_desc = __get_wq_desc_v70(ce); 2915 memset(wq_desc, 0, sizeof(*wq_desc)); 2916 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2917 2918 ce->parallel.guc.wq_head = &wq_desc->head; 2919 ce->parallel.guc.wq_tail = &wq_desc->tail; 2920 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2921 2922 clear_children_join_go_memory(ce); 2923 } 2924 } 2925 2926 static int try_context_registration(struct intel_context *ce, bool loop) 2927 { 2928 struct intel_engine_cs *engine = ce->engine; 2929 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2930 struct intel_guc *guc = gt_to_guc(engine->gt); 2931 intel_wakeref_t wakeref; 2932 u32 ctx_id = ce->guc_id.id; 2933 bool context_registered; 2934 int ret = 0; 2935 2936 GEM_BUG_ON(!sched_state_is_init(ce)); 2937 2938 context_registered = ctx_id_mapped(guc, ctx_id); 2939 2940 clr_ctx_id_mapping(guc, ctx_id); 2941 set_ctx_id_mapping(guc, ctx_id, ce); 2942 2943 /* 2944 * The context_lookup xarray is used to determine if the hardware 2945 * context is currently registered. There are two cases in which it 2946 * could be registered either the guc_id has been stolen from another 2947 * context or the lrc descriptor address of this context has changed. In 2948 * either case the context needs to be deregistered with the GuC before 2949 * registering this context. 2950 */ 2951 if (context_registered) { 2952 bool disabled; 2953 unsigned long flags; 2954 2955 trace_intel_context_steal_guc_id(ce); 2956 GEM_BUG_ON(!loop); 2957 2958 /* Seal race with Reset */ 2959 spin_lock_irqsave(&ce->guc_state.lock, flags); 2960 disabled = submission_disabled(guc); 2961 if (likely(!disabled)) { 2962 set_context_wait_for_deregister_to_register(ce); 2963 intel_context_get(ce); 2964 } 2965 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2966 if (unlikely(disabled)) { 2967 clr_ctx_id_mapping(guc, ctx_id); 2968 return 0; /* Will get registered later */ 2969 } 2970 2971 /* 2972 * If stealing the guc_id, this ce has the same guc_id as the 2973 * context whose guc_id was stolen. 2974 */ 2975 with_intel_runtime_pm(runtime_pm, wakeref) 2976 ret = deregister_context(ce, ce->guc_id.id); 2977 if (unlikely(ret == -ENODEV)) 2978 ret = 0; /* Will get registered later */ 2979 } else { 2980 with_intel_runtime_pm(runtime_pm, wakeref) 2981 ret = register_context(ce, loop); 2982 if (unlikely(ret == -EBUSY)) { 2983 clr_ctx_id_mapping(guc, ctx_id); 2984 } else if (unlikely(ret == -ENODEV)) { 2985 clr_ctx_id_mapping(guc, ctx_id); 2986 ret = 0; /* Will get registered later */ 2987 } 2988 } 2989 2990 return ret; 2991 } 2992 2993 static int __guc_context_pre_pin(struct intel_context *ce, 2994 struct intel_engine_cs *engine, 2995 struct i915_gem_ww_ctx *ww, 2996 void **vaddr) 2997 { 2998 return lrc_pre_pin(ce, engine, ww, vaddr); 2999 } 3000 3001 static int __guc_context_pin(struct intel_context *ce, 3002 struct intel_engine_cs *engine, 3003 void *vaddr) 3004 { 3005 if (i915_ggtt_offset(ce->state) != 3006 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 3007 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3008 3009 /* 3010 * GuC context gets pinned in guc_request_alloc. See that function for 3011 * explanation of why. 3012 */ 3013 3014 return lrc_pin(ce, engine, vaddr); 3015 } 3016 3017 static int guc_context_pre_pin(struct intel_context *ce, 3018 struct i915_gem_ww_ctx *ww, 3019 void **vaddr) 3020 { 3021 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 3022 } 3023 3024 static int guc_context_pin(struct intel_context *ce, void *vaddr) 3025 { 3026 int ret = __guc_context_pin(ce, ce->engine, vaddr); 3027 3028 if (likely(!ret && !intel_context_is_barrier(ce))) 3029 intel_engine_pm_get(ce->engine); 3030 3031 return ret; 3032 } 3033 3034 static void guc_context_unpin(struct intel_context *ce) 3035 { 3036 struct intel_guc *guc = ce_to_guc(ce); 3037 3038 __guc_context_update_stats(ce); 3039 unpin_guc_id(guc, ce); 3040 lrc_unpin(ce); 3041 3042 if (likely(!intel_context_is_barrier(ce))) 3043 intel_engine_pm_put_async(ce->engine); 3044 } 3045 3046 static void guc_context_post_unpin(struct intel_context *ce) 3047 { 3048 lrc_post_unpin(ce); 3049 } 3050 3051 static void __guc_context_sched_enable(struct intel_guc *guc, 3052 struct intel_context *ce) 3053 { 3054 u32 action[] = { 3055 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 3056 ce->guc_id.id, 3057 GUC_CONTEXT_ENABLE 3058 }; 3059 3060 trace_intel_context_sched_enable(ce); 3061 3062 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 3063 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 3064 } 3065 3066 static void __guc_context_sched_disable(struct intel_guc *guc, 3067 struct intel_context *ce, 3068 u16 guc_id) 3069 { 3070 u32 action[] = { 3071 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 3072 guc_id, /* ce->guc_id.id not stable */ 3073 GUC_CONTEXT_DISABLE 3074 }; 3075 3076 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 3077 3078 GEM_BUG_ON(intel_context_is_child(ce)); 3079 trace_intel_context_sched_disable(ce); 3080 3081 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 3082 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 3083 } 3084 3085 static void guc_blocked_fence_complete(struct intel_context *ce) 3086 { 3087 lockdep_assert_held(&ce->guc_state.lock); 3088 3089 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 3090 i915_sw_fence_complete(&ce->guc_state.blocked); 3091 } 3092 3093 static void guc_blocked_fence_reinit(struct intel_context *ce) 3094 { 3095 lockdep_assert_held(&ce->guc_state.lock); 3096 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 3097 3098 /* 3099 * This fence is always complete unless a pending schedule disable is 3100 * outstanding. We arm the fence here and complete it when we receive 3101 * the pending schedule disable complete message. 3102 */ 3103 i915_sw_fence_fini(&ce->guc_state.blocked); 3104 i915_sw_fence_reinit(&ce->guc_state.blocked); 3105 i915_sw_fence_await(&ce->guc_state.blocked); 3106 i915_sw_fence_commit(&ce->guc_state.blocked); 3107 } 3108 3109 static u16 prep_context_pending_disable(struct intel_context *ce) 3110 { 3111 lockdep_assert_held(&ce->guc_state.lock); 3112 3113 set_context_pending_disable(ce); 3114 clr_context_enabled(ce); 3115 guc_blocked_fence_reinit(ce); 3116 intel_context_get(ce); 3117 3118 return ce->guc_id.id; 3119 } 3120 3121 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 3122 { 3123 struct intel_guc *guc = ce_to_guc(ce); 3124 unsigned long flags; 3125 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 3126 intel_wakeref_t wakeref; 3127 u16 guc_id; 3128 bool enabled; 3129 3130 GEM_BUG_ON(intel_context_is_child(ce)); 3131 3132 spin_lock_irqsave(&ce->guc_state.lock, flags); 3133 3134 incr_context_blocked(ce); 3135 3136 enabled = context_enabled(ce); 3137 if (unlikely(!enabled || submission_disabled(guc))) { 3138 if (enabled) 3139 clr_context_enabled(ce); 3140 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3141 return &ce->guc_state.blocked; 3142 } 3143 3144 /* 3145 * We add +2 here as the schedule disable complete CTB handler calls 3146 * intel_context_sched_disable_unpin (-2 to pin_count). 3147 */ 3148 atomic_add(2, &ce->pin_count); 3149 3150 guc_id = prep_context_pending_disable(ce); 3151 3152 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3153 3154 with_intel_runtime_pm(runtime_pm, wakeref) 3155 __guc_context_sched_disable(guc, ce, guc_id); 3156 3157 return &ce->guc_state.blocked; 3158 } 3159 3160 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 3161 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 3162 #define SCHED_STATE_NO_UNBLOCK \ 3163 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 3164 SCHED_STATE_PENDING_DISABLE | \ 3165 SCHED_STATE_BANNED) 3166 3167 static bool context_cant_unblock(struct intel_context *ce) 3168 { 3169 lockdep_assert_held(&ce->guc_state.lock); 3170 3171 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 3172 context_guc_id_invalid(ce) || 3173 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 3174 !intel_context_is_pinned(ce); 3175 } 3176 3177 static void guc_context_unblock(struct intel_context *ce) 3178 { 3179 struct intel_guc *guc = ce_to_guc(ce); 3180 unsigned long flags; 3181 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 3182 intel_wakeref_t wakeref; 3183 bool enable; 3184 3185 GEM_BUG_ON(context_enabled(ce)); 3186 GEM_BUG_ON(intel_context_is_child(ce)); 3187 3188 spin_lock_irqsave(&ce->guc_state.lock, flags); 3189 3190 if (unlikely(submission_disabled(guc) || 3191 context_cant_unblock(ce))) { 3192 enable = false; 3193 } else { 3194 enable = true; 3195 set_context_pending_enable(ce); 3196 set_context_enabled(ce); 3197 intel_context_get(ce); 3198 } 3199 3200 decr_context_blocked(ce); 3201 3202 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3203 3204 if (enable) { 3205 with_intel_runtime_pm(runtime_pm, wakeref) 3206 __guc_context_sched_enable(guc, ce); 3207 } 3208 } 3209 3210 static void guc_context_cancel_request(struct intel_context *ce, 3211 struct i915_request *rq) 3212 { 3213 struct intel_context *block_context = 3214 request_to_scheduling_context(rq); 3215 3216 if (i915_sw_fence_signaled(&rq->submit)) { 3217 struct i915_sw_fence *fence; 3218 3219 intel_context_get(ce); 3220 fence = guc_context_block(block_context); 3221 i915_sw_fence_wait(fence); 3222 if (!i915_request_completed(rq)) { 3223 __i915_request_skip(rq); 3224 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 3225 true); 3226 } 3227 3228 guc_context_unblock(block_context); 3229 intel_context_put(ce); 3230 } 3231 } 3232 3233 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 3234 u16 guc_id, 3235 u32 preemption_timeout) 3236 { 3237 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3238 struct context_policy policy; 3239 3240 __guc_context_policy_start_klv(&policy, guc_id); 3241 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 3242 __guc_context_set_context_policies(guc, &policy, true); 3243 } else { 3244 u32 action[] = { 3245 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 3246 guc_id, 3247 preemption_timeout 3248 }; 3249 3250 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3251 } 3252 } 3253 3254 static void 3255 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 3256 unsigned int preempt_timeout_ms) 3257 { 3258 struct intel_guc *guc = ce_to_guc(ce); 3259 struct intel_runtime_pm *runtime_pm = 3260 &ce->engine->gt->i915->runtime_pm; 3261 intel_wakeref_t wakeref; 3262 unsigned long flags; 3263 3264 GEM_BUG_ON(intel_context_is_child(ce)); 3265 3266 guc_flush_submissions(guc); 3267 3268 spin_lock_irqsave(&ce->guc_state.lock, flags); 3269 set_context_banned(ce); 3270 3271 if (submission_disabled(guc) || 3272 (!context_enabled(ce) && !context_pending_disable(ce))) { 3273 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3274 3275 guc_cancel_context_requests(ce); 3276 intel_engine_signal_breadcrumbs(ce->engine); 3277 } else if (!context_pending_disable(ce)) { 3278 u16 guc_id; 3279 3280 /* 3281 * We add +2 here as the schedule disable complete CTB handler 3282 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3283 */ 3284 atomic_add(2, &ce->pin_count); 3285 3286 guc_id = prep_context_pending_disable(ce); 3287 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3288 3289 /* 3290 * In addition to disabling scheduling, set the preemption 3291 * timeout to the minimum value (1 us) so the banned context 3292 * gets kicked off the HW ASAP. 3293 */ 3294 with_intel_runtime_pm(runtime_pm, wakeref) { 3295 __guc_context_set_preemption_timeout(guc, guc_id, 3296 preempt_timeout_ms); 3297 __guc_context_sched_disable(guc, ce, guc_id); 3298 } 3299 } else { 3300 if (!context_guc_id_invalid(ce)) 3301 with_intel_runtime_pm(runtime_pm, wakeref) 3302 __guc_context_set_preemption_timeout(guc, 3303 ce->guc_id.id, 3304 preempt_timeout_ms); 3305 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3306 } 3307 } 3308 3309 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3310 unsigned long flags) 3311 __releases(ce->guc_state.lock) 3312 { 3313 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3314 intel_wakeref_t wakeref; 3315 u16 guc_id; 3316 3317 lockdep_assert_held(&ce->guc_state.lock); 3318 guc_id = prep_context_pending_disable(ce); 3319 3320 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3321 3322 with_intel_runtime_pm(runtime_pm, wakeref) 3323 __guc_context_sched_disable(guc, ce, guc_id); 3324 } 3325 3326 static bool bypass_sched_disable(struct intel_guc *guc, 3327 struct intel_context *ce) 3328 { 3329 lockdep_assert_held(&ce->guc_state.lock); 3330 GEM_BUG_ON(intel_context_is_child(ce)); 3331 3332 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3333 !ctx_id_mapped(guc, ce->guc_id.id)) { 3334 clr_context_enabled(ce); 3335 return true; 3336 } 3337 3338 return !context_enabled(ce); 3339 } 3340 3341 static void __delay_sched_disable(struct work_struct *wrk) 3342 { 3343 struct intel_context *ce = 3344 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3345 struct intel_guc *guc = ce_to_guc(ce); 3346 unsigned long flags; 3347 3348 spin_lock_irqsave(&ce->guc_state.lock, flags); 3349 3350 if (bypass_sched_disable(guc, ce)) { 3351 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3352 intel_context_sched_disable_unpin(ce); 3353 } else { 3354 do_sched_disable(guc, ce, flags); 3355 } 3356 } 3357 3358 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3359 { 3360 /* 3361 * parent contexts are perma-pinned, if we are unpinning do schedule 3362 * disable immediately. 3363 */ 3364 if (intel_context_is_parent(ce)) 3365 return true; 3366 3367 /* 3368 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3369 */ 3370 return guc->submission_state.guc_ids_in_use > 3371 guc->submission_state.sched_disable_gucid_threshold; 3372 } 3373 3374 static void guc_context_sched_disable(struct intel_context *ce) 3375 { 3376 struct intel_guc *guc = ce_to_guc(ce); 3377 u64 delay = guc->submission_state.sched_disable_delay_ms; 3378 unsigned long flags; 3379 3380 spin_lock_irqsave(&ce->guc_state.lock, flags); 3381 3382 if (bypass_sched_disable(guc, ce)) { 3383 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3384 intel_context_sched_disable_unpin(ce); 3385 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3386 delay) { 3387 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3388 mod_delayed_work(system_unbound_wq, 3389 &ce->guc_state.sched_disable_delay_work, 3390 msecs_to_jiffies(delay)); 3391 } else { 3392 do_sched_disable(guc, ce, flags); 3393 } 3394 } 3395 3396 static void guc_context_close(struct intel_context *ce) 3397 { 3398 unsigned long flags; 3399 3400 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3401 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3402 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3403 3404 spin_lock_irqsave(&ce->guc_state.lock, flags); 3405 set_context_close_done(ce); 3406 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3407 } 3408 3409 static inline int guc_lrc_desc_unpin(struct intel_context *ce) 3410 { 3411 struct intel_guc *guc = ce_to_guc(ce); 3412 struct intel_gt *gt = guc_to_gt(guc); 3413 unsigned long flags; 3414 bool disabled; 3415 int ret; 3416 3417 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3418 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3419 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3420 GEM_BUG_ON(context_enabled(ce)); 3421 3422 /* Seal race with Reset */ 3423 spin_lock_irqsave(&ce->guc_state.lock, flags); 3424 disabled = submission_disabled(guc); 3425 if (likely(!disabled)) { 3426 /* 3427 * Take a gt-pm ref and change context state to be destroyed. 3428 * NOTE: a G2H IRQ that comes after will put this gt-pm ref back 3429 */ 3430 __intel_gt_pm_get(gt); 3431 set_context_destroyed(ce); 3432 clr_context_registered(ce); 3433 } 3434 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3435 3436 if (unlikely(disabled)) { 3437 release_guc_id(guc, ce); 3438 __guc_context_destroy(ce); 3439 return 0; 3440 } 3441 3442 /* 3443 * GuC is active, lets destroy this context, but at this point we can still be racing 3444 * with suspend, so we undo everything if the H2G fails in deregister_context so 3445 * that GuC reset will find this context during clean up. 3446 * 3447 * There is a race condition where the reset code could have altered 3448 * this context's state and done a wakeref put before we try to 3449 * deregister it here. So check if the context is still set to be 3450 * destroyed before undoing earlier changes, to avoid two wakeref puts 3451 * on the same context. 3452 */ 3453 ret = deregister_context(ce, ce->guc_id.id); 3454 if (ret) { 3455 bool pending_destroyed; 3456 spin_lock_irqsave(&ce->guc_state.lock, flags); 3457 pending_destroyed = context_destroyed(ce); 3458 if (pending_destroyed) { 3459 set_context_registered(ce); 3460 clr_context_destroyed(ce); 3461 } 3462 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3463 /* 3464 * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements 3465 * the wakeref immediately but per function spec usage call this after unlock. 3466 */ 3467 if (pending_destroyed) 3468 intel_wakeref_put_async(>->wakeref); 3469 } 3470 3471 return ret; 3472 } 3473 3474 static void __guc_context_destroy(struct intel_context *ce) 3475 { 3476 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3477 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3478 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3479 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3480 3481 lrc_fini(ce); 3482 intel_context_fini(ce); 3483 3484 if (intel_engine_is_virtual(ce->engine)) { 3485 struct guc_virtual_engine *ve = 3486 container_of(ce, typeof(*ve), context); 3487 3488 if (ve->base.breadcrumbs) 3489 intel_breadcrumbs_put(ve->base.breadcrumbs); 3490 3491 kfree(ve); 3492 } else { 3493 intel_context_free(ce); 3494 } 3495 } 3496 3497 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3498 { 3499 struct intel_context *ce; 3500 unsigned long flags; 3501 3502 GEM_BUG_ON(!submission_disabled(guc) && 3503 guc_submission_initialized(guc)); 3504 3505 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3506 spin_lock_irqsave(&guc->submission_state.lock, flags); 3507 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3508 struct intel_context, 3509 destroyed_link); 3510 if (ce) 3511 list_del_init(&ce->destroyed_link); 3512 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3513 3514 if (!ce) 3515 break; 3516 3517 release_guc_id(guc, ce); 3518 __guc_context_destroy(ce); 3519 } 3520 } 3521 3522 static void deregister_destroyed_contexts(struct intel_guc *guc) 3523 { 3524 struct intel_context *ce; 3525 unsigned long flags; 3526 3527 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3528 spin_lock_irqsave(&guc->submission_state.lock, flags); 3529 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3530 struct intel_context, 3531 destroyed_link); 3532 if (ce) 3533 list_del_init(&ce->destroyed_link); 3534 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3535 3536 if (!ce) 3537 break; 3538 3539 if (guc_lrc_desc_unpin(ce)) { 3540 /* 3541 * This means GuC's CT link severed mid-way which could happen 3542 * in suspend-resume corner cases. In this case, put the 3543 * context back into the destroyed_contexts list which will 3544 * get picked up on the next context deregistration event or 3545 * purged in a GuC sanitization event (reset/unload/wedged/...). 3546 */ 3547 spin_lock_irqsave(&guc->submission_state.lock, flags); 3548 list_add_tail(&ce->destroyed_link, 3549 &guc->submission_state.destroyed_contexts); 3550 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3551 /* Bail now since the list might never be emptied if h2gs fail */ 3552 break; 3553 } 3554 3555 } 3556 } 3557 3558 static void destroyed_worker_func(struct work_struct *w) 3559 { 3560 struct intel_guc *guc = container_of(w, struct intel_guc, 3561 submission_state.destroyed_worker); 3562 struct intel_gt *gt = guc_to_gt(guc); 3563 intel_wakeref_t wakeref; 3564 3565 /* 3566 * In rare cases we can get here via async context-free fence-signals that 3567 * come very late in suspend flow or very early in resume flows. In these 3568 * cases, GuC won't be ready but just skipping it here is fine as these 3569 * pending-destroy-contexts get destroyed totally at GuC reset time at the 3570 * end of suspend.. OR.. this worker can be picked up later on the next 3571 * context destruction trigger after resume-completes 3572 */ 3573 if (!intel_guc_is_ready(guc)) 3574 return; 3575 3576 with_intel_gt_pm(gt, wakeref) 3577 deregister_destroyed_contexts(guc); 3578 } 3579 3580 static void guc_context_destroy(struct kref *kref) 3581 { 3582 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3583 struct intel_guc *guc = ce_to_guc(ce); 3584 unsigned long flags; 3585 bool destroy; 3586 3587 /* 3588 * If the guc_id is invalid this context has been stolen and we can free 3589 * it immediately. Also can be freed immediately if the context is not 3590 * registered with the GuC or the GuC is in the middle of a reset. 3591 */ 3592 spin_lock_irqsave(&guc->submission_state.lock, flags); 3593 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3594 !ctx_id_mapped(guc, ce->guc_id.id); 3595 if (likely(!destroy)) { 3596 if (!list_empty(&ce->guc_id.link)) 3597 list_del_init(&ce->guc_id.link); 3598 list_add_tail(&ce->destroyed_link, 3599 &guc->submission_state.destroyed_contexts); 3600 } else { 3601 __release_guc_id(guc, ce); 3602 } 3603 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3604 if (unlikely(destroy)) { 3605 __guc_context_destroy(ce); 3606 return; 3607 } 3608 3609 /* 3610 * We use a worker to issue the H2G to deregister the context as we can 3611 * take the GT PM for the first time which isn't allowed from an atomic 3612 * context. 3613 */ 3614 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3615 } 3616 3617 static int guc_context_alloc(struct intel_context *ce) 3618 { 3619 return lrc_alloc(ce, ce->engine); 3620 } 3621 3622 static void __guc_context_set_prio(struct intel_guc *guc, 3623 struct intel_context *ce) 3624 { 3625 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3626 struct context_policy policy; 3627 3628 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3629 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3630 __guc_context_set_context_policies(guc, &policy, true); 3631 } else { 3632 u32 action[] = { 3633 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3634 ce->guc_id.id, 3635 ce->guc_state.prio, 3636 }; 3637 3638 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3639 } 3640 } 3641 3642 static void guc_context_set_prio(struct intel_guc *guc, 3643 struct intel_context *ce, 3644 u8 prio) 3645 { 3646 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3647 prio > GUC_CLIENT_PRIORITY_NORMAL); 3648 lockdep_assert_held(&ce->guc_state.lock); 3649 3650 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3651 !context_registered(ce)) { 3652 ce->guc_state.prio = prio; 3653 return; 3654 } 3655 3656 ce->guc_state.prio = prio; 3657 __guc_context_set_prio(guc, ce); 3658 3659 trace_intel_context_set_prio(ce); 3660 } 3661 3662 static inline u8 map_i915_prio_to_guc_prio(int prio) 3663 { 3664 if (prio == I915_PRIORITY_NORMAL) 3665 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3666 else if (prio < I915_PRIORITY_NORMAL) 3667 return GUC_CLIENT_PRIORITY_NORMAL; 3668 else if (prio < I915_PRIORITY_DISPLAY) 3669 return GUC_CLIENT_PRIORITY_HIGH; 3670 else 3671 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3672 } 3673 3674 static inline void add_context_inflight_prio(struct intel_context *ce, 3675 u8 guc_prio) 3676 { 3677 lockdep_assert_held(&ce->guc_state.lock); 3678 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3679 3680 ++ce->guc_state.prio_count[guc_prio]; 3681 3682 /* Overflow protection */ 3683 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3684 } 3685 3686 static inline void sub_context_inflight_prio(struct intel_context *ce, 3687 u8 guc_prio) 3688 { 3689 lockdep_assert_held(&ce->guc_state.lock); 3690 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3691 3692 /* Underflow protection */ 3693 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3694 3695 --ce->guc_state.prio_count[guc_prio]; 3696 } 3697 3698 static inline void update_context_prio(struct intel_context *ce) 3699 { 3700 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3701 int i; 3702 3703 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3704 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3705 3706 lockdep_assert_held(&ce->guc_state.lock); 3707 3708 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3709 if (ce->guc_state.prio_count[i]) { 3710 guc_context_set_prio(guc, ce, i); 3711 break; 3712 } 3713 } 3714 } 3715 3716 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3717 { 3718 /* Lower value is higher priority */ 3719 return new_guc_prio < old_guc_prio; 3720 } 3721 3722 static void add_to_context(struct i915_request *rq) 3723 { 3724 struct intel_context *ce = request_to_scheduling_context(rq); 3725 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3726 3727 GEM_BUG_ON(intel_context_is_child(ce)); 3728 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3729 3730 spin_lock(&ce->guc_state.lock); 3731 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3732 3733 if (rq->guc_prio == GUC_PRIO_INIT) { 3734 rq->guc_prio = new_guc_prio; 3735 add_context_inflight_prio(ce, rq->guc_prio); 3736 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3737 sub_context_inflight_prio(ce, rq->guc_prio); 3738 rq->guc_prio = new_guc_prio; 3739 add_context_inflight_prio(ce, rq->guc_prio); 3740 } 3741 update_context_prio(ce); 3742 3743 spin_unlock(&ce->guc_state.lock); 3744 } 3745 3746 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3747 { 3748 lockdep_assert_held(&ce->guc_state.lock); 3749 3750 if (rq->guc_prio != GUC_PRIO_INIT && 3751 rq->guc_prio != GUC_PRIO_FINI) { 3752 sub_context_inflight_prio(ce, rq->guc_prio); 3753 update_context_prio(ce); 3754 } 3755 rq->guc_prio = GUC_PRIO_FINI; 3756 } 3757 3758 static void remove_from_context(struct i915_request *rq) 3759 { 3760 struct intel_context *ce = request_to_scheduling_context(rq); 3761 3762 GEM_BUG_ON(intel_context_is_child(ce)); 3763 3764 spin_lock_irq(&ce->guc_state.lock); 3765 3766 list_del_init(&rq->sched.link); 3767 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3768 3769 /* Prevent further __await_execution() registering a cb, then flush */ 3770 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3771 3772 guc_prio_fini(rq, ce); 3773 3774 spin_unlock_irq(&ce->guc_state.lock); 3775 3776 atomic_dec(&ce->guc_id.ref); 3777 i915_request_notify_execute_cb_imm(rq); 3778 } 3779 3780 static const struct intel_context_ops guc_context_ops = { 3781 .flags = COPS_RUNTIME_CYCLES, 3782 .alloc = guc_context_alloc, 3783 3784 .close = guc_context_close, 3785 3786 .pre_pin = guc_context_pre_pin, 3787 .pin = guc_context_pin, 3788 .unpin = guc_context_unpin, 3789 .post_unpin = guc_context_post_unpin, 3790 3791 .revoke = guc_context_revoke, 3792 3793 .cancel_request = guc_context_cancel_request, 3794 3795 .enter = intel_context_enter_engine, 3796 .exit = intel_context_exit_engine, 3797 3798 .sched_disable = guc_context_sched_disable, 3799 3800 .update_stats = guc_context_update_stats, 3801 3802 .reset = lrc_reset, 3803 .destroy = guc_context_destroy, 3804 3805 .create_virtual = guc_create_virtual, 3806 .create_parallel = guc_create_parallel, 3807 }; 3808 3809 static void submit_work_cb(struct irq_work *wrk) 3810 { 3811 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3812 3813 might_lock(&rq->engine->sched_engine->lock); 3814 i915_sw_fence_complete(&rq->submit); 3815 } 3816 3817 static void __guc_signal_context_fence(struct intel_context *ce) 3818 { 3819 struct i915_request *rq, *rn; 3820 3821 lockdep_assert_held(&ce->guc_state.lock); 3822 3823 if (!list_empty(&ce->guc_state.fences)) 3824 trace_intel_context_fence_release(ce); 3825 3826 /* 3827 * Use an IRQ to ensure locking order of sched_engine->lock -> 3828 * ce->guc_state.lock is preserved. 3829 */ 3830 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3831 guc_fence_link) { 3832 list_del(&rq->guc_fence_link); 3833 irq_work_queue(&rq->submit_work); 3834 } 3835 3836 INIT_LIST_HEAD(&ce->guc_state.fences); 3837 } 3838 3839 static void guc_signal_context_fence(struct intel_context *ce) 3840 { 3841 unsigned long flags; 3842 3843 GEM_BUG_ON(intel_context_is_child(ce)); 3844 3845 spin_lock_irqsave(&ce->guc_state.lock, flags); 3846 clr_context_wait_for_deregister_to_register(ce); 3847 __guc_signal_context_fence(ce); 3848 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3849 } 3850 3851 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3852 { 3853 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3854 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3855 !submission_disabled(ce_to_guc(ce)); 3856 } 3857 3858 static void guc_context_init(struct intel_context *ce) 3859 { 3860 const struct i915_gem_context *ctx; 3861 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3862 3863 rcu_read_lock(); 3864 ctx = rcu_dereference(ce->gem_context); 3865 if (ctx) 3866 prio = ctx->sched.priority; 3867 rcu_read_unlock(); 3868 3869 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3870 3871 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3872 __delay_sched_disable); 3873 3874 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3875 } 3876 3877 static int guc_request_alloc(struct i915_request *rq) 3878 { 3879 struct intel_context *ce = request_to_scheduling_context(rq); 3880 struct intel_guc *guc = ce_to_guc(ce); 3881 unsigned long flags; 3882 int ret; 3883 3884 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3885 3886 /* 3887 * Flush enough space to reduce the likelihood of waiting after 3888 * we start building the request - in which case we will just 3889 * have to repeat work. 3890 */ 3891 rq->reserved_space += GUC_REQUEST_SIZE; 3892 3893 /* 3894 * Note that after this point, we have committed to using 3895 * this request as it is being used to both track the 3896 * state of engine initialisation and liveness of the 3897 * golden renderstate above. Think twice before you try 3898 * to cancel/unwind this request now. 3899 */ 3900 3901 /* Unconditionally invalidate GPU caches and TLBs. */ 3902 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3903 if (ret) 3904 return ret; 3905 3906 rq->reserved_space -= GUC_REQUEST_SIZE; 3907 3908 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3909 guc_context_init(ce); 3910 3911 /* 3912 * If the context gets closed while the execbuf is ongoing, the context 3913 * close code will race with the below code to cancel the delayed work. 3914 * If the context close wins the race and cancels the work, it will 3915 * immediately call the sched disable (see guc_context_close), so there 3916 * is a chance we can get past this check while the sched_disable code 3917 * is being executed. To make sure that code completes before we check 3918 * the status further down, we wait for the close process to complete. 3919 * Else, this code path could send a request down thinking that the 3920 * context is still in a schedule-enable mode while the GuC ends up 3921 * dropping the request completely because the disable did go from the 3922 * context_close path right to GuC just prior. In the event the CT is 3923 * full, we could potentially need to wait up to 1.5 seconds. 3924 */ 3925 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3926 intel_context_sched_disable_unpin(ce); 3927 else if (intel_context_is_closed(ce)) 3928 if (wait_for(context_close_done(ce), 1500)) 3929 guc_warn(guc, "timed out waiting on context sched close before realloc\n"); 3930 /* 3931 * Call pin_guc_id here rather than in the pinning step as with 3932 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3933 * guc_id and creating horrible race conditions. This is especially bad 3934 * when guc_id are being stolen due to over subscription. By the time 3935 * this function is reached, it is guaranteed that the guc_id will be 3936 * persistent until the generated request is retired. Thus, sealing these 3937 * race conditions. It is still safe to fail here if guc_id are 3938 * exhausted and return -EAGAIN to the user indicating that they can try 3939 * again in the future. 3940 * 3941 * There is no need for a lock here as the timeline mutex ensures at 3942 * most one context can be executing this code path at once. The 3943 * guc_id_ref is incremented once for every request in flight and 3944 * decremented on each retire. When it is zero, a lock around the 3945 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3946 */ 3947 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3948 goto out; 3949 3950 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3951 if (unlikely(ret < 0)) 3952 return ret; 3953 if (context_needs_register(ce, !!ret)) { 3954 ret = try_context_registration(ce, true); 3955 if (unlikely(ret)) { /* unwind */ 3956 if (ret == -EPIPE) { 3957 disable_submission(guc); 3958 goto out; /* GPU will be reset */ 3959 } 3960 atomic_dec(&ce->guc_id.ref); 3961 unpin_guc_id(guc, ce); 3962 return ret; 3963 } 3964 } 3965 3966 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3967 3968 out: 3969 /* 3970 * We block all requests on this context if a G2H is pending for a 3971 * schedule disable or context deregistration as the GuC will fail a 3972 * schedule enable or context registration if either G2H is pending 3973 * respectfully. Once a G2H returns, the fence is released that is 3974 * blocking these requests (see guc_signal_context_fence). 3975 */ 3976 spin_lock_irqsave(&ce->guc_state.lock, flags); 3977 if (context_wait_for_deregister_to_register(ce) || 3978 context_pending_disable(ce)) { 3979 init_irq_work(&rq->submit_work, submit_work_cb); 3980 i915_sw_fence_await(&rq->submit); 3981 3982 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3983 } 3984 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3985 3986 return 0; 3987 } 3988 3989 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3990 struct i915_gem_ww_ctx *ww, 3991 void **vaddr) 3992 { 3993 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3994 3995 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3996 } 3997 3998 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3999 { 4000 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 4001 int ret = __guc_context_pin(ce, engine, vaddr); 4002 intel_engine_mask_t tmp, mask = ce->engine->mask; 4003 4004 if (likely(!ret)) 4005 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 4006 intel_engine_pm_get(engine); 4007 4008 return ret; 4009 } 4010 4011 static void guc_virtual_context_unpin(struct intel_context *ce) 4012 { 4013 intel_engine_mask_t tmp, mask = ce->engine->mask; 4014 struct intel_engine_cs *engine; 4015 struct intel_guc *guc = ce_to_guc(ce); 4016 4017 GEM_BUG_ON(context_enabled(ce)); 4018 GEM_BUG_ON(intel_context_is_barrier(ce)); 4019 4020 unpin_guc_id(guc, ce); 4021 lrc_unpin(ce); 4022 4023 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 4024 intel_engine_pm_put_async(engine); 4025 } 4026 4027 static void guc_virtual_context_enter(struct intel_context *ce) 4028 { 4029 intel_engine_mask_t tmp, mask = ce->engine->mask; 4030 struct intel_engine_cs *engine; 4031 4032 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 4033 intel_engine_pm_get(engine); 4034 4035 intel_timeline_enter(ce->timeline); 4036 } 4037 4038 static void guc_virtual_context_exit(struct intel_context *ce) 4039 { 4040 intel_engine_mask_t tmp, mask = ce->engine->mask; 4041 struct intel_engine_cs *engine; 4042 4043 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 4044 intel_engine_pm_put(engine); 4045 4046 intel_timeline_exit(ce->timeline); 4047 } 4048 4049 static int guc_virtual_context_alloc(struct intel_context *ce) 4050 { 4051 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 4052 4053 return lrc_alloc(ce, engine); 4054 } 4055 4056 static const struct intel_context_ops virtual_guc_context_ops = { 4057 .flags = COPS_RUNTIME_CYCLES, 4058 .alloc = guc_virtual_context_alloc, 4059 4060 .close = guc_context_close, 4061 4062 .pre_pin = guc_virtual_context_pre_pin, 4063 .pin = guc_virtual_context_pin, 4064 .unpin = guc_virtual_context_unpin, 4065 .post_unpin = guc_context_post_unpin, 4066 4067 .revoke = guc_context_revoke, 4068 4069 .cancel_request = guc_context_cancel_request, 4070 4071 .enter = guc_virtual_context_enter, 4072 .exit = guc_virtual_context_exit, 4073 4074 .sched_disable = guc_context_sched_disable, 4075 .update_stats = guc_context_update_stats, 4076 4077 .destroy = guc_context_destroy, 4078 4079 .get_sibling = guc_virtual_get_sibling, 4080 }; 4081 4082 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 4083 { 4084 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 4085 struct intel_guc *guc = ce_to_guc(ce); 4086 int ret; 4087 4088 GEM_BUG_ON(!intel_context_is_parent(ce)); 4089 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4090 4091 ret = pin_guc_id(guc, ce); 4092 if (unlikely(ret < 0)) 4093 return ret; 4094 4095 return __guc_context_pin(ce, engine, vaddr); 4096 } 4097 4098 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 4099 { 4100 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 4101 4102 GEM_BUG_ON(!intel_context_is_child(ce)); 4103 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4104 4105 __intel_context_pin(ce->parallel.parent); 4106 return __guc_context_pin(ce, engine, vaddr); 4107 } 4108 4109 static void guc_parent_context_unpin(struct intel_context *ce) 4110 { 4111 struct intel_guc *guc = ce_to_guc(ce); 4112 4113 GEM_BUG_ON(context_enabled(ce)); 4114 GEM_BUG_ON(intel_context_is_barrier(ce)); 4115 GEM_BUG_ON(!intel_context_is_parent(ce)); 4116 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4117 4118 unpin_guc_id(guc, ce); 4119 lrc_unpin(ce); 4120 } 4121 4122 static void guc_child_context_unpin(struct intel_context *ce) 4123 { 4124 GEM_BUG_ON(context_enabled(ce)); 4125 GEM_BUG_ON(intel_context_is_barrier(ce)); 4126 GEM_BUG_ON(!intel_context_is_child(ce)); 4127 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4128 4129 lrc_unpin(ce); 4130 } 4131 4132 static void guc_child_context_post_unpin(struct intel_context *ce) 4133 { 4134 GEM_BUG_ON(!intel_context_is_child(ce)); 4135 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 4136 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 4137 4138 lrc_post_unpin(ce); 4139 intel_context_unpin(ce->parallel.parent); 4140 } 4141 4142 static void guc_child_context_destroy(struct kref *kref) 4143 { 4144 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 4145 4146 __guc_context_destroy(ce); 4147 } 4148 4149 static const struct intel_context_ops virtual_parent_context_ops = { 4150 .alloc = guc_virtual_context_alloc, 4151 4152 .close = guc_context_close, 4153 4154 .pre_pin = guc_context_pre_pin, 4155 .pin = guc_parent_context_pin, 4156 .unpin = guc_parent_context_unpin, 4157 .post_unpin = guc_context_post_unpin, 4158 4159 .revoke = guc_context_revoke, 4160 4161 .cancel_request = guc_context_cancel_request, 4162 4163 .enter = guc_virtual_context_enter, 4164 .exit = guc_virtual_context_exit, 4165 4166 .sched_disable = guc_context_sched_disable, 4167 4168 .destroy = guc_context_destroy, 4169 4170 .get_sibling = guc_virtual_get_sibling, 4171 }; 4172 4173 static const struct intel_context_ops virtual_child_context_ops = { 4174 .alloc = guc_virtual_context_alloc, 4175 4176 .pre_pin = guc_context_pre_pin, 4177 .pin = guc_child_context_pin, 4178 .unpin = guc_child_context_unpin, 4179 .post_unpin = guc_child_context_post_unpin, 4180 4181 .cancel_request = guc_context_cancel_request, 4182 4183 .enter = guc_virtual_context_enter, 4184 .exit = guc_virtual_context_exit, 4185 4186 .destroy = guc_child_context_destroy, 4187 4188 .get_sibling = guc_virtual_get_sibling, 4189 }; 4190 4191 /* 4192 * The below override of the breadcrumbs is enabled when the user configures a 4193 * context for parallel submission (multi-lrc, parent-child). 4194 * 4195 * The overridden breadcrumbs implements an algorithm which allows the GuC to 4196 * safely preempt all the hw contexts configured for parallel submission 4197 * between each BB. The contract between the i915 and GuC is if the parent 4198 * context can be preempted, all the children can be preempted, and the GuC will 4199 * always try to preempt the parent before the children. A handshake between the 4200 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 4201 * creating a window to preempt between each set of BBs. 4202 */ 4203 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 4204 u64 offset, u32 len, 4205 const unsigned int flags); 4206 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 4207 u64 offset, u32 len, 4208 const unsigned int flags); 4209 static u32 * 4210 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 4211 u32 *cs); 4212 static u32 * 4213 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 4214 u32 *cs); 4215 4216 static struct intel_context * 4217 guc_create_parallel(struct intel_engine_cs **engines, 4218 unsigned int num_siblings, 4219 unsigned int width) 4220 { 4221 struct intel_engine_cs **siblings = NULL; 4222 struct intel_context *parent = NULL, *ce, *err; 4223 int i, j; 4224 4225 siblings = kmalloc_array(num_siblings, 4226 sizeof(*siblings), 4227 GFP_KERNEL); 4228 if (!siblings) 4229 return ERR_PTR(-ENOMEM); 4230 4231 for (i = 0; i < width; ++i) { 4232 for (j = 0; j < num_siblings; ++j) 4233 siblings[j] = engines[i * num_siblings + j]; 4234 4235 ce = intel_engine_create_virtual(siblings, num_siblings, 4236 FORCE_VIRTUAL); 4237 if (IS_ERR(ce)) { 4238 err = ERR_CAST(ce); 4239 goto unwind; 4240 } 4241 4242 if (i == 0) { 4243 parent = ce; 4244 parent->ops = &virtual_parent_context_ops; 4245 } else { 4246 ce->ops = &virtual_child_context_ops; 4247 intel_context_bind_parent_child(parent, ce); 4248 } 4249 } 4250 4251 parent->parallel.fence_context = dma_fence_context_alloc(1); 4252 4253 parent->engine->emit_bb_start = 4254 emit_bb_start_parent_no_preempt_mid_batch; 4255 parent->engine->emit_fini_breadcrumb = 4256 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 4257 parent->engine->emit_fini_breadcrumb_dw = 4258 12 + 4 * parent->parallel.number_children; 4259 for_each_child(parent, ce) { 4260 ce->engine->emit_bb_start = 4261 emit_bb_start_child_no_preempt_mid_batch; 4262 ce->engine->emit_fini_breadcrumb = 4263 emit_fini_breadcrumb_child_no_preempt_mid_batch; 4264 ce->engine->emit_fini_breadcrumb_dw = 16; 4265 } 4266 4267 kfree(siblings); 4268 return parent; 4269 4270 unwind: 4271 if (parent) 4272 intel_context_put(parent); 4273 kfree(siblings); 4274 return err; 4275 } 4276 4277 static bool 4278 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 4279 { 4280 struct intel_engine_cs *sibling; 4281 intel_engine_mask_t tmp, mask = b->engine_mask; 4282 bool result = false; 4283 4284 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4285 result |= intel_engine_irq_enable(sibling); 4286 4287 return result; 4288 } 4289 4290 static void 4291 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 4292 { 4293 struct intel_engine_cs *sibling; 4294 intel_engine_mask_t tmp, mask = b->engine_mask; 4295 4296 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4297 intel_engine_irq_disable(sibling); 4298 } 4299 4300 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 4301 { 4302 int i; 4303 4304 /* 4305 * In GuC submission mode we do not know which physical engine a request 4306 * will be scheduled on, this creates a problem because the breadcrumb 4307 * interrupt is per physical engine. To work around this we attach 4308 * requests and direct all breadcrumb interrupts to the first instance 4309 * of an engine per class. In addition all breadcrumb interrupts are 4310 * enabled / disabled across an engine class in unison. 4311 */ 4312 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 4313 struct intel_engine_cs *sibling = 4314 engine->gt->engine_class[engine->class][i]; 4315 4316 if (sibling) { 4317 if (engine->breadcrumbs != sibling->breadcrumbs) { 4318 intel_breadcrumbs_put(engine->breadcrumbs); 4319 engine->breadcrumbs = 4320 intel_breadcrumbs_get(sibling->breadcrumbs); 4321 } 4322 break; 4323 } 4324 } 4325 4326 if (engine->breadcrumbs) { 4327 engine->breadcrumbs->engine_mask |= engine->mask; 4328 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 4329 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 4330 } 4331 } 4332 4333 static void guc_bump_inflight_request_prio(struct i915_request *rq, 4334 int prio) 4335 { 4336 struct intel_context *ce = request_to_scheduling_context(rq); 4337 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 4338 4339 /* Short circuit function */ 4340 if (prio < I915_PRIORITY_NORMAL) 4341 return; 4342 4343 spin_lock(&ce->guc_state.lock); 4344 4345 if (rq->guc_prio == GUC_PRIO_FINI) 4346 goto exit; 4347 4348 if (!new_guc_prio_higher(rq->guc_prio, new_guc_prio)) 4349 goto exit; 4350 4351 if (rq->guc_prio != GUC_PRIO_INIT) 4352 sub_context_inflight_prio(ce, rq->guc_prio); 4353 4354 rq->guc_prio = new_guc_prio; 4355 add_context_inflight_prio(ce, rq->guc_prio); 4356 update_context_prio(ce); 4357 4358 exit: 4359 spin_unlock(&ce->guc_state.lock); 4360 } 4361 4362 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4363 { 4364 struct intel_context *ce = request_to_scheduling_context(rq); 4365 4366 spin_lock(&ce->guc_state.lock); 4367 guc_prio_fini(rq, ce); 4368 spin_unlock(&ce->guc_state.lock); 4369 } 4370 4371 static void sanitize_hwsp(struct intel_engine_cs *engine) 4372 { 4373 struct intel_timeline *tl; 4374 4375 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4376 intel_timeline_reset_seqno(tl); 4377 } 4378 4379 static void guc_sanitize(struct intel_engine_cs *engine) 4380 { 4381 /* 4382 * Poison residual state on resume, in case the suspend didn't! 4383 * 4384 * We have to assume that across suspend/resume (or other loss 4385 * of control) that the contents of our pinned buffers has been 4386 * lost, replaced by garbage. Since this doesn't always happen, 4387 * let's poison such state so that we more quickly spot when 4388 * we falsely assume it has been preserved. 4389 */ 4390 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4391 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4392 4393 /* 4394 * The kernel_context HWSP is stored in the status_page. As above, 4395 * that may be lost on resume/initialisation, and so we need to 4396 * reset the value in the HWSP. 4397 */ 4398 sanitize_hwsp(engine); 4399 4400 /* And scrub the dirty cachelines for the HWSP */ 4401 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4402 4403 intel_engine_reset_pinned_contexts(engine); 4404 } 4405 4406 static void setup_hwsp(struct intel_engine_cs *engine) 4407 { 4408 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4409 4410 ENGINE_WRITE_FW(engine, 4411 RING_HWS_PGA, 4412 i915_ggtt_offset(engine->status_page.vma)); 4413 } 4414 4415 static void start_engine(struct intel_engine_cs *engine) 4416 { 4417 ENGINE_WRITE_FW(engine, 4418 RING_MODE_GEN7, 4419 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4420 4421 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4422 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4423 } 4424 4425 static int guc_resume(struct intel_engine_cs *engine) 4426 { 4427 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4428 4429 intel_mocs_init_engine(engine); 4430 4431 intel_breadcrumbs_reset(engine->breadcrumbs); 4432 4433 setup_hwsp(engine); 4434 start_engine(engine); 4435 4436 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4437 xehp_enable_ccs_engines(engine); 4438 4439 return 0; 4440 } 4441 4442 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4443 { 4444 return !sched_engine->tasklet.callback; 4445 } 4446 4447 static void guc_set_default_submission(struct intel_engine_cs *engine) 4448 { 4449 engine->submit_request = guc_submit_request; 4450 } 4451 4452 static inline int guc_kernel_context_pin(struct intel_guc *guc, 4453 struct intel_context *ce) 4454 { 4455 int ret; 4456 4457 /* 4458 * Note: we purposefully do not check the returns below because 4459 * the registration can only fail if a reset is just starting. 4460 * This is called at the end of reset so presumably another reset 4461 * isn't happening and even it did this code would be run again. 4462 */ 4463 4464 if (context_guc_id_invalid(ce)) { 4465 ret = pin_guc_id(guc, ce); 4466 4467 if (ret < 0) 4468 return ret; 4469 } 4470 4471 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) 4472 guc_context_init(ce); 4473 4474 ret = try_context_registration(ce, true); 4475 if (ret) 4476 unpin_guc_id(guc, ce); 4477 4478 return ret; 4479 } 4480 4481 static inline int guc_init_submission(struct intel_guc *guc) 4482 { 4483 struct intel_gt *gt = guc_to_gt(guc); 4484 struct intel_engine_cs *engine; 4485 enum intel_engine_id id; 4486 4487 /* make sure all descriptors are clean... */ 4488 xa_destroy(&guc->context_lookup); 4489 4490 /* 4491 * A reset might have occurred while we had a pending stalled request, 4492 * so make sure we clean that up. 4493 */ 4494 guc->stalled_request = NULL; 4495 guc->submission_stall_reason = STALL_NONE; 4496 4497 /* 4498 * Some contexts might have been pinned before we enabled GuC 4499 * submission, so we need to add them to the GuC bookeeping. 4500 * Also, after a reset the of the GuC we want to make sure that the 4501 * information shared with GuC is properly reset. The kernel LRCs are 4502 * not attached to the gem_context, so they need to be added separately. 4503 */ 4504 for_each_engine(engine, gt, id) { 4505 struct intel_context *ce; 4506 4507 list_for_each_entry(ce, &engine->pinned_contexts_list, 4508 pinned_contexts_link) { 4509 int ret = guc_kernel_context_pin(guc, ce); 4510 4511 if (ret) { 4512 /* No point in trying to clean up as i915 will wedge on failure */ 4513 return ret; 4514 } 4515 } 4516 } 4517 4518 return 0; 4519 } 4520 4521 static void guc_release(struct intel_engine_cs *engine) 4522 { 4523 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4524 4525 intel_engine_cleanup_common(engine); 4526 lrc_fini_wa_ctx(engine); 4527 } 4528 4529 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4530 { 4531 struct intel_engine_cs *e; 4532 intel_engine_mask_t tmp, mask = engine->mask; 4533 4534 for_each_engine_masked(e, engine->gt, mask, tmp) 4535 e->serial++; 4536 } 4537 4538 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4539 { 4540 /* Default vfuncs which can be overridden by each engine. */ 4541 4542 engine->resume = guc_resume; 4543 4544 engine->cops = &guc_context_ops; 4545 engine->request_alloc = guc_request_alloc; 4546 engine->add_active_request = add_to_context; 4547 engine->remove_active_request = remove_from_context; 4548 4549 engine->sched_engine->schedule = i915_schedule; 4550 4551 engine->reset.prepare = guc_engine_reset_prepare; 4552 engine->reset.rewind = guc_rewind_nop; 4553 engine->reset.cancel = guc_reset_nop; 4554 engine->reset.finish = guc_reset_nop; 4555 4556 engine->emit_flush = gen8_emit_flush_xcs; 4557 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4558 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4559 if (GRAPHICS_VER(engine->i915) >= 12) { 4560 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4561 engine->emit_flush = gen12_emit_flush_xcs; 4562 } 4563 engine->set_default_submission = guc_set_default_submission; 4564 engine->busyness = guc_engine_busyness; 4565 4566 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4567 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4568 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4569 4570 /* Wa_14014475959:dg2 */ 4571 if (engine->class == COMPUTE_CLASS) 4572 if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || 4573 IS_DG2(engine->i915)) 4574 engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT; 4575 4576 /* Wa_16019325821 */ 4577 /* Wa_14019159160 */ 4578 if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) && 4579 IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) 4580 engine->flags |= I915_ENGINE_USES_WA_HOLD_SWITCHOUT; 4581 4582 /* 4583 * TODO: GuC supports timeslicing and semaphores as well, but they're 4584 * handled by the firmware so some minor tweaks are required before 4585 * enabling. 4586 * 4587 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4588 */ 4589 4590 engine->emit_bb_start = gen8_emit_bb_start; 4591 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) 4592 engine->emit_bb_start = xehp_emit_bb_start; 4593 } 4594 4595 static void rcs_submission_override(struct intel_engine_cs *engine) 4596 { 4597 switch (GRAPHICS_VER(engine->i915)) { 4598 case 12: 4599 engine->emit_flush = gen12_emit_flush_rcs; 4600 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4601 break; 4602 case 11: 4603 engine->emit_flush = gen11_emit_flush_rcs; 4604 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4605 break; 4606 default: 4607 engine->emit_flush = gen8_emit_flush_rcs; 4608 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4609 break; 4610 } 4611 } 4612 4613 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4614 { 4615 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4616 intel_engine_set_irq_handler(engine, cs_irq_handler); 4617 } 4618 4619 static void guc_sched_engine_destroy(struct kref *kref) 4620 { 4621 struct i915_sched_engine *sched_engine = 4622 container_of(kref, typeof(*sched_engine), ref); 4623 struct intel_guc *guc = sched_engine->private_data; 4624 4625 guc->sched_engine = NULL; 4626 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4627 kfree(sched_engine); 4628 } 4629 4630 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4631 { 4632 struct drm_i915_private *i915 = engine->i915; 4633 struct intel_guc *guc = gt_to_guc(engine->gt); 4634 4635 /* 4636 * The setup relies on several assumptions (e.g. irqs always enabled) 4637 * that are only valid on gen11+ 4638 */ 4639 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4640 4641 if (!guc->sched_engine) { 4642 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4643 if (!guc->sched_engine) 4644 return -ENOMEM; 4645 4646 guc->sched_engine->schedule = i915_schedule; 4647 guc->sched_engine->disabled = guc_sched_engine_disabled; 4648 guc->sched_engine->private_data = guc; 4649 guc->sched_engine->destroy = guc_sched_engine_destroy; 4650 guc->sched_engine->bump_inflight_request_prio = 4651 guc_bump_inflight_request_prio; 4652 guc->sched_engine->retire_inflight_request_prio = 4653 guc_retire_inflight_request_prio; 4654 tasklet_setup(&guc->sched_engine->tasklet, 4655 guc_submission_tasklet); 4656 } 4657 i915_sched_engine_put(engine->sched_engine); 4658 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4659 4660 guc_default_vfuncs(engine); 4661 guc_default_irqs(engine); 4662 guc_init_breadcrumbs(engine); 4663 4664 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4665 rcs_submission_override(engine); 4666 4667 lrc_init_wa_ctx(engine); 4668 4669 /* Finally, take ownership and responsibility for cleanup! */ 4670 engine->sanitize = guc_sanitize; 4671 engine->release = guc_release; 4672 4673 return 0; 4674 } 4675 4676 struct scheduling_policy { 4677 /* internal data */ 4678 u32 max_words, num_words; 4679 u32 count; 4680 /* API data */ 4681 struct guc_update_scheduling_policy h2g; 4682 }; 4683 4684 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4685 { 4686 u32 *start = (void *)&policy->h2g; 4687 u32 *end = policy->h2g.data + policy->num_words; 4688 size_t delta = end - start; 4689 4690 return delta; 4691 } 4692 4693 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4694 { 4695 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4696 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4697 policy->num_words = 0; 4698 policy->count = 0; 4699 4700 return policy; 4701 } 4702 4703 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4704 u32 action, u32 *data, u32 len) 4705 { 4706 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4707 4708 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4709 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4710 FIELD_PREP(GUC_KLV_0_LEN, len); 4711 memcpy(klv_ptr, data, sizeof(u32) * len); 4712 policy->num_words += 1 + len; 4713 policy->count++; 4714 } 4715 4716 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4717 struct scheduling_policy *policy) 4718 { 4719 int ret; 4720 4721 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4722 __guc_scheduling_policy_action_size(policy)); 4723 if (ret < 0) { 4724 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n", 4725 ERR_PTR(ret)); 4726 return ret; 4727 } 4728 4729 if (ret != policy->count) { 4730 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!", 4731 ret, policy->count); 4732 if (ret > policy->count) 4733 return -EPROTO; 4734 } 4735 4736 return 0; 4737 } 4738 4739 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4740 { 4741 struct scheduling_policy policy; 4742 struct intel_gt *gt = guc_to_gt(guc); 4743 intel_wakeref_t wakeref; 4744 int ret; 4745 4746 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 4747 return 0; 4748 4749 __guc_scheduling_policy_start_klv(&policy); 4750 4751 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4752 u32 yield[] = { 4753 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4754 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4755 }; 4756 4757 __guc_scheduling_policy_add_klv(&policy, 4758 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4759 yield, ARRAY_SIZE(yield)); 4760 4761 ret = __guc_action_set_scheduling_policies(guc, &policy); 4762 } 4763 4764 return ret; 4765 } 4766 4767 static void guc_route_semaphores(struct intel_guc *guc, bool to_guc) 4768 { 4769 struct intel_gt *gt = guc_to_gt(guc); 4770 u32 val; 4771 4772 if (GRAPHICS_VER(gt->i915) < 12) 4773 return; 4774 4775 if (to_guc) 4776 val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL; 4777 else 4778 val = 0; 4779 4780 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val); 4781 } 4782 4783 int intel_guc_submission_enable(struct intel_guc *guc) 4784 { 4785 int ret; 4786 4787 /* Semaphore interrupt enable and route to GuC */ 4788 guc_route_semaphores(guc, true); 4789 4790 ret = guc_init_submission(guc); 4791 if (ret) 4792 goto fail_sem; 4793 4794 ret = guc_init_engine_stats(guc); 4795 if (ret) 4796 goto fail_sem; 4797 4798 ret = guc_init_global_schedule_policy(guc); 4799 if (ret) 4800 goto fail_stats; 4801 4802 return 0; 4803 4804 fail_stats: 4805 guc_fini_engine_stats(guc); 4806 fail_sem: 4807 guc_route_semaphores(guc, false); 4808 return ret; 4809 } 4810 4811 /* Note: By the time we're here, GuC may have already been reset */ 4812 void intel_guc_submission_disable(struct intel_guc *guc) 4813 { 4814 guc_cancel_busyness_worker(guc); 4815 4816 /* Semaphore interrupt disable and route to host */ 4817 guc_route_semaphores(guc, false); 4818 } 4819 4820 static bool __guc_submission_supported(struct intel_guc *guc) 4821 { 4822 /* GuC submission is unavailable for pre-Gen11 */ 4823 return intel_guc_is_supported(guc) && 4824 GRAPHICS_VER(guc_to_i915(guc)) >= 11; 4825 } 4826 4827 static bool __guc_submission_selected(struct intel_guc *guc) 4828 { 4829 struct drm_i915_private *i915 = guc_to_i915(guc); 4830 4831 if (!intel_guc_submission_is_supported(guc)) 4832 return false; 4833 4834 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4835 } 4836 4837 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4838 { 4839 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4840 } 4841 4842 /* 4843 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4844 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4845 * operation. This matches the 30fps game-render + encode (real world) workload this 4846 * knob was tested against. 4847 */ 4848 #define SCHED_DISABLE_DELAY_MS 34 4849 4850 /* 4851 * A threshold of 75% is a reasonable starting point considering that real world apps 4852 * generally don't get anywhere near this. 4853 */ 4854 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4855 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4856 4857 void intel_guc_submission_init_early(struct intel_guc *guc) 4858 { 4859 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4860 4861 spin_lock_init(&guc->submission_state.lock); 4862 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4863 ida_init(&guc->submission_state.guc_ids); 4864 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4865 INIT_WORK(&guc->submission_state.destroyed_worker, 4866 destroyed_worker_func); 4867 INIT_WORK(&guc->submission_state.reset_fail_worker, 4868 reset_fail_worker_func); 4869 4870 spin_lock_init(&guc->timestamp.lock); 4871 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4872 4873 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4874 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4875 guc->submission_state.sched_disable_gucid_threshold = 4876 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4877 guc->submission_supported = __guc_submission_supported(guc); 4878 guc->submission_selected = __guc_submission_selected(guc); 4879 } 4880 4881 static inline struct intel_context * 4882 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4883 { 4884 struct intel_context *ce; 4885 4886 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4887 guc_err(guc, "Invalid ctx_id %u\n", ctx_id); 4888 return NULL; 4889 } 4890 4891 ce = __get_context(guc, ctx_id); 4892 if (unlikely(!ce)) { 4893 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id); 4894 return NULL; 4895 } 4896 4897 if (unlikely(intel_context_is_child(ce))) { 4898 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id); 4899 return NULL; 4900 } 4901 4902 return ce; 4903 } 4904 4905 static void wait_wake_outstanding_tlb_g2h(struct intel_guc *guc, u32 seqno) 4906 { 4907 struct intel_guc_tlb_wait *wait; 4908 unsigned long flags; 4909 4910 xa_lock_irqsave(&guc->tlb_lookup, flags); 4911 wait = xa_load(&guc->tlb_lookup, seqno); 4912 4913 if (wait) 4914 wake_up(&wait->wq); 4915 else 4916 guc_dbg(guc, 4917 "Stale TLB invalidation response with seqno %d\n", seqno); 4918 4919 xa_unlock_irqrestore(&guc->tlb_lookup, flags); 4920 } 4921 4922 int intel_guc_tlb_invalidation_done(struct intel_guc *guc, 4923 const u32 *payload, u32 len) 4924 { 4925 if (len < 1) 4926 return -EPROTO; 4927 4928 wait_wake_outstanding_tlb_g2h(guc, payload[0]); 4929 return 0; 4930 } 4931 4932 static long must_wait_woken(struct wait_queue_entry *wq_entry, long timeout) 4933 { 4934 /* 4935 * This is equivalent to wait_woken() with the exception that 4936 * we do not wake up early if the kthread task has been completed. 4937 * As we are called from page reclaim in any task context, 4938 * we may be invoked from stopped kthreads, but we *must* 4939 * complete the wait from the HW. 4940 */ 4941 do { 4942 set_current_state(TASK_UNINTERRUPTIBLE); 4943 if (wq_entry->flags & WQ_FLAG_WOKEN) 4944 break; 4945 4946 timeout = schedule_timeout(timeout); 4947 } while (timeout); 4948 4949 /* See wait_woken() and woken_wake_function() */ 4950 __set_current_state(TASK_RUNNING); 4951 smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); 4952 4953 return timeout; 4954 } 4955 4956 static bool intel_gt_is_enabled(const struct intel_gt *gt) 4957 { 4958 /* Check if GT is wedged or suspended */ 4959 if (intel_gt_is_wedged(gt) || !intel_irqs_enabled(gt->i915)) 4960 return false; 4961 return true; 4962 } 4963 4964 static int guc_send_invalidate_tlb(struct intel_guc *guc, 4965 enum intel_guc_tlb_invalidation_type type) 4966 { 4967 struct intel_guc_tlb_wait _wq, *wq = &_wq; 4968 struct intel_gt *gt = guc_to_gt(guc); 4969 DEFINE_WAIT_FUNC(wait, woken_wake_function); 4970 int err; 4971 u32 seqno; 4972 u32 action[] = { 4973 INTEL_GUC_ACTION_TLB_INVALIDATION, 4974 0, 4975 REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_TYPE_MASK, type) | 4976 REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_MODE_MASK, 4977 INTEL_GUC_TLB_INVAL_MODE_HEAVY) | 4978 INTEL_GUC_TLB_INVAL_FLUSH_CACHE, 4979 }; 4980 u32 size = ARRAY_SIZE(action); 4981 4982 /* 4983 * Early guard against GT enablement. TLB invalidation should not be 4984 * attempted if the GT is disabled due to suspend/wedge. 4985 */ 4986 if (!intel_gt_is_enabled(gt)) 4987 return -EINVAL; 4988 4989 init_waitqueue_head(&_wq.wq); 4990 4991 if (xa_alloc_cyclic_irq(&guc->tlb_lookup, &seqno, wq, 4992 xa_limit_32b, &guc->next_seqno, 4993 GFP_ATOMIC | __GFP_NOWARN) < 0) { 4994 /* Under severe memory pressure? Serialise TLB allocations */ 4995 xa_lock_irq(&guc->tlb_lookup); 4996 wq = xa_load(&guc->tlb_lookup, guc->serial_slot); 4997 wait_event_lock_irq(wq->wq, 4998 !READ_ONCE(wq->busy), 4999 guc->tlb_lookup.xa_lock); 5000 /* 5001 * Update wq->busy under lock to ensure only one waiter can 5002 * issue the TLB invalidation command using the serial slot at a 5003 * time. The condition is set to true before releasing the lock 5004 * so that other caller continue to wait until woken up again. 5005 */ 5006 wq->busy = true; 5007 xa_unlock_irq(&guc->tlb_lookup); 5008 5009 seqno = guc->serial_slot; 5010 } 5011 5012 action[1] = seqno; 5013 5014 add_wait_queue(&wq->wq, &wait); 5015 5016 /* This is a critical reclaim path and thus we must loop here. */ 5017 err = intel_guc_send_busy_loop(guc, action, size, G2H_LEN_DW_INVALIDATE_TLB, true); 5018 if (err) 5019 goto out; 5020 5021 /* 5022 * Late guard against GT enablement. It is not an error for the TLB 5023 * invalidation to time out if the GT is disabled during the process 5024 * due to suspend/wedge. In fact, the TLB invalidation is cancelled 5025 * in this case. 5026 */ 5027 if (!must_wait_woken(&wait, intel_guc_ct_max_queue_time_jiffies()) && 5028 intel_gt_is_enabled(gt)) { 5029 guc_err(guc, 5030 "TLB invalidation response timed out for seqno %u\n", seqno); 5031 err = -ETIME; 5032 } 5033 out: 5034 remove_wait_queue(&wq->wq, &wait); 5035 if (seqno != guc->serial_slot) 5036 xa_erase_irq(&guc->tlb_lookup, seqno); 5037 5038 return err; 5039 } 5040 5041 /* Send a H2G command to invalidate the TLBs at engine level and beyond. */ 5042 int intel_guc_invalidate_tlb_engines(struct intel_guc *guc) 5043 { 5044 return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_ENGINES); 5045 } 5046 5047 /* Send a H2G command to invalidate the GuC's internal TLB. */ 5048 int intel_guc_invalidate_tlb_guc(struct intel_guc *guc) 5049 { 5050 return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_GUC); 5051 } 5052 5053 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 5054 const u32 *msg, 5055 u32 len) 5056 { 5057 struct intel_context *ce; 5058 u32 ctx_id; 5059 5060 if (unlikely(len < 1)) { 5061 guc_err(guc, "Invalid length %u\n", len); 5062 return -EPROTO; 5063 } 5064 ctx_id = msg[0]; 5065 5066 ce = g2h_context_lookup(guc, ctx_id); 5067 if (unlikely(!ce)) 5068 return -EPROTO; 5069 5070 trace_intel_context_deregister_done(ce); 5071 5072 #ifdef CONFIG_DRM_I915_SELFTEST 5073 if (unlikely(ce->drop_deregister)) { 5074 ce->drop_deregister = false; 5075 return 0; 5076 } 5077 #endif 5078 5079 if (context_wait_for_deregister_to_register(ce)) { 5080 struct intel_runtime_pm *runtime_pm = 5081 &ce->engine->gt->i915->runtime_pm; 5082 intel_wakeref_t wakeref; 5083 5084 /* 5085 * Previous owner of this guc_id has been deregistered, now safe 5086 * register this context. 5087 */ 5088 with_intel_runtime_pm(runtime_pm, wakeref) 5089 register_context(ce, true); 5090 guc_signal_context_fence(ce); 5091 intel_context_put(ce); 5092 } else if (context_destroyed(ce)) { 5093 /* Context has been destroyed */ 5094 intel_gt_pm_put_async_untracked(guc_to_gt(guc)); 5095 release_guc_id(guc, ce); 5096 __guc_context_destroy(ce); 5097 } 5098 5099 decr_outstanding_submission_g2h(guc); 5100 5101 return 0; 5102 } 5103 5104 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 5105 const u32 *msg, 5106 u32 len) 5107 { 5108 struct intel_context *ce; 5109 unsigned long flags; 5110 u32 ctx_id; 5111 5112 if (unlikely(len < 2)) { 5113 guc_err(guc, "Invalid length %u\n", len); 5114 return -EPROTO; 5115 } 5116 ctx_id = msg[0]; 5117 5118 ce = g2h_context_lookup(guc, ctx_id); 5119 if (unlikely(!ce)) 5120 return -EPROTO; 5121 5122 if (unlikely(context_destroyed(ce) || 5123 (!context_pending_enable(ce) && 5124 !context_pending_disable(ce)))) { 5125 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n", 5126 ce->guc_state.sched_state, ctx_id); 5127 return -EPROTO; 5128 } 5129 5130 trace_intel_context_sched_done(ce); 5131 5132 if (context_pending_enable(ce)) { 5133 #ifdef CONFIG_DRM_I915_SELFTEST 5134 if (unlikely(ce->drop_schedule_enable)) { 5135 ce->drop_schedule_enable = false; 5136 return 0; 5137 } 5138 #endif 5139 5140 spin_lock_irqsave(&ce->guc_state.lock, flags); 5141 clr_context_pending_enable(ce); 5142 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 5143 } else if (context_pending_disable(ce)) { 5144 bool banned; 5145 5146 #ifdef CONFIG_DRM_I915_SELFTEST 5147 if (unlikely(ce->drop_schedule_disable)) { 5148 ce->drop_schedule_disable = false; 5149 return 0; 5150 } 5151 #endif 5152 5153 /* 5154 * Unpin must be done before __guc_signal_context_fence, 5155 * otherwise a race exists between the requests getting 5156 * submitted + retired before this unpin completes resulting in 5157 * the pin_count going to zero and the context still being 5158 * enabled. 5159 */ 5160 intel_context_sched_disable_unpin(ce); 5161 5162 spin_lock_irqsave(&ce->guc_state.lock, flags); 5163 banned = context_banned(ce); 5164 clr_context_banned(ce); 5165 clr_context_pending_disable(ce); 5166 __guc_signal_context_fence(ce); 5167 guc_blocked_fence_complete(ce); 5168 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 5169 5170 if (banned) { 5171 guc_cancel_context_requests(ce); 5172 intel_engine_signal_breadcrumbs(ce->engine); 5173 } 5174 } 5175 5176 decr_outstanding_submission_g2h(guc); 5177 intel_context_put(ce); 5178 5179 return 0; 5180 } 5181 5182 static void capture_error_state(struct intel_guc *guc, 5183 struct intel_context *ce) 5184 { 5185 struct intel_gt *gt = guc_to_gt(guc); 5186 struct drm_i915_private *i915 = gt->i915; 5187 intel_wakeref_t wakeref; 5188 intel_engine_mask_t engine_mask; 5189 5190 if (intel_engine_is_virtual(ce->engine)) { 5191 struct intel_engine_cs *e; 5192 intel_engine_mask_t tmp, virtual_mask = ce->engine->mask; 5193 5194 engine_mask = 0; 5195 for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) { 5196 bool match = intel_guc_capture_is_matching_engine(gt, ce, e); 5197 5198 if (match) { 5199 intel_engine_set_hung_context(e, ce); 5200 engine_mask |= e->mask; 5201 i915_increase_reset_engine_count(&i915->gpu_error, 5202 e); 5203 } 5204 } 5205 5206 if (!engine_mask) { 5207 guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s", 5208 ce->guc_id.id, ce->engine->name); 5209 engine_mask = ~0U; 5210 } 5211 } else { 5212 intel_engine_set_hung_context(ce->engine, ce); 5213 engine_mask = ce->engine->mask; 5214 i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); 5215 } 5216 5217 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 5218 i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 5219 } 5220 5221 static void guc_context_replay(struct intel_context *ce) 5222 { 5223 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 5224 5225 __guc_reset_context(ce, ce->engine->mask); 5226 tasklet_hi_schedule(&sched_engine->tasklet); 5227 } 5228 5229 static void guc_handle_context_reset(struct intel_guc *guc, 5230 struct intel_context *ce) 5231 { 5232 bool capture = intel_context_is_schedulable(ce); 5233 5234 trace_intel_context_reset(ce); 5235 5236 guc_dbg(guc, "%s context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n", 5237 capture ? "Got" : "Ignoring", 5238 ce->guc_id.id, ce->engine->name, 5239 str_yes_no(intel_context_is_exiting(ce)), 5240 str_yes_no(intel_context_is_banned(ce))); 5241 5242 if (capture) { 5243 capture_error_state(guc, ce); 5244 guc_context_replay(ce); 5245 } 5246 } 5247 5248 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 5249 const u32 *msg, u32 len) 5250 { 5251 struct intel_context *ce; 5252 unsigned long flags; 5253 int ctx_id; 5254 5255 if (unlikely(len != 1)) { 5256 guc_err(guc, "Invalid length %u", len); 5257 return -EPROTO; 5258 } 5259 5260 ctx_id = msg[0]; 5261 5262 /* 5263 * The context lookup uses the xarray but lookups only require an RCU lock 5264 * not the full spinlock. So take the lock explicitly and keep it until the 5265 * context has been reference count locked to ensure it can't be destroyed 5266 * asynchronously until the reset is done. 5267 */ 5268 xa_lock_irqsave(&guc->context_lookup, flags); 5269 ce = g2h_context_lookup(guc, ctx_id); 5270 if (ce) 5271 intel_context_get(ce); 5272 xa_unlock_irqrestore(&guc->context_lookup, flags); 5273 5274 if (unlikely(!ce)) 5275 return -EPROTO; 5276 5277 guc_handle_context_reset(guc, ce); 5278 intel_context_put(ce); 5279 5280 return 0; 5281 } 5282 5283 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 5284 const u32 *msg, u32 len) 5285 { 5286 u32 status; 5287 5288 if (unlikely(len != 1)) { 5289 guc_dbg(guc, "Invalid length %u", len); 5290 return -EPROTO; 5291 } 5292 5293 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 5294 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 5295 guc_warn(guc, "No space for error capture"); 5296 5297 intel_guc_capture_process(guc); 5298 5299 return 0; 5300 } 5301 5302 struct intel_engine_cs * 5303 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 5304 { 5305 struct intel_gt *gt = guc_to_gt(guc); 5306 u8 engine_class = guc_class_to_engine_class(guc_class); 5307 5308 /* Class index is checked in class converter */ 5309 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 5310 5311 return gt->engine_class[engine_class][instance]; 5312 } 5313 5314 static void reset_fail_worker_func(struct work_struct *w) 5315 { 5316 struct intel_guc *guc = container_of(w, struct intel_guc, 5317 submission_state.reset_fail_worker); 5318 struct intel_gt *gt = guc_to_gt(guc); 5319 intel_engine_mask_t reset_fail_mask; 5320 unsigned long flags; 5321 5322 spin_lock_irqsave(&guc->submission_state.lock, flags); 5323 reset_fail_mask = guc->submission_state.reset_fail_mask; 5324 guc->submission_state.reset_fail_mask = 0; 5325 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 5326 5327 if (likely(reset_fail_mask)) { 5328 struct intel_engine_cs *engine; 5329 enum intel_engine_id id; 5330 5331 /* 5332 * GuC is toast at this point - it dead loops after sending the failed 5333 * reset notification. So need to manually determine the guilty context. 5334 * Note that it should be reliable to do this here because the GuC is 5335 * toast and will not be scheduling behind the KMD's back. 5336 */ 5337 for_each_engine_masked(engine, gt, reset_fail_mask, id) 5338 intel_guc_find_hung_context(engine); 5339 5340 intel_gt_handle_error(gt, reset_fail_mask, 5341 I915_ERROR_CAPTURE, 5342 "GuC failed to reset engine mask=0x%x", 5343 reset_fail_mask); 5344 } 5345 } 5346 5347 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 5348 const u32 *msg, u32 len) 5349 { 5350 struct intel_engine_cs *engine; 5351 u8 guc_class, instance; 5352 u32 reason; 5353 unsigned long flags; 5354 5355 if (unlikely(len != 3)) { 5356 guc_err(guc, "Invalid length %u", len); 5357 return -EPROTO; 5358 } 5359 5360 guc_class = msg[0]; 5361 instance = msg[1]; 5362 reason = msg[2]; 5363 5364 engine = intel_guc_lookup_engine(guc, guc_class, instance); 5365 if (unlikely(!engine)) { 5366 guc_err(guc, "Invalid engine %d:%d", guc_class, instance); 5367 return -EPROTO; 5368 } 5369 5370 /* 5371 * This is an unexpected failure of a hardware feature. So, log a real 5372 * error message not just the informational that comes with the reset. 5373 */ 5374 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X", 5375 guc_class, instance, engine->name, reason); 5376 5377 spin_lock_irqsave(&guc->submission_state.lock, flags); 5378 guc->submission_state.reset_fail_mask |= engine->mask; 5379 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 5380 5381 /* 5382 * A GT reset flushes this worker queue (G2H handler) so we must use 5383 * another worker to trigger a GT reset. 5384 */ 5385 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 5386 5387 return 0; 5388 } 5389 5390 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 5391 { 5392 struct intel_guc *guc = gt_to_guc(engine->gt); 5393 struct intel_context *ce; 5394 struct i915_request *rq; 5395 unsigned long index; 5396 unsigned long flags; 5397 5398 /* Reset called during driver load? GuC not yet initialised! */ 5399 if (unlikely(!guc_submission_initialized(guc))) 5400 return; 5401 5402 xa_lock_irqsave(&guc->context_lookup, flags); 5403 xa_for_each(&guc->context_lookup, index, ce) { 5404 bool found; 5405 5406 if (!kref_get_unless_zero(&ce->ref)) 5407 continue; 5408 5409 xa_unlock(&guc->context_lookup); 5410 5411 if (!intel_context_is_pinned(ce)) 5412 goto next; 5413 5414 if (intel_engine_is_virtual(ce->engine)) { 5415 if (!(ce->engine->mask & engine->mask)) 5416 goto next; 5417 } else { 5418 if (ce->engine != engine) 5419 goto next; 5420 } 5421 5422 found = false; 5423 spin_lock(&ce->guc_state.lock); 5424 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 5425 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 5426 continue; 5427 5428 found = true; 5429 break; 5430 } 5431 spin_unlock(&ce->guc_state.lock); 5432 5433 if (found) { 5434 intel_engine_set_hung_context(engine, ce); 5435 5436 /* Can only cope with one hang at a time... */ 5437 intel_context_put(ce); 5438 xa_lock(&guc->context_lookup); 5439 goto done; 5440 } 5441 5442 next: 5443 intel_context_put(ce); 5444 xa_lock(&guc->context_lookup); 5445 } 5446 done: 5447 xa_unlock_irqrestore(&guc->context_lookup, flags); 5448 } 5449 5450 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 5451 struct i915_request *hung_rq, 5452 struct drm_printer *m) 5453 { 5454 struct intel_guc *guc = gt_to_guc(engine->gt); 5455 struct intel_context *ce; 5456 unsigned long index; 5457 unsigned long flags; 5458 5459 /* Reset called during driver load? GuC not yet initialised! */ 5460 if (unlikely(!guc_submission_initialized(guc))) 5461 return; 5462 5463 xa_lock_irqsave(&guc->context_lookup, flags); 5464 xa_for_each(&guc->context_lookup, index, ce) { 5465 if (!kref_get_unless_zero(&ce->ref)) 5466 continue; 5467 5468 xa_unlock(&guc->context_lookup); 5469 5470 if (!intel_context_is_pinned(ce)) 5471 goto next; 5472 5473 if (intel_engine_is_virtual(ce->engine)) { 5474 if (!(ce->engine->mask & engine->mask)) 5475 goto next; 5476 } else { 5477 if (ce->engine != engine) 5478 goto next; 5479 } 5480 5481 spin_lock(&ce->guc_state.lock); 5482 intel_engine_dump_active_requests(&ce->guc_state.requests, 5483 hung_rq, m); 5484 spin_unlock(&ce->guc_state.lock); 5485 5486 next: 5487 intel_context_put(ce); 5488 xa_lock(&guc->context_lookup); 5489 } 5490 xa_unlock_irqrestore(&guc->context_lookup, flags); 5491 } 5492 5493 void intel_guc_submission_print_info(struct intel_guc *guc, 5494 struct drm_printer *p) 5495 { 5496 struct i915_sched_engine *sched_engine = guc->sched_engine; 5497 struct rb_node *rb; 5498 unsigned long flags; 5499 5500 if (!sched_engine) 5501 return; 5502 5503 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", 5504 guc->submission_version.major, guc->submission_version.minor, 5505 guc->submission_version.patch); 5506 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 5507 atomic_read(&guc->outstanding_submission_g2h)); 5508 drm_printf(p, "GuC tasklet count: %u\n", 5509 atomic_read(&sched_engine->tasklet.count)); 5510 5511 spin_lock_irqsave(&sched_engine->lock, flags); 5512 drm_printf(p, "Requests in GuC submit tasklet:\n"); 5513 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 5514 struct i915_priolist *pl = to_priolist(rb); 5515 struct i915_request *rq; 5516 5517 priolist_for_each_request(rq, pl) 5518 drm_printf(p, "guc_id=%u, seqno=%llu\n", 5519 rq->context->guc_id.id, 5520 rq->fence.seqno); 5521 } 5522 spin_unlock_irqrestore(&sched_engine->lock, flags); 5523 drm_printf(p, "\n"); 5524 } 5525 5526 static inline void guc_log_context_priority(struct drm_printer *p, 5527 struct intel_context *ce) 5528 { 5529 int i; 5530 5531 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 5532 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 5533 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 5534 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 5535 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 5536 i, ce->guc_state.prio_count[i]); 5537 } 5538 drm_printf(p, "\n"); 5539 } 5540 5541 static inline void guc_log_context(struct drm_printer *p, 5542 struct intel_context *ce) 5543 { 5544 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 5545 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 5546 if (intel_context_pin_if_active(ce)) { 5547 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 5548 ce->ring->head, 5549 ce->lrc_reg_state[CTX_RING_HEAD]); 5550 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 5551 ce->ring->tail, 5552 ce->lrc_reg_state[CTX_RING_TAIL]); 5553 intel_context_unpin(ce); 5554 } else { 5555 drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n", 5556 ce->ring->head); 5557 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n", 5558 ce->ring->tail); 5559 } 5560 drm_printf(p, "\t\tContext Pin Count: %u\n", 5561 atomic_read(&ce->pin_count)); 5562 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 5563 atomic_read(&ce->guc_id.ref)); 5564 drm_printf(p, "\t\tSchedule State: 0x%x\n", 5565 ce->guc_state.sched_state); 5566 } 5567 5568 void intel_guc_submission_print_context_info(struct intel_guc *guc, 5569 struct drm_printer *p) 5570 { 5571 struct intel_context *ce; 5572 unsigned long index; 5573 unsigned long flags; 5574 5575 xa_lock_irqsave(&guc->context_lookup, flags); 5576 xa_for_each(&guc->context_lookup, index, ce) { 5577 GEM_BUG_ON(intel_context_is_child(ce)); 5578 5579 guc_log_context(p, ce); 5580 guc_log_context_priority(p, ce); 5581 5582 if (intel_context_is_parent(ce)) { 5583 struct intel_context *child; 5584 5585 drm_printf(p, "\t\tNumber children: %u\n", 5586 ce->parallel.number_children); 5587 5588 if (ce->parallel.guc.wq_status) { 5589 drm_printf(p, "\t\tWQI Head: %u\n", 5590 READ_ONCE(*ce->parallel.guc.wq_head)); 5591 drm_printf(p, "\t\tWQI Tail: %u\n", 5592 READ_ONCE(*ce->parallel.guc.wq_tail)); 5593 drm_printf(p, "\t\tWQI Status: %u\n", 5594 READ_ONCE(*ce->parallel.guc.wq_status)); 5595 } 5596 5597 if (ce->engine->emit_bb_start == 5598 emit_bb_start_parent_no_preempt_mid_batch) { 5599 u8 i; 5600 5601 drm_printf(p, "\t\tChildren Go: %u\n", 5602 get_children_go_value(ce)); 5603 for (i = 0; i < ce->parallel.number_children; ++i) 5604 drm_printf(p, "\t\tChildren Join: %u\n", 5605 get_children_join_value(ce, i)); 5606 } 5607 5608 for_each_child(ce, child) 5609 guc_log_context(p, child); 5610 } 5611 } 5612 xa_unlock_irqrestore(&guc->context_lookup, flags); 5613 } 5614 5615 static inline u32 get_children_go_addr(struct intel_context *ce) 5616 { 5617 GEM_BUG_ON(!intel_context_is_parent(ce)); 5618 5619 return i915_ggtt_offset(ce->state) + 5620 __get_parent_scratch_offset(ce) + 5621 offsetof(struct parent_scratch, go.semaphore); 5622 } 5623 5624 static inline u32 get_children_join_addr(struct intel_context *ce, 5625 u8 child_index) 5626 { 5627 GEM_BUG_ON(!intel_context_is_parent(ce)); 5628 5629 return i915_ggtt_offset(ce->state) + 5630 __get_parent_scratch_offset(ce) + 5631 offsetof(struct parent_scratch, join[child_index].semaphore); 5632 } 5633 5634 #define PARENT_GO_BB 1 5635 #define PARENT_GO_FINI_BREADCRUMB 0 5636 #define CHILD_GO_BB 1 5637 #define CHILD_GO_FINI_BREADCRUMB 0 5638 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5639 u64 offset, u32 len, 5640 const unsigned int flags) 5641 { 5642 struct intel_context *ce = rq->context; 5643 u32 *cs; 5644 u8 i; 5645 5646 GEM_BUG_ON(!intel_context_is_parent(ce)); 5647 5648 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5649 if (IS_ERR(cs)) 5650 return PTR_ERR(cs); 5651 5652 /* Wait on children */ 5653 for (i = 0; i < ce->parallel.number_children; ++i) { 5654 *cs++ = (MI_SEMAPHORE_WAIT | 5655 MI_SEMAPHORE_GLOBAL_GTT | 5656 MI_SEMAPHORE_POLL | 5657 MI_SEMAPHORE_SAD_EQ_SDD); 5658 *cs++ = PARENT_GO_BB; 5659 *cs++ = get_children_join_addr(ce, i); 5660 *cs++ = 0; 5661 } 5662 5663 /* Turn off preemption */ 5664 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5665 *cs++ = MI_NOOP; 5666 5667 /* Tell children go */ 5668 cs = gen8_emit_ggtt_write(cs, 5669 CHILD_GO_BB, 5670 get_children_go_addr(ce), 5671 0); 5672 5673 /* Jump to batch */ 5674 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5675 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5676 *cs++ = lower_32_bits(offset); 5677 *cs++ = upper_32_bits(offset); 5678 *cs++ = MI_NOOP; 5679 5680 intel_ring_advance(rq, cs); 5681 5682 return 0; 5683 } 5684 5685 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5686 u64 offset, u32 len, 5687 const unsigned int flags) 5688 { 5689 struct intel_context *ce = rq->context; 5690 struct intel_context *parent = intel_context_to_parent(ce); 5691 u32 *cs; 5692 5693 GEM_BUG_ON(!intel_context_is_child(ce)); 5694 5695 cs = intel_ring_begin(rq, 12); 5696 if (IS_ERR(cs)) 5697 return PTR_ERR(cs); 5698 5699 /* Signal parent */ 5700 cs = gen8_emit_ggtt_write(cs, 5701 PARENT_GO_BB, 5702 get_children_join_addr(parent, 5703 ce->parallel.child_index), 5704 0); 5705 5706 /* Wait on parent for go */ 5707 *cs++ = (MI_SEMAPHORE_WAIT | 5708 MI_SEMAPHORE_GLOBAL_GTT | 5709 MI_SEMAPHORE_POLL | 5710 MI_SEMAPHORE_SAD_EQ_SDD); 5711 *cs++ = CHILD_GO_BB; 5712 *cs++ = get_children_go_addr(parent); 5713 *cs++ = 0; 5714 5715 /* Turn off preemption */ 5716 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5717 5718 /* Jump to batch */ 5719 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5720 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5721 *cs++ = lower_32_bits(offset); 5722 *cs++ = upper_32_bits(offset); 5723 5724 intel_ring_advance(rq, cs); 5725 5726 return 0; 5727 } 5728 5729 static u32 * 5730 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5731 u32 *cs) 5732 { 5733 struct intel_context *ce = rq->context; 5734 u8 i; 5735 5736 GEM_BUG_ON(!intel_context_is_parent(ce)); 5737 5738 /* Wait on children */ 5739 for (i = 0; i < ce->parallel.number_children; ++i) { 5740 *cs++ = (MI_SEMAPHORE_WAIT | 5741 MI_SEMAPHORE_GLOBAL_GTT | 5742 MI_SEMAPHORE_POLL | 5743 MI_SEMAPHORE_SAD_EQ_SDD); 5744 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5745 *cs++ = get_children_join_addr(ce, i); 5746 *cs++ = 0; 5747 } 5748 5749 /* Turn on preemption */ 5750 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5751 *cs++ = MI_NOOP; 5752 5753 /* Tell children go */ 5754 cs = gen8_emit_ggtt_write(cs, 5755 CHILD_GO_FINI_BREADCRUMB, 5756 get_children_go_addr(ce), 5757 0); 5758 5759 return cs; 5760 } 5761 5762 /* 5763 * If this true, a submission of multi-lrc requests had an error and the 5764 * requests need to be skipped. The front end (execuf IOCTL) should've called 5765 * i915_request_skip which squashes the BB but we still need to emit the fini 5766 * breadrcrumbs seqno write. At this point we don't know how many of the 5767 * requests in the multi-lrc submission were generated so we can't do the 5768 * handshake between the parent and children (e.g. if 4 requests should be 5769 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5770 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5771 * has occurred on any of the requests in submission / relationship. 5772 */ 5773 static inline bool skip_handshake(struct i915_request *rq) 5774 { 5775 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5776 } 5777 5778 #define NON_SKIP_LEN 6 5779 static u32 * 5780 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5781 u32 *cs) 5782 { 5783 struct intel_context *ce = rq->context; 5784 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5785 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5786 5787 GEM_BUG_ON(!intel_context_is_parent(ce)); 5788 5789 if (unlikely(skip_handshake(rq))) { 5790 /* 5791 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5792 * the NON_SKIP_LEN comes from the length of the emits below. 5793 */ 5794 memset(cs, 0, sizeof(u32) * 5795 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5796 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5797 } else { 5798 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5799 } 5800 5801 /* Emit fini breadcrumb */ 5802 before_fini_breadcrumb_user_interrupt_cs = cs; 5803 cs = gen8_emit_ggtt_write(cs, 5804 rq->fence.seqno, 5805 i915_request_active_timeline(rq)->hwsp_offset, 5806 0); 5807 5808 /* User interrupt */ 5809 *cs++ = MI_USER_INTERRUPT; 5810 *cs++ = MI_NOOP; 5811 5812 /* Ensure our math for skip + emit is correct */ 5813 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5814 cs); 5815 GEM_BUG_ON(start_fini_breadcrumb_cs + 5816 ce->engine->emit_fini_breadcrumb_dw != cs); 5817 5818 rq->tail = intel_ring_offset(rq, cs); 5819 5820 return cs; 5821 } 5822 5823 static u32 * 5824 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5825 u32 *cs) 5826 { 5827 struct intel_context *ce = rq->context; 5828 struct intel_context *parent = intel_context_to_parent(ce); 5829 5830 GEM_BUG_ON(!intel_context_is_child(ce)); 5831 5832 /* Turn on preemption */ 5833 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5834 *cs++ = MI_NOOP; 5835 5836 /* Signal parent */ 5837 cs = gen8_emit_ggtt_write(cs, 5838 PARENT_GO_FINI_BREADCRUMB, 5839 get_children_join_addr(parent, 5840 ce->parallel.child_index), 5841 0); 5842 5843 /* Wait parent on for go */ 5844 *cs++ = (MI_SEMAPHORE_WAIT | 5845 MI_SEMAPHORE_GLOBAL_GTT | 5846 MI_SEMAPHORE_POLL | 5847 MI_SEMAPHORE_SAD_EQ_SDD); 5848 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5849 *cs++ = get_children_go_addr(parent); 5850 *cs++ = 0; 5851 5852 return cs; 5853 } 5854 5855 static u32 * 5856 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5857 u32 *cs) 5858 { 5859 struct intel_context *ce = rq->context; 5860 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5861 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5862 5863 GEM_BUG_ON(!intel_context_is_child(ce)); 5864 5865 if (unlikely(skip_handshake(rq))) { 5866 /* 5867 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5868 * the NON_SKIP_LEN comes from the length of the emits below. 5869 */ 5870 memset(cs, 0, sizeof(u32) * 5871 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5872 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5873 } else { 5874 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5875 } 5876 5877 /* Emit fini breadcrumb */ 5878 before_fini_breadcrumb_user_interrupt_cs = cs; 5879 cs = gen8_emit_ggtt_write(cs, 5880 rq->fence.seqno, 5881 i915_request_active_timeline(rq)->hwsp_offset, 5882 0); 5883 5884 /* User interrupt */ 5885 *cs++ = MI_USER_INTERRUPT; 5886 *cs++ = MI_NOOP; 5887 5888 /* Ensure our math for skip + emit is correct */ 5889 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5890 cs); 5891 GEM_BUG_ON(start_fini_breadcrumb_cs + 5892 ce->engine->emit_fini_breadcrumb_dw != cs); 5893 5894 rq->tail = intel_ring_offset(rq, cs); 5895 5896 return cs; 5897 } 5898 5899 #undef NON_SKIP_LEN 5900 5901 static struct intel_context * 5902 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5903 unsigned long flags) 5904 { 5905 struct guc_virtual_engine *ve; 5906 struct intel_guc *guc; 5907 unsigned int n; 5908 int err; 5909 5910 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5911 if (!ve) 5912 return ERR_PTR(-ENOMEM); 5913 5914 guc = gt_to_guc(siblings[0]->gt); 5915 5916 ve->base.i915 = siblings[0]->i915; 5917 ve->base.gt = siblings[0]->gt; 5918 ve->base.uncore = siblings[0]->uncore; 5919 ve->base.id = -1; 5920 5921 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5922 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5923 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5924 ve->base.saturated = ALL_ENGINES; 5925 5926 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5927 5928 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5929 5930 ve->base.cops = &virtual_guc_context_ops; 5931 ve->base.request_alloc = guc_request_alloc; 5932 ve->base.bump_serial = virtual_guc_bump_serial; 5933 5934 ve->base.submit_request = guc_submit_request; 5935 5936 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5937 5938 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); 5939 ve->base.mask = VIRTUAL_ENGINES; 5940 5941 intel_context_init(&ve->context, &ve->base); 5942 5943 for (n = 0; n < count; n++) { 5944 struct intel_engine_cs *sibling = siblings[n]; 5945 5946 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5947 if (sibling->mask & ve->base.mask) { 5948 guc_dbg(guc, "duplicate %s entry in load balancer\n", 5949 sibling->name); 5950 err = -EINVAL; 5951 goto err_put; 5952 } 5953 5954 ve->base.mask |= sibling->mask; 5955 ve->base.logical_mask |= sibling->logical_mask; 5956 5957 if (n != 0 && ve->base.class != sibling->class) { 5958 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n", 5959 sibling->class, ve->base.class); 5960 err = -EINVAL; 5961 goto err_put; 5962 } else if (n == 0) { 5963 ve->base.class = sibling->class; 5964 ve->base.uabi_class = sibling->uabi_class; 5965 snprintf(ve->base.name, sizeof(ve->base.name), 5966 "v%dx%d", ve->base.class, count); 5967 ve->base.context_size = sibling->context_size; 5968 5969 ve->base.add_active_request = 5970 sibling->add_active_request; 5971 ve->base.remove_active_request = 5972 sibling->remove_active_request; 5973 ve->base.emit_bb_start = sibling->emit_bb_start; 5974 ve->base.emit_flush = sibling->emit_flush; 5975 ve->base.emit_init_breadcrumb = 5976 sibling->emit_init_breadcrumb; 5977 ve->base.emit_fini_breadcrumb = 5978 sibling->emit_fini_breadcrumb; 5979 ve->base.emit_fini_breadcrumb_dw = 5980 sibling->emit_fini_breadcrumb_dw; 5981 ve->base.breadcrumbs = 5982 intel_breadcrumbs_get(sibling->breadcrumbs); 5983 5984 ve->base.flags |= sibling->flags; 5985 5986 ve->base.props.timeslice_duration_ms = 5987 sibling->props.timeslice_duration_ms; 5988 ve->base.props.preempt_timeout_ms = 5989 sibling->props.preempt_timeout_ms; 5990 } 5991 } 5992 5993 return &ve->context; 5994 5995 err_put: 5996 intel_context_put(&ve->context); 5997 return ERR_PTR(err); 5998 } 5999 6000 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 6001 { 6002 struct intel_engine_cs *engine; 6003 intel_engine_mask_t tmp, mask = ve->mask; 6004 6005 for_each_engine_masked(engine, ve->gt, mask, tmp) 6006 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 6007 return true; 6008 6009 return false; 6010 } 6011 6012 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6013 #include "selftest_guc.c" 6014 #include "selftest_guc_multi_lrc.c" 6015 #include "selftest_guc_hangcheck.c" 6016 #endif 6017