1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_ads.h" 7 8 #include <linux/fault-inject.h> 9 10 #include <drm/drm_managed.h> 11 12 #include <generated/xe_wa_oob.h> 13 14 #include "abi/guc_actions_abi.h" 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_gt_regs.h" 17 #include "regs/xe_guc_regs.h" 18 #include "xe_bo.h" 19 #include "xe_gt.h" 20 #include "xe_gt_ccs_mode.h" 21 #include "xe_gt_mcr.h" 22 #include "xe_gt_printk.h" 23 #include "xe_guc.h" 24 #include "xe_guc_buf.h" 25 #include "xe_guc_capture.h" 26 #include "xe_guc_ct.h" 27 #include "xe_hw_engine.h" 28 #include "xe_lrc.h" 29 #include "xe_map.h" 30 #include "xe_mmio.h" 31 #include "xe_wa.h" 32 33 /* Slack of a few additional entries per engine */ 34 #define ADS_REGSET_EXTRA_MAX 8 35 36 static struct xe_guc * 37 ads_to_guc(struct xe_guc_ads *ads) 38 { 39 return container_of(ads, struct xe_guc, ads); 40 } 41 42 static struct xe_gt * 43 ads_to_gt(struct xe_guc_ads *ads) 44 { 45 return container_of(ads, struct xe_gt, uc.guc.ads); 46 } 47 48 static struct xe_device * 49 ads_to_xe(struct xe_guc_ads *ads) 50 { 51 return gt_to_xe(ads_to_gt(ads)); 52 } 53 54 static struct iosys_map * 55 ads_to_map(struct xe_guc_ads *ads) 56 { 57 return &ads->bo->vmap; 58 } 59 60 /* UM Queue parameters: */ 61 #define GUC_UM_QUEUE_SIZE (SZ_64K) 62 #define GUC_PAGE_RES_TIMEOUT_US (-1) 63 64 /* 65 * The Additional Data Struct (ADS) has pointers for different buffers used by 66 * the GuC. One single gem object contains the ADS struct itself (guc_ads) and 67 * all the extra buffers indirectly linked via the ADS struct's entries. 68 * 69 * Layout of the ADS blob allocated for the GuC: 70 * 71 * +---------------------------------------+ <== base 72 * | guc_ads | 73 * +---------------------------------------+ 74 * | guc_policies | 75 * +---------------------------------------+ 76 * | guc_gt_system_info | 77 * +---------------------------------------+ 78 * | guc_engine_usage | 79 * +---------------------------------------+ 80 * | guc_um_init_params | 81 * +---------------------------------------+ <== static 82 * | guc_mmio_reg[countA] (engine 0.0) | 83 * | guc_mmio_reg[countB] (engine 0.1) | 84 * | guc_mmio_reg[countC] (engine 1.0) | 85 * | ... | 86 * +---------------------------------------+ <== dynamic 87 * | padding | 88 * +---------------------------------------+ <== 4K aligned 89 * | golden contexts | 90 * +---------------------------------------+ 91 * | padding | 92 * +---------------------------------------+ <== 4K aligned 93 * | w/a KLVs | 94 * +---------------------------------------+ 95 * | padding | 96 * +---------------------------------------+ <== 4K aligned 97 * | capture lists | 98 * +---------------------------------------+ 99 * | padding | 100 * +---------------------------------------+ <== 4K aligned 101 * | UM queues | 102 * +---------------------------------------+ 103 * | padding | 104 * +---------------------------------------+ <== 4K aligned 105 * | private data | 106 * +---------------------------------------+ 107 * | padding | 108 * +---------------------------------------+ <== 4K aligned 109 */ 110 struct __guc_ads_blob { 111 struct guc_ads ads; 112 struct guc_policies policies; 113 struct guc_gt_system_info system_info; 114 struct guc_engine_usage engine_usage; 115 struct guc_um_init_params um_init_params; 116 /* From here on, location is dynamic! Refer to above diagram. */ 117 struct guc_mmio_reg regset[]; 118 } __packed; 119 120 #define ads_blob_read(ads_, field_) \ 121 xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ 122 struct __guc_ads_blob, field_) 123 124 #define ads_blob_write(ads_, field_, val_) \ 125 xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ 126 struct __guc_ads_blob, field_, val_) 127 128 #define info_map_write(xe_, map_, field_, val_) \ 129 xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_) 130 131 #define info_map_read(xe_, map_, field_) \ 132 xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_) 133 134 static size_t guc_ads_regset_size(struct xe_guc_ads *ads) 135 { 136 struct xe_device *xe = ads_to_xe(ads); 137 138 xe_assert(xe, ads->regset_size); 139 140 return ads->regset_size; 141 } 142 143 static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) 144 { 145 return PAGE_ALIGN(ads->golden_lrc_size); 146 } 147 148 static u32 guc_ads_waklv_size(struct xe_guc_ads *ads) 149 { 150 return PAGE_ALIGN(ads->ads_waklv_size); 151 } 152 153 static size_t guc_ads_capture_size(struct xe_guc_ads *ads) 154 { 155 return PAGE_ALIGN(ads->capture_size); 156 } 157 158 static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) 159 { 160 struct xe_device *xe = ads_to_xe(ads); 161 162 if (!xe->info.has_usm) 163 return 0; 164 165 return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX; 166 } 167 168 static size_t guc_ads_private_data_size(struct xe_guc_ads *ads) 169 { 170 return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size); 171 } 172 173 static size_t guc_ads_regset_offset(struct xe_guc_ads *ads) 174 { 175 return offsetof(struct __guc_ads_blob, regset); 176 } 177 178 static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) 179 { 180 size_t offset; 181 182 offset = guc_ads_regset_offset(ads) + 183 guc_ads_regset_size(ads); 184 185 return PAGE_ALIGN(offset); 186 } 187 188 static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads) 189 { 190 u32 offset; 191 192 offset = guc_ads_golden_lrc_offset(ads) + 193 guc_ads_golden_lrc_size(ads); 194 195 return PAGE_ALIGN(offset); 196 } 197 198 static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) 199 { 200 size_t offset; 201 202 offset = guc_ads_waklv_offset(ads) + 203 guc_ads_waklv_size(ads); 204 205 return PAGE_ALIGN(offset); 206 } 207 208 static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads) 209 { 210 u32 offset; 211 212 offset = guc_ads_capture_offset(ads) + 213 guc_ads_capture_size(ads); 214 215 return PAGE_ALIGN(offset); 216 } 217 218 static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads) 219 { 220 size_t offset; 221 222 offset = guc_ads_um_queues_offset(ads) + 223 guc_ads_um_queues_size(ads); 224 225 return PAGE_ALIGN(offset); 226 } 227 228 static size_t guc_ads_size(struct xe_guc_ads *ads) 229 { 230 return guc_ads_private_data_offset(ads) + 231 guc_ads_private_data_size(ads); 232 } 233 234 static size_t calculate_regset_size(struct xe_gt *gt) 235 { 236 struct xe_reg_sr_entry *sr_entry; 237 unsigned long sr_idx; 238 struct xe_hw_engine *hwe; 239 enum xe_hw_engine_id id; 240 unsigned int count = 0; 241 242 for_each_hw_engine(hwe, gt, id) 243 xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry) 244 count++; 245 246 count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; 247 248 if (XE_GT_WA(gt, 1607983814)) 249 count += LNCFCMOCS_REG_COUNT; 250 251 return count * sizeof(struct guc_mmio_reg); 252 } 253 254 static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) 255 { 256 struct xe_hw_engine *hwe; 257 enum xe_hw_engine_id id; 258 u32 mask = 0; 259 260 for_each_hw_engine(hwe, gt, id) 261 if (hwe->class == class) 262 mask |= BIT(hwe->instance); 263 264 return mask; 265 } 266 267 static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) 268 { 269 struct xe_gt *gt = ads_to_gt(ads); 270 size_t total_size = 0, alloc_size, real_size; 271 int class; 272 273 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 274 if (!engine_enable_mask(gt, class)) 275 continue; 276 277 real_size = xe_gt_lrc_size(gt, class); 278 alloc_size = PAGE_ALIGN(real_size); 279 total_size += alloc_size; 280 } 281 282 return total_size; 283 } 284 285 static void guc_waklv_enable(struct xe_guc_ads *ads, 286 u32 data[], u32 data_len_dw, 287 u32 *offset, u32 *remain, 288 enum xe_guc_klv_ids klv_id) 289 { 290 size_t size = sizeof(u32) * (1 + data_len_dw); 291 292 if (*remain < size) { 293 drm_warn(&ads_to_xe(ads)->drm, 294 "w/a klv buffer too small to add klv id 0x%04X\n", klv_id); 295 return; 296 } 297 298 /* 16:16 key/length */ 299 xe_map_wr(ads_to_xe(ads), ads_to_map(ads), *offset, u32, 300 FIELD_PREP(GUC_KLV_0_KEY, klv_id) | FIELD_PREP(GUC_KLV_0_LEN, data_len_dw)); 301 /* data_len_dw dwords of data */ 302 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), 303 *offset + sizeof(u32), data, data_len_dw * sizeof(u32)); 304 305 *offset += size; 306 *remain -= size; 307 } 308 309 static void guc_waklv_init(struct xe_guc_ads *ads) 310 { 311 struct xe_gt *gt = ads_to_gt(ads); 312 u64 addr_ggtt; 313 u32 offset, remain, size; 314 315 offset = guc_ads_waklv_offset(ads); 316 remain = guc_ads_waklv_size(ads); 317 318 if (XE_GT_WA(gt, 16021333562)) 319 guc_waklv_enable(ads, NULL, 0, &offset, &remain, 320 GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); 321 if (XE_GT_WA(gt, 18024947630)) 322 guc_waklv_enable(ads, NULL, 0, &offset, &remain, 323 GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING); 324 if (XE_GT_WA(gt, 16022287689)) 325 guc_waklv_enable(ads, NULL, 0, &offset, &remain, 326 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE); 327 328 if (XE_GT_WA(gt, 14022866841)) 329 guc_waklv_enable(ads, NULL, 0, &offset, &remain, 330 GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO); 331 332 /* 333 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, 334 * the default value for this register is determined to be 0xC40. This could change in the 335 * future, so GuC depends on KMD to send it the correct value. 336 */ 337 if (XE_GT_WA(gt, 13011645652)) { 338 u32 data = 0xC40; 339 340 guc_waklv_enable(ads, &data, 1, &offset, &remain, 341 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE); 342 } 343 344 if (XE_GT_WA(gt, 14022293748) || XE_GT_WA(gt, 22019794406)) 345 guc_waklv_enable(ads, NULL, 0, &offset, &remain, 346 GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET); 347 348 if (GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, 70, 44) && XE_GT_WA(gt, 16026508708)) 349 guc_waklv_enable(ads, NULL, 0, &offset, &remain, 350 GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH); 351 if (GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, 70, 47) && XE_GT_WA(gt, 16026007364)) { 352 u32 data[] = { 353 0x0, 354 0xF, 355 }; 356 guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain, 357 GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG); 358 } 359 360 if (XE_GT_WA(gt, 14020001231)) 361 guc_waklv_enable(ads, NULL, 0, &offset, &remain, 362 GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT); 363 364 size = guc_ads_waklv_size(ads) - remain; 365 if (!size) 366 return; 367 368 offset = guc_ads_waklv_offset(ads); 369 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 370 371 ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt)); 372 ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt)); 373 ads_blob_write(ads, ads.wa_klv_size, size); 374 } 375 376 static int calculate_waklv_size(struct xe_guc_ads *ads) 377 { 378 /* 379 * A single page is both the minimum size possible and 380 * is sufficiently large enough for all current platforms. 381 */ 382 return SZ_4K; 383 } 384 385 #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64) 386 387 int xe_guc_ads_init(struct xe_guc_ads *ads) 388 { 389 struct xe_device *xe = ads_to_xe(ads); 390 struct xe_gt *gt = ads_to_gt(ads); 391 struct xe_tile *tile = gt_to_tile(gt); 392 struct xe_bo *bo; 393 394 ads->golden_lrc_size = calculate_golden_lrc_size(ads); 395 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads)); 396 ads->regset_size = calculate_regset_size(gt); 397 ads->ads_waklv_size = calculate_waklv_size(ads); 398 399 bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, 400 XE_BO_FLAG_SYSTEM | 401 XE_BO_FLAG_GGTT | 402 XE_BO_FLAG_GGTT_INVALIDATE | 403 XE_BO_FLAG_PINNED_NORESTORE); 404 if (IS_ERR(bo)) 405 return PTR_ERR(bo); 406 407 ads->bo = bo; 408 409 return 0; 410 } 411 ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */ 412 413 /** 414 * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load 415 * @ads: Additional data structures object 416 * 417 * Recalculate golden_lrc_size, capture_size and regset_size as the number 418 * hardware engines may have changed after the hwconfig was loaded. Also verify 419 * the new sizes fit in the already allocated ADS buffer object. 420 * 421 * Return: 0 on success, negative error code on error. 422 */ 423 int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) 424 { 425 struct xe_gt *gt = ads_to_gt(ads); 426 u32 prev_regset_size = ads->regset_size; 427 428 xe_gt_assert(gt, ads->bo); 429 430 ads->golden_lrc_size = calculate_golden_lrc_size(ads); 431 /* Calculate Capture size with worst size */ 432 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads)); 433 ads->regset_size = calculate_regset_size(gt); 434 435 xe_gt_assert(gt, ads->golden_lrc_size + 436 (ads->regset_size - prev_regset_size) <= 437 MAX_GOLDEN_LRC_SIZE); 438 439 return 0; 440 } 441 442 static void guc_policies_init(struct xe_guc_ads *ads) 443 { 444 struct xe_device *xe = ads_to_xe(ads); 445 u32 global_flags = 0; 446 447 ads_blob_write(ads, policies.dpc_promote_time, 448 GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); 449 ads_blob_write(ads, policies.max_num_work_items, 450 GLOBAL_POLICY_MAX_NUM_WI); 451 452 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) 453 global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; 454 455 ads_blob_write(ads, policies.global_flags, global_flags); 456 ads_blob_write(ads, policies.is_valid, 1); 457 } 458 459 static void fill_engine_enable_masks(struct xe_gt *gt, 460 struct iosys_map *info_map) 461 { 462 struct xe_device *xe = gt_to_xe(gt); 463 464 info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS], 465 engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER)); 466 info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 467 engine_enable_mask(gt, XE_ENGINE_CLASS_COPY)); 468 info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS], 469 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE)); 470 info_map_write(xe, info_map, 471 engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], 472 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE)); 473 info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], 474 engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE)); 475 info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS], 476 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); 477 } 478 479 /* 480 * Write the offsets corresponding to the golden LRCs. The actual data is 481 * populated later by guc_golden_lrc_populate() 482 */ 483 static void guc_golden_lrc_init(struct xe_guc_ads *ads) 484 { 485 struct xe_device *xe = ads_to_xe(ads); 486 struct xe_gt *gt = ads_to_gt(ads); 487 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 488 offsetof(struct __guc_ads_blob, system_info)); 489 size_t alloc_size, real_size; 490 u32 addr_ggtt, offset; 491 int class; 492 493 offset = guc_ads_golden_lrc_offset(ads); 494 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 495 496 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 497 u8 guc_class; 498 499 guc_class = xe_engine_class_to_guc_class(class); 500 501 if (!info_map_read(xe, &info_map, 502 engine_enabled_masks[guc_class])) 503 continue; 504 505 real_size = xe_gt_lrc_size(gt, class); 506 alloc_size = PAGE_ALIGN(real_size); 507 508 /* 509 * This interface is slightly confusing. We need to pass the 510 * base address of the full golden context and the size of just 511 * the engine state, which is the section of the context image 512 * that starts after the execlists LRC registers. This is 513 * required to allow the GuC to restore just the engine state 514 * when a watchdog reset occurs. 515 */ 516 ads_blob_write(ads, ads.eng_state_size[guc_class], 517 xe_lrc_engine_state_size(gt, class)); 518 ads_blob_write(ads, ads.golden_context_lrca[guc_class], 519 addr_ggtt); 520 521 addr_ggtt += alloc_size; 522 } 523 } 524 525 static void guc_mapping_table_init_invalid(struct xe_gt *gt, 526 struct iosys_map *info_map) 527 { 528 struct xe_device *xe = gt_to_xe(gt); 529 unsigned int i, j; 530 531 /* Table must be set to invalid values for entries not used */ 532 for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) 533 for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) 534 info_map_write(xe, info_map, mapping_table[i][j], 535 GUC_MAX_INSTANCES_PER_CLASS); 536 } 537 538 static void guc_mapping_table_init(struct xe_gt *gt, 539 struct iosys_map *info_map) 540 { 541 struct xe_device *xe = gt_to_xe(gt); 542 struct xe_hw_engine *hwe; 543 enum xe_hw_engine_id id; 544 545 guc_mapping_table_init_invalid(gt, info_map); 546 547 for_each_hw_engine(hwe, gt, id) { 548 u8 guc_class; 549 550 guc_class = xe_engine_class_to_guc_class(hwe->class); 551 info_map_write(xe, info_map, 552 mapping_table[guc_class][hwe->logical_instance], 553 hwe->instance); 554 } 555 } 556 557 static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map, 558 enum guc_capture_list_class_type capture_class) 559 { 560 struct xe_device *xe = gt_to_xe(gt); 561 u32 mask; 562 563 switch (capture_class) { 564 case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE: 565 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]); 566 mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]); 567 break; 568 case GUC_CAPTURE_LIST_CLASS_VIDEO: 569 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]); 570 break; 571 case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE: 572 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]); 573 break; 574 case GUC_CAPTURE_LIST_CLASS_BLITTER: 575 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]); 576 break; 577 case GUC_CAPTURE_LIST_CLASS_GSC_OTHER: 578 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]); 579 break; 580 default: 581 mask = 0; 582 } 583 584 return mask; 585 } 586 587 static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt, 588 int owner, int type, int class, u32 *total_size, size_t *size, 589 void **pptr) 590 { 591 *size = 0; 592 593 if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) { 594 if (*total_size + *size > ads->capture_size) 595 xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n", 596 *total_size + *size, ads->capture_size); 597 else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr)) 598 return false; 599 } 600 601 return true; 602 } 603 604 static int guc_capture_prep_lists(struct xe_guc_ads *ads) 605 { 606 struct xe_guc *guc = ads_to_guc(ads); 607 struct xe_gt *gt = ads_to_gt(ads); 608 u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0; 609 struct iosys_map info_map; 610 size_t size = 0; 611 void *ptr; 612 int i, j; 613 614 /* 615 * GuC Capture's steered reg-list needs to be allocated and initialized 616 * after the GuC-hwconfig is available which guaranteed from here. 617 */ 618 xe_guc_capture_steered_list_init(ads_to_guc(ads)); 619 620 capture_offset = guc_ads_capture_offset(ads); 621 ads_ggtt = xe_bo_ggtt_addr(ads->bo); 622 info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 623 offsetof(struct __guc_ads_blob, system_info)); 624 625 /* first, set aside the first page for a capture_list with zero descriptors */ 626 total_size = PAGE_SIZE; 627 if (!xe_guc_capture_getnullheader(guc, &ptr, &size)) 628 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size); 629 630 null_ggtt = ads_ggtt + capture_offset; 631 capture_offset += PAGE_SIZE; 632 633 /* 634 * Populate capture list : at this point adps is already allocated and 635 * mapped to worst case size 636 */ 637 for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { 638 bool write_empty_list; 639 640 for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) { 641 u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j); 642 /* null list if we dont have said engine or list */ 643 if (!engine_mask) { 644 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt); 645 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt); 646 continue; 647 } 648 649 /* engine exists: start with engine-class registers */ 650 write_empty_list = get_capture_list(ads, guc, gt, i, 651 GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS, 652 j, &total_size, &size, &ptr); 653 if (!write_empty_list) { 654 ads_blob_write(ads, ads.capture_class[i][j], 655 ads_ggtt + capture_offset); 656 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, 657 ptr, size); 658 total_size += size; 659 capture_offset += size; 660 } else { 661 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt); 662 } 663 664 /* engine exists: next, engine-instance registers */ 665 write_empty_list = get_capture_list(ads, guc, gt, i, 666 GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE, 667 j, &total_size, &size, &ptr); 668 if (!write_empty_list) { 669 ads_blob_write(ads, ads.capture_instance[i][j], 670 ads_ggtt + capture_offset); 671 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, 672 ptr, size); 673 total_size += size; 674 capture_offset += size; 675 } else { 676 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt); 677 } 678 } 679 680 /* global registers is last in our PF/VF loops */ 681 write_empty_list = get_capture_list(ads, guc, gt, i, 682 GUC_STATE_CAPTURE_TYPE_GLOBAL, 683 0, &total_size, &size, &ptr); 684 if (!write_empty_list) { 685 ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset); 686 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, 687 size); 688 total_size += size; 689 capture_offset += size; 690 } else { 691 ads_blob_write(ads, ads.capture_global[i], null_ggtt); 692 } 693 } 694 695 if (ads->capture_size != PAGE_ALIGN(total_size)) 696 xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n", 697 PAGE_ALIGN(total_size), ads->capture_size); 698 return PAGE_ALIGN(total_size); 699 } 700 701 static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, 702 struct iosys_map *regset_map, 703 struct xe_reg reg, 704 unsigned int n_entry) 705 { 706 struct guc_mmio_reg entry = { 707 .offset = reg.addr, 708 .flags = reg.masked ? GUC_REGSET_MASKED : 0, 709 }; 710 711 if (reg.mcr) { 712 struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr); 713 u8 group, instance; 714 715 bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg, 716 &group, &instance); 717 718 if (steer) { 719 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group); 720 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance); 721 entry.flags |= GUC_REGSET_STEERING_NEEDED; 722 } 723 } 724 725 xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), 726 &entry, sizeof(entry)); 727 } 728 729 static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, 730 struct iosys_map *regset_map, 731 struct xe_hw_engine *hwe) 732 { 733 struct xe_hw_engine *hwe_rcs_reset_domain = 734 xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); 735 struct xe_reg_sr_entry *entry; 736 unsigned long idx; 737 unsigned int count = 0; 738 const struct { 739 struct xe_reg reg; 740 bool skip; 741 } *e, extra_regs[] = { 742 { .reg = RING_MODE(hwe->mmio_base), }, 743 { .reg = RING_HWS_PGA(hwe->mmio_base), }, 744 { .reg = RING_IMR(hwe->mmio_base), }, 745 { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain }, 746 { .reg = CCS_MODE, 747 .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) }, 748 }; 749 u32 i; 750 751 BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX); 752 753 xa_for_each(&hwe->reg_sr.xa, idx, entry) 754 guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++); 755 756 for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) { 757 if (e->skip) 758 continue; 759 760 guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); 761 } 762 763 if (XE_GT_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { 764 for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { 765 guc_mmio_regset_write_one(ads, regset_map, 766 XELP_LNCFCMOCS(i), count++); 767 } 768 } 769 770 return count; 771 } 772 773 static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) 774 { 775 size_t regset_offset = guc_ads_regset_offset(ads); 776 struct xe_gt *gt = ads_to_gt(ads); 777 struct xe_hw_engine *hwe; 778 enum xe_hw_engine_id id; 779 u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset; 780 struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 781 regset_offset); 782 unsigned int regset_used = 0; 783 784 for_each_hw_engine(hwe, gt, id) { 785 unsigned int count; 786 u8 gc; 787 788 /* 789 * 1. Write all MMIO entries for this exec queue to the table. No 790 * need to worry about fused-off engines and when there are 791 * entries in the regset: the reg_state_list has been zero'ed 792 * by xe_guc_ads_populate() 793 */ 794 count = guc_mmio_regset_write(ads, ®set_map, hwe); 795 if (!count) 796 continue; 797 798 /* 799 * 2. Record in the header (ads.reg_state_list) the address 800 * location and number of entries 801 */ 802 gc = xe_engine_class_to_guc_class(hwe->class); 803 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr); 804 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count); 805 806 addr += count * sizeof(struct guc_mmio_reg); 807 iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg)); 808 809 regset_used += count * sizeof(struct guc_mmio_reg); 810 } 811 812 xe_gt_assert(gt, regset_used <= ads->regset_size); 813 } 814 815 static void guc_um_init_params(struct xe_guc_ads *ads) 816 { 817 u32 um_queue_offset = guc_ads_um_queues_offset(ads); 818 struct xe_guc *guc = ads_to_guc(ads); 819 struct xe_device *xe = ads_to_xe(ads); 820 u64 base_dpa; 821 u32 base_ggtt; 822 bool with_dpa; 823 int i; 824 825 with_dpa = !xe_guc_using_main_gamctrl_queues(guc); 826 827 base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; 828 base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; 829 830 for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { 831 /* 832 * Some platforms support USM but not access counters. 833 * Skip ACCESS_COUNTER queue initialization for such 834 * platforms, leaving queue_params[2] zero-initialized 835 * to signal unavailability to the GuC. 836 */ 837 if (i == GUC_UM_HW_QUEUE_ACCESS_COUNTER && 838 !xe->info.has_access_counter) 839 continue; 840 841 ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, 842 with_dpa ? (base_dpa + (i * GUC_UM_QUEUE_SIZE)) : 0); 843 ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, 844 base_ggtt + (i * GUC_UM_QUEUE_SIZE)); 845 ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes, 846 GUC_UM_QUEUE_SIZE); 847 } 848 849 ads_blob_write(ads, um_init_params.page_response_timeout_in_us, 850 GUC_PAGE_RES_TIMEOUT_US); 851 } 852 853 static void guc_doorbell_init(struct xe_guc_ads *ads) 854 { 855 struct xe_device *xe = ads_to_xe(ads); 856 struct xe_gt *gt = ads_to_gt(ads); 857 858 if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { 859 u32 distdbreg = 860 xe_mmio_read32(>->mmio, DIST_DBS_POPULATED); 861 862 ads_blob_write(ads, 863 system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], 864 REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1); 865 } 866 } 867 868 /** 869 * xe_guc_ads_populate_minimal - populate minimal ADS 870 * @ads: Additional data structures object 871 * 872 * This function populates a minimal ADS that does not support submissions but 873 * enough so the GuC can load and the hwconfig table can be read. 874 */ 875 void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) 876 { 877 struct xe_gt *gt = ads_to_gt(ads); 878 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 879 offsetof(struct __guc_ads_blob, system_info)); 880 u32 base = xe_bo_ggtt_addr(ads->bo); 881 882 xe_gt_assert(gt, ads->bo); 883 884 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); 885 guc_policies_init(ads); 886 guc_golden_lrc_init(ads); 887 guc_mapping_table_init_invalid(gt, &info_map); 888 guc_doorbell_init(ads); 889 890 ads_blob_write(ads, ads.scheduler_policies, base + 891 offsetof(struct __guc_ads_blob, policies)); 892 ads_blob_write(ads, ads.gt_system_info, base + 893 offsetof(struct __guc_ads_blob, system_info)); 894 ads_blob_write(ads, ads.private_data, base + 895 guc_ads_private_data_offset(ads)); 896 } 897 898 void xe_guc_ads_populate(struct xe_guc_ads *ads) 899 { 900 struct xe_device *xe = ads_to_xe(ads); 901 struct xe_gt *gt = ads_to_gt(ads); 902 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 903 offsetof(struct __guc_ads_blob, system_info)); 904 u32 base = xe_bo_ggtt_addr(ads->bo); 905 906 xe_gt_assert(gt, ads->bo); 907 908 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); 909 guc_policies_init(ads); 910 fill_engine_enable_masks(gt, &info_map); 911 guc_mmio_reg_state_init(ads); 912 guc_golden_lrc_init(ads); 913 guc_mapping_table_init(gt, &info_map); 914 guc_capture_prep_lists(ads); 915 guc_doorbell_init(ads); 916 guc_waklv_init(ads); 917 918 if (xe->info.has_usm) { 919 guc_um_init_params(ads); 920 ads_blob_write(ads, ads.um_init_data, base + 921 offsetof(struct __guc_ads_blob, um_init_params)); 922 } 923 924 ads_blob_write(ads, ads.scheduler_policies, base + 925 offsetof(struct __guc_ads_blob, policies)); 926 ads_blob_write(ads, ads.gt_system_info, base + 927 offsetof(struct __guc_ads_blob, system_info)); 928 ads_blob_write(ads, ads.private_data, base + 929 guc_ads_private_data_offset(ads)); 930 } 931 932 /* 933 * After the golden LRC's are recorded for each engine class by the first 934 * submission, copy them to the ADS, as initialized earlier by 935 * guc_golden_lrc_init(). 936 */ 937 static void guc_golden_lrc_populate(struct xe_guc_ads *ads) 938 { 939 struct xe_device *xe = ads_to_xe(ads); 940 struct xe_gt *gt = ads_to_gt(ads); 941 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 942 offsetof(struct __guc_ads_blob, system_info)); 943 size_t total_size = 0, alloc_size, real_size; 944 u32 offset; 945 int class; 946 947 offset = guc_ads_golden_lrc_offset(ads); 948 949 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 950 u8 guc_class; 951 952 guc_class = xe_engine_class_to_guc_class(class); 953 954 if (!info_map_read(xe, &info_map, 955 engine_enabled_masks[guc_class])) 956 continue; 957 958 xe_gt_assert(gt, gt->default_lrc[class]); 959 960 real_size = xe_gt_lrc_size(gt, class); 961 alloc_size = PAGE_ALIGN(real_size); 962 total_size += alloc_size; 963 964 xe_map_memcpy_to(xe, ads_to_map(ads), offset, 965 gt->default_lrc[class], real_size); 966 967 offset += alloc_size; 968 } 969 970 xe_gt_assert(gt, total_size == ads->golden_lrc_size); 971 } 972 973 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) 974 { 975 guc_golden_lrc_populate(ads); 976 } 977 978 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset) 979 { 980 struct xe_guc_ct *ct = &ads_to_guc(ads)->ct; 981 u32 action[] = { 982 XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, 983 policy_offset 984 }; 985 986 return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 987 } 988 989 /** 990 * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy 991 * @ads: Additional data structures object 992 * @enable_engine_reset: true to enable engine resets, false otherwise 993 * 994 * This function update the GuC's engine reset policy. 995 * 996 * Return: 0 on success, and negative error code otherwise. 997 */ 998 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads, 999 bool enable_engine_reset) 1000 { 1001 struct guc_policies *policies; 1002 struct xe_guc *guc = ads_to_guc(ads); 1003 CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies)); 1004 1005 if (!xe_guc_buf_is_valid(buf)) 1006 return -ENOBUFS; 1007 1008 policies = xe_guc_buf_cpu_ptr(buf); 1009 memset(policies, 0, sizeof(*policies)); 1010 1011 policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); 1012 policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); 1013 policies->is_valid = 1; 1014 1015 if (enable_engine_reset) 1016 policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; 1017 else 1018 policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; 1019 1020 return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); 1021 } 1022