1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_guc_ads.h" 7 8 #include <linux/fault-inject.h> 9 10 #include <drm/drm_managed.h> 11 12 #include <generated/xe_wa_oob.h> 13 14 #include "abi/guc_actions_abi.h" 15 #include "regs/xe_engine_regs.h" 16 #include "regs/xe_gt_regs.h" 17 #include "regs/xe_guc_regs.h" 18 #include "xe_bo.h" 19 #include "xe_gt.h" 20 #include "xe_gt_ccs_mode.h" 21 #include "xe_gt_printk.h" 22 #include "xe_guc.h" 23 #include "xe_guc_capture.h" 24 #include "xe_guc_ct.h" 25 #include "xe_hw_engine.h" 26 #include "xe_lrc.h" 27 #include "xe_map.h" 28 #include "xe_mmio.h" 29 #include "xe_platform_types.h" 30 #include "xe_uc_fw.h" 31 #include "xe_wa.h" 32 33 /* Slack of a few additional entries per engine */ 34 #define ADS_REGSET_EXTRA_MAX 8 35 36 static struct xe_guc * 37 ads_to_guc(struct xe_guc_ads *ads) 38 { 39 return container_of(ads, struct xe_guc, ads); 40 } 41 42 static struct xe_gt * 43 ads_to_gt(struct xe_guc_ads *ads) 44 { 45 return container_of(ads, struct xe_gt, uc.guc.ads); 46 } 47 48 static struct xe_device * 49 ads_to_xe(struct xe_guc_ads *ads) 50 { 51 return gt_to_xe(ads_to_gt(ads)); 52 } 53 54 static struct iosys_map * 55 ads_to_map(struct xe_guc_ads *ads) 56 { 57 return &ads->bo->vmap; 58 } 59 60 /* UM Queue parameters: */ 61 #define GUC_UM_QUEUE_SIZE (SZ_64K) 62 #define GUC_PAGE_RES_TIMEOUT_US (-1) 63 64 /* 65 * The Additional Data Struct (ADS) has pointers for different buffers used by 66 * the GuC. One single gem object contains the ADS struct itself (guc_ads) and 67 * all the extra buffers indirectly linked via the ADS struct's entries. 68 * 69 * Layout of the ADS blob allocated for the GuC: 70 * 71 * +---------------------------------------+ <== base 72 * | guc_ads | 73 * +---------------------------------------+ 74 * | guc_policies | 75 * +---------------------------------------+ 76 * | guc_gt_system_info | 77 * +---------------------------------------+ 78 * | guc_engine_usage | 79 * +---------------------------------------+ 80 * | guc_um_init_params | 81 * +---------------------------------------+ <== static 82 * | guc_mmio_reg[countA] (engine 0.0) | 83 * | guc_mmio_reg[countB] (engine 0.1) | 84 * | guc_mmio_reg[countC] (engine 1.0) | 85 * | ... | 86 * +---------------------------------------+ <== dynamic 87 * | padding | 88 * +---------------------------------------+ <== 4K aligned 89 * | golden contexts | 90 * +---------------------------------------+ 91 * | padding | 92 * +---------------------------------------+ <== 4K aligned 93 * | w/a KLVs | 94 * +---------------------------------------+ 95 * | padding | 96 * +---------------------------------------+ <== 4K aligned 97 * | capture lists | 98 * +---------------------------------------+ 99 * | padding | 100 * +---------------------------------------+ <== 4K aligned 101 * | UM queues | 102 * +---------------------------------------+ 103 * | padding | 104 * +---------------------------------------+ <== 4K aligned 105 * | private data | 106 * +---------------------------------------+ 107 * | padding | 108 * +---------------------------------------+ <== 4K aligned 109 */ 110 struct __guc_ads_blob { 111 struct guc_ads ads; 112 struct guc_policies policies; 113 struct guc_gt_system_info system_info; 114 struct guc_engine_usage engine_usage; 115 struct guc_um_init_params um_init_params; 116 /* From here on, location is dynamic! Refer to above diagram. */ 117 struct guc_mmio_reg regset[]; 118 } __packed; 119 120 #define ads_blob_read(ads_, field_) \ 121 xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ 122 struct __guc_ads_blob, field_) 123 124 #define ads_blob_write(ads_, field_, val_) \ 125 xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ 126 struct __guc_ads_blob, field_, val_) 127 128 #define info_map_write(xe_, map_, field_, val_) \ 129 xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_) 130 131 #define info_map_read(xe_, map_, field_) \ 132 xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_) 133 134 static size_t guc_ads_regset_size(struct xe_guc_ads *ads) 135 { 136 struct xe_device *xe = ads_to_xe(ads); 137 138 xe_assert(xe, ads->regset_size); 139 140 return ads->regset_size; 141 } 142 143 static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) 144 { 145 return PAGE_ALIGN(ads->golden_lrc_size); 146 } 147 148 static u32 guc_ads_waklv_size(struct xe_guc_ads *ads) 149 { 150 return PAGE_ALIGN(ads->ads_waklv_size); 151 } 152 153 static size_t guc_ads_capture_size(struct xe_guc_ads *ads) 154 { 155 return PAGE_ALIGN(ads->capture_size); 156 } 157 158 static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) 159 { 160 struct xe_device *xe = ads_to_xe(ads); 161 162 if (!xe->info.has_usm) 163 return 0; 164 165 return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX; 166 } 167 168 static size_t guc_ads_private_data_size(struct xe_guc_ads *ads) 169 { 170 return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size); 171 } 172 173 static size_t guc_ads_regset_offset(struct xe_guc_ads *ads) 174 { 175 return offsetof(struct __guc_ads_blob, regset); 176 } 177 178 static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) 179 { 180 size_t offset; 181 182 offset = guc_ads_regset_offset(ads) + 183 guc_ads_regset_size(ads); 184 185 return PAGE_ALIGN(offset); 186 } 187 188 static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads) 189 { 190 u32 offset; 191 192 offset = guc_ads_golden_lrc_offset(ads) + 193 guc_ads_golden_lrc_size(ads); 194 195 return PAGE_ALIGN(offset); 196 } 197 198 static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) 199 { 200 size_t offset; 201 202 offset = guc_ads_waklv_offset(ads) + 203 guc_ads_waklv_size(ads); 204 205 return PAGE_ALIGN(offset); 206 } 207 208 static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads) 209 { 210 u32 offset; 211 212 offset = guc_ads_capture_offset(ads) + 213 guc_ads_capture_size(ads); 214 215 return PAGE_ALIGN(offset); 216 } 217 218 static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads) 219 { 220 size_t offset; 221 222 offset = guc_ads_um_queues_offset(ads) + 223 guc_ads_um_queues_size(ads); 224 225 return PAGE_ALIGN(offset); 226 } 227 228 static size_t guc_ads_size(struct xe_guc_ads *ads) 229 { 230 return guc_ads_private_data_offset(ads) + 231 guc_ads_private_data_size(ads); 232 } 233 234 static bool needs_wa_1607983814(struct xe_device *xe) 235 { 236 return GRAPHICS_VERx100(xe) < 1250; 237 } 238 239 static size_t calculate_regset_size(struct xe_gt *gt) 240 { 241 struct xe_reg_sr_entry *sr_entry; 242 unsigned long sr_idx; 243 struct xe_hw_engine *hwe; 244 enum xe_hw_engine_id id; 245 unsigned int count = 0; 246 247 for_each_hw_engine(hwe, gt, id) 248 xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry) 249 count++; 250 251 count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; 252 253 if (needs_wa_1607983814(gt_to_xe(gt))) 254 count += LNCFCMOCS_REG_COUNT; 255 256 return count * sizeof(struct guc_mmio_reg); 257 } 258 259 static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) 260 { 261 struct xe_hw_engine *hwe; 262 enum xe_hw_engine_id id; 263 u32 mask = 0; 264 265 for_each_hw_engine(hwe, gt, id) 266 if (hwe->class == class) 267 mask |= BIT(hwe->instance); 268 269 return mask; 270 } 271 272 static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) 273 { 274 struct xe_gt *gt = ads_to_gt(ads); 275 size_t total_size = 0, alloc_size, real_size; 276 int class; 277 278 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 279 if (!engine_enable_mask(gt, class)) 280 continue; 281 282 real_size = xe_gt_lrc_size(gt, class); 283 alloc_size = PAGE_ALIGN(real_size); 284 total_size += alloc_size; 285 } 286 287 return total_size; 288 } 289 290 static void guc_waklv_enable_one_word(struct xe_guc_ads *ads, 291 enum xe_guc_klv_ids klv_id, 292 u32 value, 293 u32 *offset, u32 *remain) 294 { 295 u32 size; 296 u32 klv_entry[] = { 297 /* 16:16 key/length */ 298 FIELD_PREP(GUC_KLV_0_KEY, klv_id) | 299 FIELD_PREP(GUC_KLV_0_LEN, 1), 300 value, 301 /* 1 dword data */ 302 }; 303 304 size = sizeof(klv_entry); 305 306 if (*remain < size) { 307 drm_warn(&ads_to_xe(ads)->drm, 308 "w/a klv buffer too small to add klv id %d\n", klv_id); 309 } else { 310 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, 311 klv_entry, size); 312 *offset += size; 313 *remain -= size; 314 } 315 } 316 317 static void guc_waklv_enable_simple(struct xe_guc_ads *ads, 318 enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain) 319 { 320 u32 klv_entry[] = { 321 /* 16:16 key/length */ 322 FIELD_PREP(GUC_KLV_0_KEY, klv_id) | 323 FIELD_PREP(GUC_KLV_0_LEN, 0), 324 /* 0 dwords data */ 325 }; 326 u32 size; 327 328 size = sizeof(klv_entry); 329 330 if (xe_gt_WARN(ads_to_gt(ads), *remain < size, 331 "w/a klv buffer too small to add klv id %d\n", klv_id)) 332 return; 333 334 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, 335 klv_entry, size); 336 *offset += size; 337 *remain -= size; 338 } 339 340 static void guc_waklv_init(struct xe_guc_ads *ads) 341 { 342 struct xe_gt *gt = ads_to_gt(ads); 343 u64 addr_ggtt; 344 u32 offset, remain, size; 345 346 offset = guc_ads_waklv_offset(ads); 347 remain = guc_ads_waklv_size(ads); 348 349 if (XE_WA(gt, 14019882105)) 350 guc_waklv_enable_simple(ads, 351 GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, 352 &offset, &remain); 353 if (XE_WA(gt, 18024947630)) 354 guc_waklv_enable_simple(ads, 355 GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING, 356 &offset, &remain); 357 if (XE_WA(gt, 16022287689)) 358 guc_waklv_enable_simple(ads, 359 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, 360 &offset, &remain); 361 362 if (XE_WA(gt, 14022866841)) 363 guc_waklv_enable_simple(ads, 364 GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO, 365 &offset, &remain); 366 367 /* 368 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, 369 * the default value for this register is determined to be 0xC40. This could change in the 370 * future, so GuC depends on KMD to send it the correct value. 371 */ 372 if (XE_WA(gt, 13011645652)) 373 guc_waklv_enable_one_word(ads, 374 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE, 375 0xC40, 376 &offset, &remain); 377 378 if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) 379 guc_waklv_enable_simple(ads, 380 GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, 381 &offset, &remain); 382 383 size = guc_ads_waklv_size(ads) - remain; 384 if (!size) 385 return; 386 387 offset = guc_ads_waklv_offset(ads); 388 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 389 390 ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt)); 391 ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt)); 392 ads_blob_write(ads, ads.wa_klv_size, size); 393 } 394 395 static int calculate_waklv_size(struct xe_guc_ads *ads) 396 { 397 /* 398 * A single page is both the minimum size possible and 399 * is sufficiently large enough for all current platforms. 400 */ 401 return SZ_4K; 402 } 403 404 #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64) 405 406 int xe_guc_ads_init(struct xe_guc_ads *ads) 407 { 408 struct xe_device *xe = ads_to_xe(ads); 409 struct xe_gt *gt = ads_to_gt(ads); 410 struct xe_tile *tile = gt_to_tile(gt); 411 struct xe_bo *bo; 412 413 ads->golden_lrc_size = calculate_golden_lrc_size(ads); 414 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads)); 415 ads->regset_size = calculate_regset_size(gt); 416 ads->ads_waklv_size = calculate_waklv_size(ads); 417 418 bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, 419 XE_BO_FLAG_SYSTEM | 420 XE_BO_FLAG_GGTT | 421 XE_BO_FLAG_GGTT_INVALIDATE); 422 if (IS_ERR(bo)) 423 return PTR_ERR(bo); 424 425 ads->bo = bo; 426 427 return 0; 428 } 429 ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */ 430 431 /** 432 * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load 433 * @ads: Additional data structures object 434 * 435 * Recalculate golden_lrc_size, capture_size and regset_size as the number 436 * hardware engines may have changed after the hwconfig was loaded. Also verify 437 * the new sizes fit in the already allocated ADS buffer object. 438 * 439 * Return: 0 on success, negative error code on error. 440 */ 441 int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) 442 { 443 struct xe_gt *gt = ads_to_gt(ads); 444 u32 prev_regset_size = ads->regset_size; 445 446 xe_gt_assert(gt, ads->bo); 447 448 ads->golden_lrc_size = calculate_golden_lrc_size(ads); 449 /* Calculate Capture size with worst size */ 450 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads)); 451 ads->regset_size = calculate_regset_size(gt); 452 453 xe_gt_assert(gt, ads->golden_lrc_size + 454 (ads->regset_size - prev_regset_size) <= 455 MAX_GOLDEN_LRC_SIZE); 456 457 return 0; 458 } 459 460 static void guc_policies_init(struct xe_guc_ads *ads) 461 { 462 struct xe_device *xe = ads_to_xe(ads); 463 u32 global_flags = 0; 464 465 ads_blob_write(ads, policies.dpc_promote_time, 466 GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); 467 ads_blob_write(ads, policies.max_num_work_items, 468 GLOBAL_POLICY_MAX_NUM_WI); 469 470 if (xe->wedged.mode == 2) 471 global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; 472 473 ads_blob_write(ads, policies.global_flags, global_flags); 474 ads_blob_write(ads, policies.is_valid, 1); 475 } 476 477 static void fill_engine_enable_masks(struct xe_gt *gt, 478 struct iosys_map *info_map) 479 { 480 struct xe_device *xe = gt_to_xe(gt); 481 482 info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS], 483 engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER)); 484 info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 485 engine_enable_mask(gt, XE_ENGINE_CLASS_COPY)); 486 info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS], 487 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE)); 488 info_map_write(xe, info_map, 489 engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], 490 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE)); 491 info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], 492 engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE)); 493 info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS], 494 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); 495 } 496 497 static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) 498 { 499 struct xe_device *xe = ads_to_xe(ads); 500 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 501 offsetof(struct __guc_ads_blob, system_info)); 502 u8 guc_class; 503 504 for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { 505 if (!info_map_read(xe, &info_map, 506 engine_enabled_masks[guc_class])) 507 continue; 508 509 ads_blob_write(ads, ads.eng_state_size[guc_class], 510 guc_ads_golden_lrc_size(ads) - 511 xe_lrc_skip_size(xe)); 512 ads_blob_write(ads, ads.golden_context_lrca[guc_class], 513 xe_bo_ggtt_addr(ads->bo) + 514 guc_ads_golden_lrc_offset(ads)); 515 } 516 } 517 518 static void guc_mapping_table_init_invalid(struct xe_gt *gt, 519 struct iosys_map *info_map) 520 { 521 struct xe_device *xe = gt_to_xe(gt); 522 unsigned int i, j; 523 524 /* Table must be set to invalid values for entries not used */ 525 for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) 526 for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) 527 info_map_write(xe, info_map, mapping_table[i][j], 528 GUC_MAX_INSTANCES_PER_CLASS); 529 } 530 531 static void guc_mapping_table_init(struct xe_gt *gt, 532 struct iosys_map *info_map) 533 { 534 struct xe_device *xe = gt_to_xe(gt); 535 struct xe_hw_engine *hwe; 536 enum xe_hw_engine_id id; 537 538 guc_mapping_table_init_invalid(gt, info_map); 539 540 for_each_hw_engine(hwe, gt, id) { 541 u8 guc_class; 542 543 guc_class = xe_engine_class_to_guc_class(hwe->class); 544 info_map_write(xe, info_map, 545 mapping_table[guc_class][hwe->logical_instance], 546 hwe->instance); 547 } 548 } 549 550 static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map, 551 enum guc_capture_list_class_type capture_class) 552 { 553 struct xe_device *xe = gt_to_xe(gt); 554 u32 mask; 555 556 switch (capture_class) { 557 case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE: 558 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]); 559 mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]); 560 break; 561 case GUC_CAPTURE_LIST_CLASS_VIDEO: 562 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]); 563 break; 564 case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE: 565 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]); 566 break; 567 case GUC_CAPTURE_LIST_CLASS_BLITTER: 568 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]); 569 break; 570 case GUC_CAPTURE_LIST_CLASS_GSC_OTHER: 571 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]); 572 break; 573 default: 574 mask = 0; 575 } 576 577 return mask; 578 } 579 580 static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt, 581 int owner, int type, int class, u32 *total_size, size_t *size, 582 void **pptr) 583 { 584 *size = 0; 585 586 if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) { 587 if (*total_size + *size > ads->capture_size) 588 xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n", 589 *total_size + *size, ads->capture_size); 590 else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr)) 591 return false; 592 } 593 594 return true; 595 } 596 597 static int guc_capture_prep_lists(struct xe_guc_ads *ads) 598 { 599 struct xe_guc *guc = ads_to_guc(ads); 600 struct xe_gt *gt = ads_to_gt(ads); 601 u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0; 602 struct iosys_map info_map; 603 size_t size = 0; 604 void *ptr; 605 int i, j; 606 607 /* 608 * GuC Capture's steered reg-list needs to be allocated and initialized 609 * after the GuC-hwconfig is available which guaranteed from here. 610 */ 611 xe_guc_capture_steered_list_init(ads_to_guc(ads)); 612 613 capture_offset = guc_ads_capture_offset(ads); 614 ads_ggtt = xe_bo_ggtt_addr(ads->bo); 615 info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 616 offsetof(struct __guc_ads_blob, system_info)); 617 618 /* first, set aside the first page for a capture_list with zero descriptors */ 619 total_size = PAGE_SIZE; 620 if (!xe_guc_capture_getnullheader(guc, &ptr, &size)) 621 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size); 622 623 null_ggtt = ads_ggtt + capture_offset; 624 capture_offset += PAGE_SIZE; 625 626 /* 627 * Populate capture list : at this point adps is already allocated and 628 * mapped to worst case size 629 */ 630 for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { 631 bool write_empty_list; 632 633 for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) { 634 u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j); 635 /* null list if we dont have said engine or list */ 636 if (!engine_mask) { 637 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt); 638 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt); 639 continue; 640 } 641 642 /* engine exists: start with engine-class registers */ 643 write_empty_list = get_capture_list(ads, guc, gt, i, 644 GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS, 645 j, &total_size, &size, &ptr); 646 if (!write_empty_list) { 647 ads_blob_write(ads, ads.capture_class[i][j], 648 ads_ggtt + capture_offset); 649 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, 650 ptr, size); 651 total_size += size; 652 capture_offset += size; 653 } else { 654 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt); 655 } 656 657 /* engine exists: next, engine-instance registers */ 658 write_empty_list = get_capture_list(ads, guc, gt, i, 659 GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE, 660 j, &total_size, &size, &ptr); 661 if (!write_empty_list) { 662 ads_blob_write(ads, ads.capture_instance[i][j], 663 ads_ggtt + capture_offset); 664 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, 665 ptr, size); 666 total_size += size; 667 capture_offset += size; 668 } else { 669 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt); 670 } 671 } 672 673 /* global registers is last in our PF/VF loops */ 674 write_empty_list = get_capture_list(ads, guc, gt, i, 675 GUC_STATE_CAPTURE_TYPE_GLOBAL, 676 0, &total_size, &size, &ptr); 677 if (!write_empty_list) { 678 ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset); 679 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, 680 size); 681 total_size += size; 682 capture_offset += size; 683 } else { 684 ads_blob_write(ads, ads.capture_global[i], null_ggtt); 685 } 686 } 687 688 if (ads->capture_size != PAGE_ALIGN(total_size)) 689 xe_gt_dbg(gt, "ADS capture alloc size changed from %d to %d\n", 690 ads->capture_size, PAGE_ALIGN(total_size)); 691 return PAGE_ALIGN(total_size); 692 } 693 694 static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, 695 struct iosys_map *regset_map, 696 struct xe_reg reg, 697 unsigned int n_entry) 698 { 699 struct guc_mmio_reg entry = { 700 .offset = reg.addr, 701 .flags = reg.masked ? GUC_REGSET_MASKED : 0, 702 }; 703 704 xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), 705 &entry, sizeof(entry)); 706 } 707 708 static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, 709 struct iosys_map *regset_map, 710 struct xe_hw_engine *hwe) 711 { 712 struct xe_device *xe = ads_to_xe(ads); 713 struct xe_hw_engine *hwe_rcs_reset_domain = 714 xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); 715 struct xe_reg_sr_entry *entry; 716 unsigned long idx; 717 unsigned int count = 0; 718 const struct { 719 struct xe_reg reg; 720 bool skip; 721 } *e, extra_regs[] = { 722 { .reg = RING_MODE(hwe->mmio_base), }, 723 { .reg = RING_HWS_PGA(hwe->mmio_base), }, 724 { .reg = RING_IMR(hwe->mmio_base), }, 725 { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain }, 726 { .reg = CCS_MODE, 727 .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) }, 728 }; 729 u32 i; 730 731 BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX); 732 733 xa_for_each(&hwe->reg_sr.xa, idx, entry) 734 guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++); 735 736 for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) { 737 if (e->skip) 738 continue; 739 740 guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); 741 } 742 743 /* Wa_1607983814 */ 744 if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { 745 for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { 746 guc_mmio_regset_write_one(ads, regset_map, 747 XELP_LNCFCMOCS(i), count++); 748 } 749 } 750 751 return count; 752 } 753 754 static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) 755 { 756 size_t regset_offset = guc_ads_regset_offset(ads); 757 struct xe_gt *gt = ads_to_gt(ads); 758 struct xe_hw_engine *hwe; 759 enum xe_hw_engine_id id; 760 u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset; 761 struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 762 regset_offset); 763 unsigned int regset_used = 0; 764 765 for_each_hw_engine(hwe, gt, id) { 766 unsigned int count; 767 u8 gc; 768 769 /* 770 * 1. Write all MMIO entries for this exec queue to the table. No 771 * need to worry about fused-off engines and when there are 772 * entries in the regset: the reg_state_list has been zero'ed 773 * by xe_guc_ads_populate() 774 */ 775 count = guc_mmio_regset_write(ads, ®set_map, hwe); 776 if (!count) 777 continue; 778 779 /* 780 * 2. Record in the header (ads.reg_state_list) the address 781 * location and number of entries 782 */ 783 gc = xe_engine_class_to_guc_class(hwe->class); 784 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr); 785 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count); 786 787 addr += count * sizeof(struct guc_mmio_reg); 788 iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg)); 789 790 regset_used += count * sizeof(struct guc_mmio_reg); 791 } 792 793 xe_gt_assert(gt, regset_used <= ads->regset_size); 794 } 795 796 static void guc_um_init_params(struct xe_guc_ads *ads) 797 { 798 u32 um_queue_offset = guc_ads_um_queues_offset(ads); 799 u64 base_dpa; 800 u32 base_ggtt; 801 int i; 802 803 base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; 804 base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; 805 806 for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { 807 ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, 808 base_dpa + (i * GUC_UM_QUEUE_SIZE)); 809 ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, 810 base_ggtt + (i * GUC_UM_QUEUE_SIZE)); 811 ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes, 812 GUC_UM_QUEUE_SIZE); 813 } 814 815 ads_blob_write(ads, um_init_params.page_response_timeout_in_us, 816 GUC_PAGE_RES_TIMEOUT_US); 817 } 818 819 static void guc_doorbell_init(struct xe_guc_ads *ads) 820 { 821 struct xe_device *xe = ads_to_xe(ads); 822 struct xe_gt *gt = ads_to_gt(ads); 823 824 if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { 825 u32 distdbreg = 826 xe_mmio_read32(>->mmio, DIST_DBS_POPULATED); 827 828 ads_blob_write(ads, 829 system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], 830 REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1); 831 } 832 } 833 834 /** 835 * xe_guc_ads_populate_minimal - populate minimal ADS 836 * @ads: Additional data structures object 837 * 838 * This function populates a minimal ADS that does not support submissions but 839 * enough so the GuC can load and the hwconfig table can be read. 840 */ 841 void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) 842 { 843 struct xe_gt *gt = ads_to_gt(ads); 844 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 845 offsetof(struct __guc_ads_blob, system_info)); 846 u32 base = xe_bo_ggtt_addr(ads->bo); 847 848 xe_gt_assert(gt, ads->bo); 849 850 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 851 guc_policies_init(ads); 852 guc_prep_golden_lrc_null(ads); 853 guc_mapping_table_init_invalid(gt, &info_map); 854 guc_doorbell_init(ads); 855 856 ads_blob_write(ads, ads.scheduler_policies, base + 857 offsetof(struct __guc_ads_blob, policies)); 858 ads_blob_write(ads, ads.gt_system_info, base + 859 offsetof(struct __guc_ads_blob, system_info)); 860 ads_blob_write(ads, ads.private_data, base + 861 guc_ads_private_data_offset(ads)); 862 } 863 864 void xe_guc_ads_populate(struct xe_guc_ads *ads) 865 { 866 struct xe_device *xe = ads_to_xe(ads); 867 struct xe_gt *gt = ads_to_gt(ads); 868 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 869 offsetof(struct __guc_ads_blob, system_info)); 870 u32 base = xe_bo_ggtt_addr(ads->bo); 871 872 xe_gt_assert(gt, ads->bo); 873 874 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); 875 guc_policies_init(ads); 876 fill_engine_enable_masks(gt, &info_map); 877 guc_mmio_reg_state_init(ads); 878 guc_prep_golden_lrc_null(ads); 879 guc_mapping_table_init(gt, &info_map); 880 guc_capture_prep_lists(ads); 881 guc_doorbell_init(ads); 882 guc_waklv_init(ads); 883 884 if (xe->info.has_usm) { 885 guc_um_init_params(ads); 886 ads_blob_write(ads, ads.um_init_data, base + 887 offsetof(struct __guc_ads_blob, um_init_params)); 888 } 889 890 ads_blob_write(ads, ads.scheduler_policies, base + 891 offsetof(struct __guc_ads_blob, policies)); 892 ads_blob_write(ads, ads.gt_system_info, base + 893 offsetof(struct __guc_ads_blob, system_info)); 894 ads_blob_write(ads, ads.private_data, base + 895 guc_ads_private_data_offset(ads)); 896 } 897 898 static void guc_populate_golden_lrc(struct xe_guc_ads *ads) 899 { 900 struct xe_device *xe = ads_to_xe(ads); 901 struct xe_gt *gt = ads_to_gt(ads); 902 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), 903 offsetof(struct __guc_ads_blob, system_info)); 904 size_t total_size = 0, alloc_size, real_size; 905 u32 addr_ggtt, offset; 906 int class; 907 908 offset = guc_ads_golden_lrc_offset(ads); 909 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; 910 911 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { 912 u8 guc_class; 913 914 guc_class = xe_engine_class_to_guc_class(class); 915 916 if (!info_map_read(xe, &info_map, 917 engine_enabled_masks[guc_class])) 918 continue; 919 920 xe_gt_assert(gt, gt->default_lrc[class]); 921 922 real_size = xe_gt_lrc_size(gt, class); 923 alloc_size = PAGE_ALIGN(real_size); 924 total_size += alloc_size; 925 926 /* 927 * This interface is slightly confusing. We need to pass the 928 * base address of the full golden context and the size of just 929 * the engine state, which is the section of the context image 930 * that starts after the execlists LRC registers. This is 931 * required to allow the GuC to restore just the engine state 932 * when a watchdog reset occurs. 933 * We calculate the engine state size by removing the size of 934 * what comes before it in the context image (which is identical 935 * on all engines). 936 */ 937 ads_blob_write(ads, ads.eng_state_size[guc_class], 938 real_size - xe_lrc_skip_size(xe)); 939 ads_blob_write(ads, ads.golden_context_lrca[guc_class], 940 addr_ggtt); 941 942 xe_map_memcpy_to(xe, ads_to_map(ads), offset, 943 gt->default_lrc[class], real_size); 944 945 addr_ggtt += alloc_size; 946 offset += alloc_size; 947 } 948 949 xe_gt_assert(gt, total_size == ads->golden_lrc_size); 950 } 951 952 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) 953 { 954 guc_populate_golden_lrc(ads); 955 } 956 957 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset) 958 { 959 struct xe_guc_ct *ct = &ads_to_guc(ads)->ct; 960 u32 action[] = { 961 XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, 962 policy_offset 963 }; 964 965 return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 966 } 967 968 /** 969 * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy 970 * @ads: Additional data structures object 971 * 972 * This function update the GuC's engine reset policy based on wedged.mode. 973 * 974 * Return: 0 on success, and negative error code otherwise. 975 */ 976 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) 977 { 978 struct xe_device *xe = ads_to_xe(ads); 979 struct xe_gt *gt = ads_to_gt(ads); 980 struct xe_tile *tile = gt_to_tile(gt); 981 struct guc_policies *policies; 982 struct xe_bo *bo; 983 int ret = 0; 984 985 policies = kmalloc(sizeof(*policies), GFP_KERNEL); 986 if (!policies) 987 return -ENOMEM; 988 989 policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); 990 policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); 991 policies->is_valid = 1; 992 if (xe->wedged.mode == 2) 993 policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; 994 else 995 policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; 996 997 bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies), 998 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 999 XE_BO_FLAG_GGTT); 1000 if (IS_ERR(bo)) { 1001 ret = PTR_ERR(bo); 1002 goto out; 1003 } 1004 1005 ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo)); 1006 out: 1007 kfree(policies); 1008 return ret; 1009 } 1010