1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_guc_ads.h"
7
8 #include <linux/fault-inject.h>
9
10 #include <drm/drm_managed.h>
11
12 #include <generated/xe_wa_oob.h>
13
14 #include "abi/guc_actions_abi.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_gt_regs.h"
17 #include "regs/xe_guc_regs.h"
18 #include "xe_bo.h"
19 #include "xe_gt.h"
20 #include "xe_gt_ccs_mode.h"
21 #include "xe_gt_mcr.h"
22 #include "xe_gt_printk.h"
23 #include "xe_guc.h"
24 #include "xe_guc_buf.h"
25 #include "xe_guc_capture.h"
26 #include "xe_guc_ct.h"
27 #include "xe_hw_engine.h"
28 #include "xe_lrc.h"
29 #include "xe_map.h"
30 #include "xe_mmio.h"
31 #include "xe_platform_types.h"
32 #include "xe_uc_fw.h"
33 #include "xe_wa.h"
34
35 /* Slack of a few additional entries per engine */
36 #define ADS_REGSET_EXTRA_MAX 8
37
38 static struct xe_guc *
ads_to_guc(struct xe_guc_ads * ads)39 ads_to_guc(struct xe_guc_ads *ads)
40 {
41 return container_of(ads, struct xe_guc, ads);
42 }
43
44 static struct xe_gt *
ads_to_gt(struct xe_guc_ads * ads)45 ads_to_gt(struct xe_guc_ads *ads)
46 {
47 return container_of(ads, struct xe_gt, uc.guc.ads);
48 }
49
50 static struct xe_device *
ads_to_xe(struct xe_guc_ads * ads)51 ads_to_xe(struct xe_guc_ads *ads)
52 {
53 return gt_to_xe(ads_to_gt(ads));
54 }
55
56 static struct iosys_map *
ads_to_map(struct xe_guc_ads * ads)57 ads_to_map(struct xe_guc_ads *ads)
58 {
59 return &ads->bo->vmap;
60 }
61
62 /* UM Queue parameters: */
63 #define GUC_UM_QUEUE_SIZE (SZ_64K)
64 #define GUC_PAGE_RES_TIMEOUT_US (-1)
65
66 /*
67 * The Additional Data Struct (ADS) has pointers for different buffers used by
68 * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
69 * all the extra buffers indirectly linked via the ADS struct's entries.
70 *
71 * Layout of the ADS blob allocated for the GuC:
72 *
73 * +---------------------------------------+ <== base
74 * | guc_ads |
75 * +---------------------------------------+
76 * | guc_policies |
77 * +---------------------------------------+
78 * | guc_gt_system_info |
79 * +---------------------------------------+
80 * | guc_engine_usage |
81 * +---------------------------------------+
82 * | guc_um_init_params |
83 * +---------------------------------------+ <== static
84 * | guc_mmio_reg[countA] (engine 0.0) |
85 * | guc_mmio_reg[countB] (engine 0.1) |
86 * | guc_mmio_reg[countC] (engine 1.0) |
87 * | ... |
88 * +---------------------------------------+ <== dynamic
89 * | padding |
90 * +---------------------------------------+ <== 4K aligned
91 * | golden contexts |
92 * +---------------------------------------+
93 * | padding |
94 * +---------------------------------------+ <== 4K aligned
95 * | w/a KLVs |
96 * +---------------------------------------+
97 * | padding |
98 * +---------------------------------------+ <== 4K aligned
99 * | capture lists |
100 * +---------------------------------------+
101 * | padding |
102 * +---------------------------------------+ <== 4K aligned
103 * | UM queues |
104 * +---------------------------------------+
105 * | padding |
106 * +---------------------------------------+ <== 4K aligned
107 * | private data |
108 * +---------------------------------------+
109 * | padding |
110 * +---------------------------------------+ <== 4K aligned
111 */
112 struct __guc_ads_blob {
113 struct guc_ads ads;
114 struct guc_policies policies;
115 struct guc_gt_system_info system_info;
116 struct guc_engine_usage engine_usage;
117 struct guc_um_init_params um_init_params;
118 /* From here on, location is dynamic! Refer to above diagram. */
119 struct guc_mmio_reg regset[];
120 } __packed;
121
122 #define ads_blob_read(ads_, field_) \
123 xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
124 struct __guc_ads_blob, field_)
125
126 #define ads_blob_write(ads_, field_, val_) \
127 xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
128 struct __guc_ads_blob, field_, val_)
129
130 #define info_map_write(xe_, map_, field_, val_) \
131 xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
132
133 #define info_map_read(xe_, map_, field_) \
134 xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
135
guc_ads_regset_size(struct xe_guc_ads * ads)136 static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
137 {
138 struct xe_device *xe = ads_to_xe(ads);
139
140 xe_assert(xe, ads->regset_size);
141
142 return ads->regset_size;
143 }
144
guc_ads_golden_lrc_size(struct xe_guc_ads * ads)145 static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
146 {
147 return PAGE_ALIGN(ads->golden_lrc_size);
148 }
149
guc_ads_waklv_size(struct xe_guc_ads * ads)150 static u32 guc_ads_waklv_size(struct xe_guc_ads *ads)
151 {
152 return PAGE_ALIGN(ads->ads_waklv_size);
153 }
154
guc_ads_capture_size(struct xe_guc_ads * ads)155 static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
156 {
157 return PAGE_ALIGN(ads->capture_size);
158 }
159
guc_ads_um_queues_size(struct xe_guc_ads * ads)160 static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
161 {
162 struct xe_device *xe = ads_to_xe(ads);
163
164 if (!xe->info.has_usm)
165 return 0;
166
167 return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
168 }
169
guc_ads_private_data_size(struct xe_guc_ads * ads)170 static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
171 {
172 return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
173 }
174
guc_ads_regset_offset(struct xe_guc_ads * ads)175 static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
176 {
177 return offsetof(struct __guc_ads_blob, regset);
178 }
179
guc_ads_golden_lrc_offset(struct xe_guc_ads * ads)180 static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
181 {
182 size_t offset;
183
184 offset = guc_ads_regset_offset(ads) +
185 guc_ads_regset_size(ads);
186
187 return PAGE_ALIGN(offset);
188 }
189
guc_ads_waklv_offset(struct xe_guc_ads * ads)190 static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads)
191 {
192 u32 offset;
193
194 offset = guc_ads_golden_lrc_offset(ads) +
195 guc_ads_golden_lrc_size(ads);
196
197 return PAGE_ALIGN(offset);
198 }
199
guc_ads_capture_offset(struct xe_guc_ads * ads)200 static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
201 {
202 size_t offset;
203
204 offset = guc_ads_waklv_offset(ads) +
205 guc_ads_waklv_size(ads);
206
207 return PAGE_ALIGN(offset);
208 }
209
guc_ads_um_queues_offset(struct xe_guc_ads * ads)210 static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
211 {
212 u32 offset;
213
214 offset = guc_ads_capture_offset(ads) +
215 guc_ads_capture_size(ads);
216
217 return PAGE_ALIGN(offset);
218 }
219
guc_ads_private_data_offset(struct xe_guc_ads * ads)220 static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
221 {
222 size_t offset;
223
224 offset = guc_ads_um_queues_offset(ads) +
225 guc_ads_um_queues_size(ads);
226
227 return PAGE_ALIGN(offset);
228 }
229
guc_ads_size(struct xe_guc_ads * ads)230 static size_t guc_ads_size(struct xe_guc_ads *ads)
231 {
232 return guc_ads_private_data_offset(ads) +
233 guc_ads_private_data_size(ads);
234 }
235
calculate_regset_size(struct xe_gt * gt)236 static size_t calculate_regset_size(struct xe_gt *gt)
237 {
238 struct xe_reg_sr_entry *sr_entry;
239 unsigned long sr_idx;
240 struct xe_hw_engine *hwe;
241 enum xe_hw_engine_id id;
242 unsigned int count = 0;
243
244 for_each_hw_engine(hwe, gt, id)
245 xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
246 count++;
247
248 count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
249
250 if (XE_GT_WA(gt, 1607983814))
251 count += LNCFCMOCS_REG_COUNT;
252
253 return count * sizeof(struct guc_mmio_reg);
254 }
255
engine_enable_mask(struct xe_gt * gt,enum xe_engine_class class)256 static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
257 {
258 struct xe_hw_engine *hwe;
259 enum xe_hw_engine_id id;
260 u32 mask = 0;
261
262 for_each_hw_engine(hwe, gt, id)
263 if (hwe->class == class)
264 mask |= BIT(hwe->instance);
265
266 return mask;
267 }
268
calculate_golden_lrc_size(struct xe_guc_ads * ads)269 static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
270 {
271 struct xe_gt *gt = ads_to_gt(ads);
272 size_t total_size = 0, alloc_size, real_size;
273 int class;
274
275 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
276 if (!engine_enable_mask(gt, class))
277 continue;
278
279 real_size = xe_gt_lrc_size(gt, class);
280 alloc_size = PAGE_ALIGN(real_size);
281 total_size += alloc_size;
282 }
283
284 return total_size;
285 }
286
guc_waklv_enable(struct xe_guc_ads * ads,u32 data[],u32 data_len_dw,u32 * offset,u32 * remain,enum xe_guc_klv_ids klv_id)287 static void guc_waklv_enable(struct xe_guc_ads *ads,
288 u32 data[], u32 data_len_dw,
289 u32 *offset, u32 *remain,
290 enum xe_guc_klv_ids klv_id)
291 {
292 size_t size = sizeof(u32) * (1 + data_len_dw);
293
294 if (*remain < size) {
295 drm_warn(&ads_to_xe(ads)->drm,
296 "w/a klv buffer too small to add klv id 0x%04X\n", klv_id);
297 return;
298 }
299
300 /* 16:16 key/length */
301 xe_map_wr(ads_to_xe(ads), ads_to_map(ads), *offset, u32,
302 FIELD_PREP(GUC_KLV_0_KEY, klv_id) | FIELD_PREP(GUC_KLV_0_LEN, data_len_dw));
303 /* data_len_dw dwords of data */
304 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads),
305 *offset + sizeof(u32), data, data_len_dw * sizeof(u32));
306
307 *offset += size;
308 *remain -= size;
309 }
310
guc_waklv_init(struct xe_guc_ads * ads)311 static void guc_waklv_init(struct xe_guc_ads *ads)
312 {
313 struct xe_gt *gt = ads_to_gt(ads);
314 u64 addr_ggtt;
315 u32 offset, remain, size;
316
317 offset = guc_ads_waklv_offset(ads);
318 remain = guc_ads_waklv_size(ads);
319
320 if (XE_GT_WA(gt, 14019882105) || XE_GT_WA(gt, 16021333562))
321 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
322 GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
323 if (XE_GT_WA(gt, 18024947630))
324 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
325 GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING);
326 if (XE_GT_WA(gt, 16022287689))
327 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
328 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE);
329
330 if (XE_GT_WA(gt, 14022866841))
331 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
332 GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO);
333
334 /*
335 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
336 * the default value for this register is determined to be 0xC40. This could change in the
337 * future, so GuC depends on KMD to send it the correct value.
338 */
339 if (XE_GT_WA(gt, 13011645652)) {
340 u32 data = 0xC40;
341
342 guc_waklv_enable(ads, &data, 1, &offset, &remain,
343 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE);
344 }
345
346 if (XE_GT_WA(gt, 14022293748) || XE_GT_WA(gt, 22019794406))
347 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
348 GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET);
349
350 if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_GT_WA(gt, 16026508708))
351 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
352 GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH);
353 if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 47, 0) && XE_GT_WA(gt, 16026007364)) {
354 u32 data[] = {
355 0x0,
356 0xF,
357 };
358 guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain,
359 GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG);
360 }
361
362 if (XE_GT_WA(gt, 14020001231))
363 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
364 GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT);
365
366 size = guc_ads_waklv_size(ads) - remain;
367 if (!size)
368 return;
369
370 offset = guc_ads_waklv_offset(ads);
371 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
372
373 ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt));
374 ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt));
375 ads_blob_write(ads, ads.wa_klv_size, size);
376 }
377
calculate_waklv_size(struct xe_guc_ads * ads)378 static int calculate_waklv_size(struct xe_guc_ads *ads)
379 {
380 /*
381 * A single page is both the minimum size possible and
382 * is sufficiently large enough for all current platforms.
383 */
384 return SZ_4K;
385 }
386
387 #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64)
388
xe_guc_ads_init(struct xe_guc_ads * ads)389 int xe_guc_ads_init(struct xe_guc_ads *ads)
390 {
391 struct xe_device *xe = ads_to_xe(ads);
392 struct xe_gt *gt = ads_to_gt(ads);
393 struct xe_tile *tile = gt_to_tile(gt);
394 struct xe_bo *bo;
395
396 ads->golden_lrc_size = calculate_golden_lrc_size(ads);
397 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
398 ads->regset_size = calculate_regset_size(gt);
399 ads->ads_waklv_size = calculate_waklv_size(ads);
400
401 bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
402 XE_BO_FLAG_SYSTEM |
403 XE_BO_FLAG_GGTT |
404 XE_BO_FLAG_GGTT_INVALIDATE |
405 XE_BO_FLAG_PINNED_NORESTORE);
406 if (IS_ERR(bo))
407 return PTR_ERR(bo);
408
409 ads->bo = bo;
410
411 return 0;
412 }
413 ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */
414
415 /**
416 * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
417 * @ads: Additional data structures object
418 *
419 * Recalculate golden_lrc_size, capture_size and regset_size as the number
420 * hardware engines may have changed after the hwconfig was loaded. Also verify
421 * the new sizes fit in the already allocated ADS buffer object.
422 *
423 * Return: 0 on success, negative error code on error.
424 */
xe_guc_ads_init_post_hwconfig(struct xe_guc_ads * ads)425 int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
426 {
427 struct xe_gt *gt = ads_to_gt(ads);
428 u32 prev_regset_size = ads->regset_size;
429
430 xe_gt_assert(gt, ads->bo);
431
432 ads->golden_lrc_size = calculate_golden_lrc_size(ads);
433 /* Calculate Capture size with worst size */
434 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
435 ads->regset_size = calculate_regset_size(gt);
436
437 xe_gt_assert(gt, ads->golden_lrc_size +
438 (ads->regset_size - prev_regset_size) <=
439 MAX_GOLDEN_LRC_SIZE);
440
441 return 0;
442 }
443
guc_policies_init(struct xe_guc_ads * ads)444 static void guc_policies_init(struct xe_guc_ads *ads)
445 {
446 struct xe_device *xe = ads_to_xe(ads);
447 u32 global_flags = 0;
448
449 ads_blob_write(ads, policies.dpc_promote_time,
450 GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
451 ads_blob_write(ads, policies.max_num_work_items,
452 GLOBAL_POLICY_MAX_NUM_WI);
453
454 if (xe->wedged.mode == 2)
455 global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
456
457 ads_blob_write(ads, policies.global_flags, global_flags);
458 ads_blob_write(ads, policies.is_valid, 1);
459 }
460
fill_engine_enable_masks(struct xe_gt * gt,struct iosys_map * info_map)461 static void fill_engine_enable_masks(struct xe_gt *gt,
462 struct iosys_map *info_map)
463 {
464 struct xe_device *xe = gt_to_xe(gt);
465
466 info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
467 engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
468 info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
469 engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
470 info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
471 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
472 info_map_write(xe, info_map,
473 engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
474 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
475 info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
476 engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
477 info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
478 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
479 }
480
481 /*
482 * Write the offsets corresponding to the golden LRCs. The actual data is
483 * populated later by guc_golden_lrc_populate()
484 */
guc_golden_lrc_init(struct xe_guc_ads * ads)485 static void guc_golden_lrc_init(struct xe_guc_ads *ads)
486 {
487 struct xe_device *xe = ads_to_xe(ads);
488 struct xe_gt *gt = ads_to_gt(ads);
489 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
490 offsetof(struct __guc_ads_blob, system_info));
491 size_t alloc_size, real_size;
492 u32 addr_ggtt, offset;
493 int class;
494
495 offset = guc_ads_golden_lrc_offset(ads);
496 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
497
498 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
499 u8 guc_class;
500
501 guc_class = xe_engine_class_to_guc_class(class);
502
503 if (!info_map_read(xe, &info_map,
504 engine_enabled_masks[guc_class]))
505 continue;
506
507 real_size = xe_gt_lrc_size(gt, class);
508 alloc_size = PAGE_ALIGN(real_size);
509
510 /*
511 * This interface is slightly confusing. We need to pass the
512 * base address of the full golden context and the size of just
513 * the engine state, which is the section of the context image
514 * that starts after the execlists LRC registers. This is
515 * required to allow the GuC to restore just the engine state
516 * when a watchdog reset occurs.
517 * We calculate the engine state size by removing the size of
518 * what comes before it in the context image (which is identical
519 * on all engines).
520 */
521 ads_blob_write(ads, ads.eng_state_size[guc_class],
522 real_size - xe_lrc_skip_size(xe));
523 ads_blob_write(ads, ads.golden_context_lrca[guc_class],
524 addr_ggtt);
525
526 addr_ggtt += alloc_size;
527 }
528 }
529
guc_mapping_table_init_invalid(struct xe_gt * gt,struct iosys_map * info_map)530 static void guc_mapping_table_init_invalid(struct xe_gt *gt,
531 struct iosys_map *info_map)
532 {
533 struct xe_device *xe = gt_to_xe(gt);
534 unsigned int i, j;
535
536 /* Table must be set to invalid values for entries not used */
537 for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
538 for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
539 info_map_write(xe, info_map, mapping_table[i][j],
540 GUC_MAX_INSTANCES_PER_CLASS);
541 }
542
guc_mapping_table_init(struct xe_gt * gt,struct iosys_map * info_map)543 static void guc_mapping_table_init(struct xe_gt *gt,
544 struct iosys_map *info_map)
545 {
546 struct xe_device *xe = gt_to_xe(gt);
547 struct xe_hw_engine *hwe;
548 enum xe_hw_engine_id id;
549
550 guc_mapping_table_init_invalid(gt, info_map);
551
552 for_each_hw_engine(hwe, gt, id) {
553 u8 guc_class;
554
555 guc_class = xe_engine_class_to_guc_class(hwe->class);
556 info_map_write(xe, info_map,
557 mapping_table[guc_class][hwe->logical_instance],
558 hwe->instance);
559 }
560 }
561
guc_get_capture_engine_mask(struct xe_gt * gt,struct iosys_map * info_map,enum guc_capture_list_class_type capture_class)562 static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map,
563 enum guc_capture_list_class_type capture_class)
564 {
565 struct xe_device *xe = gt_to_xe(gt);
566 u32 mask;
567
568 switch (capture_class) {
569 case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
570 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
571 mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
572 break;
573 case GUC_CAPTURE_LIST_CLASS_VIDEO:
574 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
575 break;
576 case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
577 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
578 break;
579 case GUC_CAPTURE_LIST_CLASS_BLITTER:
580 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
581 break;
582 case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
583 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
584 break;
585 default:
586 mask = 0;
587 }
588
589 return mask;
590 }
591
get_capture_list(struct xe_guc_ads * ads,struct xe_guc * guc,struct xe_gt * gt,int owner,int type,int class,u32 * total_size,size_t * size,void ** pptr)592 static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt,
593 int owner, int type, int class, u32 *total_size, size_t *size,
594 void **pptr)
595 {
596 *size = 0;
597
598 if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) {
599 if (*total_size + *size > ads->capture_size)
600 xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n",
601 *total_size + *size, ads->capture_size);
602 else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr))
603 return false;
604 }
605
606 return true;
607 }
608
guc_capture_prep_lists(struct xe_guc_ads * ads)609 static int guc_capture_prep_lists(struct xe_guc_ads *ads)
610 {
611 struct xe_guc *guc = ads_to_guc(ads);
612 struct xe_gt *gt = ads_to_gt(ads);
613 u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
614 struct iosys_map info_map;
615 size_t size = 0;
616 void *ptr;
617 int i, j;
618
619 /*
620 * GuC Capture's steered reg-list needs to be allocated and initialized
621 * after the GuC-hwconfig is available which guaranteed from here.
622 */
623 xe_guc_capture_steered_list_init(ads_to_guc(ads));
624
625 capture_offset = guc_ads_capture_offset(ads);
626 ads_ggtt = xe_bo_ggtt_addr(ads->bo);
627 info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
628 offsetof(struct __guc_ads_blob, system_info));
629
630 /* first, set aside the first page for a capture_list with zero descriptors */
631 total_size = PAGE_SIZE;
632 if (!xe_guc_capture_getnullheader(guc, &ptr, &size))
633 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size);
634
635 null_ggtt = ads_ggtt + capture_offset;
636 capture_offset += PAGE_SIZE;
637
638 /*
639 * Populate capture list : at this point adps is already allocated and
640 * mapped to worst case size
641 */
642 for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
643 bool write_empty_list;
644
645 for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
646 u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j);
647 /* null list if we dont have said engine or list */
648 if (!engine_mask) {
649 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
650 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
651 continue;
652 }
653
654 /* engine exists: start with engine-class registers */
655 write_empty_list = get_capture_list(ads, guc, gt, i,
656 GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
657 j, &total_size, &size, &ptr);
658 if (!write_empty_list) {
659 ads_blob_write(ads, ads.capture_class[i][j],
660 ads_ggtt + capture_offset);
661 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
662 ptr, size);
663 total_size += size;
664 capture_offset += size;
665 } else {
666 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
667 }
668
669 /* engine exists: next, engine-instance registers */
670 write_empty_list = get_capture_list(ads, guc, gt, i,
671 GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
672 j, &total_size, &size, &ptr);
673 if (!write_empty_list) {
674 ads_blob_write(ads, ads.capture_instance[i][j],
675 ads_ggtt + capture_offset);
676 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
677 ptr, size);
678 total_size += size;
679 capture_offset += size;
680 } else {
681 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
682 }
683 }
684
685 /* global registers is last in our PF/VF loops */
686 write_empty_list = get_capture_list(ads, guc, gt, i,
687 GUC_STATE_CAPTURE_TYPE_GLOBAL,
688 0, &total_size, &size, &ptr);
689 if (!write_empty_list) {
690 ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset);
691 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr,
692 size);
693 total_size += size;
694 capture_offset += size;
695 } else {
696 ads_blob_write(ads, ads.capture_global[i], null_ggtt);
697 }
698 }
699
700 if (ads->capture_size != PAGE_ALIGN(total_size))
701 xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n",
702 PAGE_ALIGN(total_size), ads->capture_size);
703 return PAGE_ALIGN(total_size);
704 }
705
guc_mmio_regset_write_one(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_reg reg,unsigned int n_entry)706 static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
707 struct iosys_map *regset_map,
708 struct xe_reg reg,
709 unsigned int n_entry)
710 {
711 struct guc_mmio_reg entry = {
712 .offset = reg.addr,
713 .flags = reg.masked ? GUC_REGSET_MASKED : 0,
714 };
715
716 if (reg.mcr) {
717 struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr);
718 u8 group, instance;
719
720 bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg,
721 &group, &instance);
722
723 if (steer) {
724 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group);
725 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance);
726 entry.flags |= GUC_REGSET_STEERING_NEEDED;
727 }
728 }
729
730 xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
731 &entry, sizeof(entry));
732 }
733
guc_mmio_regset_write(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_hw_engine * hwe)734 static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
735 struct iosys_map *regset_map,
736 struct xe_hw_engine *hwe)
737 {
738 struct xe_hw_engine *hwe_rcs_reset_domain =
739 xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
740 struct xe_reg_sr_entry *entry;
741 unsigned long idx;
742 unsigned int count = 0;
743 const struct {
744 struct xe_reg reg;
745 bool skip;
746 } *e, extra_regs[] = {
747 { .reg = RING_MODE(hwe->mmio_base), },
748 { .reg = RING_HWS_PGA(hwe->mmio_base), },
749 { .reg = RING_IMR(hwe->mmio_base), },
750 { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain },
751 { .reg = CCS_MODE,
752 .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) },
753 };
754 u32 i;
755
756 BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
757
758 xa_for_each(&hwe->reg_sr.xa, idx, entry)
759 guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
760
761 for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
762 if (e->skip)
763 continue;
764
765 guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
766 }
767
768 if (XE_GT_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) {
769 for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
770 guc_mmio_regset_write_one(ads, regset_map,
771 XELP_LNCFCMOCS(i), count++);
772 }
773 }
774
775 return count;
776 }
777
guc_mmio_reg_state_init(struct xe_guc_ads * ads)778 static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
779 {
780 size_t regset_offset = guc_ads_regset_offset(ads);
781 struct xe_gt *gt = ads_to_gt(ads);
782 struct xe_hw_engine *hwe;
783 enum xe_hw_engine_id id;
784 u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
785 struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
786 regset_offset);
787 unsigned int regset_used = 0;
788
789 for_each_hw_engine(hwe, gt, id) {
790 unsigned int count;
791 u8 gc;
792
793 /*
794 * 1. Write all MMIO entries for this exec queue to the table. No
795 * need to worry about fused-off engines and when there are
796 * entries in the regset: the reg_state_list has been zero'ed
797 * by xe_guc_ads_populate()
798 */
799 count = guc_mmio_regset_write(ads, ®set_map, hwe);
800 if (!count)
801 continue;
802
803 /*
804 * 2. Record in the header (ads.reg_state_list) the address
805 * location and number of entries
806 */
807 gc = xe_engine_class_to_guc_class(hwe->class);
808 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
809 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
810
811 addr += count * sizeof(struct guc_mmio_reg);
812 iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg));
813
814 regset_used += count * sizeof(struct guc_mmio_reg);
815 }
816
817 xe_gt_assert(gt, regset_used <= ads->regset_size);
818 }
819
guc_um_init_params(struct xe_guc_ads * ads)820 static void guc_um_init_params(struct xe_guc_ads *ads)
821 {
822 u32 um_queue_offset = guc_ads_um_queues_offset(ads);
823 struct xe_guc *guc = ads_to_guc(ads);
824 u64 base_dpa;
825 u32 base_ggtt;
826 bool with_dpa;
827 int i;
828
829 with_dpa = !xe_guc_using_main_gamctrl_queues(guc);
830
831 base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
832 base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
833
834 for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
835 ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
836 with_dpa ? (base_dpa + (i * GUC_UM_QUEUE_SIZE)) : 0);
837 ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
838 base_ggtt + (i * GUC_UM_QUEUE_SIZE));
839 ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
840 GUC_UM_QUEUE_SIZE);
841 }
842
843 ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
844 GUC_PAGE_RES_TIMEOUT_US);
845 }
846
guc_doorbell_init(struct xe_guc_ads * ads)847 static void guc_doorbell_init(struct xe_guc_ads *ads)
848 {
849 struct xe_device *xe = ads_to_xe(ads);
850 struct xe_gt *gt = ads_to_gt(ads);
851
852 if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
853 u32 distdbreg =
854 xe_mmio_read32(>->mmio, DIST_DBS_POPULATED);
855
856 ads_blob_write(ads,
857 system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
858 REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
859 }
860 }
861
862 /**
863 * xe_guc_ads_populate_minimal - populate minimal ADS
864 * @ads: Additional data structures object
865 *
866 * This function populates a minimal ADS that does not support submissions but
867 * enough so the GuC can load and the hwconfig table can be read.
868 */
xe_guc_ads_populate_minimal(struct xe_guc_ads * ads)869 void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
870 {
871 struct xe_gt *gt = ads_to_gt(ads);
872 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
873 offsetof(struct __guc_ads_blob, system_info));
874 u32 base = xe_bo_ggtt_addr(ads->bo);
875
876 xe_gt_assert(gt, ads->bo);
877
878 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo));
879 guc_policies_init(ads);
880 guc_golden_lrc_init(ads);
881 guc_mapping_table_init_invalid(gt, &info_map);
882 guc_doorbell_init(ads);
883
884 ads_blob_write(ads, ads.scheduler_policies, base +
885 offsetof(struct __guc_ads_blob, policies));
886 ads_blob_write(ads, ads.gt_system_info, base +
887 offsetof(struct __guc_ads_blob, system_info));
888 ads_blob_write(ads, ads.private_data, base +
889 guc_ads_private_data_offset(ads));
890 }
891
xe_guc_ads_populate(struct xe_guc_ads * ads)892 void xe_guc_ads_populate(struct xe_guc_ads *ads)
893 {
894 struct xe_device *xe = ads_to_xe(ads);
895 struct xe_gt *gt = ads_to_gt(ads);
896 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
897 offsetof(struct __guc_ads_blob, system_info));
898 u32 base = xe_bo_ggtt_addr(ads->bo);
899
900 xe_gt_assert(gt, ads->bo);
901
902 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo));
903 guc_policies_init(ads);
904 fill_engine_enable_masks(gt, &info_map);
905 guc_mmio_reg_state_init(ads);
906 guc_golden_lrc_init(ads);
907 guc_mapping_table_init(gt, &info_map);
908 guc_capture_prep_lists(ads);
909 guc_doorbell_init(ads);
910 guc_waklv_init(ads);
911
912 if (xe->info.has_usm) {
913 guc_um_init_params(ads);
914 ads_blob_write(ads, ads.um_init_data, base +
915 offsetof(struct __guc_ads_blob, um_init_params));
916 }
917
918 ads_blob_write(ads, ads.scheduler_policies, base +
919 offsetof(struct __guc_ads_blob, policies));
920 ads_blob_write(ads, ads.gt_system_info, base +
921 offsetof(struct __guc_ads_blob, system_info));
922 ads_blob_write(ads, ads.private_data, base +
923 guc_ads_private_data_offset(ads));
924 }
925
926 /*
927 * After the golden LRC's are recorded for each engine class by the first
928 * submission, copy them to the ADS, as initialized earlier by
929 * guc_golden_lrc_init().
930 */
guc_golden_lrc_populate(struct xe_guc_ads * ads)931 static void guc_golden_lrc_populate(struct xe_guc_ads *ads)
932 {
933 struct xe_device *xe = ads_to_xe(ads);
934 struct xe_gt *gt = ads_to_gt(ads);
935 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
936 offsetof(struct __guc_ads_blob, system_info));
937 size_t total_size = 0, alloc_size, real_size;
938 u32 offset;
939 int class;
940
941 offset = guc_ads_golden_lrc_offset(ads);
942
943 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
944 u8 guc_class;
945
946 guc_class = xe_engine_class_to_guc_class(class);
947
948 if (!info_map_read(xe, &info_map,
949 engine_enabled_masks[guc_class]))
950 continue;
951
952 xe_gt_assert(gt, gt->default_lrc[class]);
953
954 real_size = xe_gt_lrc_size(gt, class);
955 alloc_size = PAGE_ALIGN(real_size);
956 total_size += alloc_size;
957
958 xe_map_memcpy_to(xe, ads_to_map(ads), offset,
959 gt->default_lrc[class], real_size);
960
961 offset += alloc_size;
962 }
963
964 xe_gt_assert(gt, total_size == ads->golden_lrc_size);
965 }
966
xe_guc_ads_populate_post_load(struct xe_guc_ads * ads)967 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
968 {
969 guc_golden_lrc_populate(ads);
970 }
971
guc_ads_action_update_policies(struct xe_guc_ads * ads,u32 policy_offset)972 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
973 {
974 struct xe_guc_ct *ct = &ads_to_guc(ads)->ct;
975 u32 action[] = {
976 XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
977 policy_offset
978 };
979
980 return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
981 }
982
983 /**
984 * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy
985 * @ads: Additional data structures object
986 * @enable_engine_reset: true to enable engine resets, false otherwise
987 *
988 * This function update the GuC's engine reset policy.
989 *
990 * Return: 0 on success, and negative error code otherwise.
991 */
xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads * ads,bool enable_engine_reset)992 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads,
993 bool enable_engine_reset)
994 {
995 struct guc_policies *policies;
996 struct xe_guc *guc = ads_to_guc(ads);
997 CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies));
998
999 if (!xe_guc_buf_is_valid(buf))
1000 return -ENOBUFS;
1001
1002 policies = xe_guc_buf_cpu_ptr(buf);
1003 memset(policies, 0, sizeof(*policies));
1004
1005 policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time);
1006 policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items);
1007 policies->is_valid = 1;
1008
1009 if (enable_engine_reset)
1010 policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET;
1011 else
1012 policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
1013
1014 return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf));
1015 }
1016