1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_guc_ads.h"
7
8 #include <linux/fault-inject.h>
9
10 #include <drm/drm_managed.h>
11
12 #include <generated/xe_wa_oob.h>
13
14 #include "abi/guc_actions_abi.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_gt_regs.h"
17 #include "regs/xe_guc_regs.h"
18 #include "xe_bo.h"
19 #include "xe_gt.h"
20 #include "xe_gt_ccs_mode.h"
21 #include "xe_gt_printk.h"
22 #include "xe_guc.h"
23 #include "xe_guc_capture.h"
24 #include "xe_guc_ct.h"
25 #include "xe_hw_engine.h"
26 #include "xe_lrc.h"
27 #include "xe_map.h"
28 #include "xe_mmio.h"
29 #include "xe_platform_types.h"
30 #include "xe_uc_fw.h"
31 #include "xe_wa.h"
32 #include "xe_gt_mcr.h"
33
34 /* Slack of a few additional entries per engine */
35 #define ADS_REGSET_EXTRA_MAX 8
36
37 static struct xe_guc *
ads_to_guc(struct xe_guc_ads * ads)38 ads_to_guc(struct xe_guc_ads *ads)
39 {
40 return container_of(ads, struct xe_guc, ads);
41 }
42
43 static struct xe_gt *
ads_to_gt(struct xe_guc_ads * ads)44 ads_to_gt(struct xe_guc_ads *ads)
45 {
46 return container_of(ads, struct xe_gt, uc.guc.ads);
47 }
48
49 static struct xe_device *
ads_to_xe(struct xe_guc_ads * ads)50 ads_to_xe(struct xe_guc_ads *ads)
51 {
52 return gt_to_xe(ads_to_gt(ads));
53 }
54
55 static struct iosys_map *
ads_to_map(struct xe_guc_ads * ads)56 ads_to_map(struct xe_guc_ads *ads)
57 {
58 return &ads->bo->vmap;
59 }
60
61 /* UM Queue parameters: */
62 #define GUC_UM_QUEUE_SIZE (SZ_64K)
63 #define GUC_PAGE_RES_TIMEOUT_US (-1)
64
65 /*
66 * The Additional Data Struct (ADS) has pointers for different buffers used by
67 * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
68 * all the extra buffers indirectly linked via the ADS struct's entries.
69 *
70 * Layout of the ADS blob allocated for the GuC:
71 *
72 * +---------------------------------------+ <== base
73 * | guc_ads |
74 * +---------------------------------------+
75 * | guc_policies |
76 * +---------------------------------------+
77 * | guc_gt_system_info |
78 * +---------------------------------------+
79 * | guc_engine_usage |
80 * +---------------------------------------+
81 * | guc_um_init_params |
82 * +---------------------------------------+ <== static
83 * | guc_mmio_reg[countA] (engine 0.0) |
84 * | guc_mmio_reg[countB] (engine 0.1) |
85 * | guc_mmio_reg[countC] (engine 1.0) |
86 * | ... |
87 * +---------------------------------------+ <== dynamic
88 * | padding |
89 * +---------------------------------------+ <== 4K aligned
90 * | golden contexts |
91 * +---------------------------------------+
92 * | padding |
93 * +---------------------------------------+ <== 4K aligned
94 * | w/a KLVs |
95 * +---------------------------------------+
96 * | padding |
97 * +---------------------------------------+ <== 4K aligned
98 * | capture lists |
99 * +---------------------------------------+
100 * | padding |
101 * +---------------------------------------+ <== 4K aligned
102 * | UM queues |
103 * +---------------------------------------+
104 * | padding |
105 * +---------------------------------------+ <== 4K aligned
106 * | private data |
107 * +---------------------------------------+
108 * | padding |
109 * +---------------------------------------+ <== 4K aligned
110 */
111 struct __guc_ads_blob {
112 struct guc_ads ads;
113 struct guc_policies policies;
114 struct guc_gt_system_info system_info;
115 struct guc_engine_usage engine_usage;
116 struct guc_um_init_params um_init_params;
117 /* From here on, location is dynamic! Refer to above diagram. */
118 struct guc_mmio_reg regset[];
119 } __packed;
120
121 #define ads_blob_read(ads_, field_) \
122 xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
123 struct __guc_ads_blob, field_)
124
125 #define ads_blob_write(ads_, field_, val_) \
126 xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
127 struct __guc_ads_blob, field_, val_)
128
129 #define info_map_write(xe_, map_, field_, val_) \
130 xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
131
132 #define info_map_read(xe_, map_, field_) \
133 xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
134
guc_ads_regset_size(struct xe_guc_ads * ads)135 static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
136 {
137 struct xe_device *xe = ads_to_xe(ads);
138
139 xe_assert(xe, ads->regset_size);
140
141 return ads->regset_size;
142 }
143
guc_ads_golden_lrc_size(struct xe_guc_ads * ads)144 static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
145 {
146 return PAGE_ALIGN(ads->golden_lrc_size);
147 }
148
guc_ads_waklv_size(struct xe_guc_ads * ads)149 static u32 guc_ads_waklv_size(struct xe_guc_ads *ads)
150 {
151 return PAGE_ALIGN(ads->ads_waklv_size);
152 }
153
guc_ads_capture_size(struct xe_guc_ads * ads)154 static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
155 {
156 return PAGE_ALIGN(ads->capture_size);
157 }
158
guc_ads_um_queues_size(struct xe_guc_ads * ads)159 static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
160 {
161 struct xe_device *xe = ads_to_xe(ads);
162
163 if (!xe->info.has_usm)
164 return 0;
165
166 return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
167 }
168
guc_ads_private_data_size(struct xe_guc_ads * ads)169 static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
170 {
171 return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
172 }
173
guc_ads_regset_offset(struct xe_guc_ads * ads)174 static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
175 {
176 return offsetof(struct __guc_ads_blob, regset);
177 }
178
guc_ads_golden_lrc_offset(struct xe_guc_ads * ads)179 static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
180 {
181 size_t offset;
182
183 offset = guc_ads_regset_offset(ads) +
184 guc_ads_regset_size(ads);
185
186 return PAGE_ALIGN(offset);
187 }
188
guc_ads_waklv_offset(struct xe_guc_ads * ads)189 static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads)
190 {
191 u32 offset;
192
193 offset = guc_ads_golden_lrc_offset(ads) +
194 guc_ads_golden_lrc_size(ads);
195
196 return PAGE_ALIGN(offset);
197 }
198
guc_ads_capture_offset(struct xe_guc_ads * ads)199 static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
200 {
201 size_t offset;
202
203 offset = guc_ads_waklv_offset(ads) +
204 guc_ads_waklv_size(ads);
205
206 return PAGE_ALIGN(offset);
207 }
208
guc_ads_um_queues_offset(struct xe_guc_ads * ads)209 static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
210 {
211 u32 offset;
212
213 offset = guc_ads_capture_offset(ads) +
214 guc_ads_capture_size(ads);
215
216 return PAGE_ALIGN(offset);
217 }
218
guc_ads_private_data_offset(struct xe_guc_ads * ads)219 static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
220 {
221 size_t offset;
222
223 offset = guc_ads_um_queues_offset(ads) +
224 guc_ads_um_queues_size(ads);
225
226 return PAGE_ALIGN(offset);
227 }
228
guc_ads_size(struct xe_guc_ads * ads)229 static size_t guc_ads_size(struct xe_guc_ads *ads)
230 {
231 return guc_ads_private_data_offset(ads) +
232 guc_ads_private_data_size(ads);
233 }
234
calculate_regset_size(struct xe_gt * gt)235 static size_t calculate_regset_size(struct xe_gt *gt)
236 {
237 struct xe_reg_sr_entry *sr_entry;
238 unsigned long sr_idx;
239 struct xe_hw_engine *hwe;
240 enum xe_hw_engine_id id;
241 unsigned int count = 0;
242
243 for_each_hw_engine(hwe, gt, id)
244 xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
245 count++;
246
247 count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
248
249 if (XE_WA(gt, 1607983814))
250 count += LNCFCMOCS_REG_COUNT;
251
252 return count * sizeof(struct guc_mmio_reg);
253 }
254
engine_enable_mask(struct xe_gt * gt,enum xe_engine_class class)255 static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
256 {
257 struct xe_hw_engine *hwe;
258 enum xe_hw_engine_id id;
259 u32 mask = 0;
260
261 for_each_hw_engine(hwe, gt, id)
262 if (hwe->class == class)
263 mask |= BIT(hwe->instance);
264
265 return mask;
266 }
267
calculate_golden_lrc_size(struct xe_guc_ads * ads)268 static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
269 {
270 struct xe_gt *gt = ads_to_gt(ads);
271 size_t total_size = 0, alloc_size, real_size;
272 int class;
273
274 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
275 if (!engine_enable_mask(gt, class))
276 continue;
277
278 real_size = xe_gt_lrc_size(gt, class);
279 alloc_size = PAGE_ALIGN(real_size);
280 total_size += alloc_size;
281 }
282
283 return total_size;
284 }
285
guc_waklv_enable_one_word(struct xe_guc_ads * ads,enum xe_guc_klv_ids klv_id,u32 value,u32 * offset,u32 * remain)286 static void guc_waklv_enable_one_word(struct xe_guc_ads *ads,
287 enum xe_guc_klv_ids klv_id,
288 u32 value,
289 u32 *offset, u32 *remain)
290 {
291 u32 size;
292 u32 klv_entry[] = {
293 /* 16:16 key/length */
294 FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
295 FIELD_PREP(GUC_KLV_0_LEN, 1),
296 value,
297 /* 1 dword data */
298 };
299
300 size = sizeof(klv_entry);
301
302 if (*remain < size) {
303 drm_warn(&ads_to_xe(ads)->drm,
304 "w/a klv buffer too small to add klv id %d\n", klv_id);
305 } else {
306 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
307 klv_entry, size);
308 *offset += size;
309 *remain -= size;
310 }
311 }
312
guc_waklv_enable_simple(struct xe_guc_ads * ads,enum xe_guc_klv_ids klv_id,u32 * offset,u32 * remain)313 static void guc_waklv_enable_simple(struct xe_guc_ads *ads,
314 enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain)
315 {
316 u32 klv_entry[] = {
317 /* 16:16 key/length */
318 FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
319 FIELD_PREP(GUC_KLV_0_LEN, 0),
320 /* 0 dwords data */
321 };
322 u32 size;
323
324 size = sizeof(klv_entry);
325
326 if (xe_gt_WARN(ads_to_gt(ads), *remain < size,
327 "w/a klv buffer too small to add klv id %d\n", klv_id))
328 return;
329
330 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
331 klv_entry, size);
332 *offset += size;
333 *remain -= size;
334 }
335
guc_waklv_init(struct xe_guc_ads * ads)336 static void guc_waklv_init(struct xe_guc_ads *ads)
337 {
338 struct xe_gt *gt = ads_to_gt(ads);
339 u64 addr_ggtt;
340 u32 offset, remain, size;
341
342 offset = guc_ads_waklv_offset(ads);
343 remain = guc_ads_waklv_size(ads);
344
345 if (XE_WA(gt, 14019882105) || XE_WA(gt, 16021333562))
346 guc_waklv_enable_simple(ads,
347 GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
348 &offset, &remain);
349 if (XE_WA(gt, 18024947630))
350 guc_waklv_enable_simple(ads,
351 GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING,
352 &offset, &remain);
353 if (XE_WA(gt, 16022287689))
354 guc_waklv_enable_simple(ads,
355 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
356 &offset, &remain);
357
358 if (XE_WA(gt, 14022866841))
359 guc_waklv_enable_simple(ads,
360 GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO,
361 &offset, &remain);
362
363 /*
364 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
365 * the default value for this register is determined to be 0xC40. This could change in the
366 * future, so GuC depends on KMD to send it the correct value.
367 */
368 if (XE_WA(gt, 13011645652))
369 guc_waklv_enable_one_word(ads,
370 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE,
371 0xC40,
372 &offset, &remain);
373
374 if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406))
375 guc_waklv_enable_simple(ads,
376 GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET,
377 &offset, &remain);
378
379 if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708))
380 guc_waklv_enable_simple(ads,
381 GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH,
382 &offset, &remain);
383
384 size = guc_ads_waklv_size(ads) - remain;
385 if (!size)
386 return;
387
388 offset = guc_ads_waklv_offset(ads);
389 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
390
391 ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt));
392 ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt));
393 ads_blob_write(ads, ads.wa_klv_size, size);
394 }
395
calculate_waklv_size(struct xe_guc_ads * ads)396 static int calculate_waklv_size(struct xe_guc_ads *ads)
397 {
398 /*
399 * A single page is both the minimum size possible and
400 * is sufficiently large enough for all current platforms.
401 */
402 return SZ_4K;
403 }
404
405 #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64)
406
xe_guc_ads_init(struct xe_guc_ads * ads)407 int xe_guc_ads_init(struct xe_guc_ads *ads)
408 {
409 struct xe_device *xe = ads_to_xe(ads);
410 struct xe_gt *gt = ads_to_gt(ads);
411 struct xe_tile *tile = gt_to_tile(gt);
412 struct xe_bo *bo;
413
414 ads->golden_lrc_size = calculate_golden_lrc_size(ads);
415 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
416 ads->regset_size = calculate_regset_size(gt);
417 ads->ads_waklv_size = calculate_waklv_size(ads);
418
419 bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
420 XE_BO_FLAG_SYSTEM |
421 XE_BO_FLAG_GGTT |
422 XE_BO_FLAG_GGTT_INVALIDATE |
423 XE_BO_FLAG_PINNED_NORESTORE);
424 if (IS_ERR(bo))
425 return PTR_ERR(bo);
426
427 ads->bo = bo;
428
429 return 0;
430 }
431 ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */
432
433 /**
434 * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
435 * @ads: Additional data structures object
436 *
437 * Recalculate golden_lrc_size, capture_size and regset_size as the number
438 * hardware engines may have changed after the hwconfig was loaded. Also verify
439 * the new sizes fit in the already allocated ADS buffer object.
440 *
441 * Return: 0 on success, negative error code on error.
442 */
xe_guc_ads_init_post_hwconfig(struct xe_guc_ads * ads)443 int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
444 {
445 struct xe_gt *gt = ads_to_gt(ads);
446 u32 prev_regset_size = ads->regset_size;
447
448 xe_gt_assert(gt, ads->bo);
449
450 ads->golden_lrc_size = calculate_golden_lrc_size(ads);
451 /* Calculate Capture size with worst size */
452 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
453 ads->regset_size = calculate_regset_size(gt);
454
455 xe_gt_assert(gt, ads->golden_lrc_size +
456 (ads->regset_size - prev_regset_size) <=
457 MAX_GOLDEN_LRC_SIZE);
458
459 return 0;
460 }
461
guc_policies_init(struct xe_guc_ads * ads)462 static void guc_policies_init(struct xe_guc_ads *ads)
463 {
464 struct xe_device *xe = ads_to_xe(ads);
465 u32 global_flags = 0;
466
467 ads_blob_write(ads, policies.dpc_promote_time,
468 GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
469 ads_blob_write(ads, policies.max_num_work_items,
470 GLOBAL_POLICY_MAX_NUM_WI);
471
472 if (xe->wedged.mode == 2)
473 global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
474
475 ads_blob_write(ads, policies.global_flags, global_flags);
476 ads_blob_write(ads, policies.is_valid, 1);
477 }
478
fill_engine_enable_masks(struct xe_gt * gt,struct iosys_map * info_map)479 static void fill_engine_enable_masks(struct xe_gt *gt,
480 struct iosys_map *info_map)
481 {
482 struct xe_device *xe = gt_to_xe(gt);
483
484 info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
485 engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
486 info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
487 engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
488 info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
489 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
490 info_map_write(xe, info_map,
491 engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
492 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
493 info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
494 engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
495 info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
496 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
497 }
498
499 /*
500 * Write the offsets corresponding to the golden LRCs. The actual data is
501 * populated later by guc_golden_lrc_populate()
502 */
guc_golden_lrc_init(struct xe_guc_ads * ads)503 static void guc_golden_lrc_init(struct xe_guc_ads *ads)
504 {
505 struct xe_device *xe = ads_to_xe(ads);
506 struct xe_gt *gt = ads_to_gt(ads);
507 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
508 offsetof(struct __guc_ads_blob, system_info));
509 size_t alloc_size, real_size;
510 u32 addr_ggtt, offset;
511 int class;
512
513 offset = guc_ads_golden_lrc_offset(ads);
514 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
515
516 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
517 u8 guc_class;
518
519 guc_class = xe_engine_class_to_guc_class(class);
520
521 if (!info_map_read(xe, &info_map,
522 engine_enabled_masks[guc_class]))
523 continue;
524
525 real_size = xe_gt_lrc_size(gt, class);
526 alloc_size = PAGE_ALIGN(real_size);
527
528 /*
529 * This interface is slightly confusing. We need to pass the
530 * base address of the full golden context and the size of just
531 * the engine state, which is the section of the context image
532 * that starts after the execlists LRC registers. This is
533 * required to allow the GuC to restore just the engine state
534 * when a watchdog reset occurs.
535 * We calculate the engine state size by removing the size of
536 * what comes before it in the context image (which is identical
537 * on all engines).
538 */
539 ads_blob_write(ads, ads.eng_state_size[guc_class],
540 real_size - xe_lrc_skip_size(xe));
541 ads_blob_write(ads, ads.golden_context_lrca[guc_class],
542 addr_ggtt);
543
544 addr_ggtt += alloc_size;
545 }
546 }
547
guc_mapping_table_init_invalid(struct xe_gt * gt,struct iosys_map * info_map)548 static void guc_mapping_table_init_invalid(struct xe_gt *gt,
549 struct iosys_map *info_map)
550 {
551 struct xe_device *xe = gt_to_xe(gt);
552 unsigned int i, j;
553
554 /* Table must be set to invalid values for entries not used */
555 for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
556 for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
557 info_map_write(xe, info_map, mapping_table[i][j],
558 GUC_MAX_INSTANCES_PER_CLASS);
559 }
560
guc_mapping_table_init(struct xe_gt * gt,struct iosys_map * info_map)561 static void guc_mapping_table_init(struct xe_gt *gt,
562 struct iosys_map *info_map)
563 {
564 struct xe_device *xe = gt_to_xe(gt);
565 struct xe_hw_engine *hwe;
566 enum xe_hw_engine_id id;
567
568 guc_mapping_table_init_invalid(gt, info_map);
569
570 for_each_hw_engine(hwe, gt, id) {
571 u8 guc_class;
572
573 guc_class = xe_engine_class_to_guc_class(hwe->class);
574 info_map_write(xe, info_map,
575 mapping_table[guc_class][hwe->logical_instance],
576 hwe->instance);
577 }
578 }
579
guc_get_capture_engine_mask(struct xe_gt * gt,struct iosys_map * info_map,enum guc_capture_list_class_type capture_class)580 static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map,
581 enum guc_capture_list_class_type capture_class)
582 {
583 struct xe_device *xe = gt_to_xe(gt);
584 u32 mask;
585
586 switch (capture_class) {
587 case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
588 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
589 mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
590 break;
591 case GUC_CAPTURE_LIST_CLASS_VIDEO:
592 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
593 break;
594 case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
595 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
596 break;
597 case GUC_CAPTURE_LIST_CLASS_BLITTER:
598 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
599 break;
600 case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
601 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
602 break;
603 default:
604 mask = 0;
605 }
606
607 return mask;
608 }
609
get_capture_list(struct xe_guc_ads * ads,struct xe_guc * guc,struct xe_gt * gt,int owner,int type,int class,u32 * total_size,size_t * size,void ** pptr)610 static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt,
611 int owner, int type, int class, u32 *total_size, size_t *size,
612 void **pptr)
613 {
614 *size = 0;
615
616 if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) {
617 if (*total_size + *size > ads->capture_size)
618 xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n",
619 *total_size + *size, ads->capture_size);
620 else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr))
621 return false;
622 }
623
624 return true;
625 }
626
guc_capture_prep_lists(struct xe_guc_ads * ads)627 static int guc_capture_prep_lists(struct xe_guc_ads *ads)
628 {
629 struct xe_guc *guc = ads_to_guc(ads);
630 struct xe_gt *gt = ads_to_gt(ads);
631 u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
632 struct iosys_map info_map;
633 size_t size = 0;
634 void *ptr;
635 int i, j;
636
637 /*
638 * GuC Capture's steered reg-list needs to be allocated and initialized
639 * after the GuC-hwconfig is available which guaranteed from here.
640 */
641 xe_guc_capture_steered_list_init(ads_to_guc(ads));
642
643 capture_offset = guc_ads_capture_offset(ads);
644 ads_ggtt = xe_bo_ggtt_addr(ads->bo);
645 info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
646 offsetof(struct __guc_ads_blob, system_info));
647
648 /* first, set aside the first page for a capture_list with zero descriptors */
649 total_size = PAGE_SIZE;
650 if (!xe_guc_capture_getnullheader(guc, &ptr, &size))
651 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size);
652
653 null_ggtt = ads_ggtt + capture_offset;
654 capture_offset += PAGE_SIZE;
655
656 /*
657 * Populate capture list : at this point adps is already allocated and
658 * mapped to worst case size
659 */
660 for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
661 bool write_empty_list;
662
663 for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
664 u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j);
665 /* null list if we dont have said engine or list */
666 if (!engine_mask) {
667 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
668 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
669 continue;
670 }
671
672 /* engine exists: start with engine-class registers */
673 write_empty_list = get_capture_list(ads, guc, gt, i,
674 GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
675 j, &total_size, &size, &ptr);
676 if (!write_empty_list) {
677 ads_blob_write(ads, ads.capture_class[i][j],
678 ads_ggtt + capture_offset);
679 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
680 ptr, size);
681 total_size += size;
682 capture_offset += size;
683 } else {
684 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
685 }
686
687 /* engine exists: next, engine-instance registers */
688 write_empty_list = get_capture_list(ads, guc, gt, i,
689 GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
690 j, &total_size, &size, &ptr);
691 if (!write_empty_list) {
692 ads_blob_write(ads, ads.capture_instance[i][j],
693 ads_ggtt + capture_offset);
694 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
695 ptr, size);
696 total_size += size;
697 capture_offset += size;
698 } else {
699 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
700 }
701 }
702
703 /* global registers is last in our PF/VF loops */
704 write_empty_list = get_capture_list(ads, guc, gt, i,
705 GUC_STATE_CAPTURE_TYPE_GLOBAL,
706 0, &total_size, &size, &ptr);
707 if (!write_empty_list) {
708 ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset);
709 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr,
710 size);
711 total_size += size;
712 capture_offset += size;
713 } else {
714 ads_blob_write(ads, ads.capture_global[i], null_ggtt);
715 }
716 }
717
718 if (ads->capture_size != PAGE_ALIGN(total_size))
719 xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n",
720 PAGE_ALIGN(total_size), ads->capture_size);
721 return PAGE_ALIGN(total_size);
722 }
723
guc_mmio_regset_write_one(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_reg reg,unsigned int n_entry)724 static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
725 struct iosys_map *regset_map,
726 struct xe_reg reg,
727 unsigned int n_entry)
728 {
729 struct guc_mmio_reg entry = {
730 .offset = reg.addr,
731 .flags = reg.masked ? GUC_REGSET_MASKED : 0,
732 };
733
734 if (reg.mcr) {
735 struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr);
736 u8 group, instance;
737
738 bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg,
739 &group, &instance);
740
741 if (steer) {
742 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group);
743 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance);
744 entry.flags |= GUC_REGSET_STEERING_NEEDED;
745 }
746 }
747
748 xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
749 &entry, sizeof(entry));
750 }
751
guc_mmio_regset_write(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_hw_engine * hwe)752 static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
753 struct iosys_map *regset_map,
754 struct xe_hw_engine *hwe)
755 {
756 struct xe_hw_engine *hwe_rcs_reset_domain =
757 xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
758 struct xe_reg_sr_entry *entry;
759 unsigned long idx;
760 unsigned int count = 0;
761 const struct {
762 struct xe_reg reg;
763 bool skip;
764 } *e, extra_regs[] = {
765 { .reg = RING_MODE(hwe->mmio_base), },
766 { .reg = RING_HWS_PGA(hwe->mmio_base), },
767 { .reg = RING_IMR(hwe->mmio_base), },
768 { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain },
769 { .reg = CCS_MODE,
770 .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) },
771 };
772 u32 i;
773
774 BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
775
776 xa_for_each(&hwe->reg_sr.xa, idx, entry)
777 guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
778
779 for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
780 if (e->skip)
781 continue;
782
783 guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
784 }
785
786 if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) {
787 for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
788 guc_mmio_regset_write_one(ads, regset_map,
789 XELP_LNCFCMOCS(i), count++);
790 }
791 }
792
793 return count;
794 }
795
guc_mmio_reg_state_init(struct xe_guc_ads * ads)796 static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
797 {
798 size_t regset_offset = guc_ads_regset_offset(ads);
799 struct xe_gt *gt = ads_to_gt(ads);
800 struct xe_hw_engine *hwe;
801 enum xe_hw_engine_id id;
802 u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
803 struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
804 regset_offset);
805 unsigned int regset_used = 0;
806
807 for_each_hw_engine(hwe, gt, id) {
808 unsigned int count;
809 u8 gc;
810
811 /*
812 * 1. Write all MMIO entries for this exec queue to the table. No
813 * need to worry about fused-off engines and when there are
814 * entries in the regset: the reg_state_list has been zero'ed
815 * by xe_guc_ads_populate()
816 */
817 count = guc_mmio_regset_write(ads, ®set_map, hwe);
818 if (!count)
819 continue;
820
821 /*
822 * 2. Record in the header (ads.reg_state_list) the address
823 * location and number of entries
824 */
825 gc = xe_engine_class_to_guc_class(hwe->class);
826 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
827 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
828
829 addr += count * sizeof(struct guc_mmio_reg);
830 iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg));
831
832 regset_used += count * sizeof(struct guc_mmio_reg);
833 }
834
835 xe_gt_assert(gt, regset_used <= ads->regset_size);
836 }
837
guc_um_init_params(struct xe_guc_ads * ads)838 static void guc_um_init_params(struct xe_guc_ads *ads)
839 {
840 u32 um_queue_offset = guc_ads_um_queues_offset(ads);
841 u64 base_dpa;
842 u32 base_ggtt;
843 int i;
844
845 base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
846 base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
847
848 for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
849 ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
850 base_dpa + (i * GUC_UM_QUEUE_SIZE));
851 ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
852 base_ggtt + (i * GUC_UM_QUEUE_SIZE));
853 ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
854 GUC_UM_QUEUE_SIZE);
855 }
856
857 ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
858 GUC_PAGE_RES_TIMEOUT_US);
859 }
860
guc_doorbell_init(struct xe_guc_ads * ads)861 static void guc_doorbell_init(struct xe_guc_ads *ads)
862 {
863 struct xe_device *xe = ads_to_xe(ads);
864 struct xe_gt *gt = ads_to_gt(ads);
865
866 if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
867 u32 distdbreg =
868 xe_mmio_read32(>->mmio, DIST_DBS_POPULATED);
869
870 ads_blob_write(ads,
871 system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
872 REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
873 }
874 }
875
876 /**
877 * xe_guc_ads_populate_minimal - populate minimal ADS
878 * @ads: Additional data structures object
879 *
880 * This function populates a minimal ADS that does not support submissions but
881 * enough so the GuC can load and the hwconfig table can be read.
882 */
xe_guc_ads_populate_minimal(struct xe_guc_ads * ads)883 void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
884 {
885 struct xe_gt *gt = ads_to_gt(ads);
886 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
887 offsetof(struct __guc_ads_blob, system_info));
888 u32 base = xe_bo_ggtt_addr(ads->bo);
889
890 xe_gt_assert(gt, ads->bo);
891
892 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
893 guc_policies_init(ads);
894 guc_golden_lrc_init(ads);
895 guc_mapping_table_init_invalid(gt, &info_map);
896 guc_doorbell_init(ads);
897
898 ads_blob_write(ads, ads.scheduler_policies, base +
899 offsetof(struct __guc_ads_blob, policies));
900 ads_blob_write(ads, ads.gt_system_info, base +
901 offsetof(struct __guc_ads_blob, system_info));
902 ads_blob_write(ads, ads.private_data, base +
903 guc_ads_private_data_offset(ads));
904 }
905
xe_guc_ads_populate(struct xe_guc_ads * ads)906 void xe_guc_ads_populate(struct xe_guc_ads *ads)
907 {
908 struct xe_device *xe = ads_to_xe(ads);
909 struct xe_gt *gt = ads_to_gt(ads);
910 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
911 offsetof(struct __guc_ads_blob, system_info));
912 u32 base = xe_bo_ggtt_addr(ads->bo);
913
914 xe_gt_assert(gt, ads->bo);
915
916 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
917 guc_policies_init(ads);
918 fill_engine_enable_masks(gt, &info_map);
919 guc_mmio_reg_state_init(ads);
920 guc_golden_lrc_init(ads);
921 guc_mapping_table_init(gt, &info_map);
922 guc_capture_prep_lists(ads);
923 guc_doorbell_init(ads);
924 guc_waklv_init(ads);
925
926 if (xe->info.has_usm) {
927 guc_um_init_params(ads);
928 ads_blob_write(ads, ads.um_init_data, base +
929 offsetof(struct __guc_ads_blob, um_init_params));
930 }
931
932 ads_blob_write(ads, ads.scheduler_policies, base +
933 offsetof(struct __guc_ads_blob, policies));
934 ads_blob_write(ads, ads.gt_system_info, base +
935 offsetof(struct __guc_ads_blob, system_info));
936 ads_blob_write(ads, ads.private_data, base +
937 guc_ads_private_data_offset(ads));
938 }
939
940 /*
941 * After the golden LRC's are recorded for each engine class by the first
942 * submission, copy them to the ADS, as initialized earlier by
943 * guc_golden_lrc_init().
944 */
guc_golden_lrc_populate(struct xe_guc_ads * ads)945 static void guc_golden_lrc_populate(struct xe_guc_ads *ads)
946 {
947 struct xe_device *xe = ads_to_xe(ads);
948 struct xe_gt *gt = ads_to_gt(ads);
949 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
950 offsetof(struct __guc_ads_blob, system_info));
951 size_t total_size = 0, alloc_size, real_size;
952 u32 offset;
953 int class;
954
955 offset = guc_ads_golden_lrc_offset(ads);
956
957 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
958 u8 guc_class;
959
960 guc_class = xe_engine_class_to_guc_class(class);
961
962 if (!info_map_read(xe, &info_map,
963 engine_enabled_masks[guc_class]))
964 continue;
965
966 xe_gt_assert(gt, gt->default_lrc[class]);
967
968 real_size = xe_gt_lrc_size(gt, class);
969 alloc_size = PAGE_ALIGN(real_size);
970 total_size += alloc_size;
971
972 xe_map_memcpy_to(xe, ads_to_map(ads), offset,
973 gt->default_lrc[class], real_size);
974
975 offset += alloc_size;
976 }
977
978 xe_gt_assert(gt, total_size == ads->golden_lrc_size);
979 }
980
xe_guc_ads_populate_post_load(struct xe_guc_ads * ads)981 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
982 {
983 guc_golden_lrc_populate(ads);
984 }
985
guc_ads_action_update_policies(struct xe_guc_ads * ads,u32 policy_offset)986 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
987 {
988 struct xe_guc_ct *ct = &ads_to_guc(ads)->ct;
989 u32 action[] = {
990 XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
991 policy_offset
992 };
993
994 return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
995 }
996
997 /**
998 * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy
999 * @ads: Additional data structures object
1000 *
1001 * This function update the GuC's engine reset policy based on wedged.mode.
1002 *
1003 * Return: 0 on success, and negative error code otherwise.
1004 */
xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads * ads)1005 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads)
1006 {
1007 struct xe_device *xe = ads_to_xe(ads);
1008 struct xe_gt *gt = ads_to_gt(ads);
1009 struct xe_tile *tile = gt_to_tile(gt);
1010 struct guc_policies *policies;
1011 struct xe_bo *bo;
1012 int ret = 0;
1013
1014 policies = kmalloc(sizeof(*policies), GFP_KERNEL);
1015 if (!policies)
1016 return -ENOMEM;
1017
1018 policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time);
1019 policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items);
1020 policies->is_valid = 1;
1021 if (xe->wedged.mode == 2)
1022 policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
1023 else
1024 policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET;
1025
1026 bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies),
1027 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
1028 XE_BO_FLAG_GGTT);
1029 if (IS_ERR(bo)) {
1030 ret = PTR_ERR(bo);
1031 goto out;
1032 }
1033
1034 ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo));
1035 out:
1036 kfree(policies);
1037 return ret;
1038 }
1039