1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_guc_ads.h"
7
8 #include <linux/fault-inject.h>
9
10 #include <drm/drm_managed.h>
11
12 #include <generated/xe_wa_oob.h>
13
14 #include "abi/guc_actions_abi.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_gt_regs.h"
17 #include "regs/xe_guc_regs.h"
18 #include "xe_bo.h"
19 #include "xe_gt.h"
20 #include "xe_gt_ccs_mode.h"
21 #include "xe_gt_printk.h"
22 #include "xe_guc.h"
23 #include "xe_guc_capture.h"
24 #include "xe_guc_ct.h"
25 #include "xe_hw_engine.h"
26 #include "xe_lrc.h"
27 #include "xe_map.h"
28 #include "xe_mmio.h"
29 #include "xe_platform_types.h"
30 #include "xe_uc_fw.h"
31 #include "xe_wa.h"
32
33 /* Slack of a few additional entries per engine */
34 #define ADS_REGSET_EXTRA_MAX 8
35
36 static struct xe_guc *
ads_to_guc(struct xe_guc_ads * ads)37 ads_to_guc(struct xe_guc_ads *ads)
38 {
39 return container_of(ads, struct xe_guc, ads);
40 }
41
42 static struct xe_gt *
ads_to_gt(struct xe_guc_ads * ads)43 ads_to_gt(struct xe_guc_ads *ads)
44 {
45 return container_of(ads, struct xe_gt, uc.guc.ads);
46 }
47
48 static struct xe_device *
ads_to_xe(struct xe_guc_ads * ads)49 ads_to_xe(struct xe_guc_ads *ads)
50 {
51 return gt_to_xe(ads_to_gt(ads));
52 }
53
54 static struct iosys_map *
ads_to_map(struct xe_guc_ads * ads)55 ads_to_map(struct xe_guc_ads *ads)
56 {
57 return &ads->bo->vmap;
58 }
59
60 /* UM Queue parameters: */
61 #define GUC_UM_QUEUE_SIZE (SZ_64K)
62 #define GUC_PAGE_RES_TIMEOUT_US (-1)
63
64 /*
65 * The Additional Data Struct (ADS) has pointers for different buffers used by
66 * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
67 * all the extra buffers indirectly linked via the ADS struct's entries.
68 *
69 * Layout of the ADS blob allocated for the GuC:
70 *
71 * +---------------------------------------+ <== base
72 * | guc_ads |
73 * +---------------------------------------+
74 * | guc_policies |
75 * +---------------------------------------+
76 * | guc_gt_system_info |
77 * +---------------------------------------+
78 * | guc_engine_usage |
79 * +---------------------------------------+
80 * | guc_um_init_params |
81 * +---------------------------------------+ <== static
82 * | guc_mmio_reg[countA] (engine 0.0) |
83 * | guc_mmio_reg[countB] (engine 0.1) |
84 * | guc_mmio_reg[countC] (engine 1.0) |
85 * | ... |
86 * +---------------------------------------+ <== dynamic
87 * | padding |
88 * +---------------------------------------+ <== 4K aligned
89 * | golden contexts |
90 * +---------------------------------------+
91 * | padding |
92 * +---------------------------------------+ <== 4K aligned
93 * | w/a KLVs |
94 * +---------------------------------------+
95 * | padding |
96 * +---------------------------------------+ <== 4K aligned
97 * | capture lists |
98 * +---------------------------------------+
99 * | padding |
100 * +---------------------------------------+ <== 4K aligned
101 * | UM queues |
102 * +---------------------------------------+
103 * | padding |
104 * +---------------------------------------+ <== 4K aligned
105 * | private data |
106 * +---------------------------------------+
107 * | padding |
108 * +---------------------------------------+ <== 4K aligned
109 */
110 struct __guc_ads_blob {
111 struct guc_ads ads;
112 struct guc_policies policies;
113 struct guc_gt_system_info system_info;
114 struct guc_engine_usage engine_usage;
115 struct guc_um_init_params um_init_params;
116 /* From here on, location is dynamic! Refer to above diagram. */
117 struct guc_mmio_reg regset[];
118 } __packed;
119
120 #define ads_blob_read(ads_, field_) \
121 xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
122 struct __guc_ads_blob, field_)
123
124 #define ads_blob_write(ads_, field_, val_) \
125 xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
126 struct __guc_ads_blob, field_, val_)
127
128 #define info_map_write(xe_, map_, field_, val_) \
129 xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
130
131 #define info_map_read(xe_, map_, field_) \
132 xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
133
guc_ads_regset_size(struct xe_guc_ads * ads)134 static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
135 {
136 struct xe_device *xe = ads_to_xe(ads);
137
138 xe_assert(xe, ads->regset_size);
139
140 return ads->regset_size;
141 }
142
guc_ads_golden_lrc_size(struct xe_guc_ads * ads)143 static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
144 {
145 return PAGE_ALIGN(ads->golden_lrc_size);
146 }
147
guc_ads_waklv_size(struct xe_guc_ads * ads)148 static u32 guc_ads_waklv_size(struct xe_guc_ads *ads)
149 {
150 return PAGE_ALIGN(ads->ads_waklv_size);
151 }
152
guc_ads_capture_size(struct xe_guc_ads * ads)153 static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
154 {
155 return PAGE_ALIGN(ads->capture_size);
156 }
157
guc_ads_um_queues_size(struct xe_guc_ads * ads)158 static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
159 {
160 struct xe_device *xe = ads_to_xe(ads);
161
162 if (!xe->info.has_usm)
163 return 0;
164
165 return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
166 }
167
guc_ads_private_data_size(struct xe_guc_ads * ads)168 static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
169 {
170 return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
171 }
172
guc_ads_regset_offset(struct xe_guc_ads * ads)173 static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
174 {
175 return offsetof(struct __guc_ads_blob, regset);
176 }
177
guc_ads_golden_lrc_offset(struct xe_guc_ads * ads)178 static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
179 {
180 size_t offset;
181
182 offset = guc_ads_regset_offset(ads) +
183 guc_ads_regset_size(ads);
184
185 return PAGE_ALIGN(offset);
186 }
187
guc_ads_waklv_offset(struct xe_guc_ads * ads)188 static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads)
189 {
190 u32 offset;
191
192 offset = guc_ads_golden_lrc_offset(ads) +
193 guc_ads_golden_lrc_size(ads);
194
195 return PAGE_ALIGN(offset);
196 }
197
guc_ads_capture_offset(struct xe_guc_ads * ads)198 static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
199 {
200 size_t offset;
201
202 offset = guc_ads_waklv_offset(ads) +
203 guc_ads_waklv_size(ads);
204
205 return PAGE_ALIGN(offset);
206 }
207
guc_ads_um_queues_offset(struct xe_guc_ads * ads)208 static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
209 {
210 u32 offset;
211
212 offset = guc_ads_capture_offset(ads) +
213 guc_ads_capture_size(ads);
214
215 return PAGE_ALIGN(offset);
216 }
217
guc_ads_private_data_offset(struct xe_guc_ads * ads)218 static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
219 {
220 size_t offset;
221
222 offset = guc_ads_um_queues_offset(ads) +
223 guc_ads_um_queues_size(ads);
224
225 return PAGE_ALIGN(offset);
226 }
227
guc_ads_size(struct xe_guc_ads * ads)228 static size_t guc_ads_size(struct xe_guc_ads *ads)
229 {
230 return guc_ads_private_data_offset(ads) +
231 guc_ads_private_data_size(ads);
232 }
233
needs_wa_1607983814(struct xe_device * xe)234 static bool needs_wa_1607983814(struct xe_device *xe)
235 {
236 return GRAPHICS_VERx100(xe) < 1250;
237 }
238
calculate_regset_size(struct xe_gt * gt)239 static size_t calculate_regset_size(struct xe_gt *gt)
240 {
241 struct xe_reg_sr_entry *sr_entry;
242 unsigned long sr_idx;
243 struct xe_hw_engine *hwe;
244 enum xe_hw_engine_id id;
245 unsigned int count = 0;
246
247 for_each_hw_engine(hwe, gt, id)
248 xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
249 count++;
250
251 count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
252
253 if (needs_wa_1607983814(gt_to_xe(gt)))
254 count += LNCFCMOCS_REG_COUNT;
255
256 return count * sizeof(struct guc_mmio_reg);
257 }
258
engine_enable_mask(struct xe_gt * gt,enum xe_engine_class class)259 static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
260 {
261 struct xe_hw_engine *hwe;
262 enum xe_hw_engine_id id;
263 u32 mask = 0;
264
265 for_each_hw_engine(hwe, gt, id)
266 if (hwe->class == class)
267 mask |= BIT(hwe->instance);
268
269 return mask;
270 }
271
calculate_golden_lrc_size(struct xe_guc_ads * ads)272 static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
273 {
274 struct xe_gt *gt = ads_to_gt(ads);
275 size_t total_size = 0, alloc_size, real_size;
276 int class;
277
278 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
279 if (!engine_enable_mask(gt, class))
280 continue;
281
282 real_size = xe_gt_lrc_size(gt, class);
283 alloc_size = PAGE_ALIGN(real_size);
284 total_size += alloc_size;
285 }
286
287 return total_size;
288 }
289
guc_waklv_enable_one_word(struct xe_guc_ads * ads,enum xe_guc_klv_ids klv_id,u32 value,u32 * offset,u32 * remain)290 static void guc_waklv_enable_one_word(struct xe_guc_ads *ads,
291 enum xe_guc_klv_ids klv_id,
292 u32 value,
293 u32 *offset, u32 *remain)
294 {
295 u32 size;
296 u32 klv_entry[] = {
297 /* 16:16 key/length */
298 FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
299 FIELD_PREP(GUC_KLV_0_LEN, 1),
300 value,
301 /* 1 dword data */
302 };
303
304 size = sizeof(klv_entry);
305
306 if (*remain < size) {
307 drm_warn(&ads_to_xe(ads)->drm,
308 "w/a klv buffer too small to add klv id %d\n", klv_id);
309 } else {
310 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
311 klv_entry, size);
312 *offset += size;
313 *remain -= size;
314 }
315 }
316
guc_waklv_enable_simple(struct xe_guc_ads * ads,enum xe_guc_klv_ids klv_id,u32 * offset,u32 * remain)317 static void guc_waklv_enable_simple(struct xe_guc_ads *ads,
318 enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain)
319 {
320 u32 klv_entry[] = {
321 /* 16:16 key/length */
322 FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
323 FIELD_PREP(GUC_KLV_0_LEN, 0),
324 /* 0 dwords data */
325 };
326 u32 size;
327
328 size = sizeof(klv_entry);
329
330 if (xe_gt_WARN(ads_to_gt(ads), *remain < size,
331 "w/a klv buffer too small to add klv id %d\n", klv_id))
332 return;
333
334 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
335 klv_entry, size);
336 *offset += size;
337 *remain -= size;
338 }
339
guc_waklv_init(struct xe_guc_ads * ads)340 static void guc_waklv_init(struct xe_guc_ads *ads)
341 {
342 struct xe_gt *gt = ads_to_gt(ads);
343 u64 addr_ggtt;
344 u32 offset, remain, size;
345
346 offset = guc_ads_waklv_offset(ads);
347 remain = guc_ads_waklv_size(ads);
348
349 if (XE_WA(gt, 14019882105))
350 guc_waklv_enable_simple(ads,
351 GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
352 &offset, &remain);
353 if (XE_WA(gt, 18024947630))
354 guc_waklv_enable_simple(ads,
355 GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING,
356 &offset, &remain);
357 if (XE_WA(gt, 16022287689))
358 guc_waklv_enable_simple(ads,
359 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
360 &offset, &remain);
361
362 if (XE_WA(gt, 14022866841))
363 guc_waklv_enable_simple(ads,
364 GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO,
365 &offset, &remain);
366
367 /*
368 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
369 * the default value for this register is determined to be 0xC40. This could change in the
370 * future, so GuC depends on KMD to send it the correct value.
371 */
372 if (XE_WA(gt, 13011645652))
373 guc_waklv_enable_one_word(ads,
374 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE,
375 0xC40,
376 &offset, &remain);
377
378 if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406))
379 guc_waklv_enable_simple(ads,
380 GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET,
381 &offset, &remain);
382
383 size = guc_ads_waklv_size(ads) - remain;
384 if (!size)
385 return;
386
387 offset = guc_ads_waklv_offset(ads);
388 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
389
390 ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt));
391 ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt));
392 ads_blob_write(ads, ads.wa_klv_size, size);
393 }
394
calculate_waklv_size(struct xe_guc_ads * ads)395 static int calculate_waklv_size(struct xe_guc_ads *ads)
396 {
397 /*
398 * A single page is both the minimum size possible and
399 * is sufficiently large enough for all current platforms.
400 */
401 return SZ_4K;
402 }
403
404 #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64)
405
xe_guc_ads_init(struct xe_guc_ads * ads)406 int xe_guc_ads_init(struct xe_guc_ads *ads)
407 {
408 struct xe_device *xe = ads_to_xe(ads);
409 struct xe_gt *gt = ads_to_gt(ads);
410 struct xe_tile *tile = gt_to_tile(gt);
411 struct xe_bo *bo;
412
413 ads->golden_lrc_size = calculate_golden_lrc_size(ads);
414 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
415 ads->regset_size = calculate_regset_size(gt);
416 ads->ads_waklv_size = calculate_waklv_size(ads);
417
418 bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
419 XE_BO_FLAG_SYSTEM |
420 XE_BO_FLAG_GGTT |
421 XE_BO_FLAG_GGTT_INVALIDATE);
422 if (IS_ERR(bo))
423 return PTR_ERR(bo);
424
425 ads->bo = bo;
426
427 return 0;
428 }
429 ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */
430
431 /**
432 * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
433 * @ads: Additional data structures object
434 *
435 * Recalculate golden_lrc_size, capture_size and regset_size as the number
436 * hardware engines may have changed after the hwconfig was loaded. Also verify
437 * the new sizes fit in the already allocated ADS buffer object.
438 *
439 * Return: 0 on success, negative error code on error.
440 */
xe_guc_ads_init_post_hwconfig(struct xe_guc_ads * ads)441 int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
442 {
443 struct xe_gt *gt = ads_to_gt(ads);
444 u32 prev_regset_size = ads->regset_size;
445
446 xe_gt_assert(gt, ads->bo);
447
448 ads->golden_lrc_size = calculate_golden_lrc_size(ads);
449 /* Calculate Capture size with worst size */
450 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
451 ads->regset_size = calculate_regset_size(gt);
452
453 xe_gt_assert(gt, ads->golden_lrc_size +
454 (ads->regset_size - prev_regset_size) <=
455 MAX_GOLDEN_LRC_SIZE);
456
457 return 0;
458 }
459
guc_policies_init(struct xe_guc_ads * ads)460 static void guc_policies_init(struct xe_guc_ads *ads)
461 {
462 struct xe_device *xe = ads_to_xe(ads);
463 u32 global_flags = 0;
464
465 ads_blob_write(ads, policies.dpc_promote_time,
466 GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
467 ads_blob_write(ads, policies.max_num_work_items,
468 GLOBAL_POLICY_MAX_NUM_WI);
469
470 if (xe->wedged.mode == 2)
471 global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
472
473 ads_blob_write(ads, policies.global_flags, global_flags);
474 ads_blob_write(ads, policies.is_valid, 1);
475 }
476
fill_engine_enable_masks(struct xe_gt * gt,struct iosys_map * info_map)477 static void fill_engine_enable_masks(struct xe_gt *gt,
478 struct iosys_map *info_map)
479 {
480 struct xe_device *xe = gt_to_xe(gt);
481
482 info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
483 engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
484 info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
485 engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
486 info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
487 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
488 info_map_write(xe, info_map,
489 engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
490 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
491 info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
492 engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
493 info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
494 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
495 }
496
guc_prep_golden_lrc_null(struct xe_guc_ads * ads)497 static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
498 {
499 struct xe_device *xe = ads_to_xe(ads);
500 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
501 offsetof(struct __guc_ads_blob, system_info));
502 u8 guc_class;
503
504 for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
505 if (!info_map_read(xe, &info_map,
506 engine_enabled_masks[guc_class]))
507 continue;
508
509 ads_blob_write(ads, ads.eng_state_size[guc_class],
510 guc_ads_golden_lrc_size(ads) -
511 xe_lrc_skip_size(xe));
512 ads_blob_write(ads, ads.golden_context_lrca[guc_class],
513 xe_bo_ggtt_addr(ads->bo) +
514 guc_ads_golden_lrc_offset(ads));
515 }
516 }
517
guc_mapping_table_init_invalid(struct xe_gt * gt,struct iosys_map * info_map)518 static void guc_mapping_table_init_invalid(struct xe_gt *gt,
519 struct iosys_map *info_map)
520 {
521 struct xe_device *xe = gt_to_xe(gt);
522 unsigned int i, j;
523
524 /* Table must be set to invalid values for entries not used */
525 for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
526 for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
527 info_map_write(xe, info_map, mapping_table[i][j],
528 GUC_MAX_INSTANCES_PER_CLASS);
529 }
530
guc_mapping_table_init(struct xe_gt * gt,struct iosys_map * info_map)531 static void guc_mapping_table_init(struct xe_gt *gt,
532 struct iosys_map *info_map)
533 {
534 struct xe_device *xe = gt_to_xe(gt);
535 struct xe_hw_engine *hwe;
536 enum xe_hw_engine_id id;
537
538 guc_mapping_table_init_invalid(gt, info_map);
539
540 for_each_hw_engine(hwe, gt, id) {
541 u8 guc_class;
542
543 guc_class = xe_engine_class_to_guc_class(hwe->class);
544 info_map_write(xe, info_map,
545 mapping_table[guc_class][hwe->logical_instance],
546 hwe->instance);
547 }
548 }
549
guc_get_capture_engine_mask(struct xe_gt * gt,struct iosys_map * info_map,enum guc_capture_list_class_type capture_class)550 static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map,
551 enum guc_capture_list_class_type capture_class)
552 {
553 struct xe_device *xe = gt_to_xe(gt);
554 u32 mask;
555
556 switch (capture_class) {
557 case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
558 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
559 mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
560 break;
561 case GUC_CAPTURE_LIST_CLASS_VIDEO:
562 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
563 break;
564 case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
565 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
566 break;
567 case GUC_CAPTURE_LIST_CLASS_BLITTER:
568 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
569 break;
570 case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
571 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
572 break;
573 default:
574 mask = 0;
575 }
576
577 return mask;
578 }
579
get_capture_list(struct xe_guc_ads * ads,struct xe_guc * guc,struct xe_gt * gt,int owner,int type,int class,u32 * total_size,size_t * size,void ** pptr)580 static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt,
581 int owner, int type, int class, u32 *total_size, size_t *size,
582 void **pptr)
583 {
584 *size = 0;
585
586 if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) {
587 if (*total_size + *size > ads->capture_size)
588 xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n",
589 *total_size + *size, ads->capture_size);
590 else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr))
591 return false;
592 }
593
594 return true;
595 }
596
guc_capture_prep_lists(struct xe_guc_ads * ads)597 static int guc_capture_prep_lists(struct xe_guc_ads *ads)
598 {
599 struct xe_guc *guc = ads_to_guc(ads);
600 struct xe_gt *gt = ads_to_gt(ads);
601 u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
602 struct iosys_map info_map;
603 size_t size = 0;
604 void *ptr;
605 int i, j;
606
607 /*
608 * GuC Capture's steered reg-list needs to be allocated and initialized
609 * after the GuC-hwconfig is available which guaranteed from here.
610 */
611 xe_guc_capture_steered_list_init(ads_to_guc(ads));
612
613 capture_offset = guc_ads_capture_offset(ads);
614 ads_ggtt = xe_bo_ggtt_addr(ads->bo);
615 info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
616 offsetof(struct __guc_ads_blob, system_info));
617
618 /* first, set aside the first page for a capture_list with zero descriptors */
619 total_size = PAGE_SIZE;
620 if (!xe_guc_capture_getnullheader(guc, &ptr, &size))
621 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size);
622
623 null_ggtt = ads_ggtt + capture_offset;
624 capture_offset += PAGE_SIZE;
625
626 /*
627 * Populate capture list : at this point adps is already allocated and
628 * mapped to worst case size
629 */
630 for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
631 bool write_empty_list;
632
633 for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
634 u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j);
635 /* null list if we dont have said engine or list */
636 if (!engine_mask) {
637 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
638 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
639 continue;
640 }
641
642 /* engine exists: start with engine-class registers */
643 write_empty_list = get_capture_list(ads, guc, gt, i,
644 GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
645 j, &total_size, &size, &ptr);
646 if (!write_empty_list) {
647 ads_blob_write(ads, ads.capture_class[i][j],
648 ads_ggtt + capture_offset);
649 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
650 ptr, size);
651 total_size += size;
652 capture_offset += size;
653 } else {
654 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
655 }
656
657 /* engine exists: next, engine-instance registers */
658 write_empty_list = get_capture_list(ads, guc, gt, i,
659 GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
660 j, &total_size, &size, &ptr);
661 if (!write_empty_list) {
662 ads_blob_write(ads, ads.capture_instance[i][j],
663 ads_ggtt + capture_offset);
664 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
665 ptr, size);
666 total_size += size;
667 capture_offset += size;
668 } else {
669 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
670 }
671 }
672
673 /* global registers is last in our PF/VF loops */
674 write_empty_list = get_capture_list(ads, guc, gt, i,
675 GUC_STATE_CAPTURE_TYPE_GLOBAL,
676 0, &total_size, &size, &ptr);
677 if (!write_empty_list) {
678 ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset);
679 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr,
680 size);
681 total_size += size;
682 capture_offset += size;
683 } else {
684 ads_blob_write(ads, ads.capture_global[i], null_ggtt);
685 }
686 }
687
688 if (ads->capture_size != PAGE_ALIGN(total_size))
689 xe_gt_dbg(gt, "ADS capture alloc size changed from %d to %d\n",
690 ads->capture_size, PAGE_ALIGN(total_size));
691 return PAGE_ALIGN(total_size);
692 }
693
guc_mmio_regset_write_one(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_reg reg,unsigned int n_entry)694 static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
695 struct iosys_map *regset_map,
696 struct xe_reg reg,
697 unsigned int n_entry)
698 {
699 struct guc_mmio_reg entry = {
700 .offset = reg.addr,
701 .flags = reg.masked ? GUC_REGSET_MASKED : 0,
702 };
703
704 xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
705 &entry, sizeof(entry));
706 }
707
guc_mmio_regset_write(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_hw_engine * hwe)708 static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
709 struct iosys_map *regset_map,
710 struct xe_hw_engine *hwe)
711 {
712 struct xe_device *xe = ads_to_xe(ads);
713 struct xe_hw_engine *hwe_rcs_reset_domain =
714 xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
715 struct xe_reg_sr_entry *entry;
716 unsigned long idx;
717 unsigned int count = 0;
718 const struct {
719 struct xe_reg reg;
720 bool skip;
721 } *e, extra_regs[] = {
722 { .reg = RING_MODE(hwe->mmio_base), },
723 { .reg = RING_HWS_PGA(hwe->mmio_base), },
724 { .reg = RING_IMR(hwe->mmio_base), },
725 { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain },
726 { .reg = CCS_MODE,
727 .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) },
728 };
729 u32 i;
730
731 BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
732
733 xa_for_each(&hwe->reg_sr.xa, idx, entry)
734 guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
735
736 for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
737 if (e->skip)
738 continue;
739
740 guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
741 }
742
743 /* Wa_1607983814 */
744 if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) {
745 for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
746 guc_mmio_regset_write_one(ads, regset_map,
747 XELP_LNCFCMOCS(i), count++);
748 }
749 }
750
751 return count;
752 }
753
guc_mmio_reg_state_init(struct xe_guc_ads * ads)754 static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
755 {
756 size_t regset_offset = guc_ads_regset_offset(ads);
757 struct xe_gt *gt = ads_to_gt(ads);
758 struct xe_hw_engine *hwe;
759 enum xe_hw_engine_id id;
760 u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
761 struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
762 regset_offset);
763 unsigned int regset_used = 0;
764
765 for_each_hw_engine(hwe, gt, id) {
766 unsigned int count;
767 u8 gc;
768
769 /*
770 * 1. Write all MMIO entries for this exec queue to the table. No
771 * need to worry about fused-off engines and when there are
772 * entries in the regset: the reg_state_list has been zero'ed
773 * by xe_guc_ads_populate()
774 */
775 count = guc_mmio_regset_write(ads, ®set_map, hwe);
776 if (!count)
777 continue;
778
779 /*
780 * 2. Record in the header (ads.reg_state_list) the address
781 * location and number of entries
782 */
783 gc = xe_engine_class_to_guc_class(hwe->class);
784 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
785 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
786
787 addr += count * sizeof(struct guc_mmio_reg);
788 iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg));
789
790 regset_used += count * sizeof(struct guc_mmio_reg);
791 }
792
793 xe_gt_assert(gt, regset_used <= ads->regset_size);
794 }
795
guc_um_init_params(struct xe_guc_ads * ads)796 static void guc_um_init_params(struct xe_guc_ads *ads)
797 {
798 u32 um_queue_offset = guc_ads_um_queues_offset(ads);
799 u64 base_dpa;
800 u32 base_ggtt;
801 int i;
802
803 base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
804 base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
805
806 for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
807 ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
808 base_dpa + (i * GUC_UM_QUEUE_SIZE));
809 ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
810 base_ggtt + (i * GUC_UM_QUEUE_SIZE));
811 ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
812 GUC_UM_QUEUE_SIZE);
813 }
814
815 ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
816 GUC_PAGE_RES_TIMEOUT_US);
817 }
818
guc_doorbell_init(struct xe_guc_ads * ads)819 static void guc_doorbell_init(struct xe_guc_ads *ads)
820 {
821 struct xe_device *xe = ads_to_xe(ads);
822 struct xe_gt *gt = ads_to_gt(ads);
823
824 if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
825 u32 distdbreg =
826 xe_mmio_read32(>->mmio, DIST_DBS_POPULATED);
827
828 ads_blob_write(ads,
829 system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
830 REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
831 }
832 }
833
834 /**
835 * xe_guc_ads_populate_minimal - populate minimal ADS
836 * @ads: Additional data structures object
837 *
838 * This function populates a minimal ADS that does not support submissions but
839 * enough so the GuC can load and the hwconfig table can be read.
840 */
xe_guc_ads_populate_minimal(struct xe_guc_ads * ads)841 void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
842 {
843 struct xe_gt *gt = ads_to_gt(ads);
844 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
845 offsetof(struct __guc_ads_blob, system_info));
846 u32 base = xe_bo_ggtt_addr(ads->bo);
847
848 xe_gt_assert(gt, ads->bo);
849
850 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
851 guc_policies_init(ads);
852 guc_prep_golden_lrc_null(ads);
853 guc_mapping_table_init_invalid(gt, &info_map);
854 guc_doorbell_init(ads);
855
856 ads_blob_write(ads, ads.scheduler_policies, base +
857 offsetof(struct __guc_ads_blob, policies));
858 ads_blob_write(ads, ads.gt_system_info, base +
859 offsetof(struct __guc_ads_blob, system_info));
860 ads_blob_write(ads, ads.private_data, base +
861 guc_ads_private_data_offset(ads));
862 }
863
xe_guc_ads_populate(struct xe_guc_ads * ads)864 void xe_guc_ads_populate(struct xe_guc_ads *ads)
865 {
866 struct xe_device *xe = ads_to_xe(ads);
867 struct xe_gt *gt = ads_to_gt(ads);
868 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
869 offsetof(struct __guc_ads_blob, system_info));
870 u32 base = xe_bo_ggtt_addr(ads->bo);
871
872 xe_gt_assert(gt, ads->bo);
873
874 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
875 guc_policies_init(ads);
876 fill_engine_enable_masks(gt, &info_map);
877 guc_mmio_reg_state_init(ads);
878 guc_prep_golden_lrc_null(ads);
879 guc_mapping_table_init(gt, &info_map);
880 guc_capture_prep_lists(ads);
881 guc_doorbell_init(ads);
882 guc_waklv_init(ads);
883
884 if (xe->info.has_usm) {
885 guc_um_init_params(ads);
886 ads_blob_write(ads, ads.um_init_data, base +
887 offsetof(struct __guc_ads_blob, um_init_params));
888 }
889
890 ads_blob_write(ads, ads.scheduler_policies, base +
891 offsetof(struct __guc_ads_blob, policies));
892 ads_blob_write(ads, ads.gt_system_info, base +
893 offsetof(struct __guc_ads_blob, system_info));
894 ads_blob_write(ads, ads.private_data, base +
895 guc_ads_private_data_offset(ads));
896 }
897
guc_populate_golden_lrc(struct xe_guc_ads * ads)898 static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
899 {
900 struct xe_device *xe = ads_to_xe(ads);
901 struct xe_gt *gt = ads_to_gt(ads);
902 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
903 offsetof(struct __guc_ads_blob, system_info));
904 size_t total_size = 0, alloc_size, real_size;
905 u32 addr_ggtt, offset;
906 int class;
907
908 offset = guc_ads_golden_lrc_offset(ads);
909 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
910
911 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
912 u8 guc_class;
913
914 guc_class = xe_engine_class_to_guc_class(class);
915
916 if (!info_map_read(xe, &info_map,
917 engine_enabled_masks[guc_class]))
918 continue;
919
920 xe_gt_assert(gt, gt->default_lrc[class]);
921
922 real_size = xe_gt_lrc_size(gt, class);
923 alloc_size = PAGE_ALIGN(real_size);
924 total_size += alloc_size;
925
926 /*
927 * This interface is slightly confusing. We need to pass the
928 * base address of the full golden context and the size of just
929 * the engine state, which is the section of the context image
930 * that starts after the execlists LRC registers. This is
931 * required to allow the GuC to restore just the engine state
932 * when a watchdog reset occurs.
933 * We calculate the engine state size by removing the size of
934 * what comes before it in the context image (which is identical
935 * on all engines).
936 */
937 ads_blob_write(ads, ads.eng_state_size[guc_class],
938 real_size - xe_lrc_skip_size(xe));
939 ads_blob_write(ads, ads.golden_context_lrca[guc_class],
940 addr_ggtt);
941
942 xe_map_memcpy_to(xe, ads_to_map(ads), offset,
943 gt->default_lrc[class], real_size);
944
945 addr_ggtt += alloc_size;
946 offset += alloc_size;
947 }
948
949 xe_gt_assert(gt, total_size == ads->golden_lrc_size);
950 }
951
xe_guc_ads_populate_post_load(struct xe_guc_ads * ads)952 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
953 {
954 guc_populate_golden_lrc(ads);
955 }
956
guc_ads_action_update_policies(struct xe_guc_ads * ads,u32 policy_offset)957 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
958 {
959 struct xe_guc_ct *ct = &ads_to_guc(ads)->ct;
960 u32 action[] = {
961 XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
962 policy_offset
963 };
964
965 return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
966 }
967
968 /**
969 * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy
970 * @ads: Additional data structures object
971 *
972 * This function update the GuC's engine reset policy based on wedged.mode.
973 *
974 * Return: 0 on success, and negative error code otherwise.
975 */
xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads * ads)976 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads)
977 {
978 struct xe_device *xe = ads_to_xe(ads);
979 struct xe_gt *gt = ads_to_gt(ads);
980 struct xe_tile *tile = gt_to_tile(gt);
981 struct guc_policies *policies;
982 struct xe_bo *bo;
983 int ret = 0;
984
985 policies = kmalloc(sizeof(*policies), GFP_KERNEL);
986 if (!policies)
987 return -ENOMEM;
988
989 policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time);
990 policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items);
991 policies->is_valid = 1;
992 if (xe->wedged.mode == 2)
993 policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
994 else
995 policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET;
996
997 bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies),
998 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
999 XE_BO_FLAG_GGTT);
1000 if (IS_ERR(bo)) {
1001 ret = PTR_ERR(bo);
1002 goto out;
1003 }
1004
1005 ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo));
1006 out:
1007 kfree(policies);
1008 return ret;
1009 }
1010