1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_guc_ads.h"
7
8 #include <linux/fault-inject.h>
9
10 #include <drm/drm_managed.h>
11
12 #include <generated/xe_wa_oob.h>
13
14 #include "abi/guc_actions_abi.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_gt_regs.h"
17 #include "regs/xe_guc_regs.h"
18 #include "xe_bo.h"
19 #include "xe_gt.h"
20 #include "xe_gt_ccs_mode.h"
21 #include "xe_gt_mcr.h"
22 #include "xe_gt_printk.h"
23 #include "xe_guc.h"
24 #include "xe_guc_buf.h"
25 #include "xe_guc_capture.h"
26 #include "xe_guc_ct.h"
27 #include "xe_hw_engine.h"
28 #include "xe_lrc.h"
29 #include "xe_map.h"
30 #include "xe_mmio.h"
31 #include "xe_wa.h"
32
33 /* Slack of a few additional entries per engine */
34 #define ADS_REGSET_EXTRA_MAX 8
35
36 static struct xe_guc *
ads_to_guc(struct xe_guc_ads * ads)37 ads_to_guc(struct xe_guc_ads *ads)
38 {
39 return container_of(ads, struct xe_guc, ads);
40 }
41
42 static struct xe_gt *
ads_to_gt(struct xe_guc_ads * ads)43 ads_to_gt(struct xe_guc_ads *ads)
44 {
45 return container_of(ads, struct xe_gt, uc.guc.ads);
46 }
47
48 static struct xe_device *
ads_to_xe(struct xe_guc_ads * ads)49 ads_to_xe(struct xe_guc_ads *ads)
50 {
51 return gt_to_xe(ads_to_gt(ads));
52 }
53
54 static struct iosys_map *
ads_to_map(struct xe_guc_ads * ads)55 ads_to_map(struct xe_guc_ads *ads)
56 {
57 return &ads->bo->vmap;
58 }
59
60 /* UM Queue parameters: */
61 #define GUC_UM_QUEUE_SIZE (SZ_64K)
62 #define GUC_PAGE_RES_TIMEOUT_US (-1)
63
64 /*
65 * The Additional Data Struct (ADS) has pointers for different buffers used by
66 * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
67 * all the extra buffers indirectly linked via the ADS struct's entries.
68 *
69 * Layout of the ADS blob allocated for the GuC:
70 *
71 * +---------------------------------------+ <== base
72 * | guc_ads |
73 * +---------------------------------------+
74 * | guc_policies |
75 * +---------------------------------------+
76 * | guc_gt_system_info |
77 * +---------------------------------------+
78 * | guc_engine_usage |
79 * +---------------------------------------+
80 * | guc_um_init_params |
81 * +---------------------------------------+ <== static
82 * | guc_mmio_reg[countA] (engine 0.0) |
83 * | guc_mmio_reg[countB] (engine 0.1) |
84 * | guc_mmio_reg[countC] (engine 1.0) |
85 * | ... |
86 * +---------------------------------------+ <== dynamic
87 * | padding |
88 * +---------------------------------------+ <== 4K aligned
89 * | golden contexts |
90 * +---------------------------------------+
91 * | padding |
92 * +---------------------------------------+ <== 4K aligned
93 * | w/a KLVs |
94 * +---------------------------------------+
95 * | padding |
96 * +---------------------------------------+ <== 4K aligned
97 * | capture lists |
98 * +---------------------------------------+
99 * | padding |
100 * +---------------------------------------+ <== 4K aligned
101 * | UM queues |
102 * +---------------------------------------+
103 * | padding |
104 * +---------------------------------------+ <== 4K aligned
105 * | private data |
106 * +---------------------------------------+
107 * | padding |
108 * +---------------------------------------+ <== 4K aligned
109 */
110 struct __guc_ads_blob {
111 struct guc_ads ads;
112 struct guc_policies policies;
113 struct guc_gt_system_info system_info;
114 struct guc_engine_usage engine_usage;
115 struct guc_um_init_params um_init_params;
116 /* From here on, location is dynamic! Refer to above diagram. */
117 struct guc_mmio_reg regset[];
118 } __packed;
119
120 #define ads_blob_read(ads_, field_) \
121 xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
122 struct __guc_ads_blob, field_)
123
124 #define ads_blob_write(ads_, field_, val_) \
125 xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
126 struct __guc_ads_blob, field_, val_)
127
128 #define info_map_write(xe_, map_, field_, val_) \
129 xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
130
131 #define info_map_read(xe_, map_, field_) \
132 xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
133
guc_ads_regset_size(struct xe_guc_ads * ads)134 static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
135 {
136 struct xe_device *xe = ads_to_xe(ads);
137
138 xe_assert(xe, ads->regset_size);
139
140 return ads->regset_size;
141 }
142
guc_ads_golden_lrc_size(struct xe_guc_ads * ads)143 static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
144 {
145 return PAGE_ALIGN(ads->golden_lrc_size);
146 }
147
guc_ads_waklv_size(struct xe_guc_ads * ads)148 static u32 guc_ads_waklv_size(struct xe_guc_ads *ads)
149 {
150 return PAGE_ALIGN(ads->ads_waklv_size);
151 }
152
guc_ads_capture_size(struct xe_guc_ads * ads)153 static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
154 {
155 return PAGE_ALIGN(ads->capture_size);
156 }
157
guc_ads_um_queues_size(struct xe_guc_ads * ads)158 static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
159 {
160 struct xe_device *xe = ads_to_xe(ads);
161
162 if (!xe->info.has_usm)
163 return 0;
164
165 return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
166 }
167
guc_ads_private_data_size(struct xe_guc_ads * ads)168 static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
169 {
170 return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
171 }
172
guc_ads_regset_offset(struct xe_guc_ads * ads)173 static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
174 {
175 return offsetof(struct __guc_ads_blob, regset);
176 }
177
guc_ads_golden_lrc_offset(struct xe_guc_ads * ads)178 static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
179 {
180 size_t offset;
181
182 offset = guc_ads_regset_offset(ads) +
183 guc_ads_regset_size(ads);
184
185 return PAGE_ALIGN(offset);
186 }
187
guc_ads_waklv_offset(struct xe_guc_ads * ads)188 static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads)
189 {
190 u32 offset;
191
192 offset = guc_ads_golden_lrc_offset(ads) +
193 guc_ads_golden_lrc_size(ads);
194
195 return PAGE_ALIGN(offset);
196 }
197
guc_ads_capture_offset(struct xe_guc_ads * ads)198 static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
199 {
200 size_t offset;
201
202 offset = guc_ads_waklv_offset(ads) +
203 guc_ads_waklv_size(ads);
204
205 return PAGE_ALIGN(offset);
206 }
207
guc_ads_um_queues_offset(struct xe_guc_ads * ads)208 static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
209 {
210 u32 offset;
211
212 offset = guc_ads_capture_offset(ads) +
213 guc_ads_capture_size(ads);
214
215 return PAGE_ALIGN(offset);
216 }
217
guc_ads_private_data_offset(struct xe_guc_ads * ads)218 static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
219 {
220 size_t offset;
221
222 offset = guc_ads_um_queues_offset(ads) +
223 guc_ads_um_queues_size(ads);
224
225 return PAGE_ALIGN(offset);
226 }
227
guc_ads_size(struct xe_guc_ads * ads)228 static size_t guc_ads_size(struct xe_guc_ads *ads)
229 {
230 return guc_ads_private_data_offset(ads) +
231 guc_ads_private_data_size(ads);
232 }
233
calculate_regset_size(struct xe_gt * gt)234 static size_t calculate_regset_size(struct xe_gt *gt)
235 {
236 struct xe_reg_sr_entry *sr_entry;
237 unsigned long sr_idx;
238 struct xe_hw_engine *hwe;
239 enum xe_hw_engine_id id;
240 unsigned int count = 0;
241
242 for_each_hw_engine(hwe, gt, id)
243 xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
244 count++;
245
246 count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
247
248 if (XE_GT_WA(gt, 1607983814))
249 count += LNCFCMOCS_REG_COUNT;
250
251 return count * sizeof(struct guc_mmio_reg);
252 }
253
engine_enable_mask(struct xe_gt * gt,enum xe_engine_class class)254 static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
255 {
256 struct xe_hw_engine *hwe;
257 enum xe_hw_engine_id id;
258 u32 mask = 0;
259
260 for_each_hw_engine(hwe, gt, id)
261 if (hwe->class == class)
262 mask |= BIT(hwe->instance);
263
264 return mask;
265 }
266
calculate_golden_lrc_size(struct xe_guc_ads * ads)267 static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
268 {
269 struct xe_gt *gt = ads_to_gt(ads);
270 size_t total_size = 0, alloc_size, real_size;
271 int class;
272
273 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
274 if (!engine_enable_mask(gt, class))
275 continue;
276
277 real_size = xe_gt_lrc_size(gt, class);
278 alloc_size = PAGE_ALIGN(real_size);
279 total_size += alloc_size;
280 }
281
282 return total_size;
283 }
284
guc_waklv_enable(struct xe_guc_ads * ads,u32 data[],u32 data_len_dw,u32 * offset,u32 * remain,enum xe_guc_klv_ids klv_id)285 static void guc_waklv_enable(struct xe_guc_ads *ads,
286 u32 data[], u32 data_len_dw,
287 u32 *offset, u32 *remain,
288 enum xe_guc_klv_ids klv_id)
289 {
290 size_t size = sizeof(u32) * (1 + data_len_dw);
291
292 if (*remain < size) {
293 drm_warn(&ads_to_xe(ads)->drm,
294 "w/a klv buffer too small to add klv id 0x%04X\n", klv_id);
295 return;
296 }
297
298 /* 16:16 key/length */
299 xe_map_wr(ads_to_xe(ads), ads_to_map(ads), *offset, u32,
300 FIELD_PREP(GUC_KLV_0_KEY, klv_id) | FIELD_PREP(GUC_KLV_0_LEN, data_len_dw));
301 /* data_len_dw dwords of data */
302 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads),
303 *offset + sizeof(u32), data, data_len_dw * sizeof(u32));
304
305 *offset += size;
306 *remain -= size;
307 }
308
guc_waklv_init(struct xe_guc_ads * ads)309 static void guc_waklv_init(struct xe_guc_ads *ads)
310 {
311 struct xe_gt *gt = ads_to_gt(ads);
312 u64 addr_ggtt;
313 u32 offset, remain, size;
314
315 offset = guc_ads_waklv_offset(ads);
316 remain = guc_ads_waklv_size(ads);
317
318 if (XE_GT_WA(gt, 16021333562))
319 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
320 GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
321 if (XE_GT_WA(gt, 18024947630))
322 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
323 GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING);
324 if (XE_GT_WA(gt, 16022287689))
325 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
326 GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE);
327
328 if (XE_GT_WA(gt, 14022866841))
329 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
330 GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO);
331
332 /*
333 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
334 * the default value for this register is determined to be 0xC40. This could change in the
335 * future, so GuC depends on KMD to send it the correct value.
336 */
337 if (XE_GT_WA(gt, 13011645652)) {
338 u32 data = 0xC40;
339
340 guc_waklv_enable(ads, &data, 1, &offset, &remain,
341 GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE);
342 }
343
344 if (XE_GT_WA(gt, 14022293748) || XE_GT_WA(gt, 22019794406))
345 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
346 GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET);
347
348 if (GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, 70, 44) && XE_GT_WA(gt, 16026508708))
349 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
350 GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH);
351 if (GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, 70, 47) && XE_GT_WA(gt, 16026007364)) {
352 u32 data[] = {
353 0x0,
354 0xF,
355 };
356 guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain,
357 GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG);
358 }
359
360 if (XE_GT_WA(gt, 14020001231))
361 guc_waklv_enable(ads, NULL, 0, &offset, &remain,
362 GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT);
363
364 size = guc_ads_waklv_size(ads) - remain;
365 if (!size)
366 return;
367
368 offset = guc_ads_waklv_offset(ads);
369 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
370
371 ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt));
372 ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt));
373 ads_blob_write(ads, ads.wa_klv_size, size);
374 }
375
calculate_waklv_size(struct xe_guc_ads * ads)376 static int calculate_waklv_size(struct xe_guc_ads *ads)
377 {
378 /*
379 * A single page is both the minimum size possible and
380 * is sufficiently large enough for all current platforms.
381 */
382 return SZ_4K;
383 }
384
385 #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64)
386
xe_guc_ads_init(struct xe_guc_ads * ads)387 int xe_guc_ads_init(struct xe_guc_ads *ads)
388 {
389 struct xe_device *xe = ads_to_xe(ads);
390 struct xe_gt *gt = ads_to_gt(ads);
391 struct xe_tile *tile = gt_to_tile(gt);
392 struct xe_bo *bo;
393
394 ads->golden_lrc_size = calculate_golden_lrc_size(ads);
395 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
396 ads->regset_size = calculate_regset_size(gt);
397 ads->ads_waklv_size = calculate_waklv_size(ads);
398
399 bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
400 XE_BO_FLAG_SYSTEM |
401 XE_BO_FLAG_GGTT |
402 XE_BO_FLAG_GGTT_INVALIDATE |
403 XE_BO_FLAG_PINNED_NORESTORE);
404 if (IS_ERR(bo))
405 return PTR_ERR(bo);
406
407 ads->bo = bo;
408
409 return 0;
410 }
411 ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */
412
413 /**
414 * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
415 * @ads: Additional data structures object
416 *
417 * Recalculate golden_lrc_size, capture_size and regset_size as the number
418 * hardware engines may have changed after the hwconfig was loaded. Also verify
419 * the new sizes fit in the already allocated ADS buffer object.
420 *
421 * Return: 0 on success, negative error code on error.
422 */
xe_guc_ads_init_post_hwconfig(struct xe_guc_ads * ads)423 int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
424 {
425 struct xe_gt *gt = ads_to_gt(ads);
426 u32 prev_regset_size = ads->regset_size;
427
428 xe_gt_assert(gt, ads->bo);
429
430 ads->golden_lrc_size = calculate_golden_lrc_size(ads);
431 /* Calculate Capture size with worst size */
432 ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
433 ads->regset_size = calculate_regset_size(gt);
434
435 xe_gt_assert(gt, ads->golden_lrc_size +
436 (ads->regset_size - prev_regset_size) <=
437 MAX_GOLDEN_LRC_SIZE);
438
439 return 0;
440 }
441
guc_policies_init(struct xe_guc_ads * ads)442 static void guc_policies_init(struct xe_guc_ads *ads)
443 {
444 struct xe_device *xe = ads_to_xe(ads);
445 u32 global_flags = 0;
446
447 ads_blob_write(ads, policies.dpc_promote_time,
448 GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
449 ads_blob_write(ads, policies.max_num_work_items,
450 GLOBAL_POLICY_MAX_NUM_WI);
451
452 if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)
453 global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
454
455 ads_blob_write(ads, policies.global_flags, global_flags);
456 ads_blob_write(ads, policies.is_valid, 1);
457 }
458
fill_engine_enable_masks(struct xe_gt * gt,struct iosys_map * info_map)459 static void fill_engine_enable_masks(struct xe_gt *gt,
460 struct iosys_map *info_map)
461 {
462 struct xe_device *xe = gt_to_xe(gt);
463
464 info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
465 engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
466 info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
467 engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
468 info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
469 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
470 info_map_write(xe, info_map,
471 engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
472 engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
473 info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
474 engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
475 info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
476 engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
477 }
478
479 /*
480 * Write the offsets corresponding to the golden LRCs. The actual data is
481 * populated later by guc_golden_lrc_populate()
482 */
guc_golden_lrc_init(struct xe_guc_ads * ads)483 static void guc_golden_lrc_init(struct xe_guc_ads *ads)
484 {
485 struct xe_device *xe = ads_to_xe(ads);
486 struct xe_gt *gt = ads_to_gt(ads);
487 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
488 offsetof(struct __guc_ads_blob, system_info));
489 size_t alloc_size, real_size;
490 u32 addr_ggtt, offset;
491 int class;
492
493 offset = guc_ads_golden_lrc_offset(ads);
494 addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
495
496 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
497 u8 guc_class;
498
499 guc_class = xe_engine_class_to_guc_class(class);
500
501 if (!info_map_read(xe, &info_map,
502 engine_enabled_masks[guc_class]))
503 continue;
504
505 real_size = xe_gt_lrc_size(gt, class);
506 alloc_size = PAGE_ALIGN(real_size);
507
508 /*
509 * This interface is slightly confusing. We need to pass the
510 * base address of the full golden context and the size of just
511 * the engine state, which is the section of the context image
512 * that starts after the execlists LRC registers. This is
513 * required to allow the GuC to restore just the engine state
514 * when a watchdog reset occurs.
515 */
516 ads_blob_write(ads, ads.eng_state_size[guc_class],
517 xe_lrc_engine_state_size(gt, class));
518 ads_blob_write(ads, ads.golden_context_lrca[guc_class],
519 addr_ggtt);
520
521 addr_ggtt += alloc_size;
522 }
523 }
524
guc_mapping_table_init_invalid(struct xe_gt * gt,struct iosys_map * info_map)525 static void guc_mapping_table_init_invalid(struct xe_gt *gt,
526 struct iosys_map *info_map)
527 {
528 struct xe_device *xe = gt_to_xe(gt);
529 unsigned int i, j;
530
531 /* Table must be set to invalid values for entries not used */
532 for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
533 for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
534 info_map_write(xe, info_map, mapping_table[i][j],
535 GUC_MAX_INSTANCES_PER_CLASS);
536 }
537
guc_mapping_table_init(struct xe_gt * gt,struct iosys_map * info_map)538 static void guc_mapping_table_init(struct xe_gt *gt,
539 struct iosys_map *info_map)
540 {
541 struct xe_device *xe = gt_to_xe(gt);
542 struct xe_hw_engine *hwe;
543 enum xe_hw_engine_id id;
544
545 guc_mapping_table_init_invalid(gt, info_map);
546
547 for_each_hw_engine(hwe, gt, id) {
548 u8 guc_class;
549
550 guc_class = xe_engine_class_to_guc_class(hwe->class);
551 info_map_write(xe, info_map,
552 mapping_table[guc_class][hwe->logical_instance],
553 hwe->instance);
554 }
555 }
556
guc_get_capture_engine_mask(struct xe_gt * gt,struct iosys_map * info_map,enum guc_capture_list_class_type capture_class)557 static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map,
558 enum guc_capture_list_class_type capture_class)
559 {
560 struct xe_device *xe = gt_to_xe(gt);
561 u32 mask;
562
563 switch (capture_class) {
564 case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
565 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
566 mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
567 break;
568 case GUC_CAPTURE_LIST_CLASS_VIDEO:
569 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
570 break;
571 case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
572 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
573 break;
574 case GUC_CAPTURE_LIST_CLASS_BLITTER:
575 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
576 break;
577 case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
578 mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
579 break;
580 default:
581 mask = 0;
582 }
583
584 return mask;
585 }
586
get_capture_list(struct xe_guc_ads * ads,struct xe_guc * guc,struct xe_gt * gt,int owner,int type,int class,u32 * total_size,size_t * size,void ** pptr)587 static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt,
588 int owner, int type, int class, u32 *total_size, size_t *size,
589 void **pptr)
590 {
591 *size = 0;
592
593 if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) {
594 if (*total_size + *size > ads->capture_size)
595 xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n",
596 *total_size + *size, ads->capture_size);
597 else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr))
598 return false;
599 }
600
601 return true;
602 }
603
guc_capture_prep_lists(struct xe_guc_ads * ads)604 static int guc_capture_prep_lists(struct xe_guc_ads *ads)
605 {
606 struct xe_guc *guc = ads_to_guc(ads);
607 struct xe_gt *gt = ads_to_gt(ads);
608 u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
609 struct iosys_map info_map;
610 size_t size = 0;
611 void *ptr;
612 int i, j;
613
614 /*
615 * GuC Capture's steered reg-list needs to be allocated and initialized
616 * after the GuC-hwconfig is available which guaranteed from here.
617 */
618 xe_guc_capture_steered_list_init(ads_to_guc(ads));
619
620 capture_offset = guc_ads_capture_offset(ads);
621 ads_ggtt = xe_bo_ggtt_addr(ads->bo);
622 info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
623 offsetof(struct __guc_ads_blob, system_info));
624
625 /* first, set aside the first page for a capture_list with zero descriptors */
626 total_size = PAGE_SIZE;
627 if (!xe_guc_capture_getnullheader(guc, &ptr, &size))
628 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size);
629
630 null_ggtt = ads_ggtt + capture_offset;
631 capture_offset += PAGE_SIZE;
632
633 /*
634 * Populate capture list : at this point adps is already allocated and
635 * mapped to worst case size
636 */
637 for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
638 bool write_empty_list;
639
640 for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
641 u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j);
642 /* null list if we dont have said engine or list */
643 if (!engine_mask) {
644 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
645 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
646 continue;
647 }
648
649 /* engine exists: start with engine-class registers */
650 write_empty_list = get_capture_list(ads, guc, gt, i,
651 GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
652 j, &total_size, &size, &ptr);
653 if (!write_empty_list) {
654 ads_blob_write(ads, ads.capture_class[i][j],
655 ads_ggtt + capture_offset);
656 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
657 ptr, size);
658 total_size += size;
659 capture_offset += size;
660 } else {
661 ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
662 }
663
664 /* engine exists: next, engine-instance registers */
665 write_empty_list = get_capture_list(ads, guc, gt, i,
666 GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
667 j, &total_size, &size, &ptr);
668 if (!write_empty_list) {
669 ads_blob_write(ads, ads.capture_instance[i][j],
670 ads_ggtt + capture_offset);
671 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
672 ptr, size);
673 total_size += size;
674 capture_offset += size;
675 } else {
676 ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
677 }
678 }
679
680 /* global registers is last in our PF/VF loops */
681 write_empty_list = get_capture_list(ads, guc, gt, i,
682 GUC_STATE_CAPTURE_TYPE_GLOBAL,
683 0, &total_size, &size, &ptr);
684 if (!write_empty_list) {
685 ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset);
686 xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr,
687 size);
688 total_size += size;
689 capture_offset += size;
690 } else {
691 ads_blob_write(ads, ads.capture_global[i], null_ggtt);
692 }
693 }
694
695 if (ads->capture_size != PAGE_ALIGN(total_size))
696 xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n",
697 PAGE_ALIGN(total_size), ads->capture_size);
698 return PAGE_ALIGN(total_size);
699 }
700
guc_mmio_regset_write_one(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_reg reg,unsigned int n_entry)701 static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
702 struct iosys_map *regset_map,
703 struct xe_reg reg,
704 unsigned int n_entry)
705 {
706 struct guc_mmio_reg entry = {
707 .offset = reg.addr,
708 .flags = reg.masked ? GUC_REGSET_MASKED : 0,
709 };
710
711 if (reg.mcr) {
712 struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr);
713 u8 group, instance;
714
715 bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg,
716 &group, &instance);
717
718 if (steer) {
719 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group);
720 entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance);
721 entry.flags |= GUC_REGSET_STEERING_NEEDED;
722 }
723 }
724
725 xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
726 &entry, sizeof(entry));
727 }
728
guc_mmio_regset_write(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_hw_engine * hwe)729 static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
730 struct iosys_map *regset_map,
731 struct xe_hw_engine *hwe)
732 {
733 struct xe_hw_engine *hwe_rcs_reset_domain =
734 xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
735 struct xe_reg_sr_entry *entry;
736 unsigned long idx;
737 unsigned int count = 0;
738 const struct {
739 struct xe_reg reg;
740 bool skip;
741 } *e, extra_regs[] = {
742 { .reg = RING_MODE(hwe->mmio_base), },
743 { .reg = RING_HWS_PGA(hwe->mmio_base), },
744 { .reg = RING_IMR(hwe->mmio_base), },
745 { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain },
746 { .reg = CCS_MODE,
747 .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) },
748 };
749 u32 i;
750
751 BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
752
753 xa_for_each(&hwe->reg_sr.xa, idx, entry)
754 guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
755
756 for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
757 if (e->skip)
758 continue;
759
760 guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
761 }
762
763 if (XE_GT_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) {
764 for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
765 guc_mmio_regset_write_one(ads, regset_map,
766 XELP_LNCFCMOCS(i), count++);
767 }
768 }
769
770 return count;
771 }
772
guc_mmio_reg_state_init(struct xe_guc_ads * ads)773 static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
774 {
775 size_t regset_offset = guc_ads_regset_offset(ads);
776 struct xe_gt *gt = ads_to_gt(ads);
777 struct xe_hw_engine *hwe;
778 enum xe_hw_engine_id id;
779 u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
780 struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
781 regset_offset);
782 unsigned int regset_used = 0;
783
784 for_each_hw_engine(hwe, gt, id) {
785 unsigned int count;
786 u8 gc;
787
788 /*
789 * 1. Write all MMIO entries for this exec queue to the table. No
790 * need to worry about fused-off engines and when there are
791 * entries in the regset: the reg_state_list has been zero'ed
792 * by xe_guc_ads_populate()
793 */
794 count = guc_mmio_regset_write(ads, ®set_map, hwe);
795 if (!count)
796 continue;
797
798 /*
799 * 2. Record in the header (ads.reg_state_list) the address
800 * location and number of entries
801 */
802 gc = xe_engine_class_to_guc_class(hwe->class);
803 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
804 ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
805
806 addr += count * sizeof(struct guc_mmio_reg);
807 iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg));
808
809 regset_used += count * sizeof(struct guc_mmio_reg);
810 }
811
812 xe_gt_assert(gt, regset_used <= ads->regset_size);
813 }
814
guc_um_init_params(struct xe_guc_ads * ads)815 static void guc_um_init_params(struct xe_guc_ads *ads)
816 {
817 u32 um_queue_offset = guc_ads_um_queues_offset(ads);
818 struct xe_guc *guc = ads_to_guc(ads);
819 struct xe_device *xe = ads_to_xe(ads);
820 u64 base_dpa;
821 u32 base_ggtt;
822 bool with_dpa;
823 int i;
824
825 with_dpa = !xe_guc_using_main_gamctrl_queues(guc);
826
827 base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
828 base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
829
830 for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
831 /*
832 * Some platforms support USM but not access counters.
833 * Skip ACCESS_COUNTER queue initialization for such
834 * platforms, leaving queue_params[2] zero-initialized
835 * to signal unavailability to the GuC.
836 */
837 if (i == GUC_UM_HW_QUEUE_ACCESS_COUNTER &&
838 !xe->info.has_access_counter)
839 continue;
840
841 ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
842 with_dpa ? (base_dpa + (i * GUC_UM_QUEUE_SIZE)) : 0);
843 ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
844 base_ggtt + (i * GUC_UM_QUEUE_SIZE));
845 ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
846 GUC_UM_QUEUE_SIZE);
847 }
848
849 ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
850 GUC_PAGE_RES_TIMEOUT_US);
851 }
852
guc_doorbell_init(struct xe_guc_ads * ads)853 static void guc_doorbell_init(struct xe_guc_ads *ads)
854 {
855 struct xe_device *xe = ads_to_xe(ads);
856 struct xe_gt *gt = ads_to_gt(ads);
857
858 if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
859 u32 distdbreg =
860 xe_mmio_read32(>->mmio, DIST_DBS_POPULATED);
861
862 ads_blob_write(ads,
863 system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
864 REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
865 }
866 }
867
868 /**
869 * xe_guc_ads_populate_minimal - populate minimal ADS
870 * @ads: Additional data structures object
871 *
872 * This function populates a minimal ADS that does not support submissions but
873 * enough so the GuC can load and the hwconfig table can be read.
874 */
xe_guc_ads_populate_minimal(struct xe_guc_ads * ads)875 void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
876 {
877 struct xe_gt *gt = ads_to_gt(ads);
878 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
879 offsetof(struct __guc_ads_blob, system_info));
880 u32 base = xe_bo_ggtt_addr(ads->bo);
881
882 xe_gt_assert(gt, ads->bo);
883
884 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo));
885 guc_policies_init(ads);
886 guc_golden_lrc_init(ads);
887 guc_mapping_table_init_invalid(gt, &info_map);
888 guc_doorbell_init(ads);
889
890 ads_blob_write(ads, ads.scheduler_policies, base +
891 offsetof(struct __guc_ads_blob, policies));
892 ads_blob_write(ads, ads.gt_system_info, base +
893 offsetof(struct __guc_ads_blob, system_info));
894 ads_blob_write(ads, ads.private_data, base +
895 guc_ads_private_data_offset(ads));
896 }
897
xe_guc_ads_populate(struct xe_guc_ads * ads)898 void xe_guc_ads_populate(struct xe_guc_ads *ads)
899 {
900 struct xe_device *xe = ads_to_xe(ads);
901 struct xe_gt *gt = ads_to_gt(ads);
902 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
903 offsetof(struct __guc_ads_blob, system_info));
904 u32 base = xe_bo_ggtt_addr(ads->bo);
905
906 xe_gt_assert(gt, ads->bo);
907
908 xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo));
909 guc_policies_init(ads);
910 fill_engine_enable_masks(gt, &info_map);
911 guc_mmio_reg_state_init(ads);
912 guc_golden_lrc_init(ads);
913 guc_mapping_table_init(gt, &info_map);
914 guc_capture_prep_lists(ads);
915 guc_doorbell_init(ads);
916 guc_waklv_init(ads);
917
918 if (xe->info.has_usm) {
919 guc_um_init_params(ads);
920 ads_blob_write(ads, ads.um_init_data, base +
921 offsetof(struct __guc_ads_blob, um_init_params));
922 }
923
924 ads_blob_write(ads, ads.scheduler_policies, base +
925 offsetof(struct __guc_ads_blob, policies));
926 ads_blob_write(ads, ads.gt_system_info, base +
927 offsetof(struct __guc_ads_blob, system_info));
928 ads_blob_write(ads, ads.private_data, base +
929 guc_ads_private_data_offset(ads));
930 }
931
932 /*
933 * After the golden LRC's are recorded for each engine class by the first
934 * submission, copy them to the ADS, as initialized earlier by
935 * guc_golden_lrc_init().
936 */
guc_golden_lrc_populate(struct xe_guc_ads * ads)937 static void guc_golden_lrc_populate(struct xe_guc_ads *ads)
938 {
939 struct xe_device *xe = ads_to_xe(ads);
940 struct xe_gt *gt = ads_to_gt(ads);
941 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
942 offsetof(struct __guc_ads_blob, system_info));
943 size_t total_size = 0, alloc_size, real_size;
944 u32 offset;
945 int class;
946
947 offset = guc_ads_golden_lrc_offset(ads);
948
949 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
950 u8 guc_class;
951
952 guc_class = xe_engine_class_to_guc_class(class);
953
954 if (!info_map_read(xe, &info_map,
955 engine_enabled_masks[guc_class]))
956 continue;
957
958 xe_gt_assert(gt, gt->default_lrc[class]);
959
960 real_size = xe_gt_lrc_size(gt, class);
961 alloc_size = PAGE_ALIGN(real_size);
962 total_size += alloc_size;
963
964 xe_map_memcpy_to(xe, ads_to_map(ads), offset,
965 gt->default_lrc[class], real_size);
966
967 offset += alloc_size;
968 }
969
970 xe_gt_assert(gt, total_size == ads->golden_lrc_size);
971 }
972
xe_guc_ads_populate_post_load(struct xe_guc_ads * ads)973 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
974 {
975 guc_golden_lrc_populate(ads);
976 }
977
guc_ads_action_update_policies(struct xe_guc_ads * ads,u32 policy_offset)978 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
979 {
980 struct xe_guc_ct *ct = &ads_to_guc(ads)->ct;
981 u32 action[] = {
982 XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
983 policy_offset
984 };
985
986 return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
987 }
988
989 /**
990 * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy
991 * @ads: Additional data structures object
992 * @enable_engine_reset: true to enable engine resets, false otherwise
993 *
994 * This function update the GuC's engine reset policy.
995 *
996 * Return: 0 on success, and negative error code otherwise.
997 */
xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads * ads,bool enable_engine_reset)998 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads,
999 bool enable_engine_reset)
1000 {
1001 struct guc_policies *policies;
1002 struct xe_guc *guc = ads_to_guc(ads);
1003 CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies));
1004
1005 if (!xe_guc_buf_is_valid(buf))
1006 return -ENOBUFS;
1007
1008 policies = xe_guc_buf_cpu_ptr(buf);
1009 memset(policies, 0, sizeof(*policies));
1010
1011 policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time);
1012 policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items);
1013 policies->is_valid = 1;
1014
1015 if (enable_engine_reset)
1016 policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET;
1017 else
1018 policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
1019
1020 return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf));
1021 }
1022