1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2014-2019 Intel Corporation
4 */
5
6 #include <linux/bsearch.h>
7
8 #include "gem/i915_gem_lmem.h"
9 #include "gt/intel_engine_regs.h"
10 #include "gt/intel_gt.h"
11 #include "gt/intel_gt_mcr.h"
12 #include "gt/intel_gt_regs.h"
13 #include "gt/intel_lrc.h"
14 #include "gt/shmem_utils.h"
15 #include "intel_guc_ads.h"
16 #include "intel_guc_capture.h"
17 #include "intel_guc_fwif.h"
18 #include "intel_guc_print.h"
19 #include "intel_uc.h"
20 #include "i915_drv.h"
21
22 /*
23 * The Additional Data Struct (ADS) has pointers for different buffers used by
24 * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
25 * all the extra buffers indirectly linked via the ADS struct's entries.
26 *
27 * Layout of the ADS blob allocated for the GuC:
28 *
29 * +---------------------------------------+ <== base
30 * | guc_ads |
31 * +---------------------------------------+
32 * | guc_policies |
33 * +---------------------------------------+
34 * | guc_gt_system_info |
35 * +---------------------------------------+
36 * | guc_engine_usage |
37 * +---------------------------------------+ <== static
38 * | guc_mmio_reg[countA] (engine 0.0) |
39 * | guc_mmio_reg[countB] (engine 0.1) |
40 * | guc_mmio_reg[countC] (engine 1.0) |
41 * | ... |
42 * +---------------------------------------+ <== dynamic
43 * | padding |
44 * +---------------------------------------+ <== 4K aligned
45 * | golden contexts |
46 * +---------------------------------------+
47 * | padding |
48 * +---------------------------------------+ <== 4K aligned
49 * | w/a KLVs |
50 * +---------------------------------------+
51 * | padding |
52 * +---------------------------------------+ <== 4K aligned
53 * | capture lists |
54 * +---------------------------------------+
55 * | padding |
56 * +---------------------------------------+ <== 4K aligned
57 * | private data |
58 * +---------------------------------------+
59 * | padding |
60 * +---------------------------------------+ <== 4K aligned
61 */
62 struct __guc_ads_blob {
63 struct guc_ads ads;
64 struct guc_policies policies;
65 struct guc_gt_system_info system_info;
66 struct guc_engine_usage engine_usage;
67 /* From here on, location is dynamic! Refer to above diagram. */
68 struct guc_mmio_reg regset[];
69 } __packed;
70
71 #define ads_blob_read(guc_, field_) \
72 iosys_map_rd_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, field_)
73
74 #define ads_blob_write(guc_, field_, val_) \
75 iosys_map_wr_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, \
76 field_, val_)
77
78 #define info_map_write(map_, field_, val_) \
79 iosys_map_wr_field(map_, 0, struct guc_gt_system_info, field_, val_)
80
81 #define info_map_read(map_, field_) \
82 iosys_map_rd_field(map_, 0, struct guc_gt_system_info, field_)
83
guc_ads_regset_size(struct intel_guc * guc)84 static u32 guc_ads_regset_size(struct intel_guc *guc)
85 {
86 GEM_BUG_ON(!guc->ads_regset_size);
87 return guc->ads_regset_size;
88 }
89
guc_ads_golden_ctxt_size(struct intel_guc * guc)90 static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
91 {
92 return PAGE_ALIGN(guc->ads_golden_ctxt_size);
93 }
94
guc_ads_waklv_size(struct intel_guc * guc)95 static u32 guc_ads_waklv_size(struct intel_guc *guc)
96 {
97 return PAGE_ALIGN(guc->ads_waklv_size);
98 }
99
guc_ads_capture_size(struct intel_guc * guc)100 static u32 guc_ads_capture_size(struct intel_guc *guc)
101 {
102 return PAGE_ALIGN(guc->ads_capture_size);
103 }
104
guc_ads_private_data_size(struct intel_guc * guc)105 static u32 guc_ads_private_data_size(struct intel_guc *guc)
106 {
107 return PAGE_ALIGN(guc->fw.private_data_size);
108 }
109
guc_ads_regset_offset(struct intel_guc * guc)110 static u32 guc_ads_regset_offset(struct intel_guc *guc)
111 {
112 return offsetof(struct __guc_ads_blob, regset);
113 }
114
guc_ads_golden_ctxt_offset(struct intel_guc * guc)115 static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
116 {
117 u32 offset;
118
119 offset = guc_ads_regset_offset(guc) +
120 guc_ads_regset_size(guc);
121
122 return PAGE_ALIGN(offset);
123 }
124
guc_ads_waklv_offset(struct intel_guc * guc)125 static u32 guc_ads_waklv_offset(struct intel_guc *guc)
126 {
127 u32 offset;
128
129 offset = guc_ads_golden_ctxt_offset(guc) +
130 guc_ads_golden_ctxt_size(guc);
131
132 return PAGE_ALIGN(offset);
133 }
134
guc_ads_capture_offset(struct intel_guc * guc)135 static u32 guc_ads_capture_offset(struct intel_guc *guc)
136 {
137 u32 offset;
138
139 offset = guc_ads_waklv_offset(guc) +
140 guc_ads_waklv_size(guc);
141
142 return PAGE_ALIGN(offset);
143 }
144
guc_ads_private_data_offset(struct intel_guc * guc)145 static u32 guc_ads_private_data_offset(struct intel_guc *guc)
146 {
147 u32 offset;
148
149 offset = guc_ads_capture_offset(guc) +
150 guc_ads_capture_size(guc);
151
152 return PAGE_ALIGN(offset);
153 }
154
guc_ads_blob_size(struct intel_guc * guc)155 static u32 guc_ads_blob_size(struct intel_guc *guc)
156 {
157 return guc_ads_private_data_offset(guc) +
158 guc_ads_private_data_size(guc);
159 }
160
guc_policies_init(struct intel_guc * guc)161 static void guc_policies_init(struct intel_guc *guc)
162 {
163 struct intel_gt *gt = guc_to_gt(guc);
164 struct drm_i915_private *i915 = gt->i915;
165 u32 global_flags = 0;
166
167 ads_blob_write(guc, policies.dpc_promote_time,
168 GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
169 ads_blob_write(guc, policies.max_num_work_items,
170 GLOBAL_POLICY_MAX_NUM_WI);
171
172 if (i915->params.reset < 2)
173 global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
174
175 ads_blob_write(guc, policies.global_flags, global_flags);
176 ads_blob_write(guc, policies.is_valid, 1);
177 }
178
intel_guc_ads_print_policy_info(struct intel_guc * guc,struct drm_printer * dp)179 void intel_guc_ads_print_policy_info(struct intel_guc *guc,
180 struct drm_printer *dp)
181 {
182 if (unlikely(iosys_map_is_null(&guc->ads_map)))
183 return;
184
185 drm_printf(dp, "Global scheduling policies:\n");
186 drm_printf(dp, " DPC promote time = %u\n",
187 ads_blob_read(guc, policies.dpc_promote_time));
188 drm_printf(dp, " Max num work items = %u\n",
189 ads_blob_read(guc, policies.max_num_work_items));
190 drm_printf(dp, " Flags = %u\n",
191 ads_blob_read(guc, policies.global_flags));
192 }
193
guc_action_policies_update(struct intel_guc * guc,u32 policy_offset)194 static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset)
195 {
196 u32 action[] = {
197 INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
198 policy_offset
199 };
200
201 return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
202 }
203
intel_guc_global_policies_update(struct intel_guc * guc)204 int intel_guc_global_policies_update(struct intel_guc *guc)
205 {
206 struct intel_gt *gt = guc_to_gt(guc);
207 u32 scheduler_policies;
208 intel_wakeref_t wakeref;
209 int ret;
210
211 if (iosys_map_is_null(&guc->ads_map))
212 return -EOPNOTSUPP;
213
214 scheduler_policies = ads_blob_read(guc, ads.scheduler_policies);
215 GEM_BUG_ON(!scheduler_policies);
216
217 guc_policies_init(guc);
218
219 if (!intel_guc_is_ready(guc))
220 return 0;
221
222 with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
223 ret = guc_action_policies_update(guc, scheduler_policies);
224
225 return ret;
226 }
227
guc_mapping_table_init(struct intel_gt * gt,struct iosys_map * info_map)228 static void guc_mapping_table_init(struct intel_gt *gt,
229 struct iosys_map *info_map)
230 {
231 unsigned int i, j;
232 struct intel_engine_cs *engine;
233 enum intel_engine_id id;
234
235 /* Table must be set to invalid values for entries not used */
236 for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
237 for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
238 info_map_write(info_map, mapping_table[i][j],
239 GUC_MAX_INSTANCES_PER_CLASS);
240
241 for_each_engine(engine, gt, id) {
242 u8 guc_class = engine_class_to_guc_class(engine->class);
243
244 info_map_write(info_map, mapping_table[guc_class][ilog2(engine->logical_mask)],
245 engine->instance);
246 }
247 }
248
249 /*
250 * The save/restore register list must be pre-calculated to a temporary
251 * buffer before it can be copied inside the ADS.
252 */
253 struct temp_regset {
254 /*
255 * ptr to the section of the storage for the engine currently being
256 * worked on
257 */
258 struct guc_mmio_reg *registers;
259 /* ptr to the base of the allocated storage for all engines */
260 struct guc_mmio_reg *storage;
261 u32 storage_used;
262 u32 storage_max;
263 };
264
guc_mmio_reg_cmp(const void * a,const void * b)265 static int guc_mmio_reg_cmp(const void *a, const void *b)
266 {
267 const struct guc_mmio_reg *ra = a;
268 const struct guc_mmio_reg *rb = b;
269
270 return (int)ra->offset - (int)rb->offset;
271 }
272
273 static struct guc_mmio_reg * __must_check
__mmio_reg_add(struct temp_regset * regset,struct guc_mmio_reg * reg)274 __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
275 {
276 u32 pos = regset->storage_used;
277 struct guc_mmio_reg *slot;
278
279 if (pos >= regset->storage_max) {
280 size_t size = ALIGN((pos + 1) * sizeof(*slot), PAGE_SIZE);
281 struct guc_mmio_reg *r = krealloc(regset->storage,
282 size, GFP_KERNEL);
283 if (!r) {
284 WARN_ONCE(1, "Incomplete regset list: can't add register (%d)\n",
285 -ENOMEM);
286 return ERR_PTR(-ENOMEM);
287 }
288
289 regset->registers = r + (regset->registers - regset->storage);
290 regset->storage = r;
291 regset->storage_max = size / sizeof(*slot);
292 }
293
294 slot = ®set->storage[pos];
295 regset->storage_used++;
296 *slot = *reg;
297
298 return slot;
299 }
300
guc_mmio_reg_add(struct intel_gt * gt,struct temp_regset * regset,u32 offset,u32 flags)301 static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
302 struct temp_regset *regset,
303 u32 offset, u32 flags)
304 {
305 u32 count = regset->storage_used - (regset->registers - regset->storage);
306 struct guc_mmio_reg entry = {
307 .offset = offset,
308 .flags = flags,
309 };
310 struct guc_mmio_reg *slot;
311
312 /*
313 * The mmio list is built using separate lists within the driver.
314 * It's possible that at some point we may attempt to add the same
315 * register more than once. Do not consider this an error; silently
316 * move on if the register is already in the list.
317 */
318 if (bsearch(&entry, regset->registers, count,
319 sizeof(entry), guc_mmio_reg_cmp))
320 return 0;
321
322 slot = __mmio_reg_add(regset, &entry);
323 if (IS_ERR(slot))
324 return PTR_ERR(slot);
325
326 while (slot-- > regset->registers) {
327 GEM_BUG_ON(slot[0].offset == slot[1].offset);
328 if (slot[1].offset > slot[0].offset)
329 break;
330
331 swap(slot[1], slot[0]);
332 }
333
334 return 0;
335 }
336
337 #define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \
338 guc_mmio_reg_add(gt, \
339 regset, \
340 i915_mmio_reg_offset(reg), \
341 (masked) ? GUC_REGSET_MASKED : 0)
342
343 #define GUC_REGSET_STEERING(group, instance) ( \
344 FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
345 FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
346 GUC_REGSET_NEEDS_STEERING \
347 )
348
guc_mcr_reg_add(struct intel_gt * gt,struct temp_regset * regset,i915_mcr_reg_t reg,u32 flags)349 static long __must_check guc_mcr_reg_add(struct intel_gt *gt,
350 struct temp_regset *regset,
351 i915_mcr_reg_t reg, u32 flags)
352 {
353 u8 group, inst;
354
355 /*
356 * The GuC doesn't have a default steering, so we need to explicitly
357 * steer all registers that need steering. However, we do not keep track
358 * of all the steering ranges, only of those that have a chance of using
359 * a non-default steering from the i915 pov. Instead of adding such
360 * tracking, it is easier to just program the default steering for all
361 * regs that don't need a non-default one.
362 */
363 intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
364 flags |= GUC_REGSET_STEERING(group, inst);
365
366 return guc_mmio_reg_add(gt, regset, i915_mmio_reg_offset(reg), flags);
367 }
368
369 #define GUC_MCR_REG_ADD(gt, regset, reg, masked) \
370 guc_mcr_reg_add(gt, \
371 regset, \
372 (reg), \
373 (masked) ? GUC_REGSET_MASKED : 0)
374
guc_mmio_regset_init(struct temp_regset * regset,struct intel_engine_cs * engine)375 static int guc_mmio_regset_init(struct temp_regset *regset,
376 struct intel_engine_cs *engine)
377 {
378 struct intel_gt *gt = engine->gt;
379 const u32 base = engine->mmio_base;
380 struct i915_wa_list *wal = &engine->wa_list;
381 struct i915_wa *wa;
382 unsigned int i;
383 int ret = 0;
384
385 /*
386 * Each engine's registers point to a new start relative to
387 * storage
388 */
389 regset->registers = regset->storage + regset->storage_used;
390
391 ret |= GUC_MMIO_REG_ADD(gt, regset, RING_MODE_GEN7(base), true);
392 ret |= GUC_MMIO_REG_ADD(gt, regset, RING_HWS_PGA(base), false);
393 ret |= GUC_MMIO_REG_ADD(gt, regset, RING_IMR(base), false);
394
395 if ((engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) &&
396 CCS_MASK(engine->gt))
397 ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true);
398
399 /*
400 * some of the WA registers are MCR registers. As it is safe to
401 * use MCR form for non-MCR registers, for code simplicity, all
402 * WA registers are added with MCR form.
403 */
404 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
405 ret |= GUC_MCR_REG_ADD(gt, regset, wa->mcr_reg, wa->masked_reg);
406
407 /* Be extra paranoid and include all whitelist registers. */
408 for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
409 ret |= GUC_MMIO_REG_ADD(gt, regset,
410 RING_FORCE_TO_NONPRIV(base, i),
411 false);
412
413 /* add in local MOCS registers */
414 for (i = 0; i < LNCFCMOCS_REG_COUNT; i++)
415 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
416 ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false);
417 else
418 ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
419
420 if (GRAPHICS_VER(engine->i915) >= 12) {
421 ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL0)), false);
422 ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL1)), false);
423 ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL2)), false);
424 ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL3)), false);
425 ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL4)), false);
426 ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL5)), false);
427 ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL6)), false);
428 }
429
430 return ret ? -1 : 0;
431 }
432
guc_mmio_reg_state_create(struct intel_guc * guc)433 static long guc_mmio_reg_state_create(struct intel_guc *guc)
434 {
435 struct intel_gt *gt = guc_to_gt(guc);
436 struct intel_engine_cs *engine;
437 enum intel_engine_id id;
438 struct temp_regset temp_set = {};
439 long total = 0;
440 long ret;
441
442 for_each_engine(engine, gt, id) {
443 u32 used = temp_set.storage_used;
444
445 ret = guc_mmio_regset_init(&temp_set, engine);
446 if (ret < 0)
447 goto fail_regset_init;
448
449 guc->ads_regset_count[id] = temp_set.storage_used - used;
450 total += guc->ads_regset_count[id];
451 }
452
453 guc->ads_regset = temp_set.storage;
454
455 guc_dbg(guc, "Used %zu KB for temporary ADS regset\n",
456 (temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10);
457
458 return total * sizeof(struct guc_mmio_reg);
459
460 fail_regset_init:
461 kfree(temp_set.storage);
462 return ret;
463 }
464
guc_mmio_reg_state_init(struct intel_guc * guc)465 static void guc_mmio_reg_state_init(struct intel_guc *guc)
466 {
467 struct intel_gt *gt = guc_to_gt(guc);
468 struct intel_engine_cs *engine;
469 enum intel_engine_id id;
470 u32 addr_ggtt, offset;
471
472 offset = guc_ads_regset_offset(guc);
473 addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
474
475 iosys_map_memcpy_to(&guc->ads_map, offset, guc->ads_regset,
476 guc->ads_regset_size);
477
478 for_each_engine(engine, gt, id) {
479 u32 count = guc->ads_regset_count[id];
480 u8 guc_class;
481
482 /* Class index is checked in class converter */
483 GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS);
484
485 guc_class = engine_class_to_guc_class(engine->class);
486
487 if (!count) {
488 ads_blob_write(guc,
489 ads.reg_state_list[guc_class][engine->instance].address,
490 0);
491 ads_blob_write(guc,
492 ads.reg_state_list[guc_class][engine->instance].count,
493 0);
494 continue;
495 }
496
497 ads_blob_write(guc,
498 ads.reg_state_list[guc_class][engine->instance].address,
499 addr_ggtt);
500 ads_blob_write(guc,
501 ads.reg_state_list[guc_class][engine->instance].count,
502 count);
503
504 addr_ggtt += count * sizeof(struct guc_mmio_reg);
505 }
506 }
507
fill_engine_enable_masks(struct intel_gt * gt,struct iosys_map * info_map)508 static void fill_engine_enable_masks(struct intel_gt *gt,
509 struct iosys_map *info_map)
510 {
511 info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], RCS_MASK(gt));
512 info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt));
513 info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], BCS_MASK(gt));
514 info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt));
515 info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt));
516
517 /* The GSC engine is an instance (6) of OTHER_CLASS */
518 if (gt->engine[GSC0])
519 info_map_write(info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
520 BIT(gt->engine[GSC0]->instance));
521 }
522
523 #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
524 #define XEHP_LR_HW_CONTEXT_SIZE (96 * sizeof(u32))
525 #define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55) ? \
526 XEHP_LR_HW_CONTEXT_SIZE : \
527 LR_HW_CONTEXT_SIZE)
528 #define LRC_SKIP_SIZE(i915) (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SZ(i915))
guc_prep_golden_context(struct intel_guc * guc)529 static int guc_prep_golden_context(struct intel_guc *guc)
530 {
531 struct intel_gt *gt = guc_to_gt(guc);
532 u32 addr_ggtt, offset;
533 u32 total_size = 0, alloc_size, real_size;
534 u8 engine_class, guc_class;
535 struct guc_gt_system_info local_info;
536 struct iosys_map info_map;
537
538 /*
539 * Reserve the memory for the golden contexts and point GuC at it but
540 * leave it empty for now. The context data will be filled in later
541 * once there is something available to put there.
542 *
543 * Note that the HWSP and ring context are not included.
544 *
545 * Note also that the storage must be pinned in the GGTT, so that the
546 * address won't change after GuC has been told where to find it. The
547 * GuC will also validate that the LRC base + size fall within the
548 * allowed GGTT range.
549 */
550 if (!iosys_map_is_null(&guc->ads_map)) {
551 offset = guc_ads_golden_ctxt_offset(guc);
552 addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
553 info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
554 offsetof(struct __guc_ads_blob, system_info));
555 } else {
556 memset(&local_info, 0, sizeof(local_info));
557 iosys_map_set_vaddr(&info_map, &local_info);
558 fill_engine_enable_masks(gt, &info_map);
559 }
560
561 for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
562 guc_class = engine_class_to_guc_class(engine_class);
563
564 if (!info_map_read(&info_map, engine_enabled_masks[guc_class]))
565 continue;
566
567 real_size = intel_engine_context_size(gt, engine_class);
568 alloc_size = PAGE_ALIGN(real_size);
569 total_size += alloc_size;
570
571 if (iosys_map_is_null(&guc->ads_map))
572 continue;
573
574 /*
575 * This interface is slightly confusing. We need to pass the
576 * base address of the full golden context and the size of just
577 * the engine state, which is the section of the context image
578 * that starts after the execlists context. This is required to
579 * allow the GuC to restore just the engine state when a
580 * watchdog reset occurs.
581 * We calculate the engine state size by removing the size of
582 * what comes before it in the context image (which is identical
583 * on all engines).
584 */
585 ads_blob_write(guc, ads.eng_state_size[guc_class],
586 real_size - LRC_SKIP_SIZE(gt->i915));
587 ads_blob_write(guc, ads.golden_context_lrca[guc_class],
588 addr_ggtt);
589
590 addr_ggtt += alloc_size;
591 }
592
593 /* Make sure current size matches what we calculated previously */
594 if (guc->ads_golden_ctxt_size)
595 GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
596
597 return total_size;
598 }
599
find_engine_state(struct intel_gt * gt,u8 engine_class)600 static struct intel_engine_cs *find_engine_state(struct intel_gt *gt, u8 engine_class)
601 {
602 struct intel_engine_cs *engine;
603 enum intel_engine_id id;
604
605 for_each_engine(engine, gt, id) {
606 if (engine->class != engine_class)
607 continue;
608
609 if (!engine->default_state)
610 continue;
611
612 return engine;
613 }
614
615 return NULL;
616 }
617
guc_init_golden_context(struct intel_guc * guc)618 static void guc_init_golden_context(struct intel_guc *guc)
619 {
620 struct intel_engine_cs *engine;
621 struct intel_gt *gt = guc_to_gt(guc);
622 unsigned long offset;
623 u32 addr_ggtt, total_size = 0, alloc_size, real_size;
624 u8 engine_class, guc_class;
625
626 if (!intel_uc_uses_guc_submission(>->uc))
627 return;
628
629 GEM_BUG_ON(iosys_map_is_null(&guc->ads_map));
630
631 /*
632 * Go back and fill in the golden context data now that it is
633 * available.
634 */
635 offset = guc_ads_golden_ctxt_offset(guc);
636 addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
637
638 for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
639 guc_class = engine_class_to_guc_class(engine_class);
640 if (!ads_blob_read(guc, system_info.engine_enabled_masks[guc_class]))
641 continue;
642
643 real_size = intel_engine_context_size(gt, engine_class);
644 alloc_size = PAGE_ALIGN(real_size);
645 total_size += alloc_size;
646
647 engine = find_engine_state(gt, engine_class);
648 if (!engine) {
649 guc_err(guc, "No engine state recorded for class %d!\n",
650 engine_class);
651 ads_blob_write(guc, ads.eng_state_size[guc_class], 0);
652 ads_blob_write(guc, ads.golden_context_lrca[guc_class], 0);
653 continue;
654 }
655
656 GEM_BUG_ON(ads_blob_read(guc, ads.eng_state_size[guc_class]) !=
657 real_size - LRC_SKIP_SIZE(gt->i915));
658 GEM_BUG_ON(ads_blob_read(guc, ads.golden_context_lrca[guc_class]) != addr_ggtt);
659
660 addr_ggtt += alloc_size;
661
662 shmem_read_to_iosys_map(engine->default_state, 0, &guc->ads_map,
663 offset, real_size);
664 offset += alloc_size;
665 }
666
667 GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
668 }
669
guc_get_capture_engine_mask(struct iosys_map * info_map,u32 capture_class)670 static u32 guc_get_capture_engine_mask(struct iosys_map *info_map, u32 capture_class)
671 {
672 u32 mask;
673
674 switch (capture_class) {
675 case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
676 mask = info_map_read(info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
677 mask |= info_map_read(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
678 break;
679
680 case GUC_CAPTURE_LIST_CLASS_VIDEO:
681 mask = info_map_read(info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
682 break;
683
684 case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
685 mask = info_map_read(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
686 break;
687
688 case GUC_CAPTURE_LIST_CLASS_BLITTER:
689 mask = info_map_read(info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
690 break;
691
692 case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
693 mask = info_map_read(info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
694 break;
695
696 default:
697 mask = 0;
698 }
699
700 return mask;
701 }
702
703 static int
guc_capture_prep_lists(struct intel_guc * guc)704 guc_capture_prep_lists(struct intel_guc *guc)
705 {
706 struct intel_gt *gt = guc_to_gt(guc);
707 u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
708 struct guc_gt_system_info local_info;
709 struct iosys_map info_map;
710 bool ads_is_mapped;
711 size_t size = 0;
712 void *ptr;
713 int i, j;
714
715 ads_is_mapped = !iosys_map_is_null(&guc->ads_map);
716 if (ads_is_mapped) {
717 capture_offset = guc_ads_capture_offset(guc);
718 ads_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma);
719 info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
720 offsetof(struct __guc_ads_blob, system_info));
721 } else {
722 memset(&local_info, 0, sizeof(local_info));
723 iosys_map_set_vaddr(&info_map, &local_info);
724 fill_engine_enable_masks(gt, &info_map);
725 }
726
727 /* first, set aside the first page for a capture_list with zero descriptors */
728 total_size = PAGE_SIZE;
729 if (ads_is_mapped) {
730 if (!intel_guc_capture_getnullheader(guc, &ptr, &size))
731 iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
732 null_ggtt = ads_ggtt + capture_offset;
733 capture_offset += PAGE_SIZE;
734 }
735
736 for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
737 for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
738 u32 engine_mask = guc_get_capture_engine_mask(&info_map, j);
739
740 /* null list if we dont have said engine or list */
741 if (!engine_mask) {
742 if (ads_is_mapped) {
743 ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
744 ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
745 }
746 continue;
747 }
748 if (intel_guc_capture_getlistsize(guc, i,
749 GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
750 j, &size)) {
751 if (ads_is_mapped)
752 ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
753 goto engine_instance_list;
754 }
755 total_size += size;
756 if (ads_is_mapped) {
757 if (total_size > guc->ads_capture_size ||
758 intel_guc_capture_getlist(guc, i,
759 GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
760 j, &ptr)) {
761 ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
762 continue;
763 }
764 ads_blob_write(guc, ads.capture_class[i][j], ads_ggtt +
765 capture_offset);
766 iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
767 capture_offset += size;
768 }
769 engine_instance_list:
770 if (intel_guc_capture_getlistsize(guc, i,
771 GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
772 j, &size)) {
773 if (ads_is_mapped)
774 ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
775 continue;
776 }
777 total_size += size;
778 if (ads_is_mapped) {
779 if (total_size > guc->ads_capture_size ||
780 intel_guc_capture_getlist(guc, i,
781 GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
782 j, &ptr)) {
783 ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
784 continue;
785 }
786 ads_blob_write(guc, ads.capture_instance[i][j], ads_ggtt +
787 capture_offset);
788 iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
789 capture_offset += size;
790 }
791 }
792 if (intel_guc_capture_getlistsize(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &size)) {
793 if (ads_is_mapped)
794 ads_blob_write(guc, ads.capture_global[i], null_ggtt);
795 continue;
796 }
797 total_size += size;
798 if (ads_is_mapped) {
799 if (total_size > guc->ads_capture_size ||
800 intel_guc_capture_getlist(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0,
801 &ptr)) {
802 ads_blob_write(guc, ads.capture_global[i], null_ggtt);
803 continue;
804 }
805 ads_blob_write(guc, ads.capture_global[i], ads_ggtt + capture_offset);
806 iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
807 capture_offset += size;
808 }
809 }
810
811 if (guc->ads_capture_size && guc->ads_capture_size != PAGE_ALIGN(total_size))
812 guc_warn(guc, "ADS capture alloc size changed from %d to %d\n",
813 guc->ads_capture_size, PAGE_ALIGN(total_size));
814
815 return PAGE_ALIGN(total_size);
816 }
817
guc_waklv_enable_simple(struct intel_guc * guc,u32 * offset,u32 * remain,u32 klv_id)818 static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id)
819 {
820 u32 size;
821 u32 klv_entry[] = {
822 /* 16:16 key/length */
823 FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
824 FIELD_PREP(GUC_KLV_0_LEN, 0),
825 /* 0 dwords data */
826 };
827
828 size = sizeof(klv_entry);
829 GEM_BUG_ON(*remain < size);
830
831 iosys_map_memcpy_to(&guc->ads_map, *offset, klv_entry, size);
832 *offset += size;
833 *remain -= size;
834 }
835
guc_waklv_init(struct intel_guc * guc)836 static void guc_waklv_init(struct intel_guc *guc)
837 {
838 struct intel_gt *gt = guc_to_gt(guc);
839 u32 offset, addr_ggtt, remain, size;
840
841 if (!intel_uc_uses_guc_submission(>->uc))
842 return;
843
844 if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0))
845 return;
846
847 GEM_BUG_ON(iosys_map_is_null(&guc->ads_map));
848 offset = guc_ads_waklv_offset(guc);
849 remain = guc_ads_waklv_size(guc);
850
851 /* Wa_14019159160 */
852 if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
853 guc_waklv_enable_simple(guc, &offset, &remain,
854 GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE);
855 guc_waklv_enable_simple(guc, &offset, &remain,
856 GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE);
857 }
858
859 /* Wa_16021333562 */
860 if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) &&
861 (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) ||
862 IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) ||
863 IS_DG2(gt->i915)))
864 guc_waklv_enable_simple(guc, &offset, &remain,
865 GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
866
867 size = guc_ads_waklv_size(guc) - remain;
868 if (!size)
869 return;
870
871 offset = guc_ads_waklv_offset(guc);
872 addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
873
874 ads_blob_write(guc, ads.wa_klv_addr_lo, addr_ggtt);
875 ads_blob_write(guc, ads.wa_klv_addr_hi, 0);
876 ads_blob_write(guc, ads.wa_klv_size, size);
877 }
878
guc_prep_waklv(struct intel_guc * guc)879 static int guc_prep_waklv(struct intel_guc *guc)
880 {
881 /* Fudge something chunky for now: */
882 return PAGE_SIZE;
883 }
884
__guc_ads_init(struct intel_guc * guc)885 static void __guc_ads_init(struct intel_guc *guc)
886 {
887 struct intel_gt *gt = guc_to_gt(guc);
888 struct drm_i915_private *i915 = gt->i915;
889 struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
890 offsetof(struct __guc_ads_blob, system_info));
891 u32 base;
892
893 /* GuC scheduling policies */
894 guc_policies_init(guc);
895
896 /* System info */
897 fill_engine_enable_masks(gt, &info_map);
898
899 ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED],
900 hweight8(gt->info.sseu.slice_mask));
901 ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK],
902 gt->info.vdbox_sfc_access);
903
904 if (GRAPHICS_VER(i915) >= 12 && !IS_DGFX(i915)) {
905 u32 distdbreg = intel_uncore_read(gt->uncore,
906 GEN12_DIST_DBS_POPULATED);
907 ads_blob_write(guc,
908 system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
909 ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT)
910 & GEN12_DOORBELLS_PER_SQIDI) + 1);
911 }
912
913 /* Golden contexts for re-initialising after a watchdog reset */
914 guc_prep_golden_context(guc);
915
916 guc_mapping_table_init(guc_to_gt(guc), &info_map);
917
918 base = intel_guc_ggtt_offset(guc, guc->ads_vma);
919
920 /* Lists for error capture debug */
921 guc_capture_prep_lists(guc);
922
923 /* ADS */
924 ads_blob_write(guc, ads.scheduler_policies, base +
925 offsetof(struct __guc_ads_blob, policies));
926 ads_blob_write(guc, ads.gt_system_info, base +
927 offsetof(struct __guc_ads_blob, system_info));
928
929 /* MMIO save/restore list */
930 guc_mmio_reg_state_init(guc);
931
932 /* Workaround KLV list */
933 guc_waklv_init(guc);
934
935 /* Private Data */
936 ads_blob_write(guc, ads.private_data, base +
937 guc_ads_private_data_offset(guc));
938
939 i915_gem_object_flush_map(guc->ads_vma->obj);
940 }
941
942 /**
943 * intel_guc_ads_create() - allocates and initializes GuC ADS.
944 * @guc: intel_guc struct
945 *
946 * GuC needs memory block (Additional Data Struct), where it will store
947 * some data. Allocate and initialize such memory block for GuC use.
948 */
intel_guc_ads_create(struct intel_guc * guc)949 int intel_guc_ads_create(struct intel_guc *guc)
950 {
951 void *ads_blob;
952 u32 size;
953 int ret;
954
955 GEM_BUG_ON(guc->ads_vma);
956
957 /*
958 * Create reg state size dynamically on system memory to be copied to
959 * the final ads blob on gt init/reset
960 */
961 ret = guc_mmio_reg_state_create(guc);
962 if (ret < 0)
963 return ret;
964 guc->ads_regset_size = ret;
965
966 /* Likewise the golden contexts: */
967 ret = guc_prep_golden_context(guc);
968 if (ret < 0)
969 return ret;
970 guc->ads_golden_ctxt_size = ret;
971
972 /* Likewise the capture lists: */
973 ret = guc_capture_prep_lists(guc);
974 if (ret < 0)
975 return ret;
976 guc->ads_capture_size = ret;
977
978 /* And don't forget the workaround KLVs: */
979 ret = guc_prep_waklv(guc);
980 if (ret < 0)
981 return ret;
982 guc->ads_waklv_size = ret;
983
984 /* Now the total size can be determined: */
985 size = guc_ads_blob_size(guc);
986
987 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
988 &ads_blob);
989 if (ret)
990 return ret;
991
992 if (i915_gem_object_is_lmem(guc->ads_vma->obj))
993 iosys_map_set_vaddr_iomem(&guc->ads_map, (void __iomem *)ads_blob);
994 else
995 iosys_map_set_vaddr(&guc->ads_map, ads_blob);
996
997 __guc_ads_init(guc);
998
999 return 0;
1000 }
1001
intel_guc_ads_init_late(struct intel_guc * guc)1002 void intel_guc_ads_init_late(struct intel_guc *guc)
1003 {
1004 /*
1005 * The golden context setup requires the saved engine state from
1006 * __engines_record_defaults(). However, that requires engines to be
1007 * operational which means the ADS must already have been configured.
1008 * Fortunately, the golden context state is not needed until a hang
1009 * occurs, so it can be filled in during this late init phase.
1010 */
1011 guc_init_golden_context(guc);
1012 }
1013
intel_guc_ads_destroy(struct intel_guc * guc)1014 void intel_guc_ads_destroy(struct intel_guc *guc)
1015 {
1016 i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP);
1017 iosys_map_clear(&guc->ads_map);
1018 kfree(guc->ads_regset);
1019 }
1020
guc_ads_private_data_reset(struct intel_guc * guc)1021 static void guc_ads_private_data_reset(struct intel_guc *guc)
1022 {
1023 u32 size;
1024
1025 size = guc_ads_private_data_size(guc);
1026 if (!size)
1027 return;
1028
1029 iosys_map_memset(&guc->ads_map, guc_ads_private_data_offset(guc),
1030 0, size);
1031 }
1032
1033 /**
1034 * intel_guc_ads_reset() - prepares GuC Additional Data Struct for reuse
1035 * @guc: intel_guc struct
1036 *
1037 * GuC stores some data in ADS, which might be stale after a reset.
1038 * Reinitialize whole ADS in case any part of it was corrupted during
1039 * previous GuC run.
1040 */
intel_guc_ads_reset(struct intel_guc * guc)1041 void intel_guc_ads_reset(struct intel_guc *guc)
1042 {
1043 if (!guc->ads_vma)
1044 return;
1045
1046 __guc_ads_init(guc);
1047
1048 guc_ads_private_data_reset(guc);
1049 }
1050
intel_guc_engine_usage_offset(struct intel_guc * guc)1051 u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
1052 {
1053 return intel_guc_ggtt_offset(guc, guc->ads_vma) +
1054 offsetof(struct __guc_ads_blob, engine_usage);
1055 }
1056
intel_guc_engine_usage_record_map(struct intel_engine_cs * engine)1057 struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine)
1058 {
1059 struct intel_guc *guc = gt_to_guc(engine->gt);
1060 u8 guc_class = engine_class_to_guc_class(engine->class);
1061 size_t offset = offsetof(struct __guc_ads_blob,
1062 engine_usage.engines[guc_class][ilog2(engine->logical_mask)]);
1063
1064 return IOSYS_MAP_INIT_OFFSET(&guc->ads_map, offset);
1065 }
1066