xref: /linux/drivers/gpu/drm/xe/xe_gt_types.h (revision 42b16d3ac371a2fac9b6f08fd75f23f34ba3955a)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright © 2022-2023 Intel Corporation
4  */
5 
6 #ifndef _XE_GT_TYPES_H_
7 #define _XE_GT_TYPES_H_
8 
9 #include "xe_force_wake_types.h"
10 #include "xe_gt_idle_types.h"
11 #include "xe_gt_sriov_pf_types.h"
12 #include "xe_gt_sriov_vf_types.h"
13 #include "xe_gt_stats.h"
14 #include "xe_hw_engine_types.h"
15 #include "xe_hw_fence_types.h"
16 #include "xe_oa.h"
17 #include "xe_reg_sr_types.h"
18 #include "xe_sa_types.h"
19 #include "xe_uc_types.h"
20 
21 struct xe_exec_queue_ops;
22 struct xe_migrate;
23 struct xe_ring_ops;
24 
25 enum xe_gt_type {
26 	XE_GT_TYPE_UNINITIALIZED,
27 	XE_GT_TYPE_MAIN,
28 	XE_GT_TYPE_MEDIA,
29 };
30 
31 enum xe_gt_eu_type {
32 	XE_GT_EU_TYPE_SIMD8,
33 	XE_GT_EU_TYPE_SIMD16,
34 };
35 
36 #define XE_MAX_DSS_FUSE_REGS		3
37 #define XE_MAX_DSS_FUSE_BITS		(32 * XE_MAX_DSS_FUSE_REGS)
38 #define XE_MAX_EU_FUSE_REGS		1
39 #define XE_MAX_EU_FUSE_BITS		(32 * XE_MAX_EU_FUSE_REGS)
40 #define XE_MAX_L3_BANK_MASK_BITS	64
41 
42 typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)];
43 typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)];
44 typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)];
45 
46 struct xe_mmio_range {
47 	u32 start;
48 	u32 end;
49 };
50 
51 /*
52  * The hardware has multiple kinds of multicast register ranges that need
53  * special register steering (and future platforms are expected to add
54  * additional types).
55  *
56  * During driver startup, we initialize the steering control register to
57  * direct reads to a slice/subslice that are valid for the 'subslice' class
58  * of multicast registers.  If another type of steering does not have any
59  * overlap in valid steering targets with 'subslice' style registers, we will
60  * need to explicitly re-steer reads of registers of the other type.
61  *
62  * Only the replication types that may need additional non-default steering
63  * are listed here.
64  */
65 enum xe_steering_type {
66 	L3BANK,
67 	MSLICE,
68 	LNCF,
69 	DSS,
70 	OADDRM,
71 	SQIDI_PSMI,
72 
73 	/*
74 	 * On some platforms there are multiple types of MCR registers that
75 	 * will always return a non-terminated value at instance (0, 0).  We'll
76 	 * lump those all into a single category to keep things simple.
77 	 */
78 	INSTANCE0,
79 
80 	/*
81 	 * Register ranges that don't need special steering for each register:
82 	 * it's sufficient to keep the HW-default for the selector, or only
83 	 * change it once, on GT initialization. This needs to be the last
84 	 * steering type.
85 	 */
86 	IMPLICIT_STEERING,
87 	NUM_STEERING_TYPES
88 };
89 
90 #define gt_to_tile(gt__)							\
91 	_Generic(gt__,								\
92 		 const struct xe_gt * : (const struct xe_tile *)((gt__)->tile),	\
93 		 struct xe_gt * : (gt__)->tile)
94 
95 #define gt_to_xe(gt__)										\
96 	_Generic(gt__,										\
97 		 const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe),	\
98 		 struct xe_gt * : gt_to_tile(gt__)->xe)
99 
100 /**
101  * struct xe_gt - A "Graphics Technology" unit of the GPU
102  *
103  * A GT ("Graphics Technology") is the subset of a GPU primarily responsible
104  * for implementing the graphics, compute, and/or media IP.  It encapsulates
105  * the hardware engines, programmable execution units, and GuC.   Each GT has
106  * its own handling of power management (RC6+forcewake) and multicast register
107  * steering.
108  *
109  * A GPU/tile may have a single GT that supplies all graphics, compute, and
110  * media functionality, or the graphics/compute and media may be split into
111  * separate GTs within a tile.
112  */
113 struct xe_gt {
114 	/** @tile: Backpointer to GT's tile */
115 	struct xe_tile *tile;
116 
117 	/** @info: GT info */
118 	struct {
119 		/** @info.type: type of GT */
120 		enum xe_gt_type type;
121 		/** @info.reference_clock: clock frequency */
122 		u32 reference_clock;
123 		/**
124 		 * @info.engine_mask: mask of engines present on GT. Some of
125 		 * them may be reserved in runtime and not available for user.
126 		 * See @user_engines.mask
127 		 */
128 		u64 engine_mask;
129 		/** @info.gmdid: raw GMD_ID value from hardware */
130 		u32 gmdid;
131 		/** @info.id: Unique ID of this GT within the PCI Device */
132 		u8 id;
133 		/** @info.has_indirect_ring_state: GT has indirect ring state support */
134 		u8 has_indirect_ring_state:1;
135 	} info;
136 
137 #if IS_ENABLED(CONFIG_DEBUG_FS)
138 	/** @stats: GT stats */
139 	struct {
140 		/** @stats.counters: counters for various GT stats */
141 		atomic_t counters[__XE_GT_STATS_NUM_IDS];
142 	} stats;
143 #endif
144 
145 	/**
146 	 * @mmio: mmio info for GT.  All GTs within a tile share the same
147 	 * register space, but have their own copy of GSI registers at a
148 	 * specific offset, as well as their own forcewake handling.
149 	 */
150 	struct {
151 		/** @mmio.fw: force wake for GT */
152 		struct xe_force_wake fw;
153 		/**
154 		 * @mmio.adj_limit: adjust MMIO address if address is below this
155 		 * value
156 		 */
157 		u32 adj_limit;
158 		/** @mmio.adj_offset: offect to add to MMIO address when adjusting */
159 		u32 adj_offset;
160 	} mmio;
161 
162 	/** @sriov: virtualization data related to GT */
163 	union {
164 		/** @sriov.pf: PF data. Valid only if driver is running as PF */
165 		struct xe_gt_sriov_pf pf;
166 		/** @sriov.vf: VF data. Valid only if driver is running as VF */
167 		struct xe_gt_sriov_vf vf;
168 	} sriov;
169 
170 	/**
171 	 * @reg_sr: table with registers to be restored on GT init/resume/reset
172 	 */
173 	struct xe_reg_sr reg_sr;
174 
175 	/** @reset: state for GT resets */
176 	struct {
177 		/**
178 		 * @reset.worker: work so GT resets can done async allowing to reset
179 		 * code to safely flush all code paths
180 		 */
181 		struct work_struct worker;
182 	} reset;
183 
184 	/** @tlb_invalidation: TLB invalidation state */
185 	struct {
186 		/** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */
187 #define TLB_INVALIDATION_SEQNO_MAX	0x100000
188 		int seqno;
189 		/**
190 		 * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno,
191 		 * protected by CT lock
192 		 */
193 		int seqno_recv;
194 		/**
195 		 * @tlb_invalidation.pending_fences: list of pending fences waiting TLB
196 		 * invaliations, protected by CT lock
197 		 */
198 		struct list_head pending_fences;
199 		/**
200 		 * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences
201 		 * and updating @tlb_invalidation.seqno_recv.
202 		 */
203 		spinlock_t pending_lock;
204 		/**
205 		 * @tlb_invalidation.fence_tdr: schedules a delayed call to
206 		 * xe_gt_tlb_fence_timeout after the timeut interval is over.
207 		 */
208 		struct delayed_work fence_tdr;
209 		/** @tlb_invalidation.lock: protects TLB invalidation fences */
210 		spinlock_t lock;
211 	} tlb_invalidation;
212 
213 	/**
214 	 * @ccs_mode: Number of compute engines enabled.
215 	 * Allows fixed mapping of available compute slices to compute engines.
216 	 * By default only the first available compute engine is enabled and all
217 	 * available compute slices are allocated to it.
218 	 */
219 	u32 ccs_mode;
220 
221 	/** @usm: unified shared memory state */
222 	struct {
223 		/**
224 		 * @usm.bb_pool: Pool from which batchbuffers, for USM operations
225 		 * (e.g. migrations, fixing page tables), are allocated.
226 		 * Dedicated pool needed so USM operations to not get blocked
227 		 * behind any user operations which may have resulted in a
228 		 * fault.
229 		 */
230 		struct xe_sa_manager *bb_pool;
231 		/**
232 		 * @usm.reserved_bcs_instance: reserved BCS instance used for USM
233 		 * operations (e.g. mmigrations, fixing page tables)
234 		 */
235 		u16 reserved_bcs_instance;
236 		/** @usm.pf_wq: page fault work queue, unbound, high priority */
237 		struct workqueue_struct *pf_wq;
238 		/** @usm.acc_wq: access counter work queue, unbound, high priority */
239 		struct workqueue_struct *acc_wq;
240 		/**
241 		 * @usm.pf_queue: Page fault queue used to sync faults so faults can
242 		 * be processed not under the GuC CT lock. The queue is sized so
243 		 * it can sync all possible faults (1 per physical engine).
244 		 * Multiple queues exists for page faults from different VMs are
245 		 * be processed in parallel.
246 		 */
247 		struct pf_queue {
248 			/** @usm.pf_queue.gt: back pointer to GT */
249 			struct xe_gt *gt;
250 			/** @usm.pf_queue.data: data in the page fault queue */
251 			u32 *data;
252 			/**
253 			 * @usm.pf_queue.num_dw: number of DWORDS in the page
254 			 * fault queue. Dynamically calculated based on the number
255 			 * of compute resources available.
256 			 */
257 			u32 num_dw;
258 			/**
259 			 * @usm.pf_queue.tail: tail pointer in DWs for page fault queue,
260 			 * moved by worker which processes faults (consumer).
261 			 */
262 			u16 tail;
263 			/**
264 			 * @usm.pf_queue.head: head pointer in DWs for page fault queue,
265 			 * moved by G2H handler (producer).
266 			 */
267 			u16 head;
268 			/** @usm.pf_queue.lock: protects page fault queue */
269 			spinlock_t lock;
270 			/** @usm.pf_queue.worker: to process page faults */
271 			struct work_struct worker;
272 #define NUM_PF_QUEUE	4
273 		} pf_queue[NUM_PF_QUEUE];
274 		/**
275 		 * @usm.acc_queue: Same as page fault queue, cannot process access
276 		 * counters under CT lock.
277 		 */
278 		struct acc_queue {
279 			/** @usm.acc_queue.gt: back pointer to GT */
280 			struct xe_gt *gt;
281 #define ACC_QUEUE_NUM_DW	128
282 			/** @usm.acc_queue.data: data in the page fault queue */
283 			u32 data[ACC_QUEUE_NUM_DW];
284 			/**
285 			 * @usm.acc_queue.tail: tail pointer in DWs for access counter queue,
286 			 * moved by worker which processes counters
287 			 * (consumer).
288 			 */
289 			u16 tail;
290 			/**
291 			 * @usm.acc_queue.head: head pointer in DWs for access counter queue,
292 			 * moved by G2H handler (producer).
293 			 */
294 			u16 head;
295 			/** @usm.acc_queue.lock: protects page fault queue */
296 			spinlock_t lock;
297 			/** @usm.acc_queue.worker: to process access counters */
298 			struct work_struct worker;
299 #define NUM_ACC_QUEUE	4
300 		} acc_queue[NUM_ACC_QUEUE];
301 	} usm;
302 
303 	/** @ordered_wq: used to serialize GT resets and TDRs */
304 	struct workqueue_struct *ordered_wq;
305 
306 	/** @uc: micro controllers on the GT */
307 	struct xe_uc uc;
308 
309 	/** @gtidle: idle properties of GT */
310 	struct xe_gt_idle gtidle;
311 
312 	/** @exec_queue_ops: submission backend exec queue operations */
313 	const struct xe_exec_queue_ops *exec_queue_ops;
314 
315 	/**
316 	 * @ring_ops: ring operations for this hw engine (1 per engine class)
317 	 */
318 	const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX];
319 
320 	/** @fence_irq: fence IRQs (1 per engine class) */
321 	struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX];
322 
323 	/** @default_lrc: default LRC state */
324 	void *default_lrc[XE_ENGINE_CLASS_MAX];
325 
326 	/** @hw_engines: hardware engines on the GT */
327 	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];
328 
329 	/** @eclass: per hardware engine class interface on the GT */
330 	struct xe_hw_engine_class_intf  eclass[XE_ENGINE_CLASS_MAX];
331 
332 	/** @sysfs: sysfs' kobj used by xe_gt_sysfs */
333 	struct kobject *sysfs;
334 
335 	/** @freq: Main GT freq sysfs control */
336 	struct kobject *freq;
337 
338 	/** @mocs: info */
339 	struct {
340 		/** @mocs.uc_index: UC index */
341 		u8 uc_index;
342 		/** @mocs.wb_index: WB index, only used on L3_CCS platforms */
343 		u8 wb_index;
344 	} mocs;
345 
346 	/** @fuse_topo: GT topology reported by fuse registers */
347 	struct {
348 		/** @fuse_topo.g_dss_mask: dual-subslices usable by geometry */
349 		xe_dss_mask_t g_dss_mask;
350 
351 		/** @fuse_topo.c_dss_mask: dual-subslices usable by compute */
352 		xe_dss_mask_t c_dss_mask;
353 
354 		/** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
355 		xe_eu_mask_t eu_mask_per_dss;
356 
357 		/** @fuse_topo.l3_bank_mask: L3 bank mask */
358 		xe_l3_bank_mask_t l3_bank_mask;
359 
360 		/**
361 		 * @fuse_topo.eu_type: type/width of EU stored in
362 		 * fuse_topo.eu_mask_per_dss
363 		 */
364 		enum xe_gt_eu_type eu_type;
365 	} fuse_topo;
366 
367 	/** @steering: register steering for individual HW units */
368 	struct {
369 		/** @steering.ranges: register ranges used for this steering type */
370 		const struct xe_mmio_range *ranges;
371 
372 		/** @steering.group_target: target to steer accesses to */
373 		u16 group_target;
374 		/** @steering.instance_target: instance to steer accesses to */
375 		u16 instance_target;
376 	} steering[NUM_STEERING_TYPES];
377 
378 	/**
379 	 * @steering_dss_per_grp: number of DSS per steering group (gslice,
380 	 *    cslice, etc.).
381 	 */
382 	unsigned int steering_dss_per_grp;
383 
384 	/**
385 	 * @mcr_lock: protects the MCR_SELECTOR register for the duration
386 	 *    of a steered operation
387 	 */
388 	spinlock_t mcr_lock;
389 
390 	/**
391 	 * @global_invl_lock: protects the register for the duration
392 	 *    of a global invalidation of l2 cache
393 	 */
394 	spinlock_t global_invl_lock;
395 
396 	/** @wa_active: keep track of active workarounds */
397 	struct {
398 		/** @wa_active.gt: bitmap with active GT workarounds */
399 		unsigned long *gt;
400 		/** @wa_active.engine: bitmap with active engine workarounds */
401 		unsigned long *engine;
402 		/** @wa_active.lrc: bitmap with active LRC workarounds */
403 		unsigned long *lrc;
404 		/** @wa_active.oob: bitmap with active OOB workarounds */
405 		unsigned long *oob;
406 		/**
407 		 * @wa_active.oob_initialized: mark oob as initialized to help
408 		 * detecting misuse of XE_WA() - it can only be called on
409 		 * initialization after OOB WAs have being processed
410 		 */
411 		bool oob_initialized;
412 	} wa_active;
413 
414 	/** @user_engines: engines present in GT and available to userspace */
415 	struct {
416 		/**
417 		 * @user_engines.mask: like @info->engine_mask, but take in
418 		 * consideration only engines available to userspace
419 		 */
420 		u64 mask;
421 
422 		/**
423 		 * @user_engines.instances_per_class: aggregate per class the
424 		 * number of engines available to userspace
425 		 */
426 		u8 instances_per_class[XE_ENGINE_CLASS_MAX];
427 	} user_engines;
428 
429 	/** @oa: oa observation subsystem per gt info */
430 	struct xe_oa_gt oa;
431 };
432 
433 #endif
434