xref: /linux/drivers/gpu/drm/xe/xe_device_types.h (revision c332fba805d659eca1f8e3a41d259c03421e81f1)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright © 2022-2023 Intel Corporation
4  */
5 
6 #ifndef _XE_DEVICE_TYPES_H_
7 #define _XE_DEVICE_TYPES_H_
8 
9 #include <linux/pci.h>
10 
11 #include <drm/drm_device.h>
12 #include <drm/drm_file.h>
13 #include <drm/ttm/ttm_device.h>
14 
15 #include "xe_devcoredump_types.h"
16 #include "xe_heci_gsc.h"
17 #include "xe_late_bind_fw_types.h"
18 #include "xe_lmtt_types.h"
19 #include "xe_memirq_types.h"
20 #include "xe_mert.h"
21 #include "xe_oa_types.h"
22 #include "xe_pagefault_types.h"
23 #include "xe_platform_types.h"
24 #include "xe_pmu_types.h"
25 #include "xe_pt_types.h"
26 #include "xe_sriov_pf_types.h"
27 #include "xe_sriov_types.h"
28 #include "xe_sriov_vf_types.h"
29 #include "xe_sriov_vf_ccs_types.h"
30 #include "xe_step_types.h"
31 #include "xe_survivability_mode_types.h"
32 #include "xe_tile_sriov_vf_types.h"
33 #include "xe_validation.h"
34 
35 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
36 #define TEST_VM_OPS_ERROR
37 #endif
38 
39 struct dram_info;
40 struct drm_pagemap_shrinker;
41 struct intel_display;
42 struct intel_dg_nvm_dev;
43 struct xe_ggtt;
44 struct xe_i2c;
45 struct xe_pat_ops;
46 struct xe_pxp;
47 struct xe_vram_region;
48 
49 /**
50  * enum xe_wedged_mode - possible wedged modes
51  * @XE_WEDGED_MODE_NEVER: Device will never be declared wedged.
52  * @XE_WEDGED_MODE_UPON_CRITICAL_ERROR: Device will be declared wedged only
53  *	when critical error occurs like GT reset failure or firmware failure.
54  *	This is the default mode.
55  * @XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET: Device will be declared wedged on
56  *	any hang. In this mode, engine resets are disabled to avoid automatic
57  *	recovery attempts. This mode is primarily intended for debugging hangs.
58  */
59 enum xe_wedged_mode {
60 	XE_WEDGED_MODE_NEVER = 0,
61 	XE_WEDGED_MODE_UPON_CRITICAL_ERROR = 1,
62 	XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET = 2,
63 };
64 
65 #define XE_WEDGED_MODE_DEFAULT		XE_WEDGED_MODE_UPON_CRITICAL_ERROR
66 #define XE_WEDGED_MODE_DEFAULT_STR	"upon-critical-error"
67 
68 #define XE_BO_INVALID_OFFSET	LONG_MAX
69 
70 #define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100)
71 #define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100)
72 #define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100)
73 #define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
74 #define IS_DGFX(xe) ((xe)->info.is_dgfx)
75 
76 #define XE_VRAM_FLAGS_NEED64K		BIT(0)
77 
78 #define XE_GT0		0
79 #define XE_GT1		1
80 #define XE_MAX_TILES_PER_DEVICE	(XE_GT1 + 1)
81 
82 #define XE_MAX_ASID	(BIT(20))
83 
84 #define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step)	\
85 	((_xe)->info.platform == (_platform) &&			\
86 	 (_xe)->info.step.graphics >= (min_step) &&		\
87 	 (_xe)->info.step.graphics < (max_step))
88 #define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step)	\
89 	((_xe)->info.platform == (_platform) &&				\
90 	 (_xe)->info.subplatform == (sub) &&				\
91 	 (_xe)->info.step.graphics >= (min_step) &&			\
92 	 (_xe)->info.step.graphics < (max_step))
93 
94 #define tile_to_xe(tile__)								\
95 	_Generic(tile__,								\
96 		 const struct xe_tile * : (const struct xe_device *)((tile__)->xe),	\
97 		 struct xe_tile * : (tile__)->xe)
98 
99 /**
100  * struct xe_mmio - register mmio structure
101  *
102  * Represents an MMIO region that the CPU may use to access registers.  A
103  * region may share its IO map with other regions (e.g., all GTs within a
104  * tile share the same map with their parent tile, but represent different
105  * subregions of the overall IO space).
106  */
107 struct xe_mmio {
108 	/** @tile: Backpointer to tile, used for tracing */
109 	struct xe_tile *tile;
110 
111 	/** @regs: Map used to access registers. */
112 	void __iomem *regs;
113 
114 	/**
115 	 * @sriov_vf_gt: Backpointer to GT.
116 	 *
117 	 * This pointer is only set for GT MMIO regions and only when running
118 	 * as an SRIOV VF structure
119 	 */
120 	struct xe_gt *sriov_vf_gt;
121 
122 	/**
123 	 * @regs_size: Length of the register region within the map.
124 	 *
125 	 * The size of the iomap set in *regs is generally larger than the
126 	 * register mmio space since it includes unused regions and/or
127 	 * non-register regions such as the GGTT PTEs.
128 	 */
129 	size_t regs_size;
130 
131 	/** @adj_limit: adjust MMIO address if address is below this value */
132 	u32 adj_limit;
133 
134 	/** @adj_offset: offset to add to MMIO address when adjusting */
135 	u32 adj_offset;
136 };
137 
138 /**
139  * struct xe_tile - hardware tile structure
140  *
141  * From a driver perspective, a "tile" is effectively a complete GPU, containing
142  * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM.
143  *
144  * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI
145  * device and designate one "root" tile as being responsible for external PCI
146  * communication.  PCI BAR0 exposes the GGTT and MMIO register space for each
147  * tile in a stacked layout, and PCI BAR2 exposes the local memory associated
148  * with each tile similarly.  Device-wide interrupts can be enabled/disabled
149  * at the root tile, and the MSTR_TILE_INTR register will report which tiles
150  * have interrupts that need servicing.
151  */
152 struct xe_tile {
153 	/** @xe: Backpointer to tile's PCI device */
154 	struct xe_device *xe;
155 
156 	/** @id: ID of the tile */
157 	u8 id;
158 
159 	/**
160 	 * @primary_gt: Primary GT
161 	 */
162 	struct xe_gt *primary_gt;
163 
164 	/**
165 	 * @media_gt: Media GT
166 	 *
167 	 * Only present on devices with media version >= 13.
168 	 */
169 	struct xe_gt *media_gt;
170 
171 	/**
172 	 * @mmio: MMIO info for a tile.
173 	 *
174 	 * Each tile has its own 16MB space in BAR0, laid out as:
175 	 * * 0-4MB: registers
176 	 * * 4MB-8MB: reserved
177 	 * * 8MB-16MB: global GTT
178 	 */
179 	struct xe_mmio mmio;
180 
181 	/** @mem: memory management info for tile */
182 	struct {
183 		/**
184 		 * @mem.kernel_vram: kernel-dedicated VRAM info for tile.
185 		 *
186 		 * Although VRAM is associated with a specific tile, it can
187 		 * still be accessed by all tiles' GTs.
188 		 */
189 		struct xe_vram_region *kernel_vram;
190 
191 		/**
192 		 * @mem.vram: general purpose VRAM info for tile.
193 		 *
194 		 * Although VRAM is associated with a specific tile, it can
195 		 * still be accessed by all tiles' GTs.
196 		 */
197 		struct xe_vram_region *vram;
198 
199 		/** @mem.ggtt: Global graphics translation table */
200 		struct xe_ggtt *ggtt;
201 
202 		/**
203 		 * @mem.kernel_bb_pool: Pool from which batchbuffers are allocated.
204 		 *
205 		 * Media GT shares a pool with its primary GT.
206 		 */
207 		struct xe_sa_manager *kernel_bb_pool;
208 
209 		/**
210 		 * @mem.reclaim_pool: Pool for PRLs allocated.
211 		 *
212 		 * Only main GT has page reclaim list allocations.
213 		 */
214 		struct xe_sa_manager *reclaim_pool;
215 	} mem;
216 
217 	/** @sriov: tile level virtualization data */
218 	union {
219 		struct {
220 			/** @sriov.pf.lmtt: Local Memory Translation Table. */
221 			struct xe_lmtt lmtt;
222 		} pf;
223 		struct {
224 			/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
225 			struct xe_ggtt_node *ggtt_balloon[2];
226 			/** @sriov.vf.self_config: VF configuration data */
227 			struct xe_tile_sriov_vf_selfconfig self_config;
228 		} vf;
229 	} sriov;
230 
231 	/** @memirq: Memory Based Interrupts. */
232 	struct xe_memirq memirq;
233 
234 	/** @csc_hw_error_work: worker to report CSC HW errors */
235 	struct work_struct csc_hw_error_work;
236 
237 	/** @pcode: tile's PCODE */
238 	struct {
239 		/** @pcode.lock: protecting tile's PCODE mailbox data */
240 		struct mutex lock;
241 	} pcode;
242 
243 	/** @migrate: Migration helper for vram blits and clearing */
244 	struct xe_migrate *migrate;
245 
246 	/** @sysfs: sysfs' kobj used by xe_tile_sysfs */
247 	struct kobject *sysfs;
248 
249 	/** @debugfs: debugfs directory associated with this tile */
250 	struct dentry *debugfs;
251 
252 	/** @mert: MERT-related data */
253 	struct xe_mert mert;
254 };
255 
256 /**
257  * struct xe_device - Top level struct of Xe device
258  */
259 struct xe_device {
260 	/** @drm: drm device */
261 	struct drm_device drm;
262 
263 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
264 	/** @display: display device data, must be placed after drm device member */
265 	struct intel_display *display;
266 #endif
267 
268 	/** @devcoredump: device coredump */
269 	struct xe_devcoredump devcoredump;
270 
271 	/** @info: device info */
272 	struct intel_device_info {
273 		/** @info.platform_name: platform name */
274 		const char *platform_name;
275 		/** @info.graphics_name: graphics IP name */
276 		const char *graphics_name;
277 		/** @info.media_name: media IP name */
278 		const char *media_name;
279 		/** @info.graphics_verx100: graphics IP version */
280 		u32 graphics_verx100;
281 		/** @info.media_verx100: media IP version */
282 		u32 media_verx100;
283 		/** @info.mem_region_mask: mask of valid memory regions */
284 		u32 mem_region_mask;
285 		/** @info.platform: Xe platform enum */
286 		enum xe_platform platform;
287 		/** @info.subplatform: Xe subplatform enum */
288 		enum xe_subplatform subplatform;
289 		/** @info.devid: device ID */
290 		u16 devid;
291 		/** @info.revid: device revision */
292 		u8 revid;
293 		/** @info.step: stepping information for each IP */
294 		struct xe_step_info step;
295 		/** @info.dma_mask_size: DMA address bits */
296 		u8 dma_mask_size;
297 		/** @info.vram_flags: Vram flags */
298 		u8 vram_flags;
299 		/** @info.tile_count: Number of tiles */
300 		u8 tile_count;
301 		/** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */
302 		u8 max_gt_per_tile;
303 		/** @info.gt_count: Total number of GTs for entire device */
304 		u8 gt_count;
305 		/** @info.vm_max_level: Max VM level */
306 		u8 vm_max_level;
307 		/** @info.va_bits: Maximum bits of a virtual address */
308 		u8 va_bits;
309 
310 		/*
311 		 * Keep all flags below alphabetically sorted
312 		 */
313 
314 		/** @info.force_execlist: Forced execlist submission */
315 		u8 force_execlist:1;
316 		/** @info.has_asid: Has address space ID */
317 		u8 has_asid:1;
318 		/** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
319 		u8 has_atomic_enable_pte_bit:1;
320 		/** @info.has_cached_pt: Supports caching pagetable */
321 		u8 has_cached_pt:1;
322 		/** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
323 		u8 has_device_atomics_on_smem:1;
324 		/** @info.has_fan_control: Device supports fan control */
325 		u8 has_fan_control:1;
326 		/** @info.has_flat_ccs: Whether flat CCS metadata is used */
327 		u8 has_flat_ccs:1;
328 		/** @info.has_gsc_nvm: Device has gsc non-volatile memory */
329 		u8 has_gsc_nvm:1;
330 		/** @info.has_heci_cscfi: device has heci cscfi */
331 		u8 has_heci_cscfi:1;
332 		/** @info.has_heci_gscfi: device has heci gscfi */
333 		u8 has_heci_gscfi:1;
334 		/** @info.has_i2c: Device has I2C controller */
335 		u8 has_i2c:1;
336 		/** @info.has_late_bind: Device has firmware late binding support */
337 		u8 has_late_bind:1;
338 		/** @info.has_llc: Device has a shared CPU+GPU last level cache */
339 		u8 has_llc:1;
340 		/** @info.has_mbx_power_limits: Device has support to manage power limits using
341 		 * pcode mailbox commands.
342 		 */
343 		u8 has_mbx_power_limits:1;
344 		/** @info.has_mbx_thermal_info: Device supports thermal mailbox commands */
345 		u8 has_mbx_thermal_info:1;
346 		/** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */
347 		u8 has_mem_copy_instr:1;
348 		/** @info.has_mert: Device has standalone MERT */
349 		u8 has_mert:1;
350 		/** @info.has_page_reclaim_hw_assist: Device supports page reclamation feature */
351 		u8 has_page_reclaim_hw_assist:1;
352 		/** @info.has_pre_prod_wa: Pre-production workarounds still present in driver */
353 		u8 has_pre_prod_wa:1;
354 		/** @info.has_pxp: Device has PXP support */
355 		u8 has_pxp:1;
356 		/** @info.has_range_tlb_inval: Has range based TLB invalidations */
357 		u8 has_range_tlb_inval:1;
358 		/** @info.has_soc_remapper_sysctrl: Has SoC remapper system controller */
359 		u8 has_soc_remapper_sysctrl:1;
360 		/** @info.has_soc_remapper_telem: Has SoC remapper telemetry support */
361 		u8 has_soc_remapper_telem:1;
362 		/** @info.has_sriov: Supports SR-IOV */
363 		u8 has_sriov:1;
364 		/** @info.has_usm: Device has unified shared memory support */
365 		u8 has_usm:1;
366 		/** @info.has_64bit_timestamp: Device supports 64-bit timestamps */
367 		u8 has_64bit_timestamp:1;
368 		/** @info.is_dgfx: is discrete device */
369 		u8 is_dgfx:1;
370 		/** @info.needs_scratch: needs scratch page for oob prefetch to work */
371 		u8 needs_scratch:1;
372 		/**
373 		 * @info.probe_display: Probe display hardware.  If set to
374 		 * false, the driver will behave as if there is no display
375 		 * hardware present and will not try to read/write to it in any
376 		 * way.  The display hardware, if it exists, will not be
377 		 * exposed to userspace and will be left untouched in whatever
378 		 * state the firmware or bootloader left it in.
379 		 */
380 		u8 probe_display:1;
381 		/** @info.skip_guc_pc: Skip GuC based PM feature init */
382 		u8 skip_guc_pc:1;
383 		/** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
384 		u8 skip_mtcfg:1;
385 		/** @info.skip_pcode: skip access to PCODE uC */
386 		u8 skip_pcode:1;
387 		/** @info.needs_shared_vf_gt_wq: needs shared GT WQ on VF */
388 		u8 needs_shared_vf_gt_wq:1;
389 	} info;
390 
391 	/** @wa_active: keep track of active workarounds */
392 	struct {
393 		/** @wa_active.oob: bitmap with active OOB workarounds */
394 		unsigned long *oob;
395 
396 		/**
397 		 * @wa_active.oob_initialized: Mark oob as initialized to help detecting misuse
398 		 * of XE_DEVICE_WA() - it can only be called on initialization after
399 		 * Device OOB WAs have been processed.
400 		 */
401 		bool oob_initialized;
402 	} wa_active;
403 
404 	/** @survivability: survivability information for device */
405 	struct xe_survivability survivability;
406 
407 	/** @irq: device interrupt state */
408 	struct {
409 		/** @irq.lock: lock for processing irq's on this device */
410 		spinlock_t lock;
411 
412 		/** @irq.enabled: interrupts enabled on this device */
413 		atomic_t enabled;
414 
415 		/** @irq.msix: irq info for platforms that support MSI-X */
416 		struct {
417 			/** @irq.msix.nvec: number of MSI-X interrupts */
418 			u16 nvec;
419 			/** @irq.msix.indexes: used to allocate MSI-X indexes */
420 			struct xarray indexes;
421 		} msix;
422 	} irq;
423 
424 	/** @ttm: ttm device */
425 	struct ttm_device ttm;
426 
427 	/** @mmio: mmio info for device */
428 	struct {
429 		/** @mmio.size: size of MMIO space for device */
430 		size_t size;
431 		/** @mmio.regs: pointer to MMIO space for device */
432 		void __iomem *regs;
433 	} mmio;
434 
435 	/** @mem: memory info for device */
436 	struct {
437 		/** @mem.vram: VRAM info for device */
438 		struct xe_vram_region *vram;
439 		/** @mem.sys_mgr: system TTM manager */
440 		struct ttm_resource_manager sys_mgr;
441 		/** @mem.sys_mgr: system memory shrinker. */
442 		struct xe_shrinker *shrinker;
443 	} mem;
444 
445 	/** @sriov: device level virtualization data */
446 	struct {
447 		/** @sriov.__mode: SR-IOV mode (Don't access directly!) */
448 		enum xe_sriov_mode __mode;
449 
450 		union {
451 			/** @sriov.pf: PF specific data */
452 			struct xe_device_pf pf;
453 			/** @sriov.vf: VF specific data */
454 			struct xe_device_vf vf;
455 		};
456 
457 		/** @sriov.wq: workqueue used by the virtualization workers */
458 		struct workqueue_struct *wq;
459 	} sriov;
460 
461 	/** @usm: unified memory state */
462 	struct {
463 		/** @usm.asid: convert a ASID to VM */
464 		struct xarray asid_to_vm;
465 		/** @usm.next_asid: next ASID, used to cyclical alloc asids */
466 		u32 next_asid;
467 		/** @usm.lock: protects UM state */
468 		struct rw_semaphore lock;
469 		/** @usm.pf_wq: page fault work queue, unbound, high priority */
470 		struct workqueue_struct *pf_wq;
471 		/*
472 		 * We pick 4 here because, in the current implementation, it
473 		 * yields the best bandwidth utilization of the kernel paging
474 		 * engine.
475 		 */
476 #define XE_PAGEFAULT_QUEUE_COUNT	4
477 		/** @usm.pf_queue: Page fault queues */
478 		struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT];
479 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
480 		/** @usm.pagemap_shrinker: Shrinker for unused pagemaps */
481 		struct drm_pagemap_shrinker *dpagemap_shrinker;
482 #endif
483 	} usm;
484 
485 	/** @pinned: pinned BO state */
486 	struct {
487 		/** @pinned.lock: protected pinned BO list state */
488 		spinlock_t lock;
489 		/** @pinned.early: early pinned lists */
490 		struct {
491 			/** @pinned.early.kernel_bo_present: pinned kernel BO that are present */
492 			struct list_head kernel_bo_present;
493 			/** @pinned.early.evicted: pinned BO that have been evicted */
494 			struct list_head evicted;
495 		} early;
496 		/** @pinned.late: late pinned lists */
497 		struct {
498 			/** @pinned.late.kernel_bo_present: pinned kernel BO that are present */
499 			struct list_head kernel_bo_present;
500 			/** @pinned.late.evicted: pinned BO that have been evicted */
501 			struct list_head evicted;
502 			/** @pinned.external: pinned external and dma-buf. */
503 			struct list_head external;
504 		} late;
505 	} pinned;
506 
507 	/** @ufence_wq: user fence wait queue */
508 	wait_queue_head_t ufence_wq;
509 
510 	/** @preempt_fence_wq: used to serialize preempt fences */
511 	struct workqueue_struct *preempt_fence_wq;
512 
513 	/** @ordered_wq: used to serialize compute mode resume */
514 	struct workqueue_struct *ordered_wq;
515 
516 	/** @unordered_wq: used to serialize unordered work */
517 	struct workqueue_struct *unordered_wq;
518 
519 	/** @destroy_wq: used to serialize user destroy work, like queue */
520 	struct workqueue_struct *destroy_wq;
521 
522 	/** @tiles: device tiles */
523 	struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
524 
525 	/**
526 	 * @mem_access: keep track of memory access in the device, possibly
527 	 * triggering additional actions when they occur.
528 	 */
529 	struct {
530 		/**
531 		 * @mem_access.vram_userfault: Encapsulate vram_userfault
532 		 * related stuff
533 		 */
534 		struct {
535 			/**
536 			 * @mem_access.vram_userfault.lock: Protects access to
537 			 * @vram_usefault.list Using mutex instead of spinlock
538 			 * as lock is applied to entire list operation which
539 			 * may sleep
540 			 */
541 			struct mutex lock;
542 
543 			/**
544 			 * @mem_access.vram_userfault.list: Keep list of userfaulted
545 			 * vram bo, which require to release their mmap mappings
546 			 * at runtime suspend path
547 			 */
548 			struct list_head list;
549 		} vram_userfault;
550 	} mem_access;
551 
552 	/**
553 	 * @pat: Encapsulate PAT related stuff
554 	 */
555 	struct {
556 		/** @pat.ops: Internal operations to abstract platforms */
557 		const struct xe_pat_ops *ops;
558 		/** @pat.table: PAT table to program in the HW */
559 		const struct xe_pat_table_entry *table;
560 		/** @pat.n_entries: Number of PAT entries */
561 		int n_entries;
562 		/** @pat.ats_entry: PAT entry for PCIe ATS responses */
563 		const struct xe_pat_table_entry *pat_ats;
564 		/** @pat.pta_entry: PAT entry for page table accesses */
565 		const struct xe_pat_table_entry *pat_pta;
566 		u32 idx[__XE_CACHE_LEVEL_COUNT];
567 	} pat;
568 
569 	/** @d3cold: Encapsulate d3cold related stuff */
570 	struct {
571 		/** @d3cold.capable: Indicates if root port is d3cold capable */
572 		bool capable;
573 
574 		/** @d3cold.allowed: Indicates if d3cold is a valid device state */
575 		bool allowed;
576 
577 		/**
578 		 * @d3cold.vram_threshold:
579 		 *
580 		 * This represents the permissible threshold(in megabytes)
581 		 * for vram save/restore. d3cold will be disallowed,
582 		 * when vram_usages is above or equals the threshold value
583 		 * to avoid the vram save/restore latency.
584 		 * Default threshold value is 300mb.
585 		 */
586 		u32 vram_threshold;
587 		/** @d3cold.lock: protect vram_threshold */
588 		struct mutex lock;
589 	} d3cold;
590 
591 	/** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */
592 	struct notifier_block pm_notifier;
593 	/** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */
594 	struct completion pm_block;
595 	/** @rebind_resume_list: List of wq items to kick on resume. */
596 	struct list_head rebind_resume_list;
597 	/** @rebind_resume_lock: Lock to protect the rebind_resume_list */
598 	struct mutex rebind_resume_lock;
599 
600 	/** @pmt: Support the PMT driver callback interface */
601 	struct {
602 		/** @pmt.lock: protect access for telemetry data */
603 		struct mutex lock;
604 	} pmt;
605 
606 	/** @soc_remapper: SoC remapper object */
607 	struct {
608 		/** @soc_remapper.lock: Serialize access to SoC Remapper's index registers */
609 		spinlock_t lock;
610 
611 		/** @soc_remapper.set_telem_region: Set telemetry index */
612 		void (*set_telem_region)(struct xe_device *xe, u32 index);
613 
614 		/** @soc_remapper.set_sysctrl_region: Set system controller index */
615 		void (*set_sysctrl_region)(struct xe_device *xe, u32 index);
616 	} soc_remapper;
617 
618 	/**
619 	 * @pm_callback_task: Track the active task that is running in either
620 	 * the runtime_suspend or runtime_resume callbacks.
621 	 */
622 	struct task_struct *pm_callback_task;
623 
624 	/** @hwmon: hwmon subsystem integration */
625 	struct xe_hwmon *hwmon;
626 
627 	/** @heci_gsc: graphics security controller */
628 	struct xe_heci_gsc heci_gsc;
629 
630 	/** @nvm: discrete graphics non-volatile memory */
631 	struct intel_dg_nvm_dev *nvm;
632 
633 	/** @late_bind: xe mei late bind interface */
634 	struct xe_late_bind late_bind;
635 
636 	/** @oa: oa observation subsystem */
637 	struct xe_oa oa;
638 
639 	/** @pxp: Encapsulate Protected Xe Path support */
640 	struct xe_pxp *pxp;
641 
642 	/** @needs_flr_on_fini: requests function-reset on fini */
643 	bool needs_flr_on_fini;
644 
645 	/** @wedged: Struct to control Wedged States and mode */
646 	struct {
647 		/** @wedged.flag: Xe device faced a critical error and is now blocked. */
648 		atomic_t flag;
649 		/** @wedged.mode: Mode controlled by kernel parameter and debugfs */
650 		enum xe_wedged_mode mode;
651 		/** @wedged.method: Recovery method to be sent in the drm device wedged uevent */
652 		unsigned long method;
653 		/** @wedged.inconsistent_reset: Inconsistent reset policy state between GTs */
654 		bool inconsistent_reset;
655 	} wedged;
656 
657 	/** @bo_device: Struct to control async free of BOs */
658 	struct xe_bo_dev {
659 		/** @bo_device.async_free: Free worker */
660 		struct work_struct async_free;
661 		/** @bo_device.async_list: List of BOs to be freed */
662 		struct llist_head async_list;
663 	} bo_device;
664 
665 	/** @pmu: performance monitoring unit */
666 	struct xe_pmu pmu;
667 
668 	/** @i2c: I2C host controller */
669 	struct xe_i2c *i2c;
670 
671 	/** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */
672 	u32 atomic_svm_timeslice_ms;
673 
674 	/** @min_run_period_lr_ms: LR VM (preempt fence mode) timeslice */
675 	u32 min_run_period_lr_ms;
676 
677 	/** @min_run_period_pf_ms: LR VM (page fault mode) timeslice */
678 	u32 min_run_period_pf_ms;
679 
680 #ifdef TEST_VM_OPS_ERROR
681 	/**
682 	 * @vm_inject_error_position: inject errors at different places in VM
683 	 * bind IOCTL based on this value
684 	 */
685 	u8 vm_inject_error_position;
686 #endif
687 
688 #if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
689 	/**
690 	 * @global_total_pages: global GPU page usage tracked for gpu_mem
691 	 * tracepoints
692 	 */
693 	atomic64_t global_total_pages;
694 #endif
695 	/** @val: The domain for exhaustive eviction, which is currently per device. */
696 	struct xe_validation_device val;
697 
698 	/** @psmi: GPU debugging via additional validation HW */
699 	struct {
700 		/** @psmi.capture_obj: PSMI buffer for VRAM */
701 		struct xe_bo *capture_obj[XE_MAX_TILES_PER_DEVICE + 1];
702 		/** @psmi.region_mask: Mask of valid memory regions */
703 		u8 region_mask;
704 	} psmi;
705 
706 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
707 	/** @g2g_test_array: for testing G2G communications */
708 	u32 *g2g_test_array;
709 	/** @g2g_test_count: for testing G2G communications */
710 	atomic_t g2g_test_count;
711 #endif
712 
713 	/* private: */
714 
715 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
716 	/*
717 	 * Any fields below this point are the ones used by display.
718 	 * They are temporarily added here so xe_device can be desguised as
719 	 * drm_i915_private during build. After cleanup these should go away,
720 	 * migrating to the right sub-structs
721 	 */
722 
723 	struct intel_uncore {
724 		spinlock_t lock;
725 	} uncore;
726 #endif
727 };
728 
729 /**
730  * struct xe_file - file handle for Xe driver
731  */
732 struct xe_file {
733 	/** @xe: xe DEVICE **/
734 	struct xe_device *xe;
735 
736 	/** @drm: base DRM file */
737 	struct drm_file *drm;
738 
739 	/** @vm: VM state for file */
740 	struct {
741 		/** @vm.xe: xarray to store VMs */
742 		struct xarray xa;
743 		/**
744 		 * @vm.lock: Protects VM lookup + reference and removal from
745 		 * file xarray. Not an intended to be an outer lock which does
746 		 * thing while being held.
747 		 */
748 		struct mutex lock;
749 	} vm;
750 
751 	/** @exec_queue: Submission exec queue state for file */
752 	struct {
753 		/** @exec_queue.xa: xarray to store exece queues */
754 		struct xarray xa;
755 		/**
756 		 * @exec_queue.lock: Protects exec queue lookup + reference and
757 		 * removal from file xarray. Not intended to be an outer lock
758 		 * which does things while being held.
759 		 */
760 		struct mutex lock;
761 		/**
762 		 * @exec_queue.pending_removal: items pending to be removed to
763 		 * synchronize GPU state update with ongoing query.
764 		 */
765 		atomic_t pending_removal;
766 	} exec_queue;
767 
768 	/** @run_ticks: hw engine class run time in ticks for this drm client */
769 	u64 run_ticks[XE_ENGINE_CLASS_MAX];
770 
771 	/** @client: drm client */
772 	struct xe_drm_client *client;
773 
774 	/**
775 	 * @process_name: process name for file handle, used to safely output
776 	 * during error situations where xe file can outlive process
777 	 */
778 	char *process_name;
779 
780 	/**
781 	 * @pid: pid for file handle, used to safely output uring error
782 	 * situations where xe file can outlive process
783 	 */
784 	pid_t pid;
785 
786 	/** @refcount: ref count of this xe file */
787 	struct kref refcount;
788 };
789 
790 #endif
791