1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2022-2023 Intel Corporation 4 */ 5 6 #ifndef _XE_GT_TYPES_H_ 7 #define _XE_GT_TYPES_H_ 8 9 #include "xe_force_wake_types.h" 10 #include "xe_gt_idle_types.h" 11 #include "xe_gt_sriov_pf_types.h" 12 #include "xe_gt_sriov_vf_types.h" 13 #include "xe_gt_stats.h" 14 #include "xe_hw_engine_types.h" 15 #include "xe_hw_fence_types.h" 16 #include "xe_oa.h" 17 #include "xe_reg_sr_types.h" 18 #include "xe_sa_types.h" 19 #include "xe_uc_types.h" 20 21 struct xe_exec_queue_ops; 22 struct xe_migrate; 23 struct xe_ring_ops; 24 25 enum xe_gt_type { 26 XE_GT_TYPE_UNINITIALIZED, 27 XE_GT_TYPE_MAIN, 28 XE_GT_TYPE_MEDIA, 29 }; 30 31 enum xe_gt_eu_type { 32 XE_GT_EU_TYPE_SIMD8, 33 XE_GT_EU_TYPE_SIMD16, 34 }; 35 36 #define XE_MAX_DSS_FUSE_REGS 3 37 #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) 38 #define XE_MAX_EU_FUSE_REGS 1 39 #define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) 40 #define XE_MAX_L3_BANK_MASK_BITS 64 41 42 typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)]; 43 typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)]; 44 typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)]; 45 46 struct xe_mmio_range { 47 u32 start; 48 u32 end; 49 }; 50 51 /* 52 * The hardware has multiple kinds of multicast register ranges that need 53 * special register steering (and future platforms are expected to add 54 * additional types). 55 * 56 * During driver startup, we initialize the steering control register to 57 * direct reads to a slice/subslice that are valid for the 'subslice' class 58 * of multicast registers. If another type of steering does not have any 59 * overlap in valid steering targets with 'subslice' style registers, we will 60 * need to explicitly re-steer reads of registers of the other type. 61 * 62 * Only the replication types that may need additional non-default steering 63 * are listed here. 64 */ 65 enum xe_steering_type { 66 L3BANK, 67 MSLICE, 68 LNCF, 69 DSS, 70 OADDRM, 71 SQIDI_PSMI, 72 73 /* 74 * On some platforms there are multiple types of MCR registers that 75 * will always return a non-terminated value at instance (0, 0). We'll 76 * lump those all into a single category to keep things simple. 77 */ 78 INSTANCE0, 79 80 /* 81 * Register ranges that don't need special steering for each register: 82 * it's sufficient to keep the HW-default for the selector, or only 83 * change it once, on GT initialization. This needs to be the last 84 * steering type. 85 */ 86 IMPLICIT_STEERING, 87 NUM_STEERING_TYPES 88 }; 89 90 #define gt_to_tile(gt__) \ 91 _Generic(gt__, \ 92 const struct xe_gt * : (const struct xe_tile *)((gt__)->tile), \ 93 struct xe_gt * : (gt__)->tile) 94 95 #define gt_to_xe(gt__) \ 96 _Generic(gt__, \ 97 const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe), \ 98 struct xe_gt * : gt_to_tile(gt__)->xe) 99 100 /** 101 * struct xe_gt - A "Graphics Technology" unit of the GPU 102 * 103 * A GT ("Graphics Technology") is the subset of a GPU primarily responsible 104 * for implementing the graphics, compute, and/or media IP. It encapsulates 105 * the hardware engines, programmable execution units, and GuC. Each GT has 106 * its own handling of power management (RC6+forcewake) and multicast register 107 * steering. 108 * 109 * A GPU/tile may have a single GT that supplies all graphics, compute, and 110 * media functionality, or the graphics/compute and media may be split into 111 * separate GTs within a tile. 112 */ 113 struct xe_gt { 114 /** @tile: Backpointer to GT's tile */ 115 struct xe_tile *tile; 116 117 /** @info: GT info */ 118 struct { 119 /** @info.type: type of GT */ 120 enum xe_gt_type type; 121 /** @info.reference_clock: clock frequency */ 122 u32 reference_clock; 123 /** 124 * @info.engine_mask: mask of engines present on GT. Some of 125 * them may be reserved in runtime and not available for user. 126 * See @user_engines.mask 127 */ 128 u64 engine_mask; 129 /** @info.gmdid: raw GMD_ID value from hardware */ 130 u32 gmdid; 131 /** @info.id: Unique ID of this GT within the PCI Device */ 132 u8 id; 133 /** @info.has_indirect_ring_state: GT has indirect ring state support */ 134 u8 has_indirect_ring_state:1; 135 } info; 136 137 #if IS_ENABLED(CONFIG_DEBUG_FS) 138 /** @stats: GT stats */ 139 struct { 140 /** @stats.counters: counters for various GT stats */ 141 atomic_t counters[__XE_GT_STATS_NUM_IDS]; 142 } stats; 143 #endif 144 145 /** 146 * @mmio: mmio info for GT. All GTs within a tile share the same 147 * register space, but have their own copy of GSI registers at a 148 * specific offset, as well as their own forcewake handling. 149 */ 150 struct { 151 /** @mmio.fw: force wake for GT */ 152 struct xe_force_wake fw; 153 /** 154 * @mmio.adj_limit: adjust MMIO address if address is below this 155 * value 156 */ 157 u32 adj_limit; 158 /** @mmio.adj_offset: offect to add to MMIO address when adjusting */ 159 u32 adj_offset; 160 } mmio; 161 162 /** @sriov: virtualization data related to GT */ 163 union { 164 /** @sriov.pf: PF data. Valid only if driver is running as PF */ 165 struct xe_gt_sriov_pf pf; 166 /** @sriov.vf: VF data. Valid only if driver is running as VF */ 167 struct xe_gt_sriov_vf vf; 168 } sriov; 169 170 /** 171 * @reg_sr: table with registers to be restored on GT init/resume/reset 172 */ 173 struct xe_reg_sr reg_sr; 174 175 /** @reset: state for GT resets */ 176 struct { 177 /** 178 * @reset.worker: work so GT resets can done async allowing to reset 179 * code to safely flush all code paths 180 */ 181 struct work_struct worker; 182 } reset; 183 184 /** @tlb_invalidation: TLB invalidation state */ 185 struct { 186 /** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */ 187 #define TLB_INVALIDATION_SEQNO_MAX 0x100000 188 int seqno; 189 /** 190 * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno, 191 * protected by CT lock 192 */ 193 int seqno_recv; 194 /** 195 * @tlb_invalidation.pending_fences: list of pending fences waiting TLB 196 * invaliations, protected by CT lock 197 */ 198 struct list_head pending_fences; 199 /** 200 * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences 201 * and updating @tlb_invalidation.seqno_recv. 202 */ 203 spinlock_t pending_lock; 204 /** 205 * @tlb_invalidation.fence_tdr: schedules a delayed call to 206 * xe_gt_tlb_fence_timeout after the timeut interval is over. 207 */ 208 struct delayed_work fence_tdr; 209 /** @tlb_invalidation.lock: protects TLB invalidation fences */ 210 spinlock_t lock; 211 } tlb_invalidation; 212 213 /** 214 * @ccs_mode: Number of compute engines enabled. 215 * Allows fixed mapping of available compute slices to compute engines. 216 * By default only the first available compute engine is enabled and all 217 * available compute slices are allocated to it. 218 */ 219 u32 ccs_mode; 220 221 /** @usm: unified shared memory state */ 222 struct { 223 /** 224 * @usm.bb_pool: Pool from which batchbuffers, for USM operations 225 * (e.g. migrations, fixing page tables), are allocated. 226 * Dedicated pool needed so USM operations to not get blocked 227 * behind any user operations which may have resulted in a 228 * fault. 229 */ 230 struct xe_sa_manager *bb_pool; 231 /** 232 * @usm.reserved_bcs_instance: reserved BCS instance used for USM 233 * operations (e.g. mmigrations, fixing page tables) 234 */ 235 u16 reserved_bcs_instance; 236 /** @usm.pf_wq: page fault work queue, unbound, high priority */ 237 struct workqueue_struct *pf_wq; 238 /** @usm.acc_wq: access counter work queue, unbound, high priority */ 239 struct workqueue_struct *acc_wq; 240 /** 241 * @usm.pf_queue: Page fault queue used to sync faults so faults can 242 * be processed not under the GuC CT lock. The queue is sized so 243 * it can sync all possible faults (1 per physical engine). 244 * Multiple queues exists for page faults from different VMs are 245 * be processed in parallel. 246 */ 247 struct pf_queue { 248 /** @usm.pf_queue.gt: back pointer to GT */ 249 struct xe_gt *gt; 250 /** @usm.pf_queue.data: data in the page fault queue */ 251 u32 *data; 252 /** 253 * @usm.pf_queue.num_dw: number of DWORDS in the page 254 * fault queue. Dynamically calculated based on the number 255 * of compute resources available. 256 */ 257 u32 num_dw; 258 /** 259 * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, 260 * moved by worker which processes faults (consumer). 261 */ 262 u16 tail; 263 /** 264 * @usm.pf_queue.head: head pointer in DWs for page fault queue, 265 * moved by G2H handler (producer). 266 */ 267 u16 head; 268 /** @usm.pf_queue.lock: protects page fault queue */ 269 spinlock_t lock; 270 /** @usm.pf_queue.worker: to process page faults */ 271 struct work_struct worker; 272 #define NUM_PF_QUEUE 4 273 } pf_queue[NUM_PF_QUEUE]; 274 /** 275 * @usm.acc_queue: Same as page fault queue, cannot process access 276 * counters under CT lock. 277 */ 278 struct acc_queue { 279 /** @usm.acc_queue.gt: back pointer to GT */ 280 struct xe_gt *gt; 281 #define ACC_QUEUE_NUM_DW 128 282 /** @usm.acc_queue.data: data in the page fault queue */ 283 u32 data[ACC_QUEUE_NUM_DW]; 284 /** 285 * @usm.acc_queue.tail: tail pointer in DWs for access counter queue, 286 * moved by worker which processes counters 287 * (consumer). 288 */ 289 u16 tail; 290 /** 291 * @usm.acc_queue.head: head pointer in DWs for access counter queue, 292 * moved by G2H handler (producer). 293 */ 294 u16 head; 295 /** @usm.acc_queue.lock: protects page fault queue */ 296 spinlock_t lock; 297 /** @usm.acc_queue.worker: to process access counters */ 298 struct work_struct worker; 299 #define NUM_ACC_QUEUE 4 300 } acc_queue[NUM_ACC_QUEUE]; 301 } usm; 302 303 /** @ordered_wq: used to serialize GT resets and TDRs */ 304 struct workqueue_struct *ordered_wq; 305 306 /** @uc: micro controllers on the GT */ 307 struct xe_uc uc; 308 309 /** @gtidle: idle properties of GT */ 310 struct xe_gt_idle gtidle; 311 312 /** @exec_queue_ops: submission backend exec queue operations */ 313 const struct xe_exec_queue_ops *exec_queue_ops; 314 315 /** 316 * @ring_ops: ring operations for this hw engine (1 per engine class) 317 */ 318 const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX]; 319 320 /** @fence_irq: fence IRQs (1 per engine class) */ 321 struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX]; 322 323 /** @default_lrc: default LRC state */ 324 void *default_lrc[XE_ENGINE_CLASS_MAX]; 325 326 /** @hw_engines: hardware engines on the GT */ 327 struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; 328 329 /** @eclass: per hardware engine class interface on the GT */ 330 struct xe_hw_engine_class_intf eclass[XE_ENGINE_CLASS_MAX]; 331 332 /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ 333 struct kobject *sysfs; 334 335 /** @freq: Main GT freq sysfs control */ 336 struct kobject *freq; 337 338 /** @mocs: info */ 339 struct { 340 /** @mocs.uc_index: UC index */ 341 u8 uc_index; 342 /** @mocs.wb_index: WB index, only used on L3_CCS platforms */ 343 u8 wb_index; 344 } mocs; 345 346 /** @fuse_topo: GT topology reported by fuse registers */ 347 struct { 348 /** @fuse_topo.g_dss_mask: dual-subslices usable by geometry */ 349 xe_dss_mask_t g_dss_mask; 350 351 /** @fuse_topo.c_dss_mask: dual-subslices usable by compute */ 352 xe_dss_mask_t c_dss_mask; 353 354 /** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/ 355 xe_eu_mask_t eu_mask_per_dss; 356 357 /** @fuse_topo.l3_bank_mask: L3 bank mask */ 358 xe_l3_bank_mask_t l3_bank_mask; 359 360 /** 361 * @fuse_topo.eu_type: type/width of EU stored in 362 * fuse_topo.eu_mask_per_dss 363 */ 364 enum xe_gt_eu_type eu_type; 365 } fuse_topo; 366 367 /** @steering: register steering for individual HW units */ 368 struct { 369 /** @steering.ranges: register ranges used for this steering type */ 370 const struct xe_mmio_range *ranges; 371 372 /** @steering.group_target: target to steer accesses to */ 373 u16 group_target; 374 /** @steering.instance_target: instance to steer accesses to */ 375 u16 instance_target; 376 } steering[NUM_STEERING_TYPES]; 377 378 /** 379 * @steering_dss_per_grp: number of DSS per steering group (gslice, 380 * cslice, etc.). 381 */ 382 unsigned int steering_dss_per_grp; 383 384 /** 385 * @mcr_lock: protects the MCR_SELECTOR register for the duration 386 * of a steered operation 387 */ 388 spinlock_t mcr_lock; 389 390 /** 391 * @global_invl_lock: protects the register for the duration 392 * of a global invalidation of l2 cache 393 */ 394 spinlock_t global_invl_lock; 395 396 /** @wa_active: keep track of active workarounds */ 397 struct { 398 /** @wa_active.gt: bitmap with active GT workarounds */ 399 unsigned long *gt; 400 /** @wa_active.engine: bitmap with active engine workarounds */ 401 unsigned long *engine; 402 /** @wa_active.lrc: bitmap with active LRC workarounds */ 403 unsigned long *lrc; 404 /** @wa_active.oob: bitmap with active OOB workarounds */ 405 unsigned long *oob; 406 /** 407 * @wa_active.oob_initialized: mark oob as initialized to help 408 * detecting misuse of XE_WA() - it can only be called on 409 * initialization after OOB WAs have being processed 410 */ 411 bool oob_initialized; 412 } wa_active; 413 414 /** @user_engines: engines present in GT and available to userspace */ 415 struct { 416 /** 417 * @user_engines.mask: like @info->engine_mask, but take in 418 * consideration only engines available to userspace 419 */ 420 u64 mask; 421 422 /** 423 * @user_engines.instances_per_class: aggregate per class the 424 * number of engines available to userspace 425 */ 426 u8 instances_per_class[XE_ENGINE_CLASS_MAX]; 427 } user_engines; 428 429 /** @oa: oa observation subsystem per gt info */ 430 struct xe_oa_gt oa; 431 }; 432 433 #endif 434