1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2022-2023 Intel Corporation 4 */ 5 6 #ifndef _XE_GT_TYPES_H_ 7 #define _XE_GT_TYPES_H_ 8 9 #include "xe_device_types.h" 10 #include "xe_force_wake_types.h" 11 #include "xe_gt_idle_types.h" 12 #include "xe_gt_sriov_pf_types.h" 13 #include "xe_gt_sriov_vf_types.h" 14 #include "xe_gt_stats.h" 15 #include "xe_hw_engine_types.h" 16 #include "xe_hw_fence_types.h" 17 #include "xe_oa.h" 18 #include "xe_reg_sr_types.h" 19 #include "xe_sa_types.h" 20 #include "xe_uc_types.h" 21 22 struct xe_exec_queue_ops; 23 struct xe_migrate; 24 struct xe_ring_ops; 25 26 enum xe_gt_type { 27 XE_GT_TYPE_UNINITIALIZED, 28 XE_GT_TYPE_MAIN, 29 XE_GT_TYPE_MEDIA, 30 }; 31 32 enum xe_gt_eu_type { 33 XE_GT_EU_TYPE_SIMD8, 34 XE_GT_EU_TYPE_SIMD16, 35 }; 36 37 #define XE_MAX_DSS_FUSE_REGS 3 38 #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) 39 #define XE_MAX_EU_FUSE_REGS 1 40 #define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) 41 #define XE_MAX_L3_BANK_MASK_BITS 64 42 43 typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)]; 44 typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)]; 45 typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)]; 46 47 struct xe_mmio_range { 48 u32 start; 49 u32 end; 50 }; 51 52 /* 53 * The hardware has multiple kinds of multicast register ranges that need 54 * special register steering (and future platforms are expected to add 55 * additional types). 56 * 57 * During driver startup, we initialize the steering control register to 58 * direct reads to a slice/subslice that are valid for the 'subslice' class 59 * of multicast registers. If another type of steering does not have any 60 * overlap in valid steering targets with 'subslice' style registers, we will 61 * need to explicitly re-steer reads of registers of the other type. 62 * 63 * Only the replication types that may need additional non-default steering 64 * are listed here. 65 */ 66 enum xe_steering_type { 67 L3BANK, 68 MSLICE, 69 LNCF, 70 DSS, 71 OADDRM, 72 SQIDI_PSMI, 73 74 /* 75 * On some platforms there are multiple types of MCR registers that 76 * will always return a non-terminated value at instance (0, 0). We'll 77 * lump those all into a single category to keep things simple. 78 */ 79 INSTANCE0, 80 81 /* 82 * Register ranges that don't need special steering for each register: 83 * it's sufficient to keep the HW-default for the selector, or only 84 * change it once, on GT initialization. This needs to be the last 85 * steering type. 86 */ 87 IMPLICIT_STEERING, 88 NUM_STEERING_TYPES 89 }; 90 91 #define gt_to_tile(gt__) \ 92 _Generic(gt__, \ 93 const struct xe_gt * : (const struct xe_tile *)((gt__)->tile), \ 94 struct xe_gt * : (gt__)->tile) 95 96 #define gt_to_xe(gt__) \ 97 _Generic(gt__, \ 98 const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe), \ 99 struct xe_gt * : gt_to_tile(gt__)->xe) 100 101 /** 102 * struct xe_gt - A "Graphics Technology" unit of the GPU 103 * 104 * A GT ("Graphics Technology") is the subset of a GPU primarily responsible 105 * for implementing the graphics, compute, and/or media IP. It encapsulates 106 * the hardware engines, programmable execution units, and GuC. Each GT has 107 * its own handling of power management (RC6+forcewake) and multicast register 108 * steering. 109 * 110 * A GPU/tile may have a single GT that supplies all graphics, compute, and 111 * media functionality, or the graphics/compute and media may be split into 112 * separate GTs within a tile. 113 */ 114 struct xe_gt { 115 /** @tile: Backpointer to GT's tile */ 116 struct xe_tile *tile; 117 118 /** @info: GT info */ 119 struct { 120 /** @info.type: type of GT */ 121 enum xe_gt_type type; 122 /** @info.reference_clock: clock frequency */ 123 u32 reference_clock; 124 /** 125 * @info.engine_mask: mask of engines present on GT. Some of 126 * them may be reserved in runtime and not available for user. 127 * See @user_engines.mask 128 */ 129 u64 engine_mask; 130 /** @info.gmdid: raw GMD_ID value from hardware */ 131 u32 gmdid; 132 /** @info.id: Unique ID of this GT within the PCI Device */ 133 u8 id; 134 /** @info.has_indirect_ring_state: GT has indirect ring state support */ 135 u8 has_indirect_ring_state:1; 136 } info; 137 138 #if IS_ENABLED(CONFIG_DEBUG_FS) 139 /** @stats: GT stats */ 140 struct { 141 /** @stats.counters: counters for various GT stats */ 142 atomic_t counters[__XE_GT_STATS_NUM_IDS]; 143 } stats; 144 #endif 145 146 /** 147 * @mmio: mmio info for GT. All GTs within a tile share the same 148 * register space, but have their own copy of GSI registers at a 149 * specific offset. 150 */ 151 struct xe_mmio mmio; 152 153 /** 154 * @pm: power management info for GT. The driver uses the GT's 155 * "force wake" interface to wake up specific parts of the GT hardware 156 * from C6 sleep states and ensure the hardware remains awake while it 157 * is being actively used. 158 */ 159 struct { 160 /** @pm.fw: force wake for GT */ 161 struct xe_force_wake fw; 162 } pm; 163 164 /** @sriov: virtualization data related to GT */ 165 union { 166 /** @sriov.pf: PF data. Valid only if driver is running as PF */ 167 struct xe_gt_sriov_pf pf; 168 /** @sriov.vf: VF data. Valid only if driver is running as VF */ 169 struct xe_gt_sriov_vf vf; 170 } sriov; 171 172 /** 173 * @reg_sr: table with registers to be restored on GT init/resume/reset 174 */ 175 struct xe_reg_sr reg_sr; 176 177 /** @reset: state for GT resets */ 178 struct { 179 /** 180 * @reset.worker: work so GT resets can done async allowing to reset 181 * code to safely flush all code paths 182 */ 183 struct work_struct worker; 184 } reset; 185 186 /** @tlb_invalidation: TLB invalidation state */ 187 struct { 188 /** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */ 189 #define TLB_INVALIDATION_SEQNO_MAX 0x100000 190 int seqno; 191 /** 192 * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno, 193 * protected by CT lock 194 */ 195 int seqno_recv; 196 /** 197 * @tlb_invalidation.pending_fences: list of pending fences waiting TLB 198 * invaliations, protected by CT lock 199 */ 200 struct list_head pending_fences; 201 /** 202 * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences 203 * and updating @tlb_invalidation.seqno_recv. 204 */ 205 spinlock_t pending_lock; 206 /** 207 * @tlb_invalidation.fence_tdr: schedules a delayed call to 208 * xe_gt_tlb_fence_timeout after the timeut interval is over. 209 */ 210 struct delayed_work fence_tdr; 211 /** @tlb_invalidation.lock: protects TLB invalidation fences */ 212 spinlock_t lock; 213 } tlb_invalidation; 214 215 /** 216 * @ccs_mode: Number of compute engines enabled. 217 * Allows fixed mapping of available compute slices to compute engines. 218 * By default only the first available compute engine is enabled and all 219 * available compute slices are allocated to it. 220 */ 221 u32 ccs_mode; 222 223 /** @usm: unified shared memory state */ 224 struct { 225 /** 226 * @usm.bb_pool: Pool from which batchbuffers, for USM operations 227 * (e.g. migrations, fixing page tables), are allocated. 228 * Dedicated pool needed so USM operations to not get blocked 229 * behind any user operations which may have resulted in a 230 * fault. 231 */ 232 struct xe_sa_manager *bb_pool; 233 /** 234 * @usm.reserved_bcs_instance: reserved BCS instance used for USM 235 * operations (e.g. mmigrations, fixing page tables) 236 */ 237 u16 reserved_bcs_instance; 238 /** @usm.pf_wq: page fault work queue, unbound, high priority */ 239 struct workqueue_struct *pf_wq; 240 /** @usm.acc_wq: access counter work queue, unbound, high priority */ 241 struct workqueue_struct *acc_wq; 242 /** 243 * @usm.pf_queue: Page fault queue used to sync faults so faults can 244 * be processed not under the GuC CT lock. The queue is sized so 245 * it can sync all possible faults (1 per physical engine). 246 * Multiple queues exists for page faults from different VMs are 247 * be processed in parallel. 248 */ 249 struct pf_queue { 250 /** @usm.pf_queue.gt: back pointer to GT */ 251 struct xe_gt *gt; 252 /** @usm.pf_queue.data: data in the page fault queue */ 253 u32 *data; 254 /** 255 * @usm.pf_queue.num_dw: number of DWORDS in the page 256 * fault queue. Dynamically calculated based on the number 257 * of compute resources available. 258 */ 259 u32 num_dw; 260 /** 261 * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, 262 * moved by worker which processes faults (consumer). 263 */ 264 u16 tail; 265 /** 266 * @usm.pf_queue.head: head pointer in DWs for page fault queue, 267 * moved by G2H handler (producer). 268 */ 269 u16 head; 270 /** @usm.pf_queue.lock: protects page fault queue */ 271 spinlock_t lock; 272 /** @usm.pf_queue.worker: to process page faults */ 273 struct work_struct worker; 274 #define NUM_PF_QUEUE 4 275 } pf_queue[NUM_PF_QUEUE]; 276 /** 277 * @usm.acc_queue: Same as page fault queue, cannot process access 278 * counters under CT lock. 279 */ 280 struct acc_queue { 281 /** @usm.acc_queue.gt: back pointer to GT */ 282 struct xe_gt *gt; 283 #define ACC_QUEUE_NUM_DW 128 284 /** @usm.acc_queue.data: data in the page fault queue */ 285 u32 data[ACC_QUEUE_NUM_DW]; 286 /** 287 * @usm.acc_queue.tail: tail pointer in DWs for access counter queue, 288 * moved by worker which processes counters 289 * (consumer). 290 */ 291 u16 tail; 292 /** 293 * @usm.acc_queue.head: head pointer in DWs for access counter queue, 294 * moved by G2H handler (producer). 295 */ 296 u16 head; 297 /** @usm.acc_queue.lock: protects page fault queue */ 298 spinlock_t lock; 299 /** @usm.acc_queue.worker: to process access counters */ 300 struct work_struct worker; 301 #define NUM_ACC_QUEUE 4 302 } acc_queue[NUM_ACC_QUEUE]; 303 } usm; 304 305 /** @ordered_wq: used to serialize GT resets and TDRs */ 306 struct workqueue_struct *ordered_wq; 307 308 /** @uc: micro controllers on the GT */ 309 struct xe_uc uc; 310 311 /** @gtidle: idle properties of GT */ 312 struct xe_gt_idle gtidle; 313 314 /** @exec_queue_ops: submission backend exec queue operations */ 315 const struct xe_exec_queue_ops *exec_queue_ops; 316 317 /** 318 * @ring_ops: ring operations for this hw engine (1 per engine class) 319 */ 320 const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX]; 321 322 /** @fence_irq: fence IRQs (1 per engine class) */ 323 struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX]; 324 325 /** @default_lrc: default LRC state */ 326 void *default_lrc[XE_ENGINE_CLASS_MAX]; 327 328 /** @hw_engines: hardware engines on the GT */ 329 struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; 330 331 /** @eclass: per hardware engine class interface on the GT */ 332 struct xe_hw_engine_class_intf eclass[XE_ENGINE_CLASS_MAX]; 333 334 /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ 335 struct kobject *sysfs; 336 337 /** @freq: Main GT freq sysfs control */ 338 struct kobject *freq; 339 340 /** @mocs: info */ 341 struct { 342 /** @mocs.uc_index: UC index */ 343 u8 uc_index; 344 /** @mocs.wb_index: WB index, only used on L3_CCS platforms */ 345 u8 wb_index; 346 } mocs; 347 348 /** @fuse_topo: GT topology reported by fuse registers */ 349 struct { 350 /** @fuse_topo.g_dss_mask: dual-subslices usable by geometry */ 351 xe_dss_mask_t g_dss_mask; 352 353 /** @fuse_topo.c_dss_mask: dual-subslices usable by compute */ 354 xe_dss_mask_t c_dss_mask; 355 356 /** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/ 357 xe_eu_mask_t eu_mask_per_dss; 358 359 /** @fuse_topo.l3_bank_mask: L3 bank mask */ 360 xe_l3_bank_mask_t l3_bank_mask; 361 362 /** 363 * @fuse_topo.eu_type: type/width of EU stored in 364 * fuse_topo.eu_mask_per_dss 365 */ 366 enum xe_gt_eu_type eu_type; 367 } fuse_topo; 368 369 /** @steering: register steering for individual HW units */ 370 struct { 371 /** @steering.ranges: register ranges used for this steering type */ 372 const struct xe_mmio_range *ranges; 373 374 /** @steering.group_target: target to steer accesses to */ 375 u16 group_target; 376 /** @steering.instance_target: instance to steer accesses to */ 377 u16 instance_target; 378 } steering[NUM_STEERING_TYPES]; 379 380 /** 381 * @steering_dss_per_grp: number of DSS per steering group (gslice, 382 * cslice, etc.). 383 */ 384 unsigned int steering_dss_per_grp; 385 386 /** 387 * @mcr_lock: protects the MCR_SELECTOR register for the duration 388 * of a steered operation 389 */ 390 spinlock_t mcr_lock; 391 392 /** 393 * @global_invl_lock: protects the register for the duration 394 * of a global invalidation of l2 cache 395 */ 396 spinlock_t global_invl_lock; 397 398 /** @wa_active: keep track of active workarounds */ 399 struct { 400 /** @wa_active.gt: bitmap with active GT workarounds */ 401 unsigned long *gt; 402 /** @wa_active.engine: bitmap with active engine workarounds */ 403 unsigned long *engine; 404 /** @wa_active.lrc: bitmap with active LRC workarounds */ 405 unsigned long *lrc; 406 /** @wa_active.oob: bitmap with active OOB workarounds */ 407 unsigned long *oob; 408 /** 409 * @wa_active.oob_initialized: mark oob as initialized to help 410 * detecting misuse of XE_WA() - it can only be called on 411 * initialization after OOB WAs have being processed 412 */ 413 bool oob_initialized; 414 } wa_active; 415 416 /** @user_engines: engines present in GT and available to userspace */ 417 struct { 418 /** 419 * @user_engines.mask: like @info->engine_mask, but take in 420 * consideration only engines available to userspace 421 */ 422 u64 mask; 423 424 /** 425 * @user_engines.instances_per_class: aggregate per class the 426 * number of engines available to userspace 427 */ 428 u8 instances_per_class[XE_ENGINE_CLASS_MAX]; 429 } user_engines; 430 431 /** @oa: oa observation subsystem per gt info */ 432 struct xe_oa_gt oa; 433 }; 434 435 #endif 436