1 /* SPDX-License-Identifier: GPL-2.0 or MIT */ 2 /* Copyright 2023 Collabora ltd. */ 3 4 #ifndef __PANTHOR_MCU_H__ 5 #define __PANTHOR_MCU_H__ 6 7 #include <linux/types.h> 8 9 struct panthor_device; 10 struct panthor_kernel_bo; 11 12 #define MAX_CSGS 31 13 #define MAX_CS_PER_CSG 32 14 15 struct panthor_fw_ringbuf_input_iface { 16 u64 insert; 17 u64 extract; 18 }; 19 20 struct panthor_fw_ringbuf_output_iface { 21 u64 extract; 22 u32 active; 23 }; 24 25 struct panthor_fw_cs_control_iface { 26 #define CS_FEATURES_WORK_REGS(x) (((x) & GENMASK(7, 0)) + 1) 27 #define CS_FEATURES_SCOREBOARDS(x) (((x) & GENMASK(15, 8)) >> 8) 28 #define CS_FEATURES_COMPUTE BIT(16) 29 #define CS_FEATURES_FRAGMENT BIT(17) 30 #define CS_FEATURES_TILER BIT(18) 31 u32 features; 32 u32 input_va; 33 u32 output_va; 34 }; 35 36 struct panthor_fw_cs_input_iface { 37 #define CS_STATE_MASK GENMASK(2, 0) 38 #define CS_STATE_STOP 0 39 #define CS_STATE_START 1 40 #define CS_EXTRACT_EVENT BIT(4) 41 #define CS_IDLE_SYNC_WAIT BIT(8) 42 #define CS_IDLE_PROTM_PENDING BIT(9) 43 #define CS_IDLE_EMPTY BIT(10) 44 #define CS_IDLE_RESOURCE_REQ BIT(11) 45 #define CS_TILER_OOM BIT(26) 46 #define CS_PROTM_PENDING BIT(27) 47 #define CS_FATAL BIT(30) 48 #define CS_FAULT BIT(31) 49 #define CS_REQ_MASK (CS_STATE_MASK | \ 50 CS_EXTRACT_EVENT | \ 51 CS_IDLE_SYNC_WAIT | \ 52 CS_IDLE_PROTM_PENDING | \ 53 CS_IDLE_EMPTY | \ 54 CS_IDLE_RESOURCE_REQ) 55 #define CS_EVT_MASK (CS_TILER_OOM | \ 56 CS_PROTM_PENDING | \ 57 CS_FATAL | \ 58 CS_FAULT) 59 u32 req; 60 61 #define CS_CONFIG_PRIORITY(x) ((x) & GENMASK(3, 0)) 62 #define CS_CONFIG_DOORBELL(x) (((x) << 8) & GENMASK(15, 8)) 63 u32 config; 64 u32 reserved1; 65 u32 ack_irq_mask; 66 u64 ringbuf_base; 67 u32 ringbuf_size; 68 u32 reserved2; 69 u64 heap_start; 70 u64 heap_end; 71 u64 ringbuf_input; 72 u64 ringbuf_output; 73 u32 instr_config; 74 u32 instrbuf_size; 75 u64 instrbuf_base; 76 u64 instrbuf_offset_ptr; 77 }; 78 79 struct panthor_fw_cs_output_iface { 80 u32 ack; 81 u32 reserved1[15]; 82 u64 status_cmd_ptr; 83 84 #define CS_STATUS_WAIT_SB_MASK GENMASK(15, 0) 85 #define CS_STATUS_WAIT_SB_SRC_MASK GENMASK(19, 16) 86 #define CS_STATUS_WAIT_SB_SRC_NONE (0 << 16) 87 #define CS_STATUS_WAIT_SB_SRC_WAIT (8 << 16) 88 #define CS_STATUS_WAIT_SYNC_COND_LE (0 << 24) 89 #define CS_STATUS_WAIT_SYNC_COND_GT (1 << 24) 90 #define CS_STATUS_WAIT_SYNC_COND_MASK GENMASK(27, 24) 91 #define CS_STATUS_WAIT_PROGRESS BIT(28) 92 #define CS_STATUS_WAIT_PROTM BIT(29) 93 #define CS_STATUS_WAIT_SYNC_64B BIT(30) 94 #define CS_STATUS_WAIT_SYNC BIT(31) 95 u32 status_wait; 96 u32 status_req_resource; 97 u64 status_wait_sync_ptr; 98 u32 status_wait_sync_value; 99 u32 status_scoreboards; 100 101 #define CS_STATUS_BLOCKED_REASON_UNBLOCKED 0 102 #define CS_STATUS_BLOCKED_REASON_SB_WAIT 1 103 #define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2 104 #define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3 105 #define CS_STATUS_BLOCKED_REASON_DEFERRED 4 106 #define CS_STATUS_BLOCKED_REASON_RESOURCE 5 107 #define CS_STATUS_BLOCKED_REASON_FLUSH 6 108 #define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0) 109 u32 status_blocked_reason; 110 u32 status_wait_sync_value_hi; 111 u32 reserved2[6]; 112 113 #define CS_EXCEPTION_TYPE(x) ((x) & GENMASK(7, 0)) 114 #define CS_EXCEPTION_DATA(x) (((x) >> 8) & GENMASK(23, 0)) 115 u32 fault; 116 u32 fatal; 117 u64 fault_info; 118 u64 fatal_info; 119 u32 reserved3[10]; 120 u32 heap_vt_start; 121 u32 heap_vt_end; 122 u32 reserved4; 123 u32 heap_frag_end; 124 u64 heap_address; 125 }; 126 127 struct panthor_fw_csg_control_iface { 128 u32 features; 129 u32 input_va; 130 u32 output_va; 131 u32 suspend_size; 132 u32 protm_suspend_size; 133 u32 stream_num; 134 u32 stream_stride; 135 }; 136 137 struct panthor_fw_csg_input_iface { 138 #define CSG_STATE_MASK GENMASK(2, 0) 139 #define CSG_STATE_TERMINATE 0 140 #define CSG_STATE_START 1 141 #define CSG_STATE_SUSPEND 2 142 #define CSG_STATE_RESUME 3 143 #define CSG_ENDPOINT_CONFIG BIT(4) 144 #define CSG_STATUS_UPDATE BIT(5) 145 #define CSG_SYNC_UPDATE BIT(28) 146 #define CSG_IDLE BIT(29) 147 #define CSG_DOORBELL BIT(30) 148 #define CSG_PROGRESS_TIMER_EVENT BIT(31) 149 #define CSG_REQ_MASK (CSG_STATE_MASK | \ 150 CSG_ENDPOINT_CONFIG | \ 151 CSG_STATUS_UPDATE) 152 #define CSG_EVT_MASK (CSG_SYNC_UPDATE | \ 153 CSG_IDLE | \ 154 CSG_PROGRESS_TIMER_EVENT) 155 u32 req; 156 u32 ack_irq_mask; 157 158 u32 doorbell_req; 159 u32 cs_irq_ack; 160 u32 reserved1[4]; 161 u64 allow_compute; 162 u64 allow_fragment; 163 u32 allow_other; 164 165 #define CSG_EP_REQ_COMPUTE(x) ((x) & GENMASK(7, 0)) 166 #define CSG_EP_REQ_FRAGMENT(x) (((x) << 8) & GENMASK(15, 8)) 167 #define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16)) 168 #define CSG_EP_REQ_EXCL_COMPUTE BIT(20) 169 #define CSG_EP_REQ_EXCL_FRAGMENT BIT(21) 170 #define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28) 171 #define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & CSG_EP_REQ_PRIORITY_MASK) 172 #define CSG_EP_REQ_PRIORITY_GET(x) (((x) & CSG_EP_REQ_PRIORITY_MASK) >> 28) 173 u32 endpoint_req; 174 u64 endpoint_req2; 175 u64 suspend_buf; 176 u64 protm_suspend_buf; 177 u32 config; 178 u32 iter_trace_config; 179 }; 180 181 struct panthor_fw_csg_output_iface { 182 u32 ack; 183 u32 reserved1; 184 u32 doorbell_ack; 185 u32 cs_irq_req; 186 u32 status_endpoint_current; 187 u32 status_endpoint_req; 188 189 #define CSG_STATUS_STATE_IS_IDLE BIT(0) 190 u32 status_state; 191 u32 resource_dep; 192 }; 193 194 struct panthor_fw_global_control_iface { 195 u32 version; 196 u32 features; 197 u32 input_va; 198 u32 output_va; 199 u32 group_num; 200 u32 group_stride; 201 u32 perfcnt_size; 202 u32 instr_features; 203 }; 204 205 struct panthor_fw_global_input_iface { 206 #define GLB_HALT BIT(0) 207 #define GLB_CFG_PROGRESS_TIMER BIT(1) 208 #define GLB_CFG_ALLOC_EN BIT(2) 209 #define GLB_CFG_POWEROFF_TIMER BIT(3) 210 #define GLB_PROTM_ENTER BIT(4) 211 #define GLB_PERFCNT_EN BIT(5) 212 #define GLB_PERFCNT_SAMPLE BIT(6) 213 #define GLB_COUNTER_EN BIT(7) 214 #define GLB_PING BIT(8) 215 #define GLB_FWCFG_UPDATE BIT(9) 216 #define GLB_IDLE_EN BIT(10) 217 #define GLB_SLEEP BIT(12) 218 #define GLB_STATE_MASK GENMASK(14, 12) 219 #define GLB_STATE_ACTIVE 0 220 #define GLB_STATE_HALT 1 221 #define GLB_STATE_SLEEP 2 222 #define GLB_STATE_SUSPEND 3 223 #define GLB_STATE(x) (((x) << 12) & GLB_STATE_MASK) 224 #define GLB_STATE_GET(x) (((x) & GLB_STATE_MASK) >> 12) 225 #define GLB_INACTIVE_COMPUTE BIT(20) 226 #define GLB_INACTIVE_FRAGMENT BIT(21) 227 #define GLB_INACTIVE_TILER BIT(22) 228 #define GLB_PROTM_EXIT BIT(23) 229 #define GLB_PERFCNT_THRESHOLD BIT(24) 230 #define GLB_PERFCNT_OVERFLOW BIT(25) 231 #define GLB_IDLE BIT(26) 232 #define GLB_DBG_CSF BIT(30) 233 #define GLB_DBG_HOST BIT(31) 234 #define GLB_REQ_MASK GENMASK(10, 0) 235 #define GLB_EVT_MASK GENMASK(26, 20) 236 u32 req; 237 u32 ack_irq_mask; 238 u32 doorbell_req; 239 u32 reserved1; 240 u32 progress_timer; 241 242 #define GLB_TIMER_VAL(x) ((x) & GENMASK(30, 0)) 243 #define GLB_TIMER_SOURCE_GPU_COUNTER BIT(31) 244 u32 poweroff_timer; 245 u64 core_en_mask; 246 u32 reserved2; 247 u32 perfcnt_as; 248 u64 perfcnt_base; 249 u32 perfcnt_extract; 250 u32 reserved3[3]; 251 u32 perfcnt_config; 252 u32 perfcnt_csg_select; 253 u32 perfcnt_fw_enable; 254 u32 perfcnt_csg_enable; 255 u32 perfcnt_csf_enable; 256 u32 perfcnt_shader_enable; 257 u32 perfcnt_tiler_enable; 258 u32 perfcnt_mmu_l2_enable; 259 u32 reserved4[8]; 260 u32 idle_timer; 261 }; 262 263 enum panthor_fw_halt_status { 264 PANTHOR_FW_HALT_OK = 0, 265 PANTHOR_FW_HALT_ON_PANIC = 0x4e, 266 PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f, 267 }; 268 269 struct panthor_fw_global_output_iface { 270 u32 ack; 271 u32 reserved1; 272 u32 doorbell_ack; 273 u32 reserved2; 274 u32 halt_status; 275 u32 perfcnt_status; 276 u32 perfcnt_insert; 277 }; 278 279 /** 280 * struct panthor_fw_cs_iface - Firmware command stream slot interface 281 */ 282 struct panthor_fw_cs_iface { 283 /** 284 * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req 285 * field. 286 * 287 * Needed so we can update the req field concurrently from the interrupt 288 * handler and the scheduler logic. 289 * 290 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW 291 * interface sections are mapped uncached/write-combined right now, and 292 * using cmpxchg() on such mappings leads to SError faults. Revisit when 293 * we have 'SHARED' GPU mappings hooked up. 294 */ 295 spinlock_t lock; 296 297 /** 298 * @control: Command stream slot control interface. 299 * 300 * Used to expose command stream slot properties. 301 * 302 * This interface is read-only. 303 */ 304 struct panthor_fw_cs_control_iface *control; 305 306 /** 307 * @input: Command stream slot input interface. 308 * 309 * Used for host updates/events. 310 */ 311 struct panthor_fw_cs_input_iface *input; 312 313 /** 314 * @output: Command stream slot output interface. 315 * 316 * Used for FW updates/events. 317 * 318 * This interface is read-only. 319 */ 320 const struct panthor_fw_cs_output_iface *output; 321 }; 322 323 /** 324 * struct panthor_fw_csg_iface - Firmware command stream group slot interface 325 */ 326 struct panthor_fw_csg_iface { 327 /** 328 * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req 329 * field. 330 * 331 * Needed so we can update the req field concurrently from the interrupt 332 * handler and the scheduler logic. 333 * 334 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW 335 * interface sections are mapped uncached/write-combined right now, and 336 * using cmpxchg() on such mappings leads to SError faults. Revisit when 337 * we have 'SHARED' GPU mappings hooked up. 338 */ 339 spinlock_t lock; 340 341 /** 342 * @control: Command stream group slot control interface. 343 * 344 * Used to expose command stream group slot properties. 345 * 346 * This interface is read-only. 347 */ 348 const struct panthor_fw_csg_control_iface *control; 349 350 /** 351 * @input: Command stream slot input interface. 352 * 353 * Used for host updates/events. 354 */ 355 struct panthor_fw_csg_input_iface *input; 356 357 /** 358 * @output: Command stream group slot output interface. 359 * 360 * Used for FW updates/events. 361 * 362 * This interface is read-only. 363 */ 364 const struct panthor_fw_csg_output_iface *output; 365 }; 366 367 /** 368 * struct panthor_fw_global_iface - Firmware global interface 369 */ 370 struct panthor_fw_global_iface { 371 /** 372 * @lock: Lock protecting access to the panthor_fw_global_input_iface::req 373 * field. 374 * 375 * Needed so we can update the req field concurrently from the interrupt 376 * handler and the scheduler/FW management logic. 377 * 378 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW 379 * interface sections are mapped uncached/write-combined right now, and 380 * using cmpxchg() on such mappings leads to SError faults. Revisit when 381 * we have 'SHARED' GPU mappings hooked up. 382 */ 383 spinlock_t lock; 384 385 /** 386 * @control: Command stream group slot control interface. 387 * 388 * Used to expose global FW properties. 389 * 390 * This interface is read-only. 391 */ 392 const struct panthor_fw_global_control_iface *control; 393 394 /** 395 * @input: Global input interface. 396 * 397 * Used for host updates/events. 398 */ 399 struct panthor_fw_global_input_iface *input; 400 401 /** 402 * @output: Global output interface. 403 * 404 * Used for FW updates/events. 405 * 406 * This interface is read-only. 407 */ 408 const struct panthor_fw_global_output_iface *output; 409 }; 410 411 /** 412 * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW 413 * @__iface: The interface to operate on. 414 * @__in_reg: Name of the register to update in the input section of the interface. 415 * @__out_reg: Name of the register to take as a reference in the output section of the 416 * interface. 417 * @__mask: Mask to apply to the update. 418 * 419 * The Host -> FW event/message passing was designed to be lockless, with each side of 420 * the channel having its writeable section. Events are signaled as a difference between 421 * the host and FW side in the req/ack registers (when a bit differs, there's an event 422 * pending, when they are the same, nothing needs attention). 423 * 424 * This helper allows one to update the req register based on the current value of the 425 * ack register managed by the FW. Toggling a specific bit will flag an event. In order 426 * for events to be re-evaluated, the interface doorbell needs to be rung. 427 * 428 * Concurrent accesses to the same req register is covered. 429 * 430 * Anything requiring atomic updates to multiple registers requires a dedicated lock. 431 */ 432 #define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \ 433 do { \ 434 u32 __cur_val, __new_val, __out_val; \ 435 spin_lock(&(__iface)->lock); \ 436 __cur_val = READ_ONCE((__iface)->input->__in_reg); \ 437 __out_val = READ_ONCE((__iface)->output->__out_reg); \ 438 __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \ 439 WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ 440 spin_unlock(&(__iface)->lock); \ 441 } while (0) 442 443 /** 444 * panthor_fw_update_reqs() - Update bits to reflect a configuration change 445 * @__iface: The interface to operate on. 446 * @__in_reg: Name of the register to update in the input section of the interface. 447 * @__val: Value to set. 448 * @__mask: Mask to apply to the update. 449 * 450 * Some configuration get passed through req registers that are also used to 451 * send events to the FW. Those req registers being updated from the interrupt 452 * handler, they require special helpers to update the configuration part as well. 453 * 454 * Concurrent accesses to the same req register is covered. 455 * 456 * Anything requiring atomic updates to multiple registers requires a dedicated lock. 457 */ 458 #define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \ 459 do { \ 460 u32 __cur_val, __new_val; \ 461 spin_lock(&(__iface)->lock); \ 462 __cur_val = READ_ONCE((__iface)->input->__in_reg); \ 463 __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \ 464 WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ 465 spin_unlock(&(__iface)->lock); \ 466 } while (0) 467 468 #define panthor_fw_update_reqs64(__iface, __in_reg, __val, __mask) \ 469 do { \ 470 u64 __cur_val, __new_val; \ 471 spin_lock(&(__iface)->lock); \ 472 __cur_val = READ_ONCE((__iface)->input->__in_reg); \ 473 __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \ 474 WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ 475 spin_unlock(&(__iface)->lock); \ 476 } while (0) 477 478 struct panthor_fw_global_iface * 479 panthor_fw_get_glb_iface(struct panthor_device *ptdev); 480 481 struct panthor_fw_csg_iface * 482 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot); 483 484 struct panthor_fw_cs_iface * 485 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot); 486 487 u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev, 488 struct panthor_fw_csg_iface *csg_iface); 489 490 void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev, 491 struct panthor_fw_csg_iface *csg_iface, u64 value); 492 493 void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev, 494 struct panthor_fw_csg_iface *csg_iface, u64 value, 495 u64 mask); 496 497 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask, 498 u32 *acked, u32 timeout_ms); 499 500 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked, 501 u32 timeout_ms); 502 503 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot); 504 505 struct panthor_kernel_bo * 506 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, 507 struct panthor_fw_ringbuf_input_iface **input, 508 const struct panthor_fw_ringbuf_output_iface **output, 509 u32 *input_fw_va, u32 *output_fw_va); 510 struct panthor_kernel_bo * 511 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size); 512 513 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev); 514 515 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang); 516 int panthor_fw_post_reset(struct panthor_device *ptdev); 517 518 static inline void panthor_fw_suspend(struct panthor_device *ptdev) 519 { 520 panthor_fw_pre_reset(ptdev, false); 521 } 522 523 static inline int panthor_fw_resume(struct panthor_device *ptdev) 524 { 525 return panthor_fw_post_reset(ptdev); 526 } 527 528 int panthor_fw_init(struct panthor_device *ptdev); 529 void panthor_fw_unplug(struct panthor_device *ptdev); 530 531 #endif 532