1 /* SPDX-License-Identifier: GPL-2.0 or MIT */
2 /* Copyright 2023 Collabora ltd. */
3
4 #ifndef __PANTHOR_MCU_H__
5 #define __PANTHOR_MCU_H__
6
7 #include <linux/types.h>
8
9 struct panthor_device;
10 struct panthor_kernel_bo;
11
12 #define MAX_CSGS 31
13 #define MAX_CS_PER_CSG 32
14
15 struct panthor_fw_ringbuf_input_iface {
16 u64 insert;
17 u64 extract;
18 };
19
20 struct panthor_fw_ringbuf_output_iface {
21 u64 extract;
22 u32 active;
23 };
24
25 struct panthor_fw_cs_control_iface {
26 #define CS_FEATURES_WORK_REGS(x) (((x) & GENMASK(7, 0)) + 1)
27 #define CS_FEATURES_SCOREBOARDS(x) (((x) & GENMASK(15, 8)) >> 8)
28 #define CS_FEATURES_COMPUTE BIT(16)
29 #define CS_FEATURES_FRAGMENT BIT(17)
30 #define CS_FEATURES_TILER BIT(18)
31 u32 features;
32 u32 input_va;
33 u32 output_va;
34 };
35
36 struct panthor_fw_cs_input_iface {
37 #define CS_STATE_MASK GENMASK(2, 0)
38 #define CS_STATE_STOP 0
39 #define CS_STATE_START 1
40 #define CS_EXTRACT_EVENT BIT(4)
41 #define CS_IDLE_SYNC_WAIT BIT(8)
42 #define CS_IDLE_PROTM_PENDING BIT(9)
43 #define CS_IDLE_EMPTY BIT(10)
44 #define CS_IDLE_RESOURCE_REQ BIT(11)
45 #define CS_TILER_OOM BIT(26)
46 #define CS_PROTM_PENDING BIT(27)
47 #define CS_FATAL BIT(30)
48 #define CS_FAULT BIT(31)
49 #define CS_REQ_MASK (CS_STATE_MASK | \
50 CS_EXTRACT_EVENT | \
51 CS_IDLE_SYNC_WAIT | \
52 CS_IDLE_PROTM_PENDING | \
53 CS_IDLE_EMPTY | \
54 CS_IDLE_RESOURCE_REQ)
55 #define CS_EVT_MASK (CS_TILER_OOM | \
56 CS_PROTM_PENDING | \
57 CS_FATAL | \
58 CS_FAULT)
59 u32 req;
60
61 #define CS_CONFIG_PRIORITY(x) ((x) & GENMASK(3, 0))
62 #define CS_CONFIG_DOORBELL(x) (((x) << 8) & GENMASK(15, 8))
63 u32 config;
64 u32 reserved1;
65 u32 ack_irq_mask;
66 u64 ringbuf_base;
67 u32 ringbuf_size;
68 u32 reserved2;
69 u64 heap_start;
70 u64 heap_end;
71 u64 ringbuf_input;
72 u64 ringbuf_output;
73 u32 instr_config;
74 u32 instrbuf_size;
75 u64 instrbuf_base;
76 u64 instrbuf_offset_ptr;
77 };
78
79 struct panthor_fw_cs_output_iface {
80 u32 ack;
81 u32 reserved1[15];
82 u64 status_cmd_ptr;
83
84 #define CS_STATUS_WAIT_SB_MASK GENMASK(15, 0)
85 #define CS_STATUS_WAIT_SB_SRC_MASK GENMASK(19, 16)
86 #define CS_STATUS_WAIT_SB_SRC_NONE (0 << 16)
87 #define CS_STATUS_WAIT_SB_SRC_WAIT (8 << 16)
88 #define CS_STATUS_WAIT_SYNC_COND_LE (0 << 24)
89 #define CS_STATUS_WAIT_SYNC_COND_GT (1 << 24)
90 #define CS_STATUS_WAIT_SYNC_COND_MASK GENMASK(27, 24)
91 #define CS_STATUS_WAIT_PROGRESS BIT(28)
92 #define CS_STATUS_WAIT_PROTM BIT(29)
93 #define CS_STATUS_WAIT_SYNC_64B BIT(30)
94 #define CS_STATUS_WAIT_SYNC BIT(31)
95 u32 status_wait;
96 u32 status_req_resource;
97 u64 status_wait_sync_ptr;
98 u32 status_wait_sync_value;
99 u32 status_scoreboards;
100
101 #define CS_STATUS_BLOCKED_REASON_UNBLOCKED 0
102 #define CS_STATUS_BLOCKED_REASON_SB_WAIT 1
103 #define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2
104 #define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3
105 #define CS_STATUS_BLOCKED_REASON_DEFERRED 4
106 #define CS_STATUS_BLOCKED_REASON_RESOURCE 5
107 #define CS_STATUS_BLOCKED_REASON_FLUSH 6
108 #define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0)
109 u32 status_blocked_reason;
110 u32 status_wait_sync_value_hi;
111 u32 reserved2[6];
112
113 #define CS_EXCEPTION_TYPE(x) ((x) & GENMASK(7, 0))
114 #define CS_EXCEPTION_DATA(x) (((x) >> 8) & GENMASK(23, 0))
115 u32 fault;
116 u32 fatal;
117 u64 fault_info;
118 u64 fatal_info;
119 u32 reserved3[10];
120 u32 heap_vt_start;
121 u32 heap_vt_end;
122 u32 reserved4;
123 u32 heap_frag_end;
124 u64 heap_address;
125 };
126
127 struct panthor_fw_csg_control_iface {
128 u32 features;
129 u32 input_va;
130 u32 output_va;
131 u32 suspend_size;
132 u32 protm_suspend_size;
133 u32 stream_num;
134 u32 stream_stride;
135 };
136
137 struct panthor_fw_csg_input_iface {
138 #define CSG_STATE_MASK GENMASK(2, 0)
139 #define CSG_STATE_TERMINATE 0
140 #define CSG_STATE_START 1
141 #define CSG_STATE_SUSPEND 2
142 #define CSG_STATE_RESUME 3
143 #define CSG_ENDPOINT_CONFIG BIT(4)
144 #define CSG_STATUS_UPDATE BIT(5)
145 #define CSG_SYNC_UPDATE BIT(28)
146 #define CSG_IDLE BIT(29)
147 #define CSG_DOORBELL BIT(30)
148 #define CSG_PROGRESS_TIMER_EVENT BIT(31)
149 #define CSG_REQ_MASK (CSG_STATE_MASK | \
150 CSG_ENDPOINT_CONFIG | \
151 CSG_STATUS_UPDATE)
152 #define CSG_EVT_MASK (CSG_SYNC_UPDATE | \
153 CSG_IDLE | \
154 CSG_PROGRESS_TIMER_EVENT)
155 u32 req;
156 u32 ack_irq_mask;
157
158 u32 doorbell_req;
159 u32 cs_irq_ack;
160 u32 reserved1[4];
161 u64 allow_compute;
162 u64 allow_fragment;
163 u32 allow_other;
164
165 #define CSG_EP_REQ_COMPUTE(x) ((x) & GENMASK(7, 0))
166 #define CSG_EP_REQ_FRAGMENT(x) (((x) << 8) & GENMASK(15, 8))
167 #define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16))
168 #define CSG_EP_REQ_EXCL_COMPUTE BIT(20)
169 #define CSG_EP_REQ_EXCL_FRAGMENT BIT(21)
170 #define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28)
171 #define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & CSG_EP_REQ_PRIORITY_MASK)
172 #define CSG_EP_REQ_PRIORITY_GET(x) (((x) & CSG_EP_REQ_PRIORITY_MASK) >> 28)
173 u32 endpoint_req;
174 u64 endpoint_req2;
175 u64 suspend_buf;
176 u64 protm_suspend_buf;
177 u32 config;
178 u32 iter_trace_config;
179 };
180
181 struct panthor_fw_csg_output_iface {
182 u32 ack;
183 u32 reserved1;
184 u32 doorbell_ack;
185 u32 cs_irq_req;
186 u32 status_endpoint_current;
187 u32 status_endpoint_req;
188
189 #define CSG_STATUS_STATE_IS_IDLE BIT(0)
190 u32 status_state;
191 u32 resource_dep;
192 };
193
194 struct panthor_fw_global_control_iface {
195 u32 version;
196 u32 features;
197 u32 input_va;
198 u32 output_va;
199 u32 group_num;
200 u32 group_stride;
201 u32 perfcnt_size;
202 u32 instr_features;
203 };
204
205 struct panthor_fw_global_input_iface {
206 #define GLB_HALT BIT(0)
207 #define GLB_CFG_PROGRESS_TIMER BIT(1)
208 #define GLB_CFG_ALLOC_EN BIT(2)
209 #define GLB_CFG_POWEROFF_TIMER BIT(3)
210 #define GLB_PROTM_ENTER BIT(4)
211 #define GLB_PERFCNT_EN BIT(5)
212 #define GLB_PERFCNT_SAMPLE BIT(6)
213 #define GLB_COUNTER_EN BIT(7)
214 #define GLB_PING BIT(8)
215 #define GLB_FWCFG_UPDATE BIT(9)
216 #define GLB_IDLE_EN BIT(10)
217 #define GLB_SLEEP BIT(12)
218 #define GLB_STATE_MASK GENMASK(14, 12)
219 #define GLB_STATE_ACTIVE 0
220 #define GLB_STATE_HALT 1
221 #define GLB_STATE_SLEEP 2
222 #define GLB_STATE_SUSPEND 3
223 #define GLB_STATE(x) (((x) << 12) & GLB_STATE_MASK)
224 #define GLB_STATE_GET(x) (((x) & GLB_STATE_MASK) >> 12)
225 #define GLB_INACTIVE_COMPUTE BIT(20)
226 #define GLB_INACTIVE_FRAGMENT BIT(21)
227 #define GLB_INACTIVE_TILER BIT(22)
228 #define GLB_PROTM_EXIT BIT(23)
229 #define GLB_PERFCNT_THRESHOLD BIT(24)
230 #define GLB_PERFCNT_OVERFLOW BIT(25)
231 #define GLB_IDLE BIT(26)
232 #define GLB_DBG_CSF BIT(30)
233 #define GLB_DBG_HOST BIT(31)
234 #define GLB_REQ_MASK GENMASK(10, 0)
235 #define GLB_EVT_MASK GENMASK(26, 20)
236 u32 req;
237 u32 ack_irq_mask;
238 u32 doorbell_req;
239 u32 reserved1;
240 u32 progress_timer;
241
242 #define GLB_TIMER_VAL(x) ((x) & GENMASK(30, 0))
243 #define GLB_TIMER_SOURCE_GPU_COUNTER BIT(31)
244 u32 poweroff_timer;
245 u64 core_en_mask;
246 u32 reserved2;
247 u32 perfcnt_as;
248 u64 perfcnt_base;
249 u32 perfcnt_extract;
250 u32 reserved3[3];
251 u32 perfcnt_config;
252 u32 perfcnt_csg_select;
253 u32 perfcnt_fw_enable;
254 u32 perfcnt_csg_enable;
255 u32 perfcnt_csf_enable;
256 u32 perfcnt_shader_enable;
257 u32 perfcnt_tiler_enable;
258 u32 perfcnt_mmu_l2_enable;
259 u32 reserved4[8];
260 u32 idle_timer;
261 };
262
263 enum panthor_fw_halt_status {
264 PANTHOR_FW_HALT_OK = 0,
265 PANTHOR_FW_HALT_ON_PANIC = 0x4e,
266 PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f,
267 };
268
269 struct panthor_fw_global_output_iface {
270 u32 ack;
271 u32 reserved1;
272 u32 doorbell_ack;
273 u32 reserved2;
274 u32 halt_status;
275 u32 perfcnt_status;
276 u32 perfcnt_insert;
277 };
278
279 /**
280 * struct panthor_fw_cs_iface - Firmware command stream slot interface
281 */
282 struct panthor_fw_cs_iface {
283 /**
284 * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req
285 * field.
286 *
287 * Needed so we can update the req field concurrently from the interrupt
288 * handler and the scheduler logic.
289 *
290 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
291 * interface sections are mapped uncached/write-combined right now, and
292 * using cmpxchg() on such mappings leads to SError faults. Revisit when
293 * we have 'SHARED' GPU mappings hooked up.
294 */
295 spinlock_t lock;
296
297 /**
298 * @control: Command stream slot control interface.
299 *
300 * Used to expose command stream slot properties.
301 *
302 * This interface is read-only.
303 */
304 struct panthor_fw_cs_control_iface *control;
305
306 /**
307 * @input: Command stream slot input interface.
308 *
309 * Used for host updates/events.
310 */
311 struct panthor_fw_cs_input_iface *input;
312
313 /**
314 * @output: Command stream slot output interface.
315 *
316 * Used for FW updates/events.
317 *
318 * This interface is read-only.
319 */
320 const struct panthor_fw_cs_output_iface *output;
321 };
322
323 /**
324 * struct panthor_fw_csg_iface - Firmware command stream group slot interface
325 */
326 struct panthor_fw_csg_iface {
327 /**
328 * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req
329 * field.
330 *
331 * Needed so we can update the req field concurrently from the interrupt
332 * handler and the scheduler logic.
333 *
334 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
335 * interface sections are mapped uncached/write-combined right now, and
336 * using cmpxchg() on such mappings leads to SError faults. Revisit when
337 * we have 'SHARED' GPU mappings hooked up.
338 */
339 spinlock_t lock;
340
341 /**
342 * @control: Command stream group slot control interface.
343 *
344 * Used to expose command stream group slot properties.
345 *
346 * This interface is read-only.
347 */
348 const struct panthor_fw_csg_control_iface *control;
349
350 /**
351 * @input: Command stream slot input interface.
352 *
353 * Used for host updates/events.
354 */
355 struct panthor_fw_csg_input_iface *input;
356
357 /**
358 * @output: Command stream group slot output interface.
359 *
360 * Used for FW updates/events.
361 *
362 * This interface is read-only.
363 */
364 const struct panthor_fw_csg_output_iface *output;
365 };
366
367 /**
368 * struct panthor_fw_global_iface - Firmware global interface
369 */
370 struct panthor_fw_global_iface {
371 /**
372 * @lock: Lock protecting access to the panthor_fw_global_input_iface::req
373 * field.
374 *
375 * Needed so we can update the req field concurrently from the interrupt
376 * handler and the scheduler/FW management logic.
377 *
378 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
379 * interface sections are mapped uncached/write-combined right now, and
380 * using cmpxchg() on such mappings leads to SError faults. Revisit when
381 * we have 'SHARED' GPU mappings hooked up.
382 */
383 spinlock_t lock;
384
385 /**
386 * @control: Command stream group slot control interface.
387 *
388 * Used to expose global FW properties.
389 *
390 * This interface is read-only.
391 */
392 const struct panthor_fw_global_control_iface *control;
393
394 /**
395 * @input: Global input interface.
396 *
397 * Used for host updates/events.
398 */
399 struct panthor_fw_global_input_iface *input;
400
401 /**
402 * @output: Global output interface.
403 *
404 * Used for FW updates/events.
405 *
406 * This interface is read-only.
407 */
408 const struct panthor_fw_global_output_iface *output;
409 };
410
411 /**
412 * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW
413 * @__iface: The interface to operate on.
414 * @__in_reg: Name of the register to update in the input section of the interface.
415 * @__out_reg: Name of the register to take as a reference in the output section of the
416 * interface.
417 * @__mask: Mask to apply to the update.
418 *
419 * The Host -> FW event/message passing was designed to be lockless, with each side of
420 * the channel having its writeable section. Events are signaled as a difference between
421 * the host and FW side in the req/ack registers (when a bit differs, there's an event
422 * pending, when they are the same, nothing needs attention).
423 *
424 * This helper allows one to update the req register based on the current value of the
425 * ack register managed by the FW. Toggling a specific bit will flag an event. In order
426 * for events to be re-evaluated, the interface doorbell needs to be rung.
427 *
428 * Concurrent accesses to the same req register is covered.
429 *
430 * Anything requiring atomic updates to multiple registers requires a dedicated lock.
431 */
432 #define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \
433 do { \
434 u32 __cur_val, __new_val, __out_val; \
435 spin_lock(&(__iface)->lock); \
436 __cur_val = READ_ONCE((__iface)->input->__in_reg); \
437 __out_val = READ_ONCE((__iface)->output->__out_reg); \
438 __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \
439 WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
440 spin_unlock(&(__iface)->lock); \
441 } while (0)
442
443 /**
444 * panthor_fw_update_reqs() - Update bits to reflect a configuration change
445 * @__iface: The interface to operate on.
446 * @__in_reg: Name of the register to update in the input section of the interface.
447 * @__val: Value to set.
448 * @__mask: Mask to apply to the update.
449 *
450 * Some configuration get passed through req registers that are also used to
451 * send events to the FW. Those req registers being updated from the interrupt
452 * handler, they require special helpers to update the configuration part as well.
453 *
454 * Concurrent accesses to the same req register is covered.
455 *
456 * Anything requiring atomic updates to multiple registers requires a dedicated lock.
457 */
458 #define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \
459 do { \
460 u32 __cur_val, __new_val; \
461 spin_lock(&(__iface)->lock); \
462 __cur_val = READ_ONCE((__iface)->input->__in_reg); \
463 __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
464 WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
465 spin_unlock(&(__iface)->lock); \
466 } while (0)
467
468 #define panthor_fw_update_reqs64(__iface, __in_reg, __val, __mask) \
469 do { \
470 u64 __cur_val, __new_val; \
471 spin_lock(&(__iface)->lock); \
472 __cur_val = READ_ONCE((__iface)->input->__in_reg); \
473 __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
474 WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
475 spin_unlock(&(__iface)->lock); \
476 } while (0)
477
478 struct panthor_fw_global_iface *
479 panthor_fw_get_glb_iface(struct panthor_device *ptdev);
480
481 struct panthor_fw_csg_iface *
482 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
483
484 struct panthor_fw_cs_iface *
485 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot);
486
487 u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev,
488 struct panthor_fw_csg_iface *csg_iface);
489
490 void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev,
491 struct panthor_fw_csg_iface *csg_iface, u64 value);
492
493 void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev,
494 struct panthor_fw_csg_iface *csg_iface, u64 value,
495 u64 mask);
496
497 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask,
498 u32 *acked, u32 timeout_ms);
499
500 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked,
501 u32 timeout_ms);
502
503 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot);
504
505 struct panthor_kernel_bo *
506 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
507 struct panthor_fw_ringbuf_input_iface **input,
508 const struct panthor_fw_ringbuf_output_iface **output,
509 u32 *input_fw_va, u32 *output_fw_va);
510 struct panthor_kernel_bo *
511 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size);
512
513 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev);
514
515 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang);
516 int panthor_fw_post_reset(struct panthor_device *ptdev);
517
panthor_fw_suspend(struct panthor_device * ptdev)518 static inline void panthor_fw_suspend(struct panthor_device *ptdev)
519 {
520 panthor_fw_pre_reset(ptdev, false);
521 }
522
panthor_fw_resume(struct panthor_device * ptdev)523 static inline int panthor_fw_resume(struct panthor_device *ptdev)
524 {
525 return panthor_fw_post_reset(ptdev);
526 }
527
528 int panthor_fw_init(struct panthor_device *ptdev);
529 void panthor_fw_unplug(struct panthor_device *ptdev);
530
531 #endif
532