1 /* SPDX-License-Identifier: GPL-2.0 or MIT */
2 /* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
3 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
4 /* Copyright 2023 Collabora ltd. */
5
6 #ifndef __PANTHOR_DEVICE_H__
7 #define __PANTHOR_DEVICE_H__
8
9 #include <linux/atomic.h>
10 #include <linux/io-pgtable.h>
11 #include <linux/regulator/consumer.h>
12 #include <linux/pm_runtime.h>
13 #include <linux/sched.h>
14 #include <linux/spinlock.h>
15
16 #include <drm/drm_device.h>
17 #include <drm/drm_mm.h>
18 #include <drm/gpu_scheduler.h>
19 #include <drm/panthor_drm.h>
20
21 struct panthor_csf;
22 struct panthor_csf_ctx;
23 struct panthor_device;
24 struct panthor_gpu;
25 struct panthor_group_pool;
26 struct panthor_heap_pool;
27 struct panthor_hw;
28 struct panthor_job;
29 struct panthor_mmu;
30 struct panthor_fw;
31 struct panthor_perfcnt;
32 struct panthor_pwr;
33 struct panthor_vm;
34 struct panthor_vm_pool;
35
36 /**
37 * struct panthor_soc_data - Panthor SoC Data
38 */
39 struct panthor_soc_data {
40 /** @asn_hash_enable: True if GPU_L2_CONFIG_ASN_HASH_ENABLE must be set. */
41 bool asn_hash_enable;
42
43 /** @asn_hash: ASN_HASH values when asn_hash_enable is true. */
44 u32 asn_hash[3];
45 };
46
47 /**
48 * enum panthor_device_pm_state - PM state
49 */
50 enum panthor_device_pm_state {
51 /** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */
52 PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0,
53
54 /** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */
55 PANTHOR_DEVICE_PM_STATE_RESUMING,
56
57 /** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */
58 PANTHOR_DEVICE_PM_STATE_ACTIVE,
59
60 /** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */
61 PANTHOR_DEVICE_PM_STATE_SUSPENDING,
62 };
63
64 /**
65 * struct panthor_irq - IRQ data
66 *
67 * Used to automate IRQ handling for the 3 different IRQs we have in this driver.
68 */
69 struct panthor_irq {
70 /** @ptdev: Panthor device */
71 struct panthor_device *ptdev;
72
73 /** @irq: IRQ number. */
74 int irq;
75
76 /** @mask: Current mask being applied to xxx_INT_MASK. */
77 u32 mask;
78
79 /** @suspended: Set to true when the IRQ is suspended. */
80 atomic_t suspended;
81 };
82
83 /**
84 * enum panthor_device_profiling_mode - Profiling state
85 */
86 enum panthor_device_profiling_flags {
87 /** @PANTHOR_DEVICE_PROFILING_DISABLED: Profiling is disabled. */
88 PANTHOR_DEVICE_PROFILING_DISABLED = 0,
89
90 /** @PANTHOR_DEVICE_PROFILING_CYCLES: Sampling job cycles. */
91 PANTHOR_DEVICE_PROFILING_CYCLES = BIT(0),
92
93 /** @PANTHOR_DEVICE_PROFILING_TIMESTAMP: Sampling job timestamp. */
94 PANTHOR_DEVICE_PROFILING_TIMESTAMP = BIT(1),
95
96 /** @PANTHOR_DEVICE_PROFILING_ALL: Sampling everything. */
97 PANTHOR_DEVICE_PROFILING_ALL =
98 PANTHOR_DEVICE_PROFILING_CYCLES |
99 PANTHOR_DEVICE_PROFILING_TIMESTAMP,
100 };
101
102 /**
103 * struct panthor_device - Panthor device
104 */
105 struct panthor_device {
106 /** @base: Base drm_device. */
107 struct drm_device base;
108
109 /** @soc_data: Optional SoC data. */
110 const struct panthor_soc_data *soc_data;
111
112 /** @phys_addr: Physical address of the iomem region. */
113 phys_addr_t phys_addr;
114
115 /** @iomem: CPU mapping of the IOMEM region. */
116 void __iomem *iomem;
117
118 /** @clks: GPU clocks. */
119 struct {
120 /** @core: Core clock. */
121 struct clk *core;
122
123 /** @stacks: Stacks clock. This clock is optional. */
124 struct clk *stacks;
125
126 /** @coregroup: Core group clock. This clock is optional. */
127 struct clk *coregroup;
128 } clks;
129
130 /** @coherent: True if the CPU/GPU are memory coherent. */
131 bool coherent;
132
133 /** @gpu_info: GPU information. */
134 struct drm_panthor_gpu_info gpu_info;
135
136 /** @csif_info: Command stream interface information. */
137 struct drm_panthor_csif_info csif_info;
138
139 /** @hw: GPU-specific data. */
140 struct panthor_hw *hw;
141
142 /** @pwr: Power control management data. */
143 struct panthor_pwr *pwr;
144
145 /** @gpu: GPU management data. */
146 struct panthor_gpu *gpu;
147
148 /** @fw: FW management data. */
149 struct panthor_fw *fw;
150
151 /** @mmu: MMU management data. */
152 struct panthor_mmu *mmu;
153
154 /** @scheduler: Scheduler management data. */
155 struct panthor_scheduler *scheduler;
156
157 /** @devfreq: Device frequency scaling management data. */
158 struct panthor_devfreq *devfreq;
159
160 /** @unplug: Device unplug related fields. */
161 struct {
162 /** @lock: Lock used to serialize unplug operations. */
163 struct mutex lock;
164
165 /**
166 * @done: Completion object signaled when the unplug
167 * operation is done.
168 */
169 struct completion done;
170 } unplug;
171
172 /** @reset: Reset related fields. */
173 struct {
174 /** @wq: Ordered worqueud used to schedule reset operations. */
175 struct workqueue_struct *wq;
176
177 /** @work: Reset work. */
178 struct work_struct work;
179
180 /** @pending: Set to true if a reset is pending. */
181 atomic_t pending;
182
183 /**
184 * @fast: True if the post_reset logic can proceed with a fast reset.
185 *
186 * A fast reset is just a reset where the driver doesn't reload the FW sections.
187 *
188 * Any time the firmware is properly suspended, a fast reset can take place.
189 * On the other hand, if the halt operation failed, the driver will reload
190 * all FW sections to make sure we start from a fresh state.
191 */
192 bool fast;
193 } reset;
194
195 /** @pm: Power management related data. */
196 struct {
197 /** @state: Power state. */
198 atomic_t state;
199
200 /**
201 * @mmio_lock: Lock protecting MMIO userspace CPU mappings.
202 *
203 * This is needed to ensure we map the dummy IO pages when
204 * the device is being suspended, and the real IO pages when
205 * the device is being resumed. We can't just do with the
206 * state atomicity to deal with this race.
207 */
208 struct mutex mmio_lock;
209
210 /**
211 * @dummy_latest_flush: Dummy LATEST_FLUSH page.
212 *
213 * Used to replace the real LATEST_FLUSH page when the GPU
214 * is suspended.
215 */
216 struct page *dummy_latest_flush;
217
218 /** @recovery_needed: True when a resume attempt failed. */
219 atomic_t recovery_needed;
220 } pm;
221
222 /** @profile_mask: User-set profiling flags for job accounting. */
223 u32 profile_mask;
224
225 /** @fast_rate: Maximum device clock frequency. Set by DVFS */
226 unsigned long fast_rate;
227
228 #ifdef CONFIG_DEBUG_FS
229 /** @gems: Device-wide list of GEM objects owned by at least one file. */
230 struct {
231 /** @gems.lock: Protects the device-wide list of GEM objects. */
232 struct mutex lock;
233
234 /** @node: Used to keep track of all the device's DRM objects */
235 struct list_head node;
236 } gems;
237 #endif
238 };
239
240 struct panthor_gpu_usage {
241 u64 time;
242 u64 cycles;
243 };
244
245 /**
246 * struct panthor_file - Panthor file
247 */
248 struct panthor_file {
249 /** @ptdev: Device attached to this file. */
250 struct panthor_device *ptdev;
251
252 /** @user_mmio: User MMIO related fields. */
253 struct {
254 /**
255 * @offset: Offset used for user MMIO mappings.
256 *
257 * This offset should not be used to check the type of mapping
258 * except in panthor_mmap(). After that point, MMIO mapping
259 * offsets have been adjusted to match
260 * DRM_PANTHOR_USER_MMIO_OFFSET and that macro should be used
261 * instead.
262 * Make sure this rule is followed at all times, because
263 * userspace is in control of the offset, and can change the
264 * value behind our back. Otherwise it can lead to erroneous
265 * branching happening in kernel space.
266 */
267 u64 offset;
268 } user_mmio;
269
270 /** @vms: VM pool attached to this file. */
271 struct panthor_vm_pool *vms;
272
273 /** @groups: Scheduling group pool attached to this file. */
274 struct panthor_group_pool *groups;
275
276 /** @stats: cycle and timestamp measures for job execution. */
277 struct panthor_gpu_usage stats;
278 };
279
280 int panthor_device_init(struct panthor_device *ptdev);
281 void panthor_device_unplug(struct panthor_device *ptdev);
282
283 /**
284 * panthor_device_schedule_reset() - Schedules a reset operation
285 */
panthor_device_schedule_reset(struct panthor_device * ptdev)286 static inline void panthor_device_schedule_reset(struct panthor_device *ptdev)
287 {
288 if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) &&
289 atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE)
290 queue_work(ptdev->reset.wq, &ptdev->reset.work);
291 }
292
293 /**
294 * panthor_device_reset_is_pending() - Checks if a reset is pending.
295 *
296 * Return: true if a reset is pending, false otherwise.
297 */
panthor_device_reset_is_pending(struct panthor_device * ptdev)298 static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev)
299 {
300 return atomic_read(&ptdev->reset.pending) != 0;
301 }
302
303 int panthor_device_mmap_io(struct panthor_device *ptdev,
304 struct vm_area_struct *vma);
305
306 int panthor_device_resume(struct device *dev);
307 int panthor_device_suspend(struct device *dev);
308
panthor_device_resume_and_get(struct panthor_device * ptdev)309 static inline int panthor_device_resume_and_get(struct panthor_device *ptdev)
310 {
311 int ret = pm_runtime_resume_and_get(ptdev->base.dev);
312
313 /* If the resume failed, we need to clear the runtime_error, which
314 * can done by forcing the RPM state to suspended. If multiple
315 * threads called panthor_device_resume_and_get(), we only want
316 * one of them to update the state, hence the cmpxchg. Note that a
317 * thread might enter panthor_device_resume_and_get() and call
318 * pm_runtime_resume_and_get() after another thread had attempted
319 * to resume and failed. This means we will end up with an error
320 * without even attempting a resume ourselves. The only risk here
321 * is to report an error when the second resume attempt might have
322 * succeeded. Given resume errors are not expected, this is probably
323 * something we can live with.
324 */
325 if (ret && atomic_cmpxchg(&ptdev->pm.recovery_needed, 1, 0) == 1)
326 pm_runtime_set_suspended(ptdev->base.dev);
327
328 return ret;
329 }
330
331 enum drm_panthor_exception_type {
332 DRM_PANTHOR_EXCEPTION_OK = 0x00,
333 DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
334 DRM_PANTHOR_EXCEPTION_KABOOM = 0x05,
335 DRM_PANTHOR_EXCEPTION_EUREKA = 0x06,
336 DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08,
337 DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
338 DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
339 DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
340 DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41,
341 DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
342 DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
343 DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
344 DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a,
345 DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b,
346 DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50,
347 DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51,
348 DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
349 DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58,
350 DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59,
351 DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
352 DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b,
353 DRM_PANTHOR_EXCEPTION_OOM = 0x60,
354 DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68,
355 DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69,
356 DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80,
357 DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
358 DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
359 DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
360 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
361 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
362 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
363 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
364 DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
365 DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8,
366 DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9,
367 DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca,
368 DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb,
369 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
370 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda,
371 DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
372 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0,
373 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
374 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
375 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
376 DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
377 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
378 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
379 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
380 DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
381 };
382
383 /**
384 * panthor_exception_is_fault() - Checks if an exception is a fault.
385 *
386 * Return: true if the exception is a fault, false otherwise.
387 */
388 static inline bool
panthor_exception_is_fault(u32 exception_code)389 panthor_exception_is_fault(u32 exception_code)
390 {
391 return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT;
392 }
393
394 const char *panthor_exception_name(struct panthor_device *ptdev,
395 u32 exception_code);
396
397 /**
398 * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
399 * registration function.
400 *
401 * The boiler-plate to gracefully deal with shared interrupts is
402 * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
403 * just after the actual handler. The handler prototype is:
404 *
405 * void (*handler)(struct panthor_device *, u32 status);
406 */
407 #define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler) \
408 static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data) \
409 { \
410 struct panthor_irq *pirq = data; \
411 struct panthor_device *ptdev = pirq->ptdev; \
412 \
413 if (atomic_read(&pirq->suspended)) \
414 return IRQ_NONE; \
415 if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT)) \
416 return IRQ_NONE; \
417 \
418 gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0); \
419 return IRQ_WAKE_THREAD; \
420 } \
421 \
422 static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data) \
423 { \
424 struct panthor_irq *pirq = data; \
425 struct panthor_device *ptdev = pirq->ptdev; \
426 irqreturn_t ret = IRQ_NONE; \
427 \
428 while (true) { \
429 u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask; \
430 \
431 if (!status) \
432 break; \
433 \
434 __handler(ptdev, status); \
435 ret = IRQ_HANDLED; \
436 } \
437 \
438 if (!atomic_read(&pirq->suspended)) \
439 gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask); \
440 \
441 return ret; \
442 } \
443 \
444 static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \
445 { \
446 pirq->mask = 0; \
447 gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \
448 synchronize_irq(pirq->irq); \
449 atomic_set(&pirq->suspended, true); \
450 } \
451 \
452 static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask) \
453 { \
454 atomic_set(&pirq->suspended, false); \
455 pirq->mask = mask; \
456 gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \
457 gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask); \
458 } \
459 \
460 static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \
461 struct panthor_irq *pirq, \
462 int irq, u32 mask) \
463 { \
464 pirq->ptdev = ptdev; \
465 pirq->irq = irq; \
466 panthor_ ## __name ## _irq_resume(pirq, mask); \
467 \
468 return devm_request_threaded_irq(ptdev->base.dev, irq, \
469 panthor_ ## __name ## _irq_raw_handler, \
470 panthor_ ## __name ## _irq_threaded_handler, \
471 IRQF_SHARED, KBUILD_MODNAME "-" # __name, \
472 pirq); \
473 }
474
475 extern struct workqueue_struct *panthor_cleanup_wq;
476
gpu_write(struct panthor_device * ptdev,u32 reg,u32 data)477 static inline void gpu_write(struct panthor_device *ptdev, u32 reg, u32 data)
478 {
479 writel(data, ptdev->iomem + reg);
480 }
481
gpu_read(struct panthor_device * ptdev,u32 reg)482 static inline u32 gpu_read(struct panthor_device *ptdev, u32 reg)
483 {
484 return readl(ptdev->iomem + reg);
485 }
486
gpu_read_relaxed(struct panthor_device * ptdev,u32 reg)487 static inline u32 gpu_read_relaxed(struct panthor_device *ptdev, u32 reg)
488 {
489 return readl_relaxed(ptdev->iomem + reg);
490 }
491
gpu_write64(struct panthor_device * ptdev,u32 reg,u64 data)492 static inline void gpu_write64(struct panthor_device *ptdev, u32 reg, u64 data)
493 {
494 gpu_write(ptdev, reg, lower_32_bits(data));
495 gpu_write(ptdev, reg + 4, upper_32_bits(data));
496 }
497
gpu_read64(struct panthor_device * ptdev,u32 reg)498 static inline u64 gpu_read64(struct panthor_device *ptdev, u32 reg)
499 {
500 return (gpu_read(ptdev, reg) | ((u64)gpu_read(ptdev, reg + 4) << 32));
501 }
502
gpu_read64_relaxed(struct panthor_device * ptdev,u32 reg)503 static inline u64 gpu_read64_relaxed(struct panthor_device *ptdev, u32 reg)
504 {
505 return (gpu_read_relaxed(ptdev, reg) |
506 ((u64)gpu_read_relaxed(ptdev, reg + 4) << 32));
507 }
508
gpu_read64_counter(struct panthor_device * ptdev,u32 reg)509 static inline u64 gpu_read64_counter(struct panthor_device *ptdev, u32 reg)
510 {
511 u32 lo, hi1, hi2;
512 do {
513 hi1 = gpu_read(ptdev, reg + 4);
514 lo = gpu_read(ptdev, reg);
515 hi2 = gpu_read(ptdev, reg + 4);
516 } while (hi1 != hi2);
517 return lo | ((u64)hi2 << 32);
518 }
519
520 #define gpu_read_poll_timeout(dev, reg, val, cond, delay_us, timeout_us) \
521 read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \
522 dev, reg)
523
524 #define gpu_read_poll_timeout_atomic(dev, reg, val, cond, delay_us, \
525 timeout_us) \
526 read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \
527 false, dev, reg)
528
529 #define gpu_read64_poll_timeout(dev, reg, val, cond, delay_us, timeout_us) \
530 read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \
531 dev, reg)
532
533 #define gpu_read64_poll_timeout_atomic(dev, reg, val, cond, delay_us, \
534 timeout_us) \
535 read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \
536 false, dev, reg)
537
538 #define gpu_read_relaxed_poll_timeout_atomic(dev, reg, val, cond, delay_us, \
539 timeout_us) \
540 read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us, \
541 timeout_us, false, dev, reg)
542
543 #define gpu_read64_relaxed_poll_timeout(dev, reg, val, cond, delay_us, \
544 timeout_us) \
545 read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us, \
546 false, dev, reg)
547
548 #endif
549