xref: /linux/drivers/gpu/drm/panthor/panthor_device.h (revision 74ba587f402d5501af2c85e50cf1e4044263b6ca)
1 /* SPDX-License-Identifier: GPL-2.0 or MIT */
2 /* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
3 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
4 /* Copyright 2023 Collabora ltd. */
5 
6 #ifndef __PANTHOR_DEVICE_H__
7 #define __PANTHOR_DEVICE_H__
8 
9 #include <linux/atomic.h>
10 #include <linux/io-pgtable.h>
11 #include <linux/regulator/consumer.h>
12 #include <linux/pm_runtime.h>
13 #include <linux/sched.h>
14 #include <linux/spinlock.h>
15 
16 #include <drm/drm_device.h>
17 #include <drm/drm_mm.h>
18 #include <drm/gpu_scheduler.h>
19 #include <drm/panthor_drm.h>
20 
21 struct panthor_csf;
22 struct panthor_csf_ctx;
23 struct panthor_device;
24 struct panthor_gpu;
25 struct panthor_group_pool;
26 struct panthor_heap_pool;
27 struct panthor_job;
28 struct panthor_mmu;
29 struct panthor_fw;
30 struct panthor_perfcnt;
31 struct panthor_vm;
32 struct panthor_vm_pool;
33 
34 /**
35  * struct panthor_soc_data - Panthor SoC Data
36  */
37 struct panthor_soc_data {
38 	/** @asn_hash_enable: True if GPU_L2_CONFIG_ASN_HASH_ENABLE must be set. */
39 	bool asn_hash_enable;
40 
41 	/** @asn_hash: ASN_HASH values when asn_hash_enable is true. */
42 	u32 asn_hash[3];
43 };
44 
45 /**
46  * enum panthor_device_pm_state - PM state
47  */
48 enum panthor_device_pm_state {
49 	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */
50 	PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0,
51 
52 	/** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */
53 	PANTHOR_DEVICE_PM_STATE_RESUMING,
54 
55 	/** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */
56 	PANTHOR_DEVICE_PM_STATE_ACTIVE,
57 
58 	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */
59 	PANTHOR_DEVICE_PM_STATE_SUSPENDING,
60 };
61 
62 /**
63  * struct panthor_irq - IRQ data
64  *
65  * Used to automate IRQ handling for the 3 different IRQs we have in this driver.
66  */
67 struct panthor_irq {
68 	/** @ptdev: Panthor device */
69 	struct panthor_device *ptdev;
70 
71 	/** @irq: IRQ number. */
72 	int irq;
73 
74 	/** @mask: Current mask being applied to xxx_INT_MASK. */
75 	u32 mask;
76 
77 	/** @suspended: Set to true when the IRQ is suspended. */
78 	atomic_t suspended;
79 };
80 
81 /**
82  * enum panthor_device_profiling_mode - Profiling state
83  */
84 enum panthor_device_profiling_flags {
85 	/** @PANTHOR_DEVICE_PROFILING_DISABLED: Profiling is disabled. */
86 	PANTHOR_DEVICE_PROFILING_DISABLED = 0,
87 
88 	/** @PANTHOR_DEVICE_PROFILING_CYCLES: Sampling job cycles. */
89 	PANTHOR_DEVICE_PROFILING_CYCLES = BIT(0),
90 
91 	/** @PANTHOR_DEVICE_PROFILING_TIMESTAMP: Sampling job timestamp. */
92 	PANTHOR_DEVICE_PROFILING_TIMESTAMP = BIT(1),
93 
94 	/** @PANTHOR_DEVICE_PROFILING_ALL: Sampling everything. */
95 	PANTHOR_DEVICE_PROFILING_ALL =
96 	PANTHOR_DEVICE_PROFILING_CYCLES |
97 	PANTHOR_DEVICE_PROFILING_TIMESTAMP,
98 };
99 
100 /**
101  * struct panthor_device - Panthor device
102  */
103 struct panthor_device {
104 	/** @base: Base drm_device. */
105 	struct drm_device base;
106 
107 	/** @soc_data: Optional SoC data. */
108 	const struct panthor_soc_data *soc_data;
109 
110 	/** @phys_addr: Physical address of the iomem region. */
111 	phys_addr_t phys_addr;
112 
113 	/** @iomem: CPU mapping of the IOMEM region. */
114 	void __iomem *iomem;
115 
116 	/** @clks: GPU clocks. */
117 	struct {
118 		/** @core: Core clock. */
119 		struct clk *core;
120 
121 		/** @stacks: Stacks clock. This clock is optional. */
122 		struct clk *stacks;
123 
124 		/** @coregroup: Core group clock. This clock is optional. */
125 		struct clk *coregroup;
126 	} clks;
127 
128 	/** @coherent: True if the CPU/GPU are memory coherent. */
129 	bool coherent;
130 
131 	/** @gpu_info: GPU information. */
132 	struct drm_panthor_gpu_info gpu_info;
133 
134 	/** @csif_info: Command stream interface information. */
135 	struct drm_panthor_csif_info csif_info;
136 
137 	/** @gpu: GPU management data. */
138 	struct panthor_gpu *gpu;
139 
140 	/** @fw: FW management data. */
141 	struct panthor_fw *fw;
142 
143 	/** @mmu: MMU management data. */
144 	struct panthor_mmu *mmu;
145 
146 	/** @scheduler: Scheduler management data. */
147 	struct panthor_scheduler *scheduler;
148 
149 	/** @devfreq: Device frequency scaling management data. */
150 	struct panthor_devfreq *devfreq;
151 
152 	/** @unplug: Device unplug related fields. */
153 	struct {
154 		/** @lock: Lock used to serialize unplug operations. */
155 		struct mutex lock;
156 
157 		/**
158 		 * @done: Completion object signaled when the unplug
159 		 * operation is done.
160 		 */
161 		struct completion done;
162 	} unplug;
163 
164 	/** @reset: Reset related fields. */
165 	struct {
166 		/** @wq: Ordered worqueud used to schedule reset operations. */
167 		struct workqueue_struct *wq;
168 
169 		/** @work: Reset work. */
170 		struct work_struct work;
171 
172 		/** @pending: Set to true if a reset is pending. */
173 		atomic_t pending;
174 
175 		/**
176 		 * @fast: True if the post_reset logic can proceed with a fast reset.
177 		 *
178 		 * A fast reset is just a reset where the driver doesn't reload the FW sections.
179 		 *
180 		 * Any time the firmware is properly suspended, a fast reset can take place.
181 		 * On the other hand, if the halt operation failed, the driver will reload
182 		 * all FW sections to make sure we start from a fresh state.
183 		 */
184 		bool fast;
185 	} reset;
186 
187 	/** @pm: Power management related data. */
188 	struct {
189 		/** @state: Power state. */
190 		atomic_t state;
191 
192 		/**
193 		 * @mmio_lock: Lock protecting MMIO userspace CPU mappings.
194 		 *
195 		 * This is needed to ensure we map the dummy IO pages when
196 		 * the device is being suspended, and the real IO pages when
197 		 * the device is being resumed. We can't just do with the
198 		 * state atomicity to deal with this race.
199 		 */
200 		struct mutex mmio_lock;
201 
202 		/**
203 		 * @dummy_latest_flush: Dummy LATEST_FLUSH page.
204 		 *
205 		 * Used to replace the real LATEST_FLUSH page when the GPU
206 		 * is suspended.
207 		 */
208 		struct page *dummy_latest_flush;
209 
210 		/** @recovery_needed: True when a resume attempt failed. */
211 		atomic_t recovery_needed;
212 	} pm;
213 
214 	/** @profile_mask: User-set profiling flags for job accounting. */
215 	u32 profile_mask;
216 
217 	/** @fast_rate: Maximum device clock frequency. Set by DVFS */
218 	unsigned long fast_rate;
219 
220 #ifdef CONFIG_DEBUG_FS
221 	/** @gems: Device-wide list of GEM objects owned by at least one file. */
222 	struct {
223 		/** @gems.lock: Protects the device-wide list of GEM objects. */
224 		struct mutex lock;
225 
226 		/** @node: Used to keep track of all the device's DRM objects */
227 		struct list_head node;
228 	} gems;
229 #endif
230 };
231 
232 struct panthor_gpu_usage {
233 	u64 time;
234 	u64 cycles;
235 };
236 
237 /**
238  * struct panthor_file - Panthor file
239  */
240 struct panthor_file {
241 	/** @ptdev: Device attached to this file. */
242 	struct panthor_device *ptdev;
243 
244 	/** @user_mmio: User MMIO related fields. */
245 	struct {
246 		/**
247 		 * @offset: Offset used for user MMIO mappings.
248 		 *
249 		 * This offset should not be used to check the type of mapping
250 		 * except in panthor_mmap(). After that point, MMIO mapping
251 		 * offsets have been adjusted to match
252 		 * DRM_PANTHOR_USER_MMIO_OFFSET and that macro should be used
253 		 * instead.
254 		 * Make sure this rule is followed at all times, because
255 		 * userspace is in control of the offset, and can change the
256 		 * value behind our back. Otherwise it can lead to erroneous
257 		 * branching happening in kernel space.
258 		 */
259 		u64 offset;
260 	} user_mmio;
261 
262 	/** @vms: VM pool attached to this file. */
263 	struct panthor_vm_pool *vms;
264 
265 	/** @groups: Scheduling group pool attached to this file. */
266 	struct panthor_group_pool *groups;
267 
268 	/** @stats: cycle and timestamp measures for job execution. */
269 	struct panthor_gpu_usage stats;
270 };
271 
272 int panthor_device_init(struct panthor_device *ptdev);
273 void panthor_device_unplug(struct panthor_device *ptdev);
274 
275 /**
276  * panthor_device_schedule_reset() - Schedules a reset operation
277  */
278 static inline void panthor_device_schedule_reset(struct panthor_device *ptdev)
279 {
280 	if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) &&
281 	    atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE)
282 		queue_work(ptdev->reset.wq, &ptdev->reset.work);
283 }
284 
285 /**
286  * panthor_device_reset_is_pending() - Checks if a reset is pending.
287  *
288  * Return: true if a reset is pending, false otherwise.
289  */
290 static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev)
291 {
292 	return atomic_read(&ptdev->reset.pending) != 0;
293 }
294 
295 int panthor_device_mmap_io(struct panthor_device *ptdev,
296 			   struct vm_area_struct *vma);
297 
298 int panthor_device_resume(struct device *dev);
299 int panthor_device_suspend(struct device *dev);
300 
301 static inline int panthor_device_resume_and_get(struct panthor_device *ptdev)
302 {
303 	int ret = pm_runtime_resume_and_get(ptdev->base.dev);
304 
305 	/* If the resume failed, we need to clear the runtime_error, which
306 	 * can done by forcing the RPM state to suspended. If multiple
307 	 * threads called panthor_device_resume_and_get(), we only want
308 	 * one of them to update the state, hence the cmpxchg. Note that a
309 	 * thread might enter panthor_device_resume_and_get() and call
310 	 * pm_runtime_resume_and_get() after another thread had attempted
311 	 * to resume and failed. This means we will end up with an error
312 	 * without even attempting a resume ourselves. The only risk here
313 	 * is to report an error when the second resume attempt might have
314 	 * succeeded. Given resume errors are not expected, this is probably
315 	 * something we can live with.
316 	 */
317 	if (ret && atomic_cmpxchg(&ptdev->pm.recovery_needed, 1, 0) == 1)
318 		pm_runtime_set_suspended(ptdev->base.dev);
319 
320 	return ret;
321 }
322 
323 enum drm_panthor_exception_type {
324 	DRM_PANTHOR_EXCEPTION_OK = 0x00,
325 	DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
326 	DRM_PANTHOR_EXCEPTION_KABOOM = 0x05,
327 	DRM_PANTHOR_EXCEPTION_EUREKA = 0x06,
328 	DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08,
329 	DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
330 	DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
331 	DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
332 	DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41,
333 	DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
334 	DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
335 	DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
336 	DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a,
337 	DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b,
338 	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50,
339 	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51,
340 	DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
341 	DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58,
342 	DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59,
343 	DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
344 	DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b,
345 	DRM_PANTHOR_EXCEPTION_OOM = 0x60,
346 	DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68,
347 	DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69,
348 	DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80,
349 	DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
350 	DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
351 	DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
352 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
353 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
354 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
355 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
356 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
357 	DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8,
358 	DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9,
359 	DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca,
360 	DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb,
361 	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
362 	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda,
363 	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
364 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0,
365 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
366 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
367 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
368 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
369 	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
370 	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
371 	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
372 	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
373 };
374 
375 /**
376  * panthor_exception_is_fault() - Checks if an exception is a fault.
377  *
378  * Return: true if the exception is a fault, false otherwise.
379  */
380 static inline bool
381 panthor_exception_is_fault(u32 exception_code)
382 {
383 	return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT;
384 }
385 
386 const char *panthor_exception_name(struct panthor_device *ptdev,
387 				   u32 exception_code);
388 
389 /**
390  * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
391  * registration function.
392  *
393  * The boiler-plate to gracefully deal with shared interrupts is
394  * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
395  * just after the actual handler. The handler prototype is:
396  *
397  * void (*handler)(struct panthor_device *, u32 status);
398  */
399 #define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
400 static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
401 {												\
402 	struct panthor_irq *pirq = data;							\
403 	struct panthor_device *ptdev = pirq->ptdev;						\
404 												\
405 	if (atomic_read(&pirq->suspended))							\
406 		return IRQ_NONE;								\
407 	if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT))					\
408 		return IRQ_NONE;								\
409 												\
410 	gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0);						\
411 	return IRQ_WAKE_THREAD;									\
412 }												\
413 												\
414 static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
415 {												\
416 	struct panthor_irq *pirq = data;							\
417 	struct panthor_device *ptdev = pirq->ptdev;						\
418 	irqreturn_t ret = IRQ_NONE;								\
419 												\
420 	while (true) {										\
421 		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
422 												\
423 		if (!status)									\
424 			break;									\
425 												\
426 		__handler(ptdev, status);							\
427 		ret = IRQ_HANDLED;								\
428 	}											\
429 												\
430 	if (!atomic_read(&pirq->suspended))							\
431 		gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
432 												\
433 	return ret;										\
434 }												\
435 												\
436 static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
437 {												\
438 	pirq->mask = 0;										\
439 	gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);					\
440 	synchronize_irq(pirq->irq);								\
441 	atomic_set(&pirq->suspended, true);							\
442 }												\
443 												\
444 static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
445 {												\
446 	atomic_set(&pirq->suspended, false);							\
447 	pirq->mask = mask;									\
448 	gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);				\
449 	gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);				\
450 }												\
451 												\
452 static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
453 					      struct panthor_irq *pirq,				\
454 					      int irq, u32 mask)				\
455 {												\
456 	pirq->ptdev = ptdev;									\
457 	pirq->irq = irq;									\
458 	panthor_ ## __name ## _irq_resume(pirq, mask);						\
459 												\
460 	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
461 					 panthor_ ## __name ## _irq_raw_handler,		\
462 					 panthor_ ## __name ## _irq_threaded_handler,		\
463 					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
464 					 pirq);							\
465 }
466 
467 extern struct workqueue_struct *panthor_cleanup_wq;
468 
469 static inline void gpu_write(struct panthor_device *ptdev, u32 reg, u32 data)
470 {
471 	writel(data, ptdev->iomem + reg);
472 }
473 
474 static inline u32 gpu_read(struct panthor_device *ptdev, u32 reg)
475 {
476 	return readl(ptdev->iomem + reg);
477 }
478 
479 static inline u32 gpu_read_relaxed(struct panthor_device *ptdev, u32 reg)
480 {
481 	return readl_relaxed(ptdev->iomem + reg);
482 }
483 
484 static inline void gpu_write64(struct panthor_device *ptdev, u32 reg, u64 data)
485 {
486 	gpu_write(ptdev, reg, lower_32_bits(data));
487 	gpu_write(ptdev, reg + 4, upper_32_bits(data));
488 }
489 
490 static inline u64 gpu_read64(struct panthor_device *ptdev, u32 reg)
491 {
492 	return (gpu_read(ptdev, reg) | ((u64)gpu_read(ptdev, reg + 4) << 32));
493 }
494 
495 static inline u64 gpu_read64_relaxed(struct panthor_device *ptdev, u32 reg)
496 {
497 	return (gpu_read_relaxed(ptdev, reg) |
498 		((u64)gpu_read_relaxed(ptdev, reg + 4) << 32));
499 }
500 
501 static inline u64 gpu_read64_counter(struct panthor_device *ptdev, u32 reg)
502 {
503 	u32 lo, hi1, hi2;
504 	do {
505 		hi1 = gpu_read(ptdev, reg + 4);
506 		lo = gpu_read(ptdev, reg);
507 		hi2 = gpu_read(ptdev, reg + 4);
508 	} while (hi1 != hi2);
509 	return lo | ((u64)hi2 << 32);
510 }
511 
512 #define gpu_read_poll_timeout(dev, reg, val, cond, delay_us, timeout_us)	\
513 	read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false,	\
514 			  dev, reg)
515 
516 #define gpu_read_poll_timeout_atomic(dev, reg, val, cond, delay_us,		\
517 				     timeout_us)				\
518 	read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us,	\
519 				 false, dev, reg)
520 
521 #define gpu_read64_poll_timeout(dev, reg, val, cond, delay_us, timeout_us)	\
522 	read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false,	\
523 			  dev, reg)
524 
525 #define gpu_read64_poll_timeout_atomic(dev, reg, val, cond, delay_us,		\
526 				       timeout_us)				\
527 	read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us,	\
528 				 false, dev, reg)
529 
530 #define gpu_read_relaxed_poll_timeout_atomic(dev, reg, val, cond, delay_us,	\
531 					     timeout_us)			\
532 	read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us,		\
533 				 timeout_us, false, dev, reg)
534 
535 #define gpu_read64_relaxed_poll_timeout(dev, reg, val, cond, delay_us,		\
536 					timeout_us)				\
537 	read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us,	\
538 			  false, dev, reg)
539 
540 #endif
541