xref: /linux/drivers/gpu/drm/panthor/panthor_device.h (revision c06b6cde2a1c3bcbb561bd57bb6f34eae9030921)
1 /* SPDX-License-Identifier: GPL-2.0 or MIT */
2 /* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
3 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
4 /* Copyright 2023 Collabora ltd. */
5 
6 #ifndef __PANTHOR_DEVICE_H__
7 #define __PANTHOR_DEVICE_H__
8 
9 #include <linux/atomic.h>
10 #include <linux/io-pgtable.h>
11 #include <linux/regulator/consumer.h>
12 #include <linux/pm_runtime.h>
13 #include <linux/sched.h>
14 #include <linux/spinlock.h>
15 
16 #include <drm/drm_device.h>
17 #include <drm/drm_gem.h>
18 #include <drm/drm_mm.h>
19 #include <drm/gpu_scheduler.h>
20 #include <drm/panthor_drm.h>
21 
22 struct panthor_csf;
23 struct panthor_csf_ctx;
24 struct panthor_device;
25 struct panthor_gpu;
26 struct panthor_group_pool;
27 struct panthor_heap_pool;
28 struct panthor_hw;
29 struct panthor_job;
30 struct panthor_mmu;
31 struct panthor_fw;
32 struct panthor_perfcnt;
33 struct panthor_pwr;
34 struct panthor_vm;
35 struct panthor_vm_pool;
36 
37 /**
38  * struct panthor_soc_data - Panthor SoC Data
39  */
40 struct panthor_soc_data {
41 	/** @asn_hash_enable: True if GPU_L2_CONFIG_ASN_HASH_ENABLE must be set. */
42 	bool asn_hash_enable;
43 
44 	/** @asn_hash: ASN_HASH values when asn_hash_enable is true. */
45 	u32 asn_hash[3];
46 };
47 
48 /**
49  * enum panthor_device_pm_state - PM state
50  */
51 enum panthor_device_pm_state {
52 	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */
53 	PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0,
54 
55 	/** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */
56 	PANTHOR_DEVICE_PM_STATE_RESUMING,
57 
58 	/** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */
59 	PANTHOR_DEVICE_PM_STATE_ACTIVE,
60 
61 	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */
62 	PANTHOR_DEVICE_PM_STATE_SUSPENDING,
63 };
64 
65 enum panthor_irq_state {
66 	/** @PANTHOR_IRQ_STATE_ACTIVE: IRQ is active and ready to process events. */
67 	PANTHOR_IRQ_STATE_ACTIVE = 0,
68 	/** @PANTHOR_IRQ_STATE_PROCESSING: IRQ is currently processing events. */
69 	PANTHOR_IRQ_STATE_PROCESSING,
70 	/** @PANTHOR_IRQ_STATE_SUSPENDED: IRQ is suspended. */
71 	PANTHOR_IRQ_STATE_SUSPENDED,
72 	/** @PANTHOR_IRQ_STATE_SUSPENDING: IRQ is being suspended. */
73 	PANTHOR_IRQ_STATE_SUSPENDING,
74 };
75 
76 /**
77  * struct panthor_irq - IRQ data
78  *
79  * Used to automate IRQ handling for the 3 different IRQs we have in this driver.
80  */
81 struct panthor_irq {
82 	/** @ptdev: Panthor device */
83 	struct panthor_device *ptdev;
84 
85 	/** @iomem: CPU mapping of IRQ base address */
86 	void __iomem *iomem;
87 
88 	/** @irq: IRQ number. */
89 	int irq;
90 
91 	/** @mask: Values to write to xxx_INT_MASK if active. */
92 	u32 mask;
93 
94 	/**
95 	 * @mask_lock: protects modifications to _INT_MASK and @mask.
96 	 *
97 	 * In paths where _INT_MASK is updated based on a state
98 	 * transition/check, it's crucial for the state update/check to be
99 	 * inside the locked section, otherwise it introduces a race window
100 	 * leading to potential _INT_MASK inconsistencies.
101 	 */
102 	spinlock_t mask_lock;
103 
104 	/** @state: one of &enum panthor_irq_state reflecting the current state. */
105 	atomic_t state;
106 };
107 
108 /**
109  * enum panthor_device_profiling_mode - Profiling state
110  */
111 enum panthor_device_profiling_flags {
112 	/** @PANTHOR_DEVICE_PROFILING_DISABLED: Profiling is disabled. */
113 	PANTHOR_DEVICE_PROFILING_DISABLED = 0,
114 
115 	/** @PANTHOR_DEVICE_PROFILING_CYCLES: Sampling job cycles. */
116 	PANTHOR_DEVICE_PROFILING_CYCLES = BIT(0),
117 
118 	/** @PANTHOR_DEVICE_PROFILING_TIMESTAMP: Sampling job timestamp. */
119 	PANTHOR_DEVICE_PROFILING_TIMESTAMP = BIT(1),
120 
121 	/** @PANTHOR_DEVICE_PROFILING_ALL: Sampling everything. */
122 	PANTHOR_DEVICE_PROFILING_ALL =
123 	PANTHOR_DEVICE_PROFILING_CYCLES |
124 	PANTHOR_DEVICE_PROFILING_TIMESTAMP,
125 };
126 
127 /**
128  * struct panthor_device - Panthor device
129  */
130 struct panthor_device {
131 	/** @base: Base drm_device. */
132 	struct drm_device base;
133 
134 	/** @soc_data: Optional SoC data. */
135 	const struct panthor_soc_data *soc_data;
136 
137 	/** @phys_addr: Physical address of the iomem region. */
138 	phys_addr_t phys_addr;
139 
140 	/** @iomem: CPU mapping of the IOMEM region. */
141 	void __iomem *iomem;
142 
143 	/** @clks: GPU clocks. */
144 	struct {
145 		/** @core: Core clock. */
146 		struct clk *core;
147 
148 		/** @stacks: Stacks clock. This clock is optional. */
149 		struct clk *stacks;
150 
151 		/** @coregroup: Core group clock. This clock is optional. */
152 		struct clk *coregroup;
153 	} clks;
154 
155 	/** @coherent: True if the CPU/GPU are memory coherent. */
156 	bool coherent;
157 
158 	/** @gpu_info: GPU information. */
159 	struct drm_panthor_gpu_info gpu_info;
160 
161 	/** @csif_info: Command stream interface information. */
162 	struct drm_panthor_csif_info csif_info;
163 
164 	/** @hw: GPU-specific data. */
165 	struct panthor_hw *hw;
166 
167 	/** @pwr: Power control management data. */
168 	struct panthor_pwr *pwr;
169 
170 	/** @gpu: GPU management data. */
171 	struct panthor_gpu *gpu;
172 
173 	/** @fw: FW management data. */
174 	struct panthor_fw *fw;
175 
176 	/** @mmu: MMU management data. */
177 	struct panthor_mmu *mmu;
178 
179 	/** @scheduler: Scheduler management data. */
180 	struct panthor_scheduler *scheduler;
181 
182 	/** @devfreq: Device frequency scaling management data. */
183 	struct panthor_devfreq *devfreq;
184 
185 	/** @reclaim: Reclaim related stuff */
186 	struct {
187 		/** @reclaim.shrinker: Shrinker instance */
188 		struct shrinker *shrinker;
189 
190 		/** @reclaim.lock: Lock protecting all LRUs */
191 		struct mutex lock;
192 
193 		/**
194 		 * @reclaim.unused: BOs with unused pages
195 		 *
196 		 * Basically all buffers that got mmapped, vmapped or GPU mapped and
197 		 * then unmapped. There should be no contention on these buffers,
198 		 * making them ideal to reclaim.
199 		 */
200 		struct drm_gem_lru unused;
201 
202 		/**
203 		 * @reclaim.mmapped: mmap()-ed buffers
204 		 *
205 		 * Those are relatively easy to reclaim since we don't need user
206 		 * agreement, we can simply teardown the mapping and let it fault on
207 		 * the next access.
208 		 */
209 		struct drm_gem_lru mmapped;
210 
211 		/**
212 		 * @reclaim.gpu_mapped_shared: shared BO LRU list
213 		 *
214 		 * That's the most tricky BO type to reclaim, because it involves
215 		 * tearing down all mappings in all VMs where this BO is mapped,
216 		 * which increases the risk of contention and thus decreases the
217 		 * likeliness of success.
218 		 */
219 		struct drm_gem_lru gpu_mapped_shared;
220 
221 		/**
222 		 * @reclaim.vms: VM LRU list
223 		 *
224 		 * VMs that have reclaimable BOs only mapped to a single VM are placed
225 		 * in this LRU. Reclaiming such BOs implies waiting for VM idleness
226 		 * (no in-flight GPU jobs targeting this VM), meaning we can't reclaim
227 		 * those if we're in a context where we can't block/sleep.
228 		 */
229 		struct list_head vms;
230 
231 		/**
232 		 * @reclaim.gpu_mapped_count: Global counter of pages that are GPU mapped
233 		 *
234 		 * Allows us to get the number of reclaimable pages without walking
235 		 * the vms and gpu_mapped_shared LRUs.
236 		 */
237 		long gpu_mapped_count;
238 
239 		/**
240 		 * @reclaim.retry_count: Number of times we ran the shrinker without being
241 		 * able to reclaim stuff
242 		 *
243 		 * Used to stop scanning GEMs when too many attempts were made
244 		 * without progress.
245 		 */
246 		atomic_t retry_count;
247 
248 #ifdef CONFIG_DEBUG_FS
249 		/**
250 		 * @reclaim.nr_pages_reclaimed_on_last_scan: Number of pages reclaimed on the last
251 		 * shrinker scan
252 		 */
253 		unsigned long nr_pages_reclaimed_on_last_scan;
254 #endif
255 	} reclaim;
256 
257 	/** @unplug: Device unplug related fields. */
258 	struct {
259 		/** @lock: Lock used to serialize unplug operations. */
260 		struct mutex lock;
261 
262 		/**
263 		 * @done: Completion object signaled when the unplug
264 		 * operation is done.
265 		 */
266 		struct completion done;
267 	} unplug;
268 
269 	/** @reset: Reset related fields. */
270 	struct {
271 		/** @wq: Ordered worqueud used to schedule reset operations. */
272 		struct workqueue_struct *wq;
273 
274 		/** @work: Reset work. */
275 		struct work_struct work;
276 
277 		/** @pending: Set to true if a reset is pending. */
278 		atomic_t pending;
279 
280 		/**
281 		 * @fast: True if the post_reset logic can proceed with a fast reset.
282 		 *
283 		 * A fast reset is just a reset where the driver doesn't reload the FW sections.
284 		 *
285 		 * Any time the firmware is properly suspended, a fast reset can take place.
286 		 * On the other hand, if the halt operation failed, the driver will reload
287 		 * all FW sections to make sure we start from a fresh state.
288 		 */
289 		bool fast;
290 	} reset;
291 
292 	/** @pm: Power management related data. */
293 	struct {
294 		/** @state: Power state. */
295 		atomic_t state;
296 
297 		/**
298 		 * @mmio_lock: Lock protecting MMIO userspace CPU mappings.
299 		 *
300 		 * This is needed to ensure we map the dummy IO pages when
301 		 * the device is being suspended, and the real IO pages when
302 		 * the device is being resumed. We can't just do with the
303 		 * state atomicity to deal with this race.
304 		 */
305 		struct mutex mmio_lock;
306 
307 		/**
308 		 * @dummy_latest_flush: Dummy LATEST_FLUSH page.
309 		 *
310 		 * Used to replace the real LATEST_FLUSH page when the GPU
311 		 * is suspended.
312 		 */
313 		struct page *dummy_latest_flush;
314 
315 		/** @recovery_needed: True when a resume attempt failed. */
316 		atomic_t recovery_needed;
317 	} pm;
318 
319 	/** @profile_mask: User-set profiling flags for job accounting. */
320 	u32 profile_mask;
321 
322 	/** @fast_rate: Maximum device clock frequency. Set by DVFS */
323 	unsigned long fast_rate;
324 
325 #ifdef CONFIG_DEBUG_FS
326 	/** @gems: Device-wide list of GEM objects owned by at least one file. */
327 	struct {
328 		/** @gems.lock: Protects the device-wide list of GEM objects. */
329 		struct mutex lock;
330 
331 		/** @node: Used to keep track of all the device's DRM objects */
332 		struct list_head node;
333 	} gems;
334 #endif
335 };
336 
337 struct panthor_gpu_usage {
338 	u64 time;
339 	u64 cycles;
340 };
341 
342 /**
343  * struct panthor_file - Panthor file
344  */
345 struct panthor_file {
346 	/** @ptdev: Device attached to this file. */
347 	struct panthor_device *ptdev;
348 
349 	/** @user_mmio: User MMIO related fields. */
350 	struct {
351 		/**
352 		 * @offset: Offset used for user MMIO mappings.
353 		 *
354 		 * This offset should not be used to check the type of mapping
355 		 * except in panthor_mmap(). After that point, MMIO mapping
356 		 * offsets have been adjusted to match
357 		 * DRM_PANTHOR_USER_MMIO_OFFSET and that macro should be used
358 		 * instead.
359 		 * Make sure this rule is followed at all times, because
360 		 * userspace is in control of the offset, and can change the
361 		 * value behind our back. Otherwise it can lead to erroneous
362 		 * branching happening in kernel space.
363 		 */
364 		u64 offset;
365 	} user_mmio;
366 
367 	/** @vms: VM pool attached to this file. */
368 	struct panthor_vm_pool *vms;
369 
370 	/** @groups: Scheduling group pool attached to this file. */
371 	struct panthor_group_pool *groups;
372 
373 	/** @stats: cycle and timestamp measures for job execution. */
374 	struct panthor_gpu_usage stats;
375 };
376 
377 int panthor_device_init(struct panthor_device *ptdev);
378 void panthor_device_unplug(struct panthor_device *ptdev);
379 
380 /**
381  * panthor_device_schedule_reset() - Schedules a reset operation
382  */
383 static inline void panthor_device_schedule_reset(struct panthor_device *ptdev)
384 {
385 	if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) &&
386 	    atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE)
387 		queue_work(ptdev->reset.wq, &ptdev->reset.work);
388 }
389 
390 /**
391  * panthor_device_reset_is_pending() - Checks if a reset is pending.
392  *
393  * Return: true if a reset is pending, false otherwise.
394  */
395 static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev)
396 {
397 	return atomic_read(&ptdev->reset.pending) != 0;
398 }
399 
400 int panthor_device_mmap_io(struct panthor_device *ptdev,
401 			   struct vm_area_struct *vma);
402 
403 int panthor_device_resume(struct device *dev);
404 int panthor_device_suspend(struct device *dev);
405 
406 static inline int panthor_device_resume_and_get(struct panthor_device *ptdev)
407 {
408 	int ret = pm_runtime_resume_and_get(ptdev->base.dev);
409 
410 	/* If the resume failed, we need to clear the runtime_error, which
411 	 * can done by forcing the RPM state to suspended. If multiple
412 	 * threads called panthor_device_resume_and_get(), we only want
413 	 * one of them to update the state, hence the cmpxchg. Note that a
414 	 * thread might enter panthor_device_resume_and_get() and call
415 	 * pm_runtime_resume_and_get() after another thread had attempted
416 	 * to resume and failed. This means we will end up with an error
417 	 * without even attempting a resume ourselves. The only risk here
418 	 * is to report an error when the second resume attempt might have
419 	 * succeeded. Given resume errors are not expected, this is probably
420 	 * something we can live with.
421 	 */
422 	if (ret && atomic_cmpxchg(&ptdev->pm.recovery_needed, 1, 0) == 1)
423 		pm_runtime_set_suspended(ptdev->base.dev);
424 
425 	return ret;
426 }
427 
428 enum drm_panthor_exception_type {
429 	DRM_PANTHOR_EXCEPTION_OK = 0x00,
430 	DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
431 	DRM_PANTHOR_EXCEPTION_KABOOM = 0x05,
432 	DRM_PANTHOR_EXCEPTION_EUREKA = 0x06,
433 	DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08,
434 	DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
435 	DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
436 	DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
437 	DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE = 0x41,
438 	DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
439 	DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
440 	DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
441 	DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a,
442 	DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b,
443 	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50,
444 	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51,
445 	DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
446 	DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58,
447 	DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59,
448 	DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
449 	DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b,
450 	DRM_PANTHOR_EXCEPTION_OOM = 0x60,
451 	DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68,
452 	DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69,
453 	DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80,
454 	DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
455 	DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
456 	DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
457 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
458 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
459 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
460 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
461 	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
462 	DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8,
463 	DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9,
464 	DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca,
465 	DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb,
466 	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
467 	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda,
468 	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
469 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0,
470 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
471 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
472 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
473 	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
474 	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
475 	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
476 	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
477 	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
478 };
479 
480 /**
481  * panthor_exception_is_fault() - Checks if an exception is a fault.
482  *
483  * Return: true if the exception is a fault, false otherwise.
484  */
485 static inline bool
486 panthor_exception_is_fault(u32 exception_code)
487 {
488 	return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT;
489 }
490 
491 const char *panthor_exception_name(struct panthor_device *ptdev,
492 				   u32 exception_code);
493 
494 #define INT_RAWSTAT 0x0
495 #define INT_CLEAR   0x4
496 #define INT_MASK    0x8
497 #define INT_STAT    0xc
498 
499 /**
500  * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
501  * registration function.
502  *
503  * The boiler-plate to gracefully deal with shared interrupts is
504  * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
505  * just after the actual handler. The handler prototype is:
506  *
507  * void (*handler)(struct panthor_device *, u32 status);
508  */
509 #define PANTHOR_IRQ_HANDLER(__name, __handler)							\
510 static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
511 {												\
512 	struct panthor_irq *pirq = data;							\
513 	enum panthor_irq_state old_state;							\
514 												\
515 	if (!gpu_read(pirq->iomem, INT_STAT))							\
516 		return IRQ_NONE;								\
517 												\
518 	guard(spinlock_irqsave)(&pirq->mask_lock);						\
519 	old_state = atomic_cmpxchg(&pirq->state,						\
520 				   PANTHOR_IRQ_STATE_ACTIVE,					\
521 				   PANTHOR_IRQ_STATE_PROCESSING);				\
522 	if (old_state != PANTHOR_IRQ_STATE_ACTIVE)						\
523 		return IRQ_NONE;								\
524 												\
525 	gpu_write(pirq->iomem, INT_MASK, 0);							\
526 	return IRQ_WAKE_THREAD;									\
527 }												\
528 												\
529 static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
530 {												\
531 	struct panthor_irq *pirq = data;							\
532 	struct panthor_device *ptdev = pirq->ptdev;						\
533 	irqreturn_t ret = IRQ_NONE;								\
534 												\
535 	while (true) {										\
536 		/* It's safe to access pirq->mask without the lock held here. If a new		\
537 		 * event gets added to the mask and the corresponding IRQ is pending,		\
538 		 * we'll process it right away instead of adding an extra raw -> threaded	\
539 		 * round trip. If an event is removed and the status bit is set, it will	\
540 		 * be ignored, just like it would have been if the mask had been adjusted	\
541 		 * right before the HW event kicks in. TLDR; it's all expected races we're	\
542 		 * covered for.									\
543 		 */										\
544 		u32 status = gpu_read(pirq->iomem, INT_RAWSTAT) & pirq->mask;			\
545 												\
546 		if (!status)									\
547 			break;									\
548 												\
549 		__handler(ptdev, status);							\
550 		ret = IRQ_HANDLED;								\
551 	}											\
552 												\
553 	scoped_guard(spinlock_irqsave, &pirq->mask_lock) {					\
554 		enum panthor_irq_state old_state;						\
555 												\
556 		old_state = atomic_cmpxchg(&pirq->state,					\
557 					   PANTHOR_IRQ_STATE_PROCESSING,			\
558 					   PANTHOR_IRQ_STATE_ACTIVE);				\
559 		if (old_state == PANTHOR_IRQ_STATE_PROCESSING)					\
560 			gpu_write(pirq->iomem, INT_MASK, pirq->mask);				\
561 	}											\
562 												\
563 	return ret;										\
564 }												\
565 												\
566 static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
567 {												\
568 	scoped_guard(spinlock_irqsave, &pirq->mask_lock) {					\
569 		atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDING);				\
570 		gpu_write(pirq->iomem, INT_MASK, 0);						\
571 	}											\
572 	synchronize_irq(pirq->irq);								\
573 	atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDED);					\
574 }												\
575 												\
576 static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq)			\
577 {												\
578 	guard(spinlock_irqsave)(&pirq->mask_lock);						\
579 												\
580 	atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE);					\
581 	gpu_write(pirq->iomem, INT_CLEAR, pirq->mask);						\
582 	gpu_write(pirq->iomem, INT_MASK, pirq->mask);						\
583 }												\
584 												\
585 static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
586 					      struct panthor_irq *pirq,				\
587 					      int irq, u32 mask, void __iomem *iomem)		\
588 {												\
589 	pirq->ptdev = ptdev;									\
590 	pirq->irq = irq;									\
591 	pirq->mask = mask;									\
592 	pirq->iomem = iomem;									\
593 	spin_lock_init(&pirq->mask_lock);							\
594 	panthor_ ## __name ## _irq_resume(pirq);						\
595 												\
596 	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
597 					 panthor_ ## __name ## _irq_raw_handler,		\
598 					 panthor_ ## __name ## _irq_threaded_handler,		\
599 					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
600 					 pirq);							\
601 }												\
602 												\
603 static inline void panthor_ ## __name ## _irq_enable_events(struct panthor_irq *pirq, u32 mask)	\
604 {												\
605 	guard(spinlock_irqsave)(&pirq->mask_lock);						\
606 	pirq->mask |= mask;									\
607 												\
608 	/* The only situation where we need to write the new mask is if the IRQ is active.	\
609 	 * If it's being processed, the mask will be restored for us in _irq_threaded_handler()	\
610 	 * on the PROCESSING -> ACTIVE transition.						\
611 	 * If the IRQ is suspended/suspending, the mask is restored at resume time.		\
612 	 */											\
613 	if (atomic_read(&pirq->state) == PANTHOR_IRQ_STATE_ACTIVE)				\
614 		gpu_write(pirq->iomem, INT_MASK, pirq->mask);					\
615 }												\
616 												\
617 static inline void panthor_ ## __name ## _irq_disable_events(struct panthor_irq *pirq, u32 mask)\
618 {												\
619 	guard(spinlock_irqsave)(&pirq->mask_lock);						\
620 	pirq->mask &= ~mask;									\
621 												\
622 	/* The only situation where we need to write the new mask is if the IRQ is active.	\
623 	 * If it's being processed, the mask will be restored for us in _irq_threaded_handler()	\
624 	 * on the PROCESSING -> ACTIVE transition.						\
625 	 * If the IRQ is suspended/suspending, the mask is restored at resume time.		\
626 	 */											\
627 	if (atomic_read(&pirq->state) == PANTHOR_IRQ_STATE_ACTIVE)				\
628 		gpu_write(pirq->iomem, INT_MASK, pirq->mask);					\
629 }
630 
631 extern struct workqueue_struct *panthor_cleanup_wq;
632 
633 static inline void gpu_write(void __iomem *iomem, u32 reg, u32 data)
634 {
635 	writel(data, iomem + reg);
636 }
637 
638 static inline u32 gpu_read(void __iomem *iomem, u32 reg)
639 {
640 	return readl(iomem + reg);
641 }
642 
643 static inline u32 gpu_read_relaxed(void __iomem *iomem, u32 reg)
644 {
645 	return readl_relaxed(iomem + reg);
646 }
647 
648 static inline void gpu_write64(void __iomem *iomem, u32 reg, u64 data)
649 {
650 	gpu_write(iomem, reg, lower_32_bits(data));
651 	gpu_write(iomem, reg + 4, upper_32_bits(data));
652 }
653 
654 static inline u64 gpu_read64(void __iomem *iomem, u32 reg)
655 {
656 	return (gpu_read(iomem, reg) | ((u64)gpu_read(iomem, reg + 4) << 32));
657 }
658 
659 static inline u64 gpu_read64_relaxed(void __iomem *iomem, u32 reg)
660 {
661 	return (gpu_read_relaxed(iomem, reg) |
662 		((u64)gpu_read_relaxed(iomem, reg + 4) << 32));
663 }
664 
665 static inline u64 gpu_read64_counter(void __iomem *iomem, u32 reg)
666 {
667 	u32 lo, hi1, hi2;
668 	do {
669 		hi1 = gpu_read(iomem, reg + 4);
670 		lo = gpu_read(iomem, reg);
671 		hi2 = gpu_read(iomem, reg + 4);
672 	} while (hi1 != hi2);
673 	return lo | ((u64)hi2 << 32);
674 }
675 
676 #define gpu_read_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us)	\
677 	read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false,	\
678 			  iomem, reg)
679 
680 #define gpu_read_poll_timeout_atomic(iomem, reg, val, cond, delay_us,		\
681 				     timeout_us)				\
682 	read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us,	\
683 				 false, iomem, reg)
684 
685 #define gpu_read64_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us)	\
686 	read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false,	\
687 			  iomem, reg)
688 
689 #define gpu_read64_poll_timeout_atomic(iomem, reg, val, cond, delay_us,		\
690 				       timeout_us)				\
691 	read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us,	\
692 				 false, iomem, reg)
693 
694 #define gpu_read_relaxed_poll_timeout_atomic(iomem, reg, val, cond, delay_us,	\
695 					     timeout_us)			\
696 	read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us,		\
697 				 timeout_us, false, iomem, reg)
698 
699 #define gpu_read64_relaxed_poll_timeout(iomem, reg, val, cond, delay_us,	\
700 					timeout_us)				\
701 	read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us,	\
702 			  false, iomem, reg)
703 
704 #endif
705