xref: /linux/drivers/gpu/drm/panthor/panthor_sched.c (revision daa121128a2d2ac6006159e2c47676e4fcd21eab)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #include <drm/drm_drv.h>
5 #include <drm/drm_exec.h>
6 #include <drm/drm_gem_shmem_helper.h>
7 #include <drm/drm_managed.h>
8 #include <drm/gpu_scheduler.h>
9 #include <drm/panthor_drm.h>
10 
11 #include <linux/build_bug.h>
12 #include <linux/clk.h>
13 #include <linux/delay.h>
14 #include <linux/dma-mapping.h>
15 #include <linux/dma-resv.h>
16 #include <linux/firmware.h>
17 #include <linux/interrupt.h>
18 #include <linux/io.h>
19 #include <linux/iopoll.h>
20 #include <linux/iosys-map.h>
21 #include <linux/module.h>
22 #include <linux/platform_device.h>
23 #include <linux/pm_runtime.h>
24 
25 #include "panthor_devfreq.h"
26 #include "panthor_device.h"
27 #include "panthor_fw.h"
28 #include "panthor_gem.h"
29 #include "panthor_gpu.h"
30 #include "panthor_heap.h"
31 #include "panthor_mmu.h"
32 #include "panthor_regs.h"
33 #include "panthor_sched.h"
34 
35 /**
36  * DOC: Scheduler
37  *
38  * Mali CSF hardware adopts a firmware-assisted scheduling model, where
39  * the firmware takes care of scheduling aspects, to some extent.
40  *
41  * The scheduling happens at the scheduling group level, each group
42  * contains 1 to N queues (N is FW/hardware dependent, and exposed
43  * through the firmware interface). Each queue is assigned a command
44  * stream ring buffer, which serves as a way to get jobs submitted to
45  * the GPU, among other things.
46  *
47  * The firmware can schedule a maximum of M groups (M is FW/hardware
48  * dependent, and exposed through the firmware interface). Passed
49  * this maximum number of groups, the kernel must take care of
50  * rotating the groups passed to the firmware so every group gets
51  * a chance to have his queues scheduled for execution.
52  *
53  * The current implementation only supports with kernel-mode queues.
54  * In other terms, userspace doesn't have access to the ring-buffer.
55  * Instead, userspace passes indirect command stream buffers that are
56  * called from the queue ring-buffer by the kernel using a pre-defined
57  * sequence of command stream instructions to ensure the userspace driver
58  * always gets consistent results (cache maintenance,
59  * synchronization, ...).
60  *
61  * We rely on the drm_gpu_scheduler framework to deal with job
62  * dependencies and submission. As any other driver dealing with a
63  * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each
64  * entity has its own job scheduler. When a job is ready to be executed
65  * (all its dependencies are met), it is pushed to the appropriate
66  * queue ring-buffer, and the group is scheduled for execution if it
67  * wasn't already active.
68  *
69  * Kernel-side group scheduling is timeslice-based. When we have less
70  * groups than there are slots, the periodic tick is disabled and we
71  * just let the FW schedule the active groups. When there are more
72  * groups than slots, we let each group a chance to execute stuff for
73  * a given amount of time, and then re-evaluate and pick new groups
74  * to schedule. The group selection algorithm is based on
75  * priority+round-robin.
76  *
77  * Even though user-mode queues is out of the scope right now, the
78  * current design takes them into account by avoiding any guess on the
79  * group/queue state that would be based on information we wouldn't have
80  * if userspace was in charge of the ring-buffer. That's also one of the
81  * reason we don't do 'cooperative' scheduling (encoding FW group slot
82  * reservation as dma_fence that would be returned from the
83  * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as
84  * a queue of waiters, ordered by job submission order). This approach
85  * would work for kernel-mode queues, but would make user-mode queues a
86  * lot more complicated to retrofit.
87  */
88 
89 #define JOB_TIMEOUT_MS				5000
90 
91 #define MIN_CS_PER_CSG				8
92 
93 #define MIN_CSGS				3
94 #define MAX_CSG_PRIO				0xf
95 
96 struct panthor_group;
97 
98 /**
99  * struct panthor_csg_slot - Command stream group slot
100  *
101  * This represents a FW slot for a scheduling group.
102  */
103 struct panthor_csg_slot {
104 	/** @group: Scheduling group bound to this slot. */
105 	struct panthor_group *group;
106 
107 	/** @priority: Group priority. */
108 	u8 priority;
109 
110 	/**
111 	 * @idle: True if the group bound to this slot is idle.
112 	 *
113 	 * A group is idle when it has nothing waiting for execution on
114 	 * all its queues, or when queues are blocked waiting for something
115 	 * to happen (synchronization object).
116 	 */
117 	bool idle;
118 };
119 
120 /**
121  * enum panthor_csg_priority - Group priority
122  */
123 enum panthor_csg_priority {
124 	/** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */
125 	PANTHOR_CSG_PRIORITY_LOW = 0,
126 
127 	/** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */
128 	PANTHOR_CSG_PRIORITY_MEDIUM,
129 
130 	/** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */
131 	PANTHOR_CSG_PRIORITY_HIGH,
132 
133 	/**
134 	 * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group.
135 	 *
136 	 * Real-time priority allows one to preempt scheduling of other
137 	 * non-real-time groups. When such a group becomes executable,
138 	 * it will evict the group with the lowest non-rt priority if
139 	 * there's no free group slot available.
140 	 *
141 	 * Currently not exposed to userspace.
142 	 */
143 	PANTHOR_CSG_PRIORITY_RT,
144 
145 	/** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */
146 	PANTHOR_CSG_PRIORITY_COUNT,
147 };
148 
149 /**
150  * struct panthor_scheduler - Object used to manage the scheduler
151  */
152 struct panthor_scheduler {
153 	/** @ptdev: Device. */
154 	struct panthor_device *ptdev;
155 
156 	/**
157 	 * @wq: Workqueue used by our internal scheduler logic and
158 	 * drm_gpu_scheduler.
159 	 *
160 	 * Used for the scheduler tick, group update or other kind of FW
161 	 * event processing that can't be handled in the threaded interrupt
162 	 * path. Also passed to the drm_gpu_scheduler instances embedded
163 	 * in panthor_queue.
164 	 */
165 	struct workqueue_struct *wq;
166 
167 	/**
168 	 * @heap_alloc_wq: Workqueue used to schedule tiler_oom works.
169 	 *
170 	 * We have a queue dedicated to heap chunk allocation works to avoid
171 	 * blocking the rest of the scheduler if the allocation tries to
172 	 * reclaim memory.
173 	 */
174 	struct workqueue_struct *heap_alloc_wq;
175 
176 	/** @tick_work: Work executed on a scheduling tick. */
177 	struct delayed_work tick_work;
178 
179 	/**
180 	 * @sync_upd_work: Work used to process synchronization object updates.
181 	 *
182 	 * We use this work to unblock queues/groups that were waiting on a
183 	 * synchronization object.
184 	 */
185 	struct work_struct sync_upd_work;
186 
187 	/**
188 	 * @fw_events_work: Work used to process FW events outside the interrupt path.
189 	 *
190 	 * Even if the interrupt is threaded, we need any event processing
191 	 * that require taking the panthor_scheduler::lock to be processed
192 	 * outside the interrupt path so we don't block the tick logic when
193 	 * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the
194 	 * event processing requires taking this lock, we just delegate all
195 	 * FW event processing to the scheduler workqueue.
196 	 */
197 	struct work_struct fw_events_work;
198 
199 	/**
200 	 * @fw_events: Bitmask encoding pending FW events.
201 	 */
202 	atomic_t fw_events;
203 
204 	/**
205 	 * @resched_target: When the next tick should occur.
206 	 *
207 	 * Expressed in jiffies.
208 	 */
209 	u64 resched_target;
210 
211 	/**
212 	 * @last_tick: When the last tick occurred.
213 	 *
214 	 * Expressed in jiffies.
215 	 */
216 	u64 last_tick;
217 
218 	/** @tick_period: Tick period in jiffies. */
219 	u64 tick_period;
220 
221 	/**
222 	 * @lock: Lock protecting access to all the scheduler fields.
223 	 *
224 	 * Should be taken in the tick work, the irq handler, and anywhere the @groups
225 	 * fields are touched.
226 	 */
227 	struct mutex lock;
228 
229 	/** @groups: Various lists used to classify groups. */
230 	struct {
231 		/**
232 		 * @runnable: Runnable group lists.
233 		 *
234 		 * When a group has queues that want to execute something,
235 		 * its panthor_group::run_node should be inserted here.
236 		 *
237 		 * One list per-priority.
238 		 */
239 		struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
240 
241 		/**
242 		 * @idle: Idle group lists.
243 		 *
244 		 * When all queues of a group are idle (either because they
245 		 * have nothing to execute, or because they are blocked), the
246 		 * panthor_group::run_node field should be inserted here.
247 		 *
248 		 * One list per-priority.
249 		 */
250 		struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
251 
252 		/**
253 		 * @waiting: List of groups whose queues are blocked on a
254 		 * synchronization object.
255 		 *
256 		 * Insert panthor_group::wait_node here when a group is waiting
257 		 * for synchronization objects to be signaled.
258 		 *
259 		 * This list is evaluated in the @sync_upd_work work.
260 		 */
261 		struct list_head waiting;
262 	} groups;
263 
264 	/**
265 	 * @csg_slots: FW command stream group slots.
266 	 */
267 	struct panthor_csg_slot csg_slots[MAX_CSGS];
268 
269 	/** @csg_slot_count: Number of command stream group slots exposed by the FW. */
270 	u32 csg_slot_count;
271 
272 	/** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */
273 	u32 cs_slot_count;
274 
275 	/** @as_slot_count: Number of address space slots supported by the MMU. */
276 	u32 as_slot_count;
277 
278 	/** @used_csg_slot_count: Number of command stream group slot currently used. */
279 	u32 used_csg_slot_count;
280 
281 	/** @sb_slot_count: Number of scoreboard slots. */
282 	u32 sb_slot_count;
283 
284 	/**
285 	 * @might_have_idle_groups: True if an active group might have become idle.
286 	 *
287 	 * This will force a tick, so other runnable groups can be scheduled if one
288 	 * or more active groups became idle.
289 	 */
290 	bool might_have_idle_groups;
291 
292 	/** @pm: Power management related fields. */
293 	struct {
294 		/** @has_ref: True if the scheduler owns a runtime PM reference. */
295 		bool has_ref;
296 	} pm;
297 
298 	/** @reset: Reset related fields. */
299 	struct {
300 		/** @lock: Lock protecting the other reset fields. */
301 		struct mutex lock;
302 
303 		/**
304 		 * @in_progress: True if a reset is in progress.
305 		 *
306 		 * Set to true in panthor_sched_pre_reset() and back to false in
307 		 * panthor_sched_post_reset().
308 		 */
309 		atomic_t in_progress;
310 
311 		/**
312 		 * @stopped_groups: List containing all groups that were stopped
313 		 * before a reset.
314 		 *
315 		 * Insert panthor_group::run_node in the pre_reset path.
316 		 */
317 		struct list_head stopped_groups;
318 	} reset;
319 };
320 
321 /**
322  * struct panthor_syncobj_32b - 32-bit FW synchronization object
323  */
324 struct panthor_syncobj_32b {
325 	/** @seqno: Sequence number. */
326 	u32 seqno;
327 
328 	/**
329 	 * @status: Status.
330 	 *
331 	 * Not zero on failure.
332 	 */
333 	u32 status;
334 };
335 
336 /**
337  * struct panthor_syncobj_64b - 64-bit FW synchronization object
338  */
339 struct panthor_syncobj_64b {
340 	/** @seqno: Sequence number. */
341 	u64 seqno;
342 
343 	/**
344 	 * @status: Status.
345 	 *
346 	 * Not zero on failure.
347 	 */
348 	u32 status;
349 
350 	/** @pad: MBZ. */
351 	u32 pad;
352 };
353 
354 /**
355  * struct panthor_queue - Execution queue
356  */
357 struct panthor_queue {
358 	/** @scheduler: DRM scheduler used for this queue. */
359 	struct drm_gpu_scheduler scheduler;
360 
361 	/** @entity: DRM scheduling entity used for this queue. */
362 	struct drm_sched_entity entity;
363 
364 	/**
365 	 * @remaining_time: Time remaining before the job timeout expires.
366 	 *
367 	 * The job timeout is suspended when the queue is not scheduled by the
368 	 * FW. Every time we suspend the timer, we need to save the remaining
369 	 * time so we can restore it later on.
370 	 */
371 	unsigned long remaining_time;
372 
373 	/** @timeout_suspended: True if the job timeout was suspended. */
374 	bool timeout_suspended;
375 
376 	/**
377 	 * @doorbell_id: Doorbell assigned to this queue.
378 	 *
379 	 * Right now, all groups share the same doorbell, and the doorbell ID
380 	 * is assigned to group_slot + 1 when the group is assigned a slot. But
381 	 * we might decide to provide fine grained doorbell assignment at some
382 	 * point, so don't have to wake up all queues in a group every time one
383 	 * of them is updated.
384 	 */
385 	u8 doorbell_id;
386 
387 	/**
388 	 * @priority: Priority of the queue inside the group.
389 	 *
390 	 * Must be less than 16 (Only 4 bits available).
391 	 */
392 	u8 priority;
393 #define CSF_MAX_QUEUE_PRIO	GENMASK(3, 0)
394 
395 	/** @ringbuf: Command stream ring-buffer. */
396 	struct panthor_kernel_bo *ringbuf;
397 
398 	/** @iface: Firmware interface. */
399 	struct {
400 		/** @mem: FW memory allocated for this interface. */
401 		struct panthor_kernel_bo *mem;
402 
403 		/** @input: Input interface. */
404 		struct panthor_fw_ringbuf_input_iface *input;
405 
406 		/** @output: Output interface. */
407 		const struct panthor_fw_ringbuf_output_iface *output;
408 
409 		/** @input_fw_va: FW virtual address of the input interface buffer. */
410 		u32 input_fw_va;
411 
412 		/** @output_fw_va: FW virtual address of the output interface buffer. */
413 		u32 output_fw_va;
414 	} iface;
415 
416 	/**
417 	 * @syncwait: Stores information about the synchronization object this
418 	 * queue is waiting on.
419 	 */
420 	struct {
421 		/** @gpu_va: GPU address of the synchronization object. */
422 		u64 gpu_va;
423 
424 		/** @ref: Reference value to compare against. */
425 		u64 ref;
426 
427 		/** @gt: True if this is a greater-than test. */
428 		bool gt;
429 
430 		/** @sync64: True if this is a 64-bit sync object. */
431 		bool sync64;
432 
433 		/** @bo: Buffer object holding the synchronization object. */
434 		struct drm_gem_object *obj;
435 
436 		/** @offset: Offset of the synchronization object inside @bo. */
437 		u64 offset;
438 
439 		/**
440 		 * @kmap: Kernel mapping of the buffer object holding the
441 		 * synchronization object.
442 		 */
443 		void *kmap;
444 	} syncwait;
445 
446 	/** @fence_ctx: Fence context fields. */
447 	struct {
448 		/** @lock: Used to protect access to all fences allocated by this context. */
449 		spinlock_t lock;
450 
451 		/**
452 		 * @id: Fence context ID.
453 		 *
454 		 * Allocated with dma_fence_context_alloc().
455 		 */
456 		u64 id;
457 
458 		/** @seqno: Sequence number of the last initialized fence. */
459 		atomic64_t seqno;
460 
461 		/**
462 		 * @in_flight_jobs: List containing all in-flight jobs.
463 		 *
464 		 * Used to keep track and signal panthor_job::done_fence when the
465 		 * synchronization object attached to the queue is signaled.
466 		 */
467 		struct list_head in_flight_jobs;
468 	} fence_ctx;
469 };
470 
471 /**
472  * enum panthor_group_state - Scheduling group state.
473  */
474 enum panthor_group_state {
475 	/** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */
476 	PANTHOR_CS_GROUP_CREATED,
477 
478 	/** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */
479 	PANTHOR_CS_GROUP_ACTIVE,
480 
481 	/**
482 	 * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is
483 	 * inactive/suspended right now.
484 	 */
485 	PANTHOR_CS_GROUP_SUSPENDED,
486 
487 	/**
488 	 * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated.
489 	 *
490 	 * Can no longer be scheduled. The only allowed action is a destruction.
491 	 */
492 	PANTHOR_CS_GROUP_TERMINATED,
493 
494 	/**
495 	 * @PANTHOR_CS_GROUP_UNKNOWN_STATE: Group is an unknown state.
496 	 *
497 	 * The FW returned an inconsistent state. The group is flagged unusable
498 	 * and can no longer be scheduled. The only allowed action is a
499 	 * destruction.
500 	 *
501 	 * When that happens, we also schedule a FW reset, to start from a fresh
502 	 * state.
503 	 */
504 	PANTHOR_CS_GROUP_UNKNOWN_STATE,
505 };
506 
507 /**
508  * struct panthor_group - Scheduling group object
509  */
510 struct panthor_group {
511 	/** @refcount: Reference count */
512 	struct kref refcount;
513 
514 	/** @ptdev: Device. */
515 	struct panthor_device *ptdev;
516 
517 	/** @vm: VM bound to the group. */
518 	struct panthor_vm *vm;
519 
520 	/** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */
521 	u64 compute_core_mask;
522 
523 	/** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */
524 	u64 fragment_core_mask;
525 
526 	/** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */
527 	u64 tiler_core_mask;
528 
529 	/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
530 	u8 max_compute_cores;
531 
532 	/** @max_fragment_cores: Maximum number of shader cores used for fragment jobs. */
533 	u8 max_fragment_cores;
534 
535 	/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
536 	u8 max_tiler_cores;
537 
538 	/** @priority: Group priority (check panthor_csg_priority). */
539 	u8 priority;
540 
541 	/** @blocked_queues: Bitmask reflecting the blocked queues. */
542 	u32 blocked_queues;
543 
544 	/** @idle_queues: Bitmask reflecting the idle queues. */
545 	u32 idle_queues;
546 
547 	/** @fatal_lock: Lock used to protect access to fatal fields. */
548 	spinlock_t fatal_lock;
549 
550 	/** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
551 	u32 fatal_queues;
552 
553 	/** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
554 	atomic_t tiler_oom;
555 
556 	/** @queue_count: Number of queues in this group. */
557 	u32 queue_count;
558 
559 	/** @queues: Queues owned by this group. */
560 	struct panthor_queue *queues[MAX_CS_PER_CSG];
561 
562 	/**
563 	 * @csg_id: ID of the FW group slot.
564 	 *
565 	 * -1 when the group is not scheduled/active.
566 	 */
567 	int csg_id;
568 
569 	/**
570 	 * @destroyed: True when the group has been destroyed.
571 	 *
572 	 * If a group is destroyed it becomes useless: no further jobs can be submitted
573 	 * to its queues. We simply wait for all references to be dropped so we can
574 	 * release the group object.
575 	 */
576 	bool destroyed;
577 
578 	/**
579 	 * @timedout: True when a timeout occurred on any of the queues owned by
580 	 * this group.
581 	 *
582 	 * Timeouts can be reported by drm_sched or by the FW. In any case, any
583 	 * timeout situation is unrecoverable, and the group becomes useless.
584 	 * We simply wait for all references to be dropped so we can release the
585 	 * group object.
586 	 */
587 	bool timedout;
588 
589 	/**
590 	 * @syncobjs: Pool of per-queue synchronization objects.
591 	 *
592 	 * One sync object per queue. The position of the sync object is
593 	 * determined by the queue index.
594 	 */
595 	struct panthor_kernel_bo *syncobjs;
596 
597 	/** @state: Group state. */
598 	enum panthor_group_state state;
599 
600 	/**
601 	 * @suspend_buf: Suspend buffer.
602 	 *
603 	 * Stores the state of the group and its queues when a group is suspended.
604 	 * Used at resume time to restore the group in its previous state.
605 	 *
606 	 * The size of the suspend buffer is exposed through the FW interface.
607 	 */
608 	struct panthor_kernel_bo *suspend_buf;
609 
610 	/**
611 	 * @protm_suspend_buf: Protection mode suspend buffer.
612 	 *
613 	 * Stores the state of the group and its queues when a group that's in
614 	 * protection mode is suspended.
615 	 *
616 	 * Used at resume time to restore the group in its previous state.
617 	 *
618 	 * The size of the protection mode suspend buffer is exposed through the
619 	 * FW interface.
620 	 */
621 	struct panthor_kernel_bo *protm_suspend_buf;
622 
623 	/** @sync_upd_work: Work used to check/signal job fences. */
624 	struct work_struct sync_upd_work;
625 
626 	/** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */
627 	struct work_struct tiler_oom_work;
628 
629 	/** @term_work: Work used to finish the group termination procedure. */
630 	struct work_struct term_work;
631 
632 	/**
633 	 * @release_work: Work used to release group resources.
634 	 *
635 	 * We need to postpone the group release to avoid a deadlock when
636 	 * the last ref is released in the tick work.
637 	 */
638 	struct work_struct release_work;
639 
640 	/**
641 	 * @run_node: Node used to insert the group in the
642 	 * panthor_group::groups::{runnable,idle} and
643 	 * panthor_group::reset.stopped_groups lists.
644 	 */
645 	struct list_head run_node;
646 
647 	/**
648 	 * @wait_node: Node used to insert the group in the
649 	 * panthor_group::groups::waiting list.
650 	 */
651 	struct list_head wait_node;
652 };
653 
654 /**
655  * group_queue_work() - Queue a group work
656  * @group: Group to queue the work for.
657  * @wname: Work name.
658  *
659  * Grabs a ref and queue a work item to the scheduler workqueue. If
660  * the work was already queued, we release the reference we grabbed.
661  *
662  * Work callbacks must release the reference we grabbed here.
663  */
664 #define group_queue_work(group, wname) \
665 	do { \
666 		group_get(group); \
667 		if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
668 			group_put(group); \
669 	} while (0)
670 
671 /**
672  * sched_queue_work() - Queue a scheduler work.
673  * @sched: Scheduler object.
674  * @wname: Work name.
675  *
676  * Conditionally queues a scheduler work if no reset is pending/in-progress.
677  */
678 #define sched_queue_work(sched, wname) \
679 	do { \
680 		if (!atomic_read(&(sched)->reset.in_progress) && \
681 		    !panthor_device_reset_is_pending((sched)->ptdev)) \
682 			queue_work((sched)->wq, &(sched)->wname ## _work); \
683 	} while (0)
684 
685 /**
686  * sched_queue_delayed_work() - Queue a scheduler delayed work.
687  * @sched: Scheduler object.
688  * @wname: Work name.
689  * @delay: Work delay in jiffies.
690  *
691  * Conditionally queues a scheduler delayed work if no reset is
692  * pending/in-progress.
693  */
694 #define sched_queue_delayed_work(sched, wname, delay) \
695 	do { \
696 		if (!atomic_read(&sched->reset.in_progress) && \
697 		    !panthor_device_reset_is_pending((sched)->ptdev)) \
698 			mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
699 	} while (0)
700 
701 /*
702  * We currently set the maximum of groups per file to an arbitrary low value.
703  * But this can be updated if we need more.
704  */
705 #define MAX_GROUPS_PER_POOL 128
706 
707 /**
708  * struct panthor_group_pool - Group pool
709  *
710  * Each file get assigned a group pool.
711  */
712 struct panthor_group_pool {
713 	/** @xa: Xarray used to manage group handles. */
714 	struct xarray xa;
715 };
716 
717 /**
718  * struct panthor_job - Used to manage GPU job
719  */
720 struct panthor_job {
721 	/** @base: Inherit from drm_sched_job. */
722 	struct drm_sched_job base;
723 
724 	/** @refcount: Reference count. */
725 	struct kref refcount;
726 
727 	/** @group: Group of the queue this job will be pushed to. */
728 	struct panthor_group *group;
729 
730 	/** @queue_idx: Index of the queue inside @group. */
731 	u32 queue_idx;
732 
733 	/** @call_info: Information about the userspace command stream call. */
734 	struct {
735 		/** @start: GPU address of the userspace command stream. */
736 		u64 start;
737 
738 		/** @size: Size of the userspace command stream. */
739 		u32 size;
740 
741 		/**
742 		 * @latest_flush: Flush ID at the time the userspace command
743 		 * stream was built.
744 		 *
745 		 * Needed for the flush reduction mechanism.
746 		 */
747 		u32 latest_flush;
748 	} call_info;
749 
750 	/** @ringbuf: Position of this job is in the ring buffer. */
751 	struct {
752 		/** @start: Start offset. */
753 		u64 start;
754 
755 		/** @end: End offset. */
756 		u64 end;
757 	} ringbuf;
758 
759 	/**
760 	 * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs
761 	 * list.
762 	 */
763 	struct list_head node;
764 
765 	/** @done_fence: Fence signaled when the job is finished or cancelled. */
766 	struct dma_fence *done_fence;
767 };
768 
769 static void
770 panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
771 {
772 	if (queue->syncwait.kmap) {
773 		struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
774 
775 		drm_gem_vunmap_unlocked(queue->syncwait.obj, &map);
776 		queue->syncwait.kmap = NULL;
777 	}
778 
779 	drm_gem_object_put(queue->syncwait.obj);
780 	queue->syncwait.obj = NULL;
781 }
782 
783 static void *
784 panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue)
785 {
786 	struct panthor_device *ptdev = group->ptdev;
787 	struct panthor_gem_object *bo;
788 	struct iosys_map map;
789 	int ret;
790 
791 	if (queue->syncwait.kmap)
792 		return queue->syncwait.kmap + queue->syncwait.offset;
793 
794 	bo = panthor_vm_get_bo_for_va(group->vm,
795 				      queue->syncwait.gpu_va,
796 				      &queue->syncwait.offset);
797 	if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo)))
798 		goto err_put_syncwait_obj;
799 
800 	queue->syncwait.obj = &bo->base.base;
801 	ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map);
802 	if (drm_WARN_ON(&ptdev->base, ret))
803 		goto err_put_syncwait_obj;
804 
805 	queue->syncwait.kmap = map.vaddr;
806 	if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
807 		goto err_put_syncwait_obj;
808 
809 	return queue->syncwait.kmap + queue->syncwait.offset;
810 
811 err_put_syncwait_obj:
812 	panthor_queue_put_syncwait_obj(queue);
813 	return NULL;
814 }
815 
816 static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue)
817 {
818 	if (IS_ERR_OR_NULL(queue))
819 		return;
820 
821 	if (queue->entity.fence_context)
822 		drm_sched_entity_destroy(&queue->entity);
823 
824 	if (queue->scheduler.ops)
825 		drm_sched_fini(&queue->scheduler);
826 
827 	panthor_queue_put_syncwait_obj(queue);
828 
829 	panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
830 	panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
831 
832 	kfree(queue);
833 }
834 
835 static void group_release_work(struct work_struct *work)
836 {
837 	struct panthor_group *group = container_of(work,
838 						   struct panthor_group,
839 						   release_work);
840 	struct panthor_device *ptdev = group->ptdev;
841 	u32 i;
842 
843 	for (i = 0; i < group->queue_count; i++)
844 		group_free_queue(group, group->queues[i]);
845 
846 	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
847 	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
848 	panthor_kernel_bo_destroy(group->vm, group->syncobjs);
849 
850 	panthor_vm_put(group->vm);
851 	kfree(group);
852 }
853 
854 static void group_release(struct kref *kref)
855 {
856 	struct panthor_group *group = container_of(kref,
857 						   struct panthor_group,
858 						   refcount);
859 	struct panthor_device *ptdev = group->ptdev;
860 
861 	drm_WARN_ON(&ptdev->base, group->csg_id >= 0);
862 	drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node));
863 	drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node));
864 
865 	queue_work(panthor_cleanup_wq, &group->release_work);
866 }
867 
868 static void group_put(struct panthor_group *group)
869 {
870 	if (group)
871 		kref_put(&group->refcount, group_release);
872 }
873 
874 static struct panthor_group *
875 group_get(struct panthor_group *group)
876 {
877 	if (group)
878 		kref_get(&group->refcount);
879 
880 	return group;
881 }
882 
883 /**
884  * group_bind_locked() - Bind a group to a group slot
885  * @group: Group.
886  * @csg_id: Slot.
887  *
888  * Return: 0 on success, a negative error code otherwise.
889  */
890 static int
891 group_bind_locked(struct panthor_group *group, u32 csg_id)
892 {
893 	struct panthor_device *ptdev = group->ptdev;
894 	struct panthor_csg_slot *csg_slot;
895 	int ret;
896 
897 	lockdep_assert_held(&ptdev->scheduler->lock);
898 
899 	if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
900 			ptdev->scheduler->csg_slots[csg_id].group))
901 		return -EINVAL;
902 
903 	ret = panthor_vm_active(group->vm);
904 	if (ret)
905 		return ret;
906 
907 	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
908 	group_get(group);
909 	group->csg_id = csg_id;
910 
911 	/* Dummy doorbell allocation: doorbell is assigned to the group and
912 	 * all queues use the same doorbell.
913 	 *
914 	 * TODO: Implement LRU-based doorbell assignment, so the most often
915 	 * updated queues get their own doorbell, thus avoiding useless checks
916 	 * on queues belonging to the same group that are rarely updated.
917 	 */
918 	for (u32 i = 0; i < group->queue_count; i++)
919 		group->queues[i]->doorbell_id = csg_id + 1;
920 
921 	csg_slot->group = group;
922 
923 	return 0;
924 }
925 
926 /**
927  * group_unbind_locked() - Unbind a group from a slot.
928  * @group: Group to unbind.
929  *
930  * Return: 0 on success, a negative error code otherwise.
931  */
932 static int
933 group_unbind_locked(struct panthor_group *group)
934 {
935 	struct panthor_device *ptdev = group->ptdev;
936 	struct panthor_csg_slot *slot;
937 
938 	lockdep_assert_held(&ptdev->scheduler->lock);
939 
940 	if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
941 		return -EINVAL;
942 
943 	if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
944 		return -EINVAL;
945 
946 	slot = &ptdev->scheduler->csg_slots[group->csg_id];
947 	panthor_vm_idle(group->vm);
948 	group->csg_id = -1;
949 
950 	/* Tiler OOM events will be re-issued next time the group is scheduled. */
951 	atomic_set(&group->tiler_oom, 0);
952 	cancel_work(&group->tiler_oom_work);
953 
954 	for (u32 i = 0; i < group->queue_count; i++)
955 		group->queues[i]->doorbell_id = -1;
956 
957 	slot->group = NULL;
958 
959 	group_put(group);
960 	return 0;
961 }
962 
963 /**
964  * cs_slot_prog_locked() - Program a queue slot
965  * @ptdev: Device.
966  * @csg_id: Group slot ID.
967  * @cs_id: Queue slot ID.
968  *
969  * Program a queue slot with the queue information so things can start being
970  * executed on this queue.
971  *
972  * The group slot must have a group bound to it already (group_bind_locked()).
973  */
974 static void
975 cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
976 {
977 	struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id];
978 	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
979 
980 	lockdep_assert_held(&ptdev->scheduler->lock);
981 
982 	queue->iface.input->extract = queue->iface.output->extract;
983 	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
984 
985 	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
986 	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
987 	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
988 	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
989 	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
990 				  CS_CONFIG_DOORBELL(queue->doorbell_id);
991 	cs_iface->input->ack_irq_mask = ~0;
992 	panthor_fw_update_reqs(cs_iface, req,
993 			       CS_IDLE_SYNC_WAIT |
994 			       CS_IDLE_EMPTY |
995 			       CS_STATE_START |
996 			       CS_EXTRACT_EVENT,
997 			       CS_IDLE_SYNC_WAIT |
998 			       CS_IDLE_EMPTY |
999 			       CS_STATE_MASK |
1000 			       CS_EXTRACT_EVENT);
1001 	if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
1002 		drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
1003 		queue->timeout_suspended = false;
1004 	}
1005 }
1006 
1007 /**
1008  * cs_slot_reset_locked() - Reset a queue slot
1009  * @ptdev: Device.
1010  * @csg_id: Group slot.
1011  * @cs_id: Queue slot.
1012  *
1013  * Change the queue slot state to STOP and suspend the queue timeout if
1014  * the queue is not blocked.
1015  *
1016  * The group slot must have a group bound to it (group_bind_locked()).
1017  */
1018 static int
1019 cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1020 {
1021 	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1022 	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1023 	struct panthor_queue *queue = group->queues[cs_id];
1024 
1025 	lockdep_assert_held(&ptdev->scheduler->lock);
1026 
1027 	panthor_fw_update_reqs(cs_iface, req,
1028 			       CS_STATE_STOP,
1029 			       CS_STATE_MASK);
1030 
1031 	/* If the queue is blocked, we want to keep the timeout running, so
1032 	 * we can detect unbounded waits and kill the group when that happens.
1033 	 */
1034 	if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
1035 		queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
1036 		queue->timeout_suspended = true;
1037 		WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
1038 	}
1039 
1040 	return 0;
1041 }
1042 
1043 /**
1044  * csg_slot_sync_priority_locked() - Synchronize the group slot priority
1045  * @ptdev: Device.
1046  * @csg_id: Group slot ID.
1047  *
1048  * Group slot priority update happens asynchronously. When we receive a
1049  * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can
1050  * reflect it to our panthor_csg_slot object.
1051  */
1052 static void
1053 csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
1054 {
1055 	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1056 	struct panthor_fw_csg_iface *csg_iface;
1057 
1058 	lockdep_assert_held(&ptdev->scheduler->lock);
1059 
1060 	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1061 	csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28;
1062 }
1063 
1064 /**
1065  * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority
1066  * @ptdev: Device.
1067  * @csg_id: Group slot.
1068  * @cs_id: Queue slot.
1069  *
1070  * Queue state is updated on group suspend or STATUS_UPDATE event.
1071  */
1072 static void
1073 cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1074 {
1075 	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1076 	struct panthor_queue *queue = group->queues[cs_id];
1077 	struct panthor_fw_cs_iface *cs_iface =
1078 		panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id);
1079 
1080 	u32 status_wait_cond;
1081 
1082 	switch (cs_iface->output->status_blocked_reason) {
1083 	case CS_STATUS_BLOCKED_REASON_UNBLOCKED:
1084 		if (queue->iface.input->insert == queue->iface.output->extract &&
1085 		    cs_iface->output->status_scoreboards == 0)
1086 			group->idle_queues |= BIT(cs_id);
1087 		break;
1088 
1089 	case CS_STATUS_BLOCKED_REASON_SYNC_WAIT:
1090 		if (list_empty(&group->wait_node)) {
1091 			list_move_tail(&group->wait_node,
1092 				       &group->ptdev->scheduler->groups.waiting);
1093 		}
1094 		group->blocked_queues |= BIT(cs_id);
1095 		queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
1096 		queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
1097 		status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
1098 		queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT;
1099 		if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) {
1100 			u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi;
1101 
1102 			queue->syncwait.sync64 = true;
1103 			queue->syncwait.ref |= sync_val_hi << 32;
1104 		} else {
1105 			queue->syncwait.sync64 = false;
1106 		}
1107 		break;
1108 
1109 	default:
1110 		/* Other reasons are not blocking. Consider the queue as runnable
1111 		 * in those cases.
1112 		 */
1113 		break;
1114 	}
1115 }
1116 
1117 static void
1118 csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
1119 {
1120 	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1121 	struct panthor_group *group = csg_slot->group;
1122 	u32 i;
1123 
1124 	lockdep_assert_held(&ptdev->scheduler->lock);
1125 
1126 	group->idle_queues = 0;
1127 	group->blocked_queues = 0;
1128 
1129 	for (i = 0; i < group->queue_count; i++) {
1130 		if (group->queues[i])
1131 			cs_slot_sync_queue_state_locked(ptdev, csg_id, i);
1132 	}
1133 }
1134 
1135 static void
1136 csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
1137 {
1138 	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1139 	struct panthor_fw_csg_iface *csg_iface;
1140 	struct panthor_group *group;
1141 	enum panthor_group_state new_state, old_state;
1142 	u32 csg_state;
1143 
1144 	lockdep_assert_held(&ptdev->scheduler->lock);
1145 
1146 	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1147 	group = csg_slot->group;
1148 
1149 	if (!group)
1150 		return;
1151 
1152 	old_state = group->state;
1153 	csg_state = csg_iface->output->ack & CSG_STATE_MASK;
1154 	switch (csg_state) {
1155 	case CSG_STATE_START:
1156 	case CSG_STATE_RESUME:
1157 		new_state = PANTHOR_CS_GROUP_ACTIVE;
1158 		break;
1159 	case CSG_STATE_TERMINATE:
1160 		new_state = PANTHOR_CS_GROUP_TERMINATED;
1161 		break;
1162 	case CSG_STATE_SUSPEND:
1163 		new_state = PANTHOR_CS_GROUP_SUSPENDED;
1164 		break;
1165 	default:
1166 		/* The unknown state might be caused by a FW state corruption,
1167 		 * which means the group metadata can't be trusted anymore, and
1168 		 * the SUSPEND operation might propagate the corruption to the
1169 		 * suspend buffers. Flag the group state as unknown to make
1170 		 * sure it's unusable after that point.
1171 		 */
1172 		drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
1173 			csg_id, csg_state);
1174 		new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE;
1175 		break;
1176 	}
1177 
1178 	if (old_state == new_state)
1179 		return;
1180 
1181 	/* The unknown state might be caused by a FW issue, reset the FW to
1182 	 * take a fresh start.
1183 	 */
1184 	if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
1185 		panthor_device_schedule_reset(ptdev);
1186 
1187 	if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
1188 		csg_slot_sync_queues_state_locked(ptdev, csg_id);
1189 
1190 	if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
1191 		u32 i;
1192 
1193 		/* Reset the queue slots so we start from a clean
1194 		 * state when starting/resuming a new group on this
1195 		 * CSG slot. No wait needed here, and no ringbell
1196 		 * either, since the CS slot will only be re-used
1197 		 * on the next CSG start operation.
1198 		 */
1199 		for (i = 0; i < group->queue_count; i++) {
1200 			if (group->queues[i])
1201 				cs_slot_reset_locked(ptdev, csg_id, i);
1202 		}
1203 	}
1204 
1205 	group->state = new_state;
1206 }
1207 
1208 static int
1209 csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
1210 {
1211 	struct panthor_fw_csg_iface *csg_iface;
1212 	struct panthor_csg_slot *csg_slot;
1213 	struct panthor_group *group;
1214 	u32 queue_mask = 0, i;
1215 
1216 	lockdep_assert_held(&ptdev->scheduler->lock);
1217 
1218 	if (priority > MAX_CSG_PRIO)
1219 		return -EINVAL;
1220 
1221 	if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS))
1222 		return -EINVAL;
1223 
1224 	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1225 	group = csg_slot->group;
1226 	if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE)
1227 		return 0;
1228 
1229 	csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id);
1230 
1231 	for (i = 0; i < group->queue_count; i++) {
1232 		if (group->queues[i]) {
1233 			cs_slot_prog_locked(ptdev, csg_id, i);
1234 			queue_mask |= BIT(i);
1235 		}
1236 	}
1237 
1238 	csg_iface->input->allow_compute = group->compute_core_mask;
1239 	csg_iface->input->allow_fragment = group->fragment_core_mask;
1240 	csg_iface->input->allow_other = group->tiler_core_mask;
1241 	csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
1242 					 CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
1243 					 CSG_EP_REQ_TILER(group->max_tiler_cores) |
1244 					 CSG_EP_REQ_PRIORITY(priority);
1245 	csg_iface->input->config = panthor_vm_as(group->vm);
1246 
1247 	if (group->suspend_buf)
1248 		csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf);
1249 	else
1250 		csg_iface->input->suspend_buf = 0;
1251 
1252 	if (group->protm_suspend_buf) {
1253 		csg_iface->input->protm_suspend_buf =
1254 			panthor_kernel_bo_gpuva(group->protm_suspend_buf);
1255 	} else {
1256 		csg_iface->input->protm_suspend_buf = 0;
1257 	}
1258 
1259 	csg_iface->input->ack_irq_mask = ~0;
1260 	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask);
1261 	return 0;
1262 }
1263 
1264 static void
1265 cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
1266 				   u32 csg_id, u32 cs_id)
1267 {
1268 	struct panthor_scheduler *sched = ptdev->scheduler;
1269 	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1270 	struct panthor_group *group = csg_slot->group;
1271 	struct panthor_fw_cs_iface *cs_iface;
1272 	u32 fatal;
1273 	u64 info;
1274 
1275 	lockdep_assert_held(&sched->lock);
1276 
1277 	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1278 	fatal = cs_iface->output->fatal;
1279 	info = cs_iface->output->fatal_info;
1280 
1281 	if (group)
1282 		group->fatal_queues |= BIT(cs_id);
1283 
1284 	sched_queue_delayed_work(sched, tick, 0);
1285 	drm_warn(&ptdev->base,
1286 		 "CSG slot %d CS slot: %d\n"
1287 		 "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
1288 		 "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
1289 		 "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
1290 		 csg_id, cs_id,
1291 		 (unsigned int)CS_EXCEPTION_TYPE(fatal),
1292 		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)),
1293 		 (unsigned int)CS_EXCEPTION_DATA(fatal),
1294 		 info);
1295 }
1296 
1297 static void
1298 cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
1299 				   u32 csg_id, u32 cs_id)
1300 {
1301 	struct panthor_scheduler *sched = ptdev->scheduler;
1302 	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1303 	struct panthor_group *group = csg_slot->group;
1304 	struct panthor_queue *queue = group && cs_id < group->queue_count ?
1305 				      group->queues[cs_id] : NULL;
1306 	struct panthor_fw_cs_iface *cs_iface;
1307 	u32 fault;
1308 	u64 info;
1309 
1310 	lockdep_assert_held(&sched->lock);
1311 
1312 	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1313 	fault = cs_iface->output->fault;
1314 	info = cs_iface->output->fault_info;
1315 
1316 	if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) {
1317 		u64 cs_extract = queue->iface.output->extract;
1318 		struct panthor_job *job;
1319 
1320 		spin_lock(&queue->fence_ctx.lock);
1321 		list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) {
1322 			if (cs_extract >= job->ringbuf.end)
1323 				continue;
1324 
1325 			if (cs_extract < job->ringbuf.start)
1326 				break;
1327 
1328 			dma_fence_set_error(job->done_fence, -EINVAL);
1329 		}
1330 		spin_unlock(&queue->fence_ctx.lock);
1331 	}
1332 
1333 	drm_warn(&ptdev->base,
1334 		 "CSG slot %d CS slot: %d\n"
1335 		 "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
1336 		 "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
1337 		 "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
1338 		 csg_id, cs_id,
1339 		 (unsigned int)CS_EXCEPTION_TYPE(fault),
1340 		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)),
1341 		 (unsigned int)CS_EXCEPTION_DATA(fault),
1342 		 info);
1343 }
1344 
1345 static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
1346 {
1347 	struct panthor_device *ptdev = group->ptdev;
1348 	struct panthor_scheduler *sched = ptdev->scheduler;
1349 	u32 renderpasses_in_flight, pending_frag_count;
1350 	struct panthor_heap_pool *heaps = NULL;
1351 	u64 heap_address, new_chunk_va = 0;
1352 	u32 vt_start, vt_end, frag_end;
1353 	int ret, csg_id;
1354 
1355 	mutex_lock(&sched->lock);
1356 	csg_id = group->csg_id;
1357 	if (csg_id >= 0) {
1358 		struct panthor_fw_cs_iface *cs_iface;
1359 
1360 		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1361 		heaps = panthor_vm_get_heap_pool(group->vm, false);
1362 		heap_address = cs_iface->output->heap_address;
1363 		vt_start = cs_iface->output->heap_vt_start;
1364 		vt_end = cs_iface->output->heap_vt_end;
1365 		frag_end = cs_iface->output->heap_frag_end;
1366 		renderpasses_in_flight = vt_start - frag_end;
1367 		pending_frag_count = vt_end - frag_end;
1368 	}
1369 	mutex_unlock(&sched->lock);
1370 
1371 	/* The group got scheduled out, we stop here. We will get a new tiler OOM event
1372 	 * when it's scheduled again.
1373 	 */
1374 	if (unlikely(csg_id < 0))
1375 		return 0;
1376 
1377 	if (IS_ERR(heaps) || frag_end > vt_end || vt_end >= vt_start) {
1378 		ret = -EINVAL;
1379 	} else {
1380 		/* We do the allocation without holding the scheduler lock to avoid
1381 		 * blocking the scheduling.
1382 		 */
1383 		ret = panthor_heap_grow(heaps, heap_address,
1384 					renderpasses_in_flight,
1385 					pending_frag_count, &new_chunk_va);
1386 	}
1387 
1388 	if (ret && ret != -EBUSY) {
1389 		drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
1390 		group->fatal_queues |= BIT(cs_id);
1391 		sched_queue_delayed_work(sched, tick, 0);
1392 		goto out_put_heap_pool;
1393 	}
1394 
1395 	mutex_lock(&sched->lock);
1396 	csg_id = group->csg_id;
1397 	if (csg_id >= 0) {
1398 		struct panthor_fw_csg_iface *csg_iface;
1399 		struct panthor_fw_cs_iface *cs_iface;
1400 
1401 		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1402 		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1403 
1404 		cs_iface->input->heap_start = new_chunk_va;
1405 		cs_iface->input->heap_end = new_chunk_va;
1406 		panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM);
1407 		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id));
1408 		panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1409 	}
1410 	mutex_unlock(&sched->lock);
1411 
1412 	/* We allocated a chunck, but couldn't link it to the heap
1413 	 * context because the group was scheduled out while we were
1414 	 * allocating memory. We need to return this chunk to the heap.
1415 	 */
1416 	if (unlikely(csg_id < 0 && new_chunk_va))
1417 		panthor_heap_return_chunk(heaps, heap_address, new_chunk_va);
1418 
1419 	ret = 0;
1420 
1421 out_put_heap_pool:
1422 	panthor_heap_pool_put(heaps);
1423 	return ret;
1424 }
1425 
1426 static void group_tiler_oom_work(struct work_struct *work)
1427 {
1428 	struct panthor_group *group =
1429 		container_of(work, struct panthor_group, tiler_oom_work);
1430 	u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0);
1431 
1432 	while (tiler_oom) {
1433 		u32 cs_id = ffs(tiler_oom) - 1;
1434 
1435 		group_process_tiler_oom(group, cs_id);
1436 		tiler_oom &= ~BIT(cs_id);
1437 	}
1438 
1439 	group_put(group);
1440 }
1441 
1442 static void
1443 cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
1444 				       u32 csg_id, u32 cs_id)
1445 {
1446 	struct panthor_scheduler *sched = ptdev->scheduler;
1447 	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1448 	struct panthor_group *group = csg_slot->group;
1449 
1450 	lockdep_assert_held(&sched->lock);
1451 
1452 	if (drm_WARN_ON(&ptdev->base, !group))
1453 		return;
1454 
1455 	atomic_or(BIT(cs_id), &group->tiler_oom);
1456 
1457 	/* We don't use group_queue_work() here because we want to queue the
1458 	 * work item to the heap_alloc_wq.
1459 	 */
1460 	group_get(group);
1461 	if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work))
1462 		group_put(group);
1463 }
1464 
1465 static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
1466 				       u32 csg_id, u32 cs_id)
1467 {
1468 	struct panthor_fw_cs_iface *cs_iface;
1469 	u32 req, ack, events;
1470 
1471 	lockdep_assert_held(&ptdev->scheduler->lock);
1472 
1473 	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1474 	req = cs_iface->input->req;
1475 	ack = cs_iface->output->ack;
1476 	events = (req ^ ack) & CS_EVT_MASK;
1477 
1478 	if (events & CS_FATAL)
1479 		cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
1480 
1481 	if (events & CS_FAULT)
1482 		cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
1483 
1484 	if (events & CS_TILER_OOM)
1485 		cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
1486 
1487 	/* We don't acknowledge the TILER_OOM event since its handling is
1488 	 * deferred to a separate work.
1489 	 */
1490 	panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT);
1491 
1492 	return (events & (CS_FAULT | CS_TILER_OOM)) != 0;
1493 }
1494 
1495 static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id)
1496 {
1497 	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1498 	struct panthor_fw_csg_iface *csg_iface;
1499 
1500 	lockdep_assert_held(&ptdev->scheduler->lock);
1501 
1502 	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1503 	csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE;
1504 }
1505 
1506 static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
1507 {
1508 	struct panthor_scheduler *sched = ptdev->scheduler;
1509 
1510 	lockdep_assert_held(&sched->lock);
1511 
1512 	sched->might_have_idle_groups = true;
1513 
1514 	/* Schedule a tick so we can evict idle groups and schedule non-idle
1515 	 * ones. This will also update runtime PM and devfreq busy/idle states,
1516 	 * so the device can lower its frequency or get suspended.
1517 	 */
1518 	sched_queue_delayed_work(sched, tick, 0);
1519 }
1520 
1521 static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
1522 					u32 csg_id)
1523 {
1524 	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1525 	struct panthor_group *group = csg_slot->group;
1526 
1527 	lockdep_assert_held(&ptdev->scheduler->lock);
1528 
1529 	if (group)
1530 		group_queue_work(group, sync_upd);
1531 
1532 	sched_queue_work(ptdev->scheduler, sync_upd);
1533 }
1534 
1535 static void
1536 csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
1537 {
1538 	struct panthor_scheduler *sched = ptdev->scheduler;
1539 	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1540 	struct panthor_group *group = csg_slot->group;
1541 
1542 	lockdep_assert_held(&sched->lock);
1543 
1544 	drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id);
1545 
1546 	group = csg_slot->group;
1547 	if (!drm_WARN_ON(&ptdev->base, !group))
1548 		group->timedout = true;
1549 
1550 	sched_queue_delayed_work(sched, tick, 0);
1551 }
1552 
1553 static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
1554 {
1555 	u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events;
1556 	struct panthor_fw_csg_iface *csg_iface;
1557 	u32 ring_cs_db_mask = 0;
1558 
1559 	lockdep_assert_held(&ptdev->scheduler->lock);
1560 
1561 	if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1562 		return;
1563 
1564 	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1565 	req = READ_ONCE(csg_iface->input->req);
1566 	ack = READ_ONCE(csg_iface->output->ack);
1567 	cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req);
1568 	cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack);
1569 	csg_events = (req ^ ack) & CSG_EVT_MASK;
1570 
1571 	/* There may not be any pending CSG/CS interrupts to process */
1572 	if (req == ack && cs_irq_req == cs_irq_ack)
1573 		return;
1574 
1575 	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
1576 	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
1577 	 * doesn't miss an interrupt for the CS in the race scenario where
1578 	 * whilst Host is servicing an interrupt for the CS, firmware sends
1579 	 * another interrupt for that CS.
1580 	 */
1581 	csg_iface->input->cs_irq_ack = cs_irq_req;
1582 
1583 	panthor_fw_update_reqs(csg_iface, req, ack,
1584 			       CSG_SYNC_UPDATE |
1585 			       CSG_IDLE |
1586 			       CSG_PROGRESS_TIMER_EVENT);
1587 
1588 	if (csg_events & CSG_IDLE)
1589 		csg_slot_process_idle_event_locked(ptdev, csg_id);
1590 
1591 	if (csg_events & CSG_PROGRESS_TIMER_EVENT)
1592 		csg_slot_process_progress_timer_event_locked(ptdev, csg_id);
1593 
1594 	cs_irqs = cs_irq_req ^ cs_irq_ack;
1595 	while (cs_irqs) {
1596 		u32 cs_id = ffs(cs_irqs) - 1;
1597 
1598 		if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id))
1599 			ring_cs_db_mask |= BIT(cs_id);
1600 
1601 		cs_irqs &= ~BIT(cs_id);
1602 	}
1603 
1604 	if (csg_events & CSG_SYNC_UPDATE)
1605 		csg_slot_sync_update_locked(ptdev, csg_id);
1606 
1607 	if (ring_cs_db_mask)
1608 		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask);
1609 
1610 	panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1611 }
1612 
1613 static void sched_process_idle_event_locked(struct panthor_device *ptdev)
1614 {
1615 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1616 
1617 	lockdep_assert_held(&ptdev->scheduler->lock);
1618 
1619 	/* Acknowledge the idle event and schedule a tick. */
1620 	panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
1621 	sched_queue_delayed_work(ptdev->scheduler, tick, 0);
1622 }
1623 
1624 /**
1625  * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ
1626  * @ptdev: Device.
1627  */
1628 static void sched_process_global_irq_locked(struct panthor_device *ptdev)
1629 {
1630 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1631 	u32 req, ack, evts;
1632 
1633 	lockdep_assert_held(&ptdev->scheduler->lock);
1634 
1635 	req = READ_ONCE(glb_iface->input->req);
1636 	ack = READ_ONCE(glb_iface->output->ack);
1637 	evts = (req ^ ack) & GLB_EVT_MASK;
1638 
1639 	if (evts & GLB_IDLE)
1640 		sched_process_idle_event_locked(ptdev);
1641 }
1642 
1643 static void process_fw_events_work(struct work_struct *work)
1644 {
1645 	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
1646 						      fw_events_work);
1647 	u32 events = atomic_xchg(&sched->fw_events, 0);
1648 	struct panthor_device *ptdev = sched->ptdev;
1649 
1650 	mutex_lock(&sched->lock);
1651 
1652 	if (events & JOB_INT_GLOBAL_IF) {
1653 		sched_process_global_irq_locked(ptdev);
1654 		events &= ~JOB_INT_GLOBAL_IF;
1655 	}
1656 
1657 	while (events) {
1658 		u32 csg_id = ffs(events) - 1;
1659 
1660 		sched_process_csg_irq_locked(ptdev, csg_id);
1661 		events &= ~BIT(csg_id);
1662 	}
1663 
1664 	mutex_unlock(&sched->lock);
1665 }
1666 
1667 /**
1668  * panthor_sched_report_fw_events() - Report FW events to the scheduler.
1669  */
1670 void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
1671 {
1672 	if (!ptdev->scheduler)
1673 		return;
1674 
1675 	atomic_or(events, &ptdev->scheduler->fw_events);
1676 	sched_queue_work(ptdev->scheduler, fw_events);
1677 }
1678 
1679 static const char *fence_get_driver_name(struct dma_fence *fence)
1680 {
1681 	return "panthor";
1682 }
1683 
1684 static const char *queue_fence_get_timeline_name(struct dma_fence *fence)
1685 {
1686 	return "queue-fence";
1687 }
1688 
1689 static const struct dma_fence_ops panthor_queue_fence_ops = {
1690 	.get_driver_name = fence_get_driver_name,
1691 	.get_timeline_name = queue_fence_get_timeline_name,
1692 };
1693 
1694 struct panthor_csg_slots_upd_ctx {
1695 	u32 update_mask;
1696 	u32 timedout_mask;
1697 	struct {
1698 		u32 value;
1699 		u32 mask;
1700 	} requests[MAX_CSGS];
1701 };
1702 
1703 static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
1704 {
1705 	memset(ctx, 0, sizeof(*ctx));
1706 }
1707 
1708 static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
1709 				    struct panthor_csg_slots_upd_ctx *ctx,
1710 				    u32 csg_id, u32 value, u32 mask)
1711 {
1712 	if (drm_WARN_ON(&ptdev->base, !mask) ||
1713 	    drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1714 		return;
1715 
1716 	ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask);
1717 	ctx->requests[csg_id].mask |= mask;
1718 	ctx->update_mask |= BIT(csg_id);
1719 }
1720 
1721 static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
1722 				     struct panthor_csg_slots_upd_ctx *ctx)
1723 {
1724 	struct panthor_scheduler *sched = ptdev->scheduler;
1725 	u32 update_slots = ctx->update_mask;
1726 
1727 	lockdep_assert_held(&sched->lock);
1728 
1729 	if (!ctx->update_mask)
1730 		return 0;
1731 
1732 	while (update_slots) {
1733 		struct panthor_fw_csg_iface *csg_iface;
1734 		u32 csg_id = ffs(update_slots) - 1;
1735 
1736 		update_slots &= ~BIT(csg_id);
1737 		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1738 		panthor_fw_update_reqs(csg_iface, req,
1739 				       ctx->requests[csg_id].value,
1740 				       ctx->requests[csg_id].mask);
1741 	}
1742 
1743 	panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask);
1744 
1745 	update_slots = ctx->update_mask;
1746 	while (update_slots) {
1747 		struct panthor_fw_csg_iface *csg_iface;
1748 		u32 csg_id = ffs(update_slots) - 1;
1749 		u32 req_mask = ctx->requests[csg_id].mask, acked;
1750 		int ret;
1751 
1752 		update_slots &= ~BIT(csg_id);
1753 		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1754 
1755 		ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100);
1756 
1757 		if (acked & CSG_ENDPOINT_CONFIG)
1758 			csg_slot_sync_priority_locked(ptdev, csg_id);
1759 
1760 		if (acked & CSG_STATE_MASK)
1761 			csg_slot_sync_state_locked(ptdev, csg_id);
1762 
1763 		if (acked & CSG_STATUS_UPDATE) {
1764 			csg_slot_sync_queues_state_locked(ptdev, csg_id);
1765 			csg_slot_sync_idle_state_locked(ptdev, csg_id);
1766 		}
1767 
1768 		if (ret && acked != req_mask &&
1769 		    ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) {
1770 			drm_err(&ptdev->base, "CSG %d update request timedout", csg_id);
1771 			ctx->timedout_mask |= BIT(csg_id);
1772 		}
1773 	}
1774 
1775 	if (ctx->timedout_mask)
1776 		return -ETIMEDOUT;
1777 
1778 	return 0;
1779 }
1780 
1781 struct panthor_sched_tick_ctx {
1782 	struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT];
1783 	struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
1784 	u32 idle_group_count;
1785 	u32 group_count;
1786 	enum panthor_csg_priority min_priority;
1787 	struct panthor_vm *vms[MAX_CS_PER_CSG];
1788 	u32 as_count;
1789 	bool immediate_tick;
1790 	u32 csg_upd_failed_mask;
1791 };
1792 
1793 static bool
1794 tick_ctx_is_full(const struct panthor_scheduler *sched,
1795 		 const struct panthor_sched_tick_ctx *ctx)
1796 {
1797 	return ctx->group_count == sched->csg_slot_count;
1798 }
1799 
1800 static bool
1801 group_is_idle(struct panthor_group *group)
1802 {
1803 	struct panthor_device *ptdev = group->ptdev;
1804 	u32 inactive_queues;
1805 
1806 	if (group->csg_id >= 0)
1807 		return ptdev->scheduler->csg_slots[group->csg_id].idle;
1808 
1809 	inactive_queues = group->idle_queues | group->blocked_queues;
1810 	return hweight32(inactive_queues) == group->queue_count;
1811 }
1812 
1813 static bool
1814 group_can_run(struct panthor_group *group)
1815 {
1816 	return group->state != PANTHOR_CS_GROUP_TERMINATED &&
1817 	       group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
1818 	       !group->destroyed && group->fatal_queues == 0 &&
1819 	       !group->timedout;
1820 }
1821 
1822 static void
1823 tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
1824 			       struct panthor_sched_tick_ctx *ctx,
1825 			       struct list_head *queue,
1826 			       bool skip_idle_groups,
1827 			       bool owned_by_tick_ctx)
1828 {
1829 	struct panthor_group *group, *tmp;
1830 
1831 	if (tick_ctx_is_full(sched, ctx))
1832 		return;
1833 
1834 	list_for_each_entry_safe(group, tmp, queue, run_node) {
1835 		u32 i;
1836 
1837 		if (!group_can_run(group))
1838 			continue;
1839 
1840 		if (skip_idle_groups && group_is_idle(group))
1841 			continue;
1842 
1843 		for (i = 0; i < ctx->as_count; i++) {
1844 			if (ctx->vms[i] == group->vm)
1845 				break;
1846 		}
1847 
1848 		if (i == ctx->as_count && ctx->as_count == sched->as_slot_count)
1849 			continue;
1850 
1851 		if (!owned_by_tick_ctx)
1852 			group_get(group);
1853 
1854 		list_move_tail(&group->run_node, &ctx->groups[group->priority]);
1855 		ctx->group_count++;
1856 		if (group_is_idle(group))
1857 			ctx->idle_group_count++;
1858 
1859 		if (i == ctx->as_count)
1860 			ctx->vms[ctx->as_count++] = group->vm;
1861 
1862 		if (ctx->min_priority > group->priority)
1863 			ctx->min_priority = group->priority;
1864 
1865 		if (tick_ctx_is_full(sched, ctx))
1866 			return;
1867 	}
1868 }
1869 
1870 static void
1871 tick_ctx_insert_old_group(struct panthor_scheduler *sched,
1872 			  struct panthor_sched_tick_ctx *ctx,
1873 			  struct panthor_group *group,
1874 			  bool full_tick)
1875 {
1876 	struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
1877 	struct panthor_group *other_group;
1878 
1879 	if (!full_tick) {
1880 		list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
1881 		return;
1882 	}
1883 
1884 	/* Rotate to make sure groups with lower CSG slot
1885 	 * priorities have a chance to get a higher CSG slot
1886 	 * priority next time they get picked. This priority
1887 	 * has an impact on resource request ordering, so it's
1888 	 * important to make sure we don't let one group starve
1889 	 * all other groups with the same group priority.
1890 	 */
1891 	list_for_each_entry(other_group,
1892 			    &ctx->old_groups[csg_slot->group->priority],
1893 			    run_node) {
1894 		struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
1895 
1896 		if (other_csg_slot->priority > csg_slot->priority) {
1897 			list_add_tail(&csg_slot->group->run_node, &other_group->run_node);
1898 			return;
1899 		}
1900 	}
1901 
1902 	list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
1903 }
1904 
1905 static void
1906 tick_ctx_init(struct panthor_scheduler *sched,
1907 	      struct panthor_sched_tick_ctx *ctx,
1908 	      bool full_tick)
1909 {
1910 	struct panthor_device *ptdev = sched->ptdev;
1911 	struct panthor_csg_slots_upd_ctx upd_ctx;
1912 	int ret;
1913 	u32 i;
1914 
1915 	memset(ctx, 0, sizeof(*ctx));
1916 	csgs_upd_ctx_init(&upd_ctx);
1917 
1918 	ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT;
1919 	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
1920 		INIT_LIST_HEAD(&ctx->groups[i]);
1921 		INIT_LIST_HEAD(&ctx->old_groups[i]);
1922 	}
1923 
1924 	for (i = 0; i < sched->csg_slot_count; i++) {
1925 		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
1926 		struct panthor_group *group = csg_slot->group;
1927 		struct panthor_fw_csg_iface *csg_iface;
1928 
1929 		if (!group)
1930 			continue;
1931 
1932 		csg_iface = panthor_fw_get_csg_iface(ptdev, i);
1933 		group_get(group);
1934 
1935 		/* If there was unhandled faults on the VM, force processing of
1936 		 * CSG IRQs, so we can flag the faulty queue.
1937 		 */
1938 		if (panthor_vm_has_unhandled_faults(group->vm)) {
1939 			sched_process_csg_irq_locked(ptdev, i);
1940 
1941 			/* No fatal fault reported, flag all queues as faulty. */
1942 			if (!group->fatal_queues)
1943 				group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
1944 		}
1945 
1946 		tick_ctx_insert_old_group(sched, ctx, group, full_tick);
1947 		csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
1948 					csg_iface->output->ack ^ CSG_STATUS_UPDATE,
1949 					CSG_STATUS_UPDATE);
1950 	}
1951 
1952 	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
1953 	if (ret) {
1954 		panthor_device_schedule_reset(ptdev);
1955 		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
1956 	}
1957 }
1958 
1959 #define NUM_INSTRS_PER_SLOT		16
1960 
1961 static void
1962 group_term_post_processing(struct panthor_group *group)
1963 {
1964 	struct panthor_job *job, *tmp;
1965 	LIST_HEAD(faulty_jobs);
1966 	bool cookie;
1967 	u32 i = 0;
1968 
1969 	if (drm_WARN_ON(&group->ptdev->base, group_can_run(group)))
1970 		return;
1971 
1972 	cookie = dma_fence_begin_signalling();
1973 	for (i = 0; i < group->queue_count; i++) {
1974 		struct panthor_queue *queue = group->queues[i];
1975 		struct panthor_syncobj_64b *syncobj;
1976 		int err;
1977 
1978 		if (group->fatal_queues & BIT(i))
1979 			err = -EINVAL;
1980 		else if (group->timedout)
1981 			err = -ETIMEDOUT;
1982 		else
1983 			err = -ECANCELED;
1984 
1985 		if (!queue)
1986 			continue;
1987 
1988 		spin_lock(&queue->fence_ctx.lock);
1989 		list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
1990 			list_move_tail(&job->node, &faulty_jobs);
1991 			dma_fence_set_error(job->done_fence, err);
1992 			dma_fence_signal_locked(job->done_fence);
1993 		}
1994 		spin_unlock(&queue->fence_ctx.lock);
1995 
1996 		/* Manually update the syncobj seqno to unblock waiters. */
1997 		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
1998 		syncobj->status = ~0;
1999 		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
2000 		sched_queue_work(group->ptdev->scheduler, sync_upd);
2001 	}
2002 	dma_fence_end_signalling(cookie);
2003 
2004 	list_for_each_entry_safe(job, tmp, &faulty_jobs, node) {
2005 		list_del_init(&job->node);
2006 		panthor_job_put(&job->base);
2007 	}
2008 }
2009 
2010 static void group_term_work(struct work_struct *work)
2011 {
2012 	struct panthor_group *group =
2013 		container_of(work, struct panthor_group, term_work);
2014 
2015 	group_term_post_processing(group);
2016 	group_put(group);
2017 }
2018 
2019 static void
2020 tick_ctx_cleanup(struct panthor_scheduler *sched,
2021 		 struct panthor_sched_tick_ctx *ctx)
2022 {
2023 	struct panthor_group *group, *tmp;
2024 	u32 i;
2025 
2026 	for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) {
2027 		list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) {
2028 			/* If everything went fine, we should only have groups
2029 			 * to be terminated in the old_groups lists.
2030 			 */
2031 			drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask &&
2032 				    group_can_run(group));
2033 
2034 			if (!group_can_run(group)) {
2035 				list_del_init(&group->run_node);
2036 				list_del_init(&group->wait_node);
2037 				group_queue_work(group, term);
2038 			} else if (group->csg_id >= 0) {
2039 				list_del_init(&group->run_node);
2040 			} else {
2041 				list_move(&group->run_node,
2042 					  group_is_idle(group) ?
2043 					  &sched->groups.idle[group->priority] :
2044 					  &sched->groups.runnable[group->priority]);
2045 			}
2046 			group_put(group);
2047 		}
2048 	}
2049 
2050 	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
2051 		/* If everything went fine, the groups to schedule lists should
2052 		 * be empty.
2053 		 */
2054 		drm_WARN_ON(&group->ptdev->base,
2055 			    !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
2056 
2057 		list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
2058 			if (group->csg_id >= 0) {
2059 				list_del_init(&group->run_node);
2060 			} else {
2061 				list_move(&group->run_node,
2062 					  group_is_idle(group) ?
2063 					  &sched->groups.idle[group->priority] :
2064 					  &sched->groups.runnable[group->priority]);
2065 			}
2066 			group_put(group);
2067 		}
2068 	}
2069 }
2070 
2071 static void
2072 tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx)
2073 {
2074 	struct panthor_group *group, *tmp;
2075 	struct panthor_device *ptdev = sched->ptdev;
2076 	struct panthor_csg_slot *csg_slot;
2077 	int prio, new_csg_prio = MAX_CSG_PRIO, i;
2078 	u32 free_csg_slots = 0;
2079 	struct panthor_csg_slots_upd_ctx upd_ctx;
2080 	int ret;
2081 
2082 	csgs_upd_ctx_init(&upd_ctx);
2083 
2084 	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2085 		/* Suspend or terminate evicted groups. */
2086 		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2087 			bool term = !group_can_run(group);
2088 			int csg_id = group->csg_id;
2089 
2090 			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
2091 				continue;
2092 
2093 			csg_slot = &sched->csg_slots[csg_id];
2094 			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2095 						term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND,
2096 						CSG_STATE_MASK);
2097 		}
2098 
2099 		/* Update priorities on already running groups. */
2100 		list_for_each_entry(group, &ctx->groups[prio], run_node) {
2101 			struct panthor_fw_csg_iface *csg_iface;
2102 			int csg_id = group->csg_id;
2103 
2104 			if (csg_id < 0) {
2105 				new_csg_prio--;
2106 				continue;
2107 			}
2108 
2109 			csg_slot = &sched->csg_slots[csg_id];
2110 			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
2111 			if (csg_slot->priority == new_csg_prio) {
2112 				new_csg_prio--;
2113 				continue;
2114 			}
2115 
2116 			panthor_fw_update_reqs(csg_iface, endpoint_req,
2117 					       CSG_EP_REQ_PRIORITY(new_csg_prio),
2118 					       CSG_EP_REQ_PRIORITY_MASK);
2119 			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2120 						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2121 						CSG_ENDPOINT_CONFIG);
2122 			new_csg_prio--;
2123 		}
2124 	}
2125 
2126 	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2127 	if (ret) {
2128 		panthor_device_schedule_reset(ptdev);
2129 		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
2130 		return;
2131 	}
2132 
2133 	/* Unbind evicted groups. */
2134 	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2135 		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2136 			/* This group is gone. Process interrupts to clear
2137 			 * any pending interrupts before we start the new
2138 			 * group.
2139 			 */
2140 			if (group->csg_id >= 0)
2141 				sched_process_csg_irq_locked(ptdev, group->csg_id);
2142 
2143 			group_unbind_locked(group);
2144 		}
2145 	}
2146 
2147 	for (i = 0; i < sched->csg_slot_count; i++) {
2148 		if (!sched->csg_slots[i].group)
2149 			free_csg_slots |= BIT(i);
2150 	}
2151 
2152 	csgs_upd_ctx_init(&upd_ctx);
2153 	new_csg_prio = MAX_CSG_PRIO;
2154 
2155 	/* Start new groups. */
2156 	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2157 		list_for_each_entry(group, &ctx->groups[prio], run_node) {
2158 			int csg_id = group->csg_id;
2159 			struct panthor_fw_csg_iface *csg_iface;
2160 
2161 			if (csg_id >= 0) {
2162 				new_csg_prio--;
2163 				continue;
2164 			}
2165 
2166 			csg_id = ffs(free_csg_slots) - 1;
2167 			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
2168 				break;
2169 
2170 			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
2171 			csg_slot = &sched->csg_slots[csg_id];
2172 			group_bind_locked(group, csg_id);
2173 			csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
2174 			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2175 						group->state == PANTHOR_CS_GROUP_SUSPENDED ?
2176 						CSG_STATE_RESUME : CSG_STATE_START,
2177 						CSG_STATE_MASK);
2178 			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2179 						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2180 						CSG_ENDPOINT_CONFIG);
2181 			free_csg_slots &= ~BIT(csg_id);
2182 		}
2183 	}
2184 
2185 	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2186 	if (ret) {
2187 		panthor_device_schedule_reset(ptdev);
2188 		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
2189 		return;
2190 	}
2191 
2192 	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2193 		list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) {
2194 			list_del_init(&group->run_node);
2195 
2196 			/* If the group has been destroyed while we were
2197 			 * scheduling, ask for an immediate tick to
2198 			 * re-evaluate as soon as possible and get rid of
2199 			 * this dangling group.
2200 			 */
2201 			if (group->destroyed)
2202 				ctx->immediate_tick = true;
2203 			group_put(group);
2204 		}
2205 
2206 		/* Return evicted groups to the idle or run queues. Groups
2207 		 * that can no longer be run (because they've been destroyed
2208 		 * or experienced an unrecoverable error) will be scheduled
2209 		 * for destruction in tick_ctx_cleanup().
2210 		 */
2211 		list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) {
2212 			if (!group_can_run(group))
2213 				continue;
2214 
2215 			if (group_is_idle(group))
2216 				list_move_tail(&group->run_node, &sched->groups.idle[prio]);
2217 			else
2218 				list_move_tail(&group->run_node, &sched->groups.runnable[prio]);
2219 			group_put(group);
2220 		}
2221 	}
2222 
2223 	sched->used_csg_slot_count = ctx->group_count;
2224 	sched->might_have_idle_groups = ctx->idle_group_count > 0;
2225 }
2226 
2227 static u64
2228 tick_ctx_update_resched_target(struct panthor_scheduler *sched,
2229 			       const struct panthor_sched_tick_ctx *ctx)
2230 {
2231 	/* We had space left, no need to reschedule until some external event happens. */
2232 	if (!tick_ctx_is_full(sched, ctx))
2233 		goto no_tick;
2234 
2235 	/* If idle groups were scheduled, no need to wake up until some external
2236 	 * event happens (group unblocked, new job submitted, ...).
2237 	 */
2238 	if (ctx->idle_group_count)
2239 		goto no_tick;
2240 
2241 	if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT))
2242 		goto no_tick;
2243 
2244 	/* If there are groups of the same priority waiting, we need to
2245 	 * keep the scheduler ticking, otherwise, we'll just wait for
2246 	 * new groups with higher priority to be queued.
2247 	 */
2248 	if (!list_empty(&sched->groups.runnable[ctx->min_priority])) {
2249 		u64 resched_target = sched->last_tick + sched->tick_period;
2250 
2251 		if (time_before64(sched->resched_target, sched->last_tick) ||
2252 		    time_before64(resched_target, sched->resched_target))
2253 			sched->resched_target = resched_target;
2254 
2255 		return sched->resched_target - sched->last_tick;
2256 	}
2257 
2258 no_tick:
2259 	sched->resched_target = U64_MAX;
2260 	return U64_MAX;
2261 }
2262 
2263 static void tick_work(struct work_struct *work)
2264 {
2265 	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
2266 						      tick_work.work);
2267 	struct panthor_device *ptdev = sched->ptdev;
2268 	struct panthor_sched_tick_ctx ctx;
2269 	u64 remaining_jiffies = 0, resched_delay;
2270 	u64 now = get_jiffies_64();
2271 	int prio, ret, cookie;
2272 
2273 	if (!drm_dev_enter(&ptdev->base, &cookie))
2274 		return;
2275 
2276 	ret = pm_runtime_resume_and_get(ptdev->base.dev);
2277 	if (drm_WARN_ON(&ptdev->base, ret))
2278 		goto out_dev_exit;
2279 
2280 	if (time_before64(now, sched->resched_target))
2281 		remaining_jiffies = sched->resched_target - now;
2282 
2283 	mutex_lock(&sched->lock);
2284 	if (panthor_device_reset_is_pending(sched->ptdev))
2285 		goto out_unlock;
2286 
2287 	tick_ctx_init(sched, &ctx, remaining_jiffies != 0);
2288 	if (ctx.csg_upd_failed_mask)
2289 		goto out_cleanup_ctx;
2290 
2291 	if (remaining_jiffies) {
2292 		/* Scheduling forced in the middle of a tick. Only RT groups
2293 		 * can preempt non-RT ones. Currently running RT groups can't be
2294 		 * preempted.
2295 		 */
2296 		for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2297 		     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2298 		     prio--) {
2299 			tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
2300 						       true, true);
2301 			if (prio == PANTHOR_CSG_PRIORITY_RT) {
2302 				tick_ctx_pick_groups_from_list(sched, &ctx,
2303 							       &sched->groups.runnable[prio],
2304 							       true, false);
2305 			}
2306 		}
2307 	}
2308 
2309 	/* First pick non-idle groups */
2310 	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2311 	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2312 	     prio--) {
2313 		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio],
2314 					       true, false);
2315 		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
2316 	}
2317 
2318 	/* If we have free CSG slots left, pick idle groups */
2319 	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2320 	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2321 	     prio--) {
2322 		/* Check the old_group queue first to avoid reprogramming the slots */
2323 		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true);
2324 		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio],
2325 					       false, false);
2326 	}
2327 
2328 	tick_ctx_apply(sched, &ctx);
2329 	if (ctx.csg_upd_failed_mask)
2330 		goto out_cleanup_ctx;
2331 
2332 	if (ctx.idle_group_count == ctx.group_count) {
2333 		panthor_devfreq_record_idle(sched->ptdev);
2334 		if (sched->pm.has_ref) {
2335 			pm_runtime_put_autosuspend(ptdev->base.dev);
2336 			sched->pm.has_ref = false;
2337 		}
2338 	} else {
2339 		panthor_devfreq_record_busy(sched->ptdev);
2340 		if (!sched->pm.has_ref) {
2341 			pm_runtime_get(ptdev->base.dev);
2342 			sched->pm.has_ref = true;
2343 		}
2344 	}
2345 
2346 	sched->last_tick = now;
2347 	resched_delay = tick_ctx_update_resched_target(sched, &ctx);
2348 	if (ctx.immediate_tick)
2349 		resched_delay = 0;
2350 
2351 	if (resched_delay != U64_MAX)
2352 		sched_queue_delayed_work(sched, tick, resched_delay);
2353 
2354 out_cleanup_ctx:
2355 	tick_ctx_cleanup(sched, &ctx);
2356 
2357 out_unlock:
2358 	mutex_unlock(&sched->lock);
2359 	pm_runtime_mark_last_busy(ptdev->base.dev);
2360 	pm_runtime_put_autosuspend(ptdev->base.dev);
2361 
2362 out_dev_exit:
2363 	drm_dev_exit(cookie);
2364 }
2365 
2366 static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
2367 {
2368 	struct panthor_queue *queue = group->queues[queue_idx];
2369 	union {
2370 		struct panthor_syncobj_64b sync64;
2371 		struct panthor_syncobj_32b sync32;
2372 	} *syncobj;
2373 	bool result;
2374 	u64 value;
2375 
2376 	syncobj = panthor_queue_get_syncwait_obj(group, queue);
2377 	if (!syncobj)
2378 		return -EINVAL;
2379 
2380 	value = queue->syncwait.sync64 ?
2381 		syncobj->sync64.seqno :
2382 		syncobj->sync32.seqno;
2383 
2384 	if (queue->syncwait.gt)
2385 		result = value > queue->syncwait.ref;
2386 	else
2387 		result = value <= queue->syncwait.ref;
2388 
2389 	if (result)
2390 		panthor_queue_put_syncwait_obj(queue);
2391 
2392 	return result;
2393 }
2394 
2395 static void sync_upd_work(struct work_struct *work)
2396 {
2397 	struct panthor_scheduler *sched = container_of(work,
2398 						      struct panthor_scheduler,
2399 						      sync_upd_work);
2400 	struct panthor_group *group, *tmp;
2401 	bool immediate_tick = false;
2402 
2403 	mutex_lock(&sched->lock);
2404 	list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) {
2405 		u32 tested_queues = group->blocked_queues;
2406 		u32 unblocked_queues = 0;
2407 
2408 		while (tested_queues) {
2409 			u32 cs_id = ffs(tested_queues) - 1;
2410 			int ret;
2411 
2412 			ret = panthor_queue_eval_syncwait(group, cs_id);
2413 			drm_WARN_ON(&group->ptdev->base, ret < 0);
2414 			if (ret)
2415 				unblocked_queues |= BIT(cs_id);
2416 
2417 			tested_queues &= ~BIT(cs_id);
2418 		}
2419 
2420 		if (unblocked_queues) {
2421 			group->blocked_queues &= ~unblocked_queues;
2422 
2423 			if (group->csg_id < 0) {
2424 				list_move(&group->run_node,
2425 					  &sched->groups.runnable[group->priority]);
2426 				if (group->priority == PANTHOR_CSG_PRIORITY_RT)
2427 					immediate_tick = true;
2428 			}
2429 		}
2430 
2431 		if (!group->blocked_queues)
2432 			list_del_init(&group->wait_node);
2433 	}
2434 	mutex_unlock(&sched->lock);
2435 
2436 	if (immediate_tick)
2437 		sched_queue_delayed_work(sched, tick, 0);
2438 }
2439 
2440 static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
2441 {
2442 	struct panthor_device *ptdev = group->ptdev;
2443 	struct panthor_scheduler *sched = ptdev->scheduler;
2444 	struct list_head *queue = &sched->groups.runnable[group->priority];
2445 	u64 delay_jiffies = 0;
2446 	bool was_idle;
2447 	u64 now;
2448 
2449 	if (!group_can_run(group))
2450 		return;
2451 
2452 	/* All updated queues are blocked, no need to wake up the scheduler. */
2453 	if ((queue_mask & group->blocked_queues) == queue_mask)
2454 		return;
2455 
2456 	was_idle = group_is_idle(group);
2457 	group->idle_queues &= ~queue_mask;
2458 
2459 	/* Don't mess up with the lists if we're in a middle of a reset. */
2460 	if (atomic_read(&sched->reset.in_progress))
2461 		return;
2462 
2463 	if (was_idle && !group_is_idle(group))
2464 		list_move_tail(&group->run_node, queue);
2465 
2466 	/* RT groups are preemptive. */
2467 	if (group->priority == PANTHOR_CSG_PRIORITY_RT) {
2468 		sched_queue_delayed_work(sched, tick, 0);
2469 		return;
2470 	}
2471 
2472 	/* Some groups might be idle, force an immediate tick to
2473 	 * re-evaluate.
2474 	 */
2475 	if (sched->might_have_idle_groups) {
2476 		sched_queue_delayed_work(sched, tick, 0);
2477 		return;
2478 	}
2479 
2480 	/* Scheduler is ticking, nothing to do. */
2481 	if (sched->resched_target != U64_MAX) {
2482 		/* If there are free slots, force immediating ticking. */
2483 		if (sched->used_csg_slot_count < sched->csg_slot_count)
2484 			sched_queue_delayed_work(sched, tick, 0);
2485 
2486 		return;
2487 	}
2488 
2489 	/* Scheduler tick was off, recalculate the resched_target based on the
2490 	 * last tick event, and queue the scheduler work.
2491 	 */
2492 	now = get_jiffies_64();
2493 	sched->resched_target = sched->last_tick + sched->tick_period;
2494 	if (sched->used_csg_slot_count == sched->csg_slot_count &&
2495 	    time_before64(now, sched->resched_target))
2496 		delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
2497 
2498 	sched_queue_delayed_work(sched, tick, delay_jiffies);
2499 }
2500 
2501 static void queue_stop(struct panthor_queue *queue,
2502 		       struct panthor_job *bad_job)
2503 {
2504 	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
2505 }
2506 
2507 static void queue_start(struct panthor_queue *queue)
2508 {
2509 	struct panthor_job *job;
2510 
2511 	/* Re-assign the parent fences. */
2512 	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
2513 		job->base.s_fence->parent = dma_fence_get(job->done_fence);
2514 
2515 	drm_sched_start(&queue->scheduler, true);
2516 }
2517 
2518 static void panthor_group_stop(struct panthor_group *group)
2519 {
2520 	struct panthor_scheduler *sched = group->ptdev->scheduler;
2521 
2522 	lockdep_assert_held(&sched->reset.lock);
2523 
2524 	for (u32 i = 0; i < group->queue_count; i++)
2525 		queue_stop(group->queues[i], NULL);
2526 
2527 	group_get(group);
2528 	list_move_tail(&group->run_node, &sched->reset.stopped_groups);
2529 }
2530 
2531 static void panthor_group_start(struct panthor_group *group)
2532 {
2533 	struct panthor_scheduler *sched = group->ptdev->scheduler;
2534 
2535 	lockdep_assert_held(&group->ptdev->scheduler->reset.lock);
2536 
2537 	for (u32 i = 0; i < group->queue_count; i++)
2538 		queue_start(group->queues[i]);
2539 
2540 	if (group_can_run(group)) {
2541 		list_move_tail(&group->run_node,
2542 			       group_is_idle(group) ?
2543 			       &sched->groups.idle[group->priority] :
2544 			       &sched->groups.runnable[group->priority]);
2545 	} else {
2546 		list_del_init(&group->run_node);
2547 		list_del_init(&group->wait_node);
2548 		group_queue_work(group, term);
2549 	}
2550 
2551 	group_put(group);
2552 }
2553 
2554 static void panthor_sched_immediate_tick(struct panthor_device *ptdev)
2555 {
2556 	struct panthor_scheduler *sched = ptdev->scheduler;
2557 
2558 	sched_queue_delayed_work(sched, tick, 0);
2559 }
2560 
2561 /**
2562  * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler.
2563  */
2564 void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
2565 {
2566 	/* Force a tick to immediately kill faulty groups. */
2567 	if (ptdev->scheduler)
2568 		panthor_sched_immediate_tick(ptdev);
2569 }
2570 
2571 void panthor_sched_resume(struct panthor_device *ptdev)
2572 {
2573 	/* Force a tick to re-evaluate after a resume. */
2574 	panthor_sched_immediate_tick(ptdev);
2575 }
2576 
2577 void panthor_sched_suspend(struct panthor_device *ptdev)
2578 {
2579 	struct panthor_scheduler *sched = ptdev->scheduler;
2580 	struct panthor_csg_slots_upd_ctx upd_ctx;
2581 	struct panthor_group *group;
2582 	u32 suspended_slots;
2583 	u32 i;
2584 
2585 	mutex_lock(&sched->lock);
2586 	csgs_upd_ctx_init(&upd_ctx);
2587 	for (i = 0; i < sched->csg_slot_count; i++) {
2588 		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2589 
2590 		if (csg_slot->group) {
2591 			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
2592 						group_can_run(csg_slot->group) ?
2593 						CSG_STATE_SUSPEND : CSG_STATE_TERMINATE,
2594 						CSG_STATE_MASK);
2595 		}
2596 	}
2597 
2598 	suspended_slots = upd_ctx.update_mask;
2599 
2600 	csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2601 	suspended_slots &= ~upd_ctx.timedout_mask;
2602 
2603 	if (upd_ctx.timedout_mask) {
2604 		u32 slot_mask = upd_ctx.timedout_mask;
2605 
2606 		drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
2607 		csgs_upd_ctx_init(&upd_ctx);
2608 		while (slot_mask) {
2609 			u32 csg_id = ffs(slot_mask) - 1;
2610 
2611 			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2612 						CSG_STATE_TERMINATE,
2613 						CSG_STATE_MASK);
2614 			slot_mask &= ~BIT(csg_id);
2615 		}
2616 
2617 		csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2618 
2619 		slot_mask = upd_ctx.timedout_mask;
2620 		while (slot_mask) {
2621 			u32 csg_id = ffs(slot_mask) - 1;
2622 			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2623 
2624 			/* Terminate command timedout, but the soft-reset will
2625 			 * automatically terminate all active groups, so let's
2626 			 * force the state to halted here.
2627 			 */
2628 			if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED)
2629 				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
2630 			slot_mask &= ~BIT(csg_id);
2631 		}
2632 	}
2633 
2634 	/* Flush L2 and LSC caches to make sure suspend state is up-to-date.
2635 	 * If the flush fails, flag all queues for termination.
2636 	 */
2637 	if (suspended_slots) {
2638 		bool flush_caches_failed = false;
2639 		u32 slot_mask = suspended_slots;
2640 
2641 		if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0))
2642 			flush_caches_failed = true;
2643 
2644 		while (slot_mask) {
2645 			u32 csg_id = ffs(slot_mask) - 1;
2646 			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2647 
2648 			if (flush_caches_failed)
2649 				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
2650 			else
2651 				csg_slot_sync_update_locked(ptdev, csg_id);
2652 
2653 			slot_mask &= ~BIT(csg_id);
2654 		}
2655 	}
2656 
2657 	for (i = 0; i < sched->csg_slot_count; i++) {
2658 		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2659 
2660 		group = csg_slot->group;
2661 		if (!group)
2662 			continue;
2663 
2664 		group_get(group);
2665 
2666 		if (group->csg_id >= 0)
2667 			sched_process_csg_irq_locked(ptdev, group->csg_id);
2668 
2669 		group_unbind_locked(group);
2670 
2671 		drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
2672 
2673 		if (group_can_run(group)) {
2674 			list_add(&group->run_node,
2675 				 &sched->groups.idle[group->priority]);
2676 		} else {
2677 			/* We don't bother stopping the scheduler if the group is
2678 			 * faulty, the group termination work will finish the job.
2679 			 */
2680 			list_del_init(&group->wait_node);
2681 			group_queue_work(group, term);
2682 		}
2683 		group_put(group);
2684 	}
2685 	mutex_unlock(&sched->lock);
2686 }
2687 
2688 void panthor_sched_pre_reset(struct panthor_device *ptdev)
2689 {
2690 	struct panthor_scheduler *sched = ptdev->scheduler;
2691 	struct panthor_group *group, *group_tmp;
2692 	u32 i;
2693 
2694 	mutex_lock(&sched->reset.lock);
2695 	atomic_set(&sched->reset.in_progress, true);
2696 
2697 	/* Cancel all scheduler works. Once this is done, these works can't be
2698 	 * scheduled again until the reset operation is complete.
2699 	 */
2700 	cancel_work_sync(&sched->sync_upd_work);
2701 	cancel_delayed_work_sync(&sched->tick_work);
2702 
2703 	panthor_sched_suspend(ptdev);
2704 
2705 	/* Stop all groups that might still accept jobs, so we don't get passed
2706 	 * new jobs while we're resetting.
2707 	 */
2708 	for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) {
2709 		/* All groups should be in the idle lists. */
2710 		drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i]));
2711 		list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node)
2712 			panthor_group_stop(group);
2713 	}
2714 
2715 	for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) {
2716 		list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node)
2717 			panthor_group_stop(group);
2718 	}
2719 
2720 	mutex_unlock(&sched->reset.lock);
2721 }
2722 
2723 void panthor_sched_post_reset(struct panthor_device *ptdev)
2724 {
2725 	struct panthor_scheduler *sched = ptdev->scheduler;
2726 	struct panthor_group *group, *group_tmp;
2727 
2728 	mutex_lock(&sched->reset.lock);
2729 
2730 	list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node)
2731 		panthor_group_start(group);
2732 
2733 	/* We're done resetting the GPU, clear the reset.in_progress bit so we can
2734 	 * kick the scheduler.
2735 	 */
2736 	atomic_set(&sched->reset.in_progress, false);
2737 	mutex_unlock(&sched->reset.lock);
2738 
2739 	sched_queue_delayed_work(sched, tick, 0);
2740 
2741 	sched_queue_work(sched, sync_upd);
2742 }
2743 
2744 static void group_sync_upd_work(struct work_struct *work)
2745 {
2746 	struct panthor_group *group =
2747 		container_of(work, struct panthor_group, sync_upd_work);
2748 	struct panthor_job *job, *job_tmp;
2749 	LIST_HEAD(done_jobs);
2750 	u32 queue_idx;
2751 	bool cookie;
2752 
2753 	cookie = dma_fence_begin_signalling();
2754 	for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
2755 		struct panthor_queue *queue = group->queues[queue_idx];
2756 		struct panthor_syncobj_64b *syncobj;
2757 
2758 		if (!queue)
2759 			continue;
2760 
2761 		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
2762 
2763 		spin_lock(&queue->fence_ctx.lock);
2764 		list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
2765 			if (!job->call_info.size)
2766 				continue;
2767 
2768 			if (syncobj->seqno < job->done_fence->seqno)
2769 				break;
2770 
2771 			list_move_tail(&job->node, &done_jobs);
2772 			dma_fence_signal_locked(job->done_fence);
2773 		}
2774 		spin_unlock(&queue->fence_ctx.lock);
2775 	}
2776 	dma_fence_end_signalling(cookie);
2777 
2778 	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
2779 		list_del_init(&job->node);
2780 		panthor_job_put(&job->base);
2781 	}
2782 
2783 	group_put(group);
2784 }
2785 
2786 static struct dma_fence *
2787 queue_run_job(struct drm_sched_job *sched_job)
2788 {
2789 	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
2790 	struct panthor_group *group = job->group;
2791 	struct panthor_queue *queue = group->queues[job->queue_idx];
2792 	struct panthor_device *ptdev = group->ptdev;
2793 	struct panthor_scheduler *sched = ptdev->scheduler;
2794 	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
2795 	u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
2796 	u64 addr_reg = ptdev->csif_info.cs_reg_count -
2797 		       ptdev->csif_info.unpreserved_cs_reg_count;
2798 	u64 val_reg = addr_reg + 2;
2799 	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
2800 			job->queue_idx * sizeof(struct panthor_syncobj_64b);
2801 	u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
2802 	struct dma_fence *done_fence;
2803 	int ret;
2804 
2805 	u64 call_instrs[NUM_INSTRS_PER_SLOT] = {
2806 		/* MOV32 rX+2, cs.latest_flush */
2807 		(2ull << 56) | (val_reg << 48) | job->call_info.latest_flush,
2808 
2809 		/* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
2810 		(36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
2811 
2812 		/* MOV48 rX:rX+1, cs.start */
2813 		(1ull << 56) | (addr_reg << 48) | job->call_info.start,
2814 
2815 		/* MOV32 rX+2, cs.size */
2816 		(2ull << 56) | (val_reg << 48) | job->call_info.size,
2817 
2818 		/* WAIT(0) => waits for FLUSH_CACHE2 instruction */
2819 		(3ull << 56) | (1 << 16),
2820 
2821 		/* CALL rX:rX+1, rX+2 */
2822 		(32ull << 56) | (addr_reg << 40) | (val_reg << 32),
2823 
2824 		/* MOV48 rX:rX+1, sync_addr */
2825 		(1ull << 56) | (addr_reg << 48) | sync_addr,
2826 
2827 		/* MOV48 rX+2, #1 */
2828 		(1ull << 56) | (val_reg << 48) | 1,
2829 
2830 		/* WAIT(all) */
2831 		(3ull << 56) | (waitall_mask << 16),
2832 
2833 		/* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/
2834 		(51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1,
2835 
2836 		/* ERROR_BARRIER, so we can recover from faults at job
2837 		 * boundaries.
2838 		 */
2839 		(47ull << 56),
2840 	};
2841 
2842 	/* Need to be cacheline aligned to please the prefetcher. */
2843 	static_assert(sizeof(call_instrs) % 64 == 0,
2844 		      "call_instrs is not aligned on a cacheline");
2845 
2846 	/* Stream size is zero, nothing to do => return a NULL fence and let
2847 	 * drm_sched signal the parent.
2848 	 */
2849 	if (!job->call_info.size)
2850 		return NULL;
2851 
2852 	ret = pm_runtime_resume_and_get(ptdev->base.dev);
2853 	if (drm_WARN_ON(&ptdev->base, ret))
2854 		return ERR_PTR(ret);
2855 
2856 	mutex_lock(&sched->lock);
2857 	if (!group_can_run(group)) {
2858 		done_fence = ERR_PTR(-ECANCELED);
2859 		goto out_unlock;
2860 	}
2861 
2862 	dma_fence_init(job->done_fence,
2863 		       &panthor_queue_fence_ops,
2864 		       &queue->fence_ctx.lock,
2865 		       queue->fence_ctx.id,
2866 		       atomic64_inc_return(&queue->fence_ctx.seqno));
2867 
2868 	memcpy(queue->ringbuf->kmap + ringbuf_insert,
2869 	       call_instrs, sizeof(call_instrs));
2870 
2871 	panthor_job_get(&job->base);
2872 	spin_lock(&queue->fence_ctx.lock);
2873 	list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs);
2874 	spin_unlock(&queue->fence_ctx.lock);
2875 
2876 	job->ringbuf.start = queue->iface.input->insert;
2877 	job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
2878 
2879 	/* Make sure the ring buffer is updated before the INSERT
2880 	 * register.
2881 	 */
2882 	wmb();
2883 
2884 	queue->iface.input->extract = queue->iface.output->extract;
2885 	queue->iface.input->insert = job->ringbuf.end;
2886 
2887 	if (group->csg_id < 0) {
2888 		/* If the queue is blocked, we want to keep the timeout running, so we
2889 		 * can detect unbounded waits and kill the group when that happens.
2890 		 * Otherwise, we suspend the timeout so the time we spend waiting for
2891 		 * a CSG slot is not counted.
2892 		 */
2893 		if (!(group->blocked_queues & BIT(job->queue_idx)) &&
2894 		    !queue->timeout_suspended) {
2895 			queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
2896 			queue->timeout_suspended = true;
2897 		}
2898 
2899 		group_schedule_locked(group, BIT(job->queue_idx));
2900 	} else {
2901 		gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
2902 		if (!sched->pm.has_ref &&
2903 		    !(group->blocked_queues & BIT(job->queue_idx))) {
2904 			pm_runtime_get(ptdev->base.dev);
2905 			sched->pm.has_ref = true;
2906 		}
2907 	}
2908 
2909 	done_fence = dma_fence_get(job->done_fence);
2910 
2911 out_unlock:
2912 	mutex_unlock(&sched->lock);
2913 	pm_runtime_mark_last_busy(ptdev->base.dev);
2914 	pm_runtime_put_autosuspend(ptdev->base.dev);
2915 
2916 	return done_fence;
2917 }
2918 
2919 static enum drm_gpu_sched_stat
2920 queue_timedout_job(struct drm_sched_job *sched_job)
2921 {
2922 	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
2923 	struct panthor_group *group = job->group;
2924 	struct panthor_device *ptdev = group->ptdev;
2925 	struct panthor_scheduler *sched = ptdev->scheduler;
2926 	struct panthor_queue *queue = group->queues[job->queue_idx];
2927 
2928 	drm_warn(&ptdev->base, "job timeout\n");
2929 
2930 	drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
2931 
2932 	queue_stop(queue, job);
2933 
2934 	mutex_lock(&sched->lock);
2935 	group->timedout = true;
2936 	if (group->csg_id >= 0) {
2937 		sched_queue_delayed_work(ptdev->scheduler, tick, 0);
2938 	} else {
2939 		/* Remove from the run queues, so the scheduler can't
2940 		 * pick the group on the next tick.
2941 		 */
2942 		list_del_init(&group->run_node);
2943 		list_del_init(&group->wait_node);
2944 
2945 		group_queue_work(group, term);
2946 	}
2947 	mutex_unlock(&sched->lock);
2948 
2949 	queue_start(queue);
2950 
2951 	return DRM_GPU_SCHED_STAT_NOMINAL;
2952 }
2953 
2954 static void queue_free_job(struct drm_sched_job *sched_job)
2955 {
2956 	drm_sched_job_cleanup(sched_job);
2957 	panthor_job_put(sched_job);
2958 }
2959 
2960 static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
2961 	.run_job = queue_run_job,
2962 	.timedout_job = queue_timedout_job,
2963 	.free_job = queue_free_job,
2964 };
2965 
2966 static struct panthor_queue *
2967 group_create_queue(struct panthor_group *group,
2968 		   const struct drm_panthor_queue_create *args)
2969 {
2970 	struct drm_gpu_scheduler *drm_sched;
2971 	struct panthor_queue *queue;
2972 	int ret;
2973 
2974 	if (args->pad[0] || args->pad[1] || args->pad[2])
2975 		return ERR_PTR(-EINVAL);
2976 
2977 	if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K ||
2978 	    !is_power_of_2(args->ringbuf_size))
2979 		return ERR_PTR(-EINVAL);
2980 
2981 	if (args->priority > CSF_MAX_QUEUE_PRIO)
2982 		return ERR_PTR(-EINVAL);
2983 
2984 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
2985 	if (!queue)
2986 		return ERR_PTR(-ENOMEM);
2987 
2988 	queue->fence_ctx.id = dma_fence_context_alloc(1);
2989 	spin_lock_init(&queue->fence_ctx.lock);
2990 	INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
2991 
2992 	queue->priority = args->priority;
2993 
2994 	queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
2995 						  args->ringbuf_size,
2996 						  DRM_PANTHOR_BO_NO_MMAP,
2997 						  DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
2998 						  DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
2999 						  PANTHOR_VM_KERNEL_AUTO_VA);
3000 	if (IS_ERR(queue->ringbuf)) {
3001 		ret = PTR_ERR(queue->ringbuf);
3002 		goto err_free_queue;
3003 	}
3004 
3005 	ret = panthor_kernel_bo_vmap(queue->ringbuf);
3006 	if (ret)
3007 		goto err_free_queue;
3008 
3009 	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
3010 							    &queue->iface.input,
3011 							    &queue->iface.output,
3012 							    &queue->iface.input_fw_va,
3013 							    &queue->iface.output_fw_va);
3014 	if (IS_ERR(queue->iface.mem)) {
3015 		ret = PTR_ERR(queue->iface.mem);
3016 		goto err_free_queue;
3017 	}
3018 
3019 	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
3020 			     group->ptdev->scheduler->wq, 1,
3021 			     args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
3022 			     0, msecs_to_jiffies(JOB_TIMEOUT_MS),
3023 			     group->ptdev->reset.wq,
3024 			     NULL, "panthor-queue", group->ptdev->base.dev);
3025 	if (ret)
3026 		goto err_free_queue;
3027 
3028 	drm_sched = &queue->scheduler;
3029 	ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL);
3030 
3031 	return queue;
3032 
3033 err_free_queue:
3034 	group_free_queue(group, queue);
3035 	return ERR_PTR(ret);
3036 }
3037 
3038 #define MAX_GROUPS_PER_POOL		128
3039 
3040 int panthor_group_create(struct panthor_file *pfile,
3041 			 const struct drm_panthor_group_create *group_args,
3042 			 const struct drm_panthor_queue_create *queue_args)
3043 {
3044 	struct panthor_device *ptdev = pfile->ptdev;
3045 	struct panthor_group_pool *gpool = pfile->groups;
3046 	struct panthor_scheduler *sched = ptdev->scheduler;
3047 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
3048 	struct panthor_group *group = NULL;
3049 	u32 gid, i, suspend_size;
3050 	int ret;
3051 
3052 	if (group_args->pad)
3053 		return -EINVAL;
3054 
3055 	if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH)
3056 		return -EINVAL;
3057 
3058 	if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) ||
3059 	    (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) ||
3060 	    (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present))
3061 		return -EINVAL;
3062 
3063 	if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores ||
3064 	    hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores ||
3065 	    hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores)
3066 		return -EINVAL;
3067 
3068 	group = kzalloc(sizeof(*group), GFP_KERNEL);
3069 	if (!group)
3070 		return -ENOMEM;
3071 
3072 	spin_lock_init(&group->fatal_lock);
3073 	kref_init(&group->refcount);
3074 	group->state = PANTHOR_CS_GROUP_CREATED;
3075 	group->csg_id = -1;
3076 
3077 	group->ptdev = ptdev;
3078 	group->max_compute_cores = group_args->max_compute_cores;
3079 	group->compute_core_mask = group_args->compute_core_mask;
3080 	group->max_fragment_cores = group_args->max_fragment_cores;
3081 	group->fragment_core_mask = group_args->fragment_core_mask;
3082 	group->max_tiler_cores = group_args->max_tiler_cores;
3083 	group->tiler_core_mask = group_args->tiler_core_mask;
3084 	group->priority = group_args->priority;
3085 
3086 	INIT_LIST_HEAD(&group->wait_node);
3087 	INIT_LIST_HEAD(&group->run_node);
3088 	INIT_WORK(&group->term_work, group_term_work);
3089 	INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
3090 	INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
3091 	INIT_WORK(&group->release_work, group_release_work);
3092 
3093 	group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
3094 	if (!group->vm) {
3095 		ret = -EINVAL;
3096 		goto err_put_group;
3097 	}
3098 
3099 	suspend_size = csg_iface->control->suspend_size;
3100 	group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
3101 	if (IS_ERR(group->suspend_buf)) {
3102 		ret = PTR_ERR(group->suspend_buf);
3103 		group->suspend_buf = NULL;
3104 		goto err_put_group;
3105 	}
3106 
3107 	suspend_size = csg_iface->control->protm_suspend_size;
3108 	group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
3109 	if (IS_ERR(group->protm_suspend_buf)) {
3110 		ret = PTR_ERR(group->protm_suspend_buf);
3111 		group->protm_suspend_buf = NULL;
3112 		goto err_put_group;
3113 	}
3114 
3115 	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
3116 						   group_args->queues.count *
3117 						   sizeof(struct panthor_syncobj_64b),
3118 						   DRM_PANTHOR_BO_NO_MMAP,
3119 						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
3120 						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3121 						   PANTHOR_VM_KERNEL_AUTO_VA);
3122 	if (IS_ERR(group->syncobjs)) {
3123 		ret = PTR_ERR(group->syncobjs);
3124 		goto err_put_group;
3125 	}
3126 
3127 	ret = panthor_kernel_bo_vmap(group->syncobjs);
3128 	if (ret)
3129 		goto err_put_group;
3130 
3131 	memset(group->syncobjs->kmap, 0,
3132 	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
3133 
3134 	for (i = 0; i < group_args->queues.count; i++) {
3135 		group->queues[i] = group_create_queue(group, &queue_args[i]);
3136 		if (IS_ERR(group->queues[i])) {
3137 			ret = PTR_ERR(group->queues[i]);
3138 			group->queues[i] = NULL;
3139 			goto err_put_group;
3140 		}
3141 
3142 		group->queue_count++;
3143 	}
3144 
3145 	group->idle_queues = GENMASK(group->queue_count - 1, 0);
3146 
3147 	ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
3148 	if (ret)
3149 		goto err_put_group;
3150 
3151 	mutex_lock(&sched->reset.lock);
3152 	if (atomic_read(&sched->reset.in_progress)) {
3153 		panthor_group_stop(group);
3154 	} else {
3155 		mutex_lock(&sched->lock);
3156 		list_add_tail(&group->run_node,
3157 			      &sched->groups.idle[group->priority]);
3158 		mutex_unlock(&sched->lock);
3159 	}
3160 	mutex_unlock(&sched->reset.lock);
3161 
3162 	return gid;
3163 
3164 err_put_group:
3165 	group_put(group);
3166 	return ret;
3167 }
3168 
3169 int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
3170 {
3171 	struct panthor_group_pool *gpool = pfile->groups;
3172 	struct panthor_device *ptdev = pfile->ptdev;
3173 	struct panthor_scheduler *sched = ptdev->scheduler;
3174 	struct panthor_group *group;
3175 
3176 	group = xa_erase(&gpool->xa, group_handle);
3177 	if (!group)
3178 		return -EINVAL;
3179 
3180 	for (u32 i = 0; i < group->queue_count; i++) {
3181 		if (group->queues[i])
3182 			drm_sched_entity_destroy(&group->queues[i]->entity);
3183 	}
3184 
3185 	mutex_lock(&sched->reset.lock);
3186 	mutex_lock(&sched->lock);
3187 	group->destroyed = true;
3188 	if (group->csg_id >= 0) {
3189 		sched_queue_delayed_work(sched, tick, 0);
3190 	} else if (!atomic_read(&sched->reset.in_progress)) {
3191 		/* Remove from the run queues, so the scheduler can't
3192 		 * pick the group on the next tick.
3193 		 */
3194 		list_del_init(&group->run_node);
3195 		list_del_init(&group->wait_node);
3196 		group_queue_work(group, term);
3197 	}
3198 	mutex_unlock(&sched->lock);
3199 	mutex_unlock(&sched->reset.lock);
3200 
3201 	group_put(group);
3202 	return 0;
3203 }
3204 
3205 int panthor_group_get_state(struct panthor_file *pfile,
3206 			    struct drm_panthor_group_get_state *get_state)
3207 {
3208 	struct panthor_group_pool *gpool = pfile->groups;
3209 	struct panthor_device *ptdev = pfile->ptdev;
3210 	struct panthor_scheduler *sched = ptdev->scheduler;
3211 	struct panthor_group *group;
3212 
3213 	if (get_state->pad)
3214 		return -EINVAL;
3215 
3216 	group = group_get(xa_load(&gpool->xa, get_state->group_handle));
3217 	if (!group)
3218 		return -EINVAL;
3219 
3220 	memset(get_state, 0, sizeof(*get_state));
3221 
3222 	mutex_lock(&sched->lock);
3223 	if (group->timedout)
3224 		get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT;
3225 	if (group->fatal_queues) {
3226 		get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
3227 		get_state->fatal_queues = group->fatal_queues;
3228 	}
3229 	mutex_unlock(&sched->lock);
3230 
3231 	group_put(group);
3232 	return 0;
3233 }
3234 
3235 int panthor_group_pool_create(struct panthor_file *pfile)
3236 {
3237 	struct panthor_group_pool *gpool;
3238 
3239 	gpool = kzalloc(sizeof(*gpool), GFP_KERNEL);
3240 	if (!gpool)
3241 		return -ENOMEM;
3242 
3243 	xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1);
3244 	pfile->groups = gpool;
3245 	return 0;
3246 }
3247 
3248 void panthor_group_pool_destroy(struct panthor_file *pfile)
3249 {
3250 	struct panthor_group_pool *gpool = pfile->groups;
3251 	struct panthor_group *group;
3252 	unsigned long i;
3253 
3254 	if (IS_ERR_OR_NULL(gpool))
3255 		return;
3256 
3257 	xa_for_each(&gpool->xa, i, group)
3258 		panthor_group_destroy(pfile, i);
3259 
3260 	xa_destroy(&gpool->xa);
3261 	kfree(gpool);
3262 	pfile->groups = NULL;
3263 }
3264 
3265 static void job_release(struct kref *ref)
3266 {
3267 	struct panthor_job *job = container_of(ref, struct panthor_job, refcount);
3268 
3269 	drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node));
3270 
3271 	if (job->base.s_fence)
3272 		drm_sched_job_cleanup(&job->base);
3273 
3274 	if (job->done_fence && job->done_fence->ops)
3275 		dma_fence_put(job->done_fence);
3276 	else
3277 		dma_fence_free(job->done_fence);
3278 
3279 	group_put(job->group);
3280 
3281 	kfree(job);
3282 }
3283 
3284 struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job)
3285 {
3286 	if (sched_job) {
3287 		struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3288 
3289 		kref_get(&job->refcount);
3290 	}
3291 
3292 	return sched_job;
3293 }
3294 
3295 void panthor_job_put(struct drm_sched_job *sched_job)
3296 {
3297 	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3298 
3299 	if (sched_job)
3300 		kref_put(&job->refcount, job_release);
3301 }
3302 
3303 struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job)
3304 {
3305 	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3306 
3307 	return job->group->vm;
3308 }
3309 
3310 struct drm_sched_job *
3311 panthor_job_create(struct panthor_file *pfile,
3312 		   u16 group_handle,
3313 		   const struct drm_panthor_queue_submit *qsubmit)
3314 {
3315 	struct panthor_group_pool *gpool = pfile->groups;
3316 	struct panthor_job *job;
3317 	int ret;
3318 
3319 	if (qsubmit->pad)
3320 		return ERR_PTR(-EINVAL);
3321 
3322 	/* If stream_addr is zero, so stream_size should be. */
3323 	if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0))
3324 		return ERR_PTR(-EINVAL);
3325 
3326 	/* Make sure the address is aligned on 64-byte (cacheline) and the size is
3327 	 * aligned on 8-byte (instruction size).
3328 	 */
3329 	if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7))
3330 		return ERR_PTR(-EINVAL);
3331 
3332 	/* bits 24:30 must be zero. */
3333 	if (qsubmit->latest_flush & GENMASK(30, 24))
3334 		return ERR_PTR(-EINVAL);
3335 
3336 	job = kzalloc(sizeof(*job), GFP_KERNEL);
3337 	if (!job)
3338 		return ERR_PTR(-ENOMEM);
3339 
3340 	kref_init(&job->refcount);
3341 	job->queue_idx = qsubmit->queue_index;
3342 	job->call_info.size = qsubmit->stream_size;
3343 	job->call_info.start = qsubmit->stream_addr;
3344 	job->call_info.latest_flush = qsubmit->latest_flush;
3345 	INIT_LIST_HEAD(&job->node);
3346 
3347 	job->group = group_get(xa_load(&gpool->xa, group_handle));
3348 	if (!job->group) {
3349 		ret = -EINVAL;
3350 		goto err_put_job;
3351 	}
3352 
3353 	if (job->queue_idx >= job->group->queue_count ||
3354 	    !job->group->queues[job->queue_idx]) {
3355 		ret = -EINVAL;
3356 		goto err_put_job;
3357 	}
3358 
3359 	job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
3360 	if (!job->done_fence) {
3361 		ret = -ENOMEM;
3362 		goto err_put_job;
3363 	}
3364 
3365 	ret = drm_sched_job_init(&job->base,
3366 				 &job->group->queues[job->queue_idx]->entity,
3367 				 1, job->group);
3368 	if (ret)
3369 		goto err_put_job;
3370 
3371 	return &job->base;
3372 
3373 err_put_job:
3374 	panthor_job_put(&job->base);
3375 	return ERR_PTR(ret);
3376 }
3377 
3378 void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job)
3379 {
3380 	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3381 
3382 	/* Still not sure why we want USAGE_WRITE for external objects, since I
3383 	 * was assuming this would be handled through explicit syncs being imported
3384 	 * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers
3385 	 * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason.
3386 	 */
3387 	panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished,
3388 				DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
3389 }
3390 
3391 void panthor_sched_unplug(struct panthor_device *ptdev)
3392 {
3393 	struct panthor_scheduler *sched = ptdev->scheduler;
3394 
3395 	cancel_delayed_work_sync(&sched->tick_work);
3396 
3397 	mutex_lock(&sched->lock);
3398 	if (sched->pm.has_ref) {
3399 		pm_runtime_put(ptdev->base.dev);
3400 		sched->pm.has_ref = false;
3401 	}
3402 	mutex_unlock(&sched->lock);
3403 }
3404 
3405 static void panthor_sched_fini(struct drm_device *ddev, void *res)
3406 {
3407 	struct panthor_scheduler *sched = res;
3408 	int prio;
3409 
3410 	if (!sched || !sched->csg_slot_count)
3411 		return;
3412 
3413 	cancel_delayed_work_sync(&sched->tick_work);
3414 
3415 	if (sched->wq)
3416 		destroy_workqueue(sched->wq);
3417 
3418 	if (sched->heap_alloc_wq)
3419 		destroy_workqueue(sched->heap_alloc_wq);
3420 
3421 	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
3422 		drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
3423 		drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
3424 	}
3425 
3426 	drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting));
3427 }
3428 
3429 int panthor_sched_init(struct panthor_device *ptdev)
3430 {
3431 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
3432 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
3433 	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0);
3434 	struct panthor_scheduler *sched;
3435 	u32 gpu_as_count, num_groups;
3436 	int prio, ret;
3437 
3438 	sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL);
3439 	if (!sched)
3440 		return -ENOMEM;
3441 
3442 	/* The highest bit in JOB_INT_* is reserved for globabl IRQs. That
3443 	 * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here.
3444 	 */
3445 	num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num);
3446 
3447 	/* The FW-side scheduler might deadlock if two groups with the same
3448 	 * priority try to access a set of resources that overlaps, with part
3449 	 * of the resources being allocated to one group and the other part to
3450 	 * the other group, both groups waiting for the remaining resources to
3451 	 * be allocated. To avoid that, it is recommended to assign each CSG a
3452 	 * different priority. In theory we could allow several groups to have
3453 	 * the same CSG priority if they don't request the same resources, but
3454 	 * that makes the scheduling logic more complicated, so let's clamp
3455 	 * the number of CSG slots to MAX_CSG_PRIO + 1 for now.
3456 	 */
3457 	num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups);
3458 
3459 	/* We need at least one AS for the MCU and one for the GPU contexts. */
3460 	gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1));
3461 	if (!gpu_as_count) {
3462 		drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)",
3463 			gpu_as_count + 1);
3464 		return -EINVAL;
3465 	}
3466 
3467 	sched->ptdev = ptdev;
3468 	sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features);
3469 	sched->csg_slot_count = num_groups;
3470 	sched->cs_slot_count = csg_iface->control->stream_num;
3471 	sched->as_slot_count = gpu_as_count;
3472 	ptdev->csif_info.csg_slot_count = sched->csg_slot_count;
3473 	ptdev->csif_info.cs_slot_count = sched->cs_slot_count;
3474 	ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count;
3475 
3476 	sched->last_tick = 0;
3477 	sched->resched_target = U64_MAX;
3478 	sched->tick_period = msecs_to_jiffies(10);
3479 	INIT_DELAYED_WORK(&sched->tick_work, tick_work);
3480 	INIT_WORK(&sched->sync_upd_work, sync_upd_work);
3481 	INIT_WORK(&sched->fw_events_work, process_fw_events_work);
3482 
3483 	ret = drmm_mutex_init(&ptdev->base, &sched->lock);
3484 	if (ret)
3485 		return ret;
3486 
3487 	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
3488 		INIT_LIST_HEAD(&sched->groups.runnable[prio]);
3489 		INIT_LIST_HEAD(&sched->groups.idle[prio]);
3490 	}
3491 	INIT_LIST_HEAD(&sched->groups.waiting);
3492 
3493 	ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock);
3494 	if (ret)
3495 		return ret;
3496 
3497 	INIT_LIST_HEAD(&sched->reset.stopped_groups);
3498 
3499 	/* sched->heap_alloc_wq will be used for heap chunk allocation on
3500 	 * tiler OOM events, which means we can't use the same workqueue for
3501 	 * the scheduler because works queued by the scheduler are in
3502 	 * the dma-signalling path. Allocate a dedicated heap_alloc_wq to
3503 	 * work around this limitation.
3504 	 *
3505 	 * FIXME: Ultimately, what we need is a failable/non-blocking GEM
3506 	 * allocation path that we can call when a heap OOM is reported. The
3507 	 * FW is smart enough to fall back on other methods if the kernel can't
3508 	 * allocate memory, and fail the tiling job if none of these
3509 	 * countermeasures worked.
3510 	 *
3511 	 * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the
3512 	 * system is running out of memory.
3513 	 */
3514 	sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0);
3515 	sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
3516 	if (!sched->wq || !sched->heap_alloc_wq) {
3517 		panthor_sched_fini(&ptdev->base, sched);
3518 		drm_err(&ptdev->base, "Failed to allocate the workqueues");
3519 		return -ENOMEM;
3520 	}
3521 
3522 	ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched);
3523 	if (ret)
3524 		return ret;
3525 
3526 	ptdev->scheduler = sched;
3527 	return 0;
3528 }
3529