xref: /linux/drivers/gpu/drm/panthor/panthor_fw.c (revision 001821b0e79716c4e17c71d8e053a23599a7a508)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #ifdef CONFIG_ARM_ARCH_TIMER
5 #include <asm/arch_timer.h>
6 #endif
7 
8 #include <linux/clk.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/firmware.h>
11 #include <linux/iopoll.h>
12 #include <linux/iosys-map.h>
13 #include <linux/mutex.h>
14 #include <linux/platform_device.h>
15 
16 #include <drm/drm_drv.h>
17 #include <drm/drm_managed.h>
18 
19 #include "panthor_device.h"
20 #include "panthor_fw.h"
21 #include "panthor_gem.h"
22 #include "panthor_gpu.h"
23 #include "panthor_mmu.h"
24 #include "panthor_regs.h"
25 #include "panthor_sched.h"
26 
27 #define CSF_FW_NAME "mali_csffw.bin"
28 
29 #define PING_INTERVAL_MS			12000
30 #define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
31 #define PROGRESS_TIMEOUT_SCALE_SHIFT		10
32 #define IDLE_HYSTERESIS_US			800
33 #define PWROFF_HYSTERESIS_US			10000
34 
35 /**
36  * struct panthor_fw_binary_hdr - Firmware binary header.
37  */
38 struct panthor_fw_binary_hdr {
39 	/** @magic: Magic value to check binary validity. */
40 	u32 magic;
41 #define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
42 
43 	/** @minor: Minor FW version. */
44 	u8 minor;
45 
46 	/** @major: Major FW version. */
47 	u8 major;
48 #define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
49 
50 	/** @padding1: MBZ. */
51 	u16 padding1;
52 
53 	/** @version_hash: FW version hash. */
54 	u32 version_hash;
55 
56 	/** @padding2: MBZ. */
57 	u32 padding2;
58 
59 	/** @size: FW binary size. */
60 	u32 size;
61 };
62 
63 /**
64  * enum panthor_fw_binary_entry_type - Firmware binary entry type
65  */
66 enum panthor_fw_binary_entry_type {
67 	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
68 	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
69 
70 	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
71 	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
72 
73 	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
74 	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
75 
76 	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
77 	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
78 
79 	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
80 	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
81 };
82 
83 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
84 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
85 #define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
86 #define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
87 
88 #define CSF_FW_BINARY_IFACE_ENTRY_RD_RD					BIT(0)
89 #define CSF_FW_BINARY_IFACE_ENTRY_RD_WR					BIT(1)
90 #define CSF_FW_BINARY_IFACE_ENTRY_RD_EX					BIT(2)
91 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE			(0 << 3)
92 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED			(1 << 3)
93 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT	(2 << 3)
94 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT		(3 << 3)
95 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK			GENMASK(4, 3)
96 #define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT				BIT(5)
97 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED				BIT(30)
98 #define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO				BIT(31)
99 
100 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS			\
101 	(CSF_FW_BINARY_IFACE_ENTRY_RD_RD |				\
102 	 CSF_FW_BINARY_IFACE_ENTRY_RD_WR |				\
103 	 CSF_FW_BINARY_IFACE_ENTRY_RD_EX |				\
104 	 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK |			\
105 	 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT |				\
106 	 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED  |				\
107 	 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
108 
109 /**
110  * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
111  */
112 struct panthor_fw_binary_section_entry_hdr {
113 	/** @flags: Section flags. */
114 	u32 flags;
115 
116 	/** @va: MCU virtual range to map this binary section to. */
117 	struct {
118 		/** @start: Start address. */
119 		u32 start;
120 
121 		/** @end: End address. */
122 		u32 end;
123 	} va;
124 
125 	/** @data: Data to initialize the FW section with. */
126 	struct {
127 		/** @start: Start offset in the FW binary. */
128 		u32 start;
129 
130 		/** @end: End offset in the FW binary. */
131 		u32 end;
132 	} data;
133 };
134 
135 /**
136  * struct panthor_fw_binary_iter - Firmware binary iterator
137  *
138  * Used to parse a firmware binary.
139  */
140 struct panthor_fw_binary_iter {
141 	/** @data: FW binary data. */
142 	const void *data;
143 
144 	/** @size: FW binary size. */
145 	size_t size;
146 
147 	/** @offset: Iterator offset. */
148 	size_t offset;
149 };
150 
151 /**
152  * struct panthor_fw_section - FW section
153  */
154 struct panthor_fw_section {
155 	/** @node: Used to keep track of FW sections. */
156 	struct list_head node;
157 
158 	/** @flags: Section flags, as encoded in the FW binary. */
159 	u32 flags;
160 
161 	/** @mem: Section memory. */
162 	struct panthor_kernel_bo *mem;
163 
164 	/**
165 	 * @name: Name of the section, as specified in the binary.
166 	 *
167 	 * Can be NULL.
168 	 */
169 	const char *name;
170 
171 	/**
172 	 * @data: Initial data copied to the FW memory.
173 	 *
174 	 * We keep data around so we can reload sections after a reset.
175 	 */
176 	struct {
177 		/** @buf: Buffed used to store init data. */
178 		const void *buf;
179 
180 		/** @size: Size of @buf in bytes. */
181 		size_t size;
182 	} data;
183 };
184 
185 #define CSF_MCU_SHARED_REGION_START		0x04000000ULL
186 #define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
187 
188 #define MIN_CS_PER_CSG				8
189 #define MIN_CSGS				3
190 #define MAX_CSG_PRIO				0xf
191 
192 #define CSF_IFACE_VERSION(major, minor, patch)	\
193 	(((major) << 24) | ((minor) << 16) | (patch))
194 #define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
195 #define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
196 #define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
197 
198 #define CSF_GROUP_CONTROL_OFFSET		0x1000
199 #define CSF_STREAM_CONTROL_OFFSET		0x40
200 #define CSF_UNPRESERVED_REG_COUNT		4
201 
202 /**
203  * struct panthor_fw_iface - FW interfaces
204  */
205 struct panthor_fw_iface {
206 	/** @global: Global interface. */
207 	struct panthor_fw_global_iface global;
208 
209 	/** @groups: Group slot interfaces. */
210 	struct panthor_fw_csg_iface groups[MAX_CSGS];
211 
212 	/** @streams: Command stream slot interfaces. */
213 	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
214 };
215 
216 /**
217  * struct panthor_fw - Firmware management
218  */
219 struct panthor_fw {
220 	/** @vm: MCU VM. */
221 	struct panthor_vm *vm;
222 
223 	/** @sections: List of FW sections. */
224 	struct list_head sections;
225 
226 	/** @shared_section: The section containing the FW interfaces. */
227 	struct panthor_fw_section *shared_section;
228 
229 	/** @iface: FW interfaces. */
230 	struct panthor_fw_iface iface;
231 
232 	/** @watchdog: Collection of fields relating to the FW watchdog. */
233 	struct {
234 		/** @ping_work: Delayed work used to ping the FW. */
235 		struct delayed_work ping_work;
236 	} watchdog;
237 
238 	/**
239 	 * @req_waitqueue: FW request waitqueue.
240 	 *
241 	 * Everytime a request is sent to a command stream group or the global
242 	 * interface, the caller will first busy wait for the request to be
243 	 * acknowledged, and then fallback to a sleeping wait.
244 	 *
245 	 * This wait queue is here to support the sleeping wait flavor.
246 	 */
247 	wait_queue_head_t req_waitqueue;
248 
249 	/** @booted: True is the FW is booted */
250 	bool booted;
251 
252 	/**
253 	 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
254 	 *
255 	 * A fast reset is just a reset where the driver doesn't reload the FW sections.
256 	 *
257 	 * Any time the firmware is properly suspended, a fast reset can take place.
258 	 * On the other hand, if the halt operation failed, the driver will reload
259 	 * all sections to make sure we start from a fresh state.
260 	 */
261 	bool fast_reset;
262 
263 	/** @irq: Job irq data. */
264 	struct panthor_irq irq;
265 };
266 
267 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
268 {
269 	return ptdev->fw->vm;
270 }
271 
272 /**
273  * panthor_fw_get_glb_iface() - Get the global interface
274  * @ptdev: Device.
275  *
276  * Return: The global interface.
277  */
278 struct panthor_fw_global_iface *
279 panthor_fw_get_glb_iface(struct panthor_device *ptdev)
280 {
281 	return &ptdev->fw->iface.global;
282 }
283 
284 /**
285  * panthor_fw_get_csg_iface() - Get a command stream group slot interface
286  * @ptdev: Device.
287  * @csg_slot: Index of the command stream group slot.
288  *
289  * Return: The command stream group slot interface.
290  */
291 struct panthor_fw_csg_iface *
292 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
293 {
294 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
295 		return NULL;
296 
297 	return &ptdev->fw->iface.groups[csg_slot];
298 }
299 
300 /**
301  * panthor_fw_get_cs_iface() - Get a command stream slot interface
302  * @ptdev: Device.
303  * @csg_slot: Index of the command stream group slot.
304  * @cs_slot: Index of the command stream slot.
305  *
306  * Return: The command stream slot interface.
307  */
308 struct panthor_fw_cs_iface *
309 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
310 {
311 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
312 		return NULL;
313 
314 	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
315 }
316 
317 /**
318  * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
319  * @ptdev: Device.
320  * @timeout_us: Timeout expressed in micro-seconds.
321  *
322  * The FW has two timer sources: the GPU counter or arch-timer. We need
323  * to express timeouts in term of number of cycles and specify which
324  * timer source should be used.
325  *
326  * Return: A value suitable for timeout fields in the global interface.
327  */
328 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
329 {
330 	bool use_cycle_counter = false;
331 	u32 timer_rate = 0;
332 	u64 mod_cycles;
333 
334 #ifdef CONFIG_ARM_ARCH_TIMER
335 	timer_rate = arch_timer_get_cntfrq();
336 #endif
337 
338 	if (!timer_rate) {
339 		use_cycle_counter = true;
340 		timer_rate = clk_get_rate(ptdev->clks.core);
341 	}
342 
343 	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
344 		/* We couldn't get a valid clock rate, let's just pick the
345 		 * maximum value so the FW still handles the core
346 		 * power on/off requests.
347 		 */
348 		return GLB_TIMER_VAL(~0) |
349 		       GLB_TIMER_SOURCE_GPU_COUNTER;
350 	}
351 
352 	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
353 				      1000000ull << 10);
354 	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
355 		mod_cycles = GLB_TIMER_VAL(~0);
356 
357 	return GLB_TIMER_VAL(mod_cycles) |
358 	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
359 }
360 
361 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
362 				       struct panthor_fw_binary_iter *iter,
363 				       void *out, size_t size)
364 {
365 	size_t new_offset = iter->offset + size;
366 
367 	if (new_offset > iter->size || new_offset < iter->offset) {
368 		drm_err(&ptdev->base, "Firmware too small\n");
369 		return -EINVAL;
370 	}
371 
372 	memcpy(out, iter->data + iter->offset, size);
373 	iter->offset = new_offset;
374 	return 0;
375 }
376 
377 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
378 					   struct panthor_fw_binary_iter *iter,
379 					   struct panthor_fw_binary_iter *sub_iter,
380 					   size_t size)
381 {
382 	size_t new_offset = iter->offset + size;
383 
384 	if (new_offset > iter->size || new_offset < iter->offset) {
385 		drm_err(&ptdev->base, "Firmware entry too long\n");
386 		return -EINVAL;
387 	}
388 
389 	sub_iter->offset = 0;
390 	sub_iter->data = iter->data + iter->offset;
391 	sub_iter->size = size;
392 	iter->offset = new_offset;
393 	return 0;
394 }
395 
396 static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
397 					struct panthor_fw_section *section)
398 {
399 	bool was_mapped = !!section->mem->kmap;
400 	int ret;
401 
402 	if (!section->data.size &&
403 	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
404 		return;
405 
406 	ret = panthor_kernel_bo_vmap(section->mem);
407 	if (drm_WARN_ON(&ptdev->base, ret))
408 		return;
409 
410 	memcpy(section->mem->kmap, section->data.buf, section->data.size);
411 	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
412 		memset(section->mem->kmap + section->data.size, 0,
413 		       panthor_kernel_bo_size(section->mem) - section->data.size);
414 	}
415 
416 	if (!was_mapped)
417 		panthor_kernel_bo_vunmap(section->mem);
418 }
419 
420 /**
421  * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
422  * @ptdev: Device.
423  * @input: Pointer holding the input interface on success.
424  * Should be ignored on failure.
425  * @output: Pointer holding the output interface on success.
426  * Should be ignored on failure.
427  * @input_fw_va: Pointer holding the input interface FW VA on success.
428  * Should be ignored on failure.
429  * @output_fw_va: Pointer holding the output interface FW VA on success.
430  * Should be ignored on failure.
431  *
432  * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
433  * interface is at offset 0, and the output interface at offset 4096.
434  *
435  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
436  */
437 struct panthor_kernel_bo *
438 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
439 				 struct panthor_fw_ringbuf_input_iface **input,
440 				 const struct panthor_fw_ringbuf_output_iface **output,
441 				 u32 *input_fw_va, u32 *output_fw_va)
442 {
443 	struct panthor_kernel_bo *mem;
444 	int ret;
445 
446 	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
447 				       DRM_PANTHOR_BO_NO_MMAP,
448 				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
449 				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
450 				       PANTHOR_VM_KERNEL_AUTO_VA);
451 	if (IS_ERR(mem))
452 		return mem;
453 
454 	ret = panthor_kernel_bo_vmap(mem);
455 	if (ret) {
456 		panthor_kernel_bo_destroy(mem);
457 		return ERR_PTR(ret);
458 	}
459 
460 	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
461 	*input = mem->kmap;
462 	*output = mem->kmap + SZ_4K;
463 	*input_fw_va = panthor_kernel_bo_gpuva(mem);
464 	*output_fw_va = *input_fw_va + SZ_4K;
465 
466 	return mem;
467 }
468 
469 /**
470  * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
471  * @ptdev: Device.
472  * @size: Size of the suspend buffer.
473  *
474  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
475  */
476 struct panthor_kernel_bo *
477 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
478 {
479 	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
480 					DRM_PANTHOR_BO_NO_MMAP,
481 					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
482 					PANTHOR_VM_KERNEL_AUTO_VA);
483 }
484 
485 static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
486 					 const struct firmware *fw,
487 					 struct panthor_fw_binary_iter *iter,
488 					 u32 ehdr)
489 {
490 	struct panthor_fw_binary_section_entry_hdr hdr;
491 	struct panthor_fw_section *section;
492 	u32 section_size;
493 	u32 name_len;
494 	int ret;
495 
496 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
497 	if (ret)
498 		return ret;
499 
500 	if (hdr.data.end < hdr.data.start) {
501 		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
502 			hdr.data.end, hdr.data.start);
503 		return -EINVAL;
504 	}
505 
506 	if (hdr.va.end < hdr.va.start) {
507 		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
508 			hdr.va.end, hdr.va.start);
509 		return -EINVAL;
510 	}
511 
512 	if (hdr.data.end > fw->size) {
513 		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
514 			hdr.data.end, fw->size);
515 		return -EINVAL;
516 	}
517 
518 	if ((hdr.va.start & ~PAGE_MASK) != 0 ||
519 	    (hdr.va.end & ~PAGE_MASK) != 0) {
520 		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
521 			hdr.va.start, hdr.va.end);
522 		return -EINVAL;
523 	}
524 
525 	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
526 		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
527 			hdr.flags);
528 		return -EINVAL;
529 	}
530 
531 	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
532 		drm_warn(&ptdev->base,
533 			 "Firmware protected mode entry not be supported, ignoring");
534 		return 0;
535 	}
536 
537 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
538 	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
539 		drm_err(&ptdev->base,
540 			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
541 		return -EINVAL;
542 	}
543 
544 	name_len = iter->size - iter->offset;
545 
546 	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
547 	if (!section)
548 		return -ENOMEM;
549 
550 	list_add_tail(&section->node, &ptdev->fw->sections);
551 	section->flags = hdr.flags;
552 	section->data.size = hdr.data.end - hdr.data.start;
553 
554 	if (section->data.size > 0) {
555 		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
556 
557 		if (!data)
558 			return -ENOMEM;
559 
560 		memcpy(data, fw->data + hdr.data.start, section->data.size);
561 		section->data.buf = data;
562 	}
563 
564 	if (name_len > 0) {
565 		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
566 
567 		if (!name)
568 			return -ENOMEM;
569 
570 		memcpy(name, iter->data + iter->offset, name_len);
571 		name[name_len] = '\0';
572 		section->name = name;
573 	}
574 
575 	section_size = hdr.va.end - hdr.va.start;
576 	if (section_size) {
577 		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
578 		struct panthor_gem_object *bo;
579 		u32 vm_map_flags = 0;
580 		struct sg_table *sgt;
581 		u64 va = hdr.va.start;
582 
583 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
584 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
585 
586 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
587 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
588 
589 		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
590 		 * non-cacheable for now. We might want to introduce a new
591 		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
592 		 * memory and is currently not used by our driver) for
593 		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
594 		 * of IO-coherent systems.
595 		 */
596 		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
597 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
598 
599 		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
600 							section_size,
601 							DRM_PANTHOR_BO_NO_MMAP,
602 							vm_map_flags, va);
603 		if (IS_ERR(section->mem))
604 			return PTR_ERR(section->mem);
605 
606 		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
607 			return -EINVAL;
608 
609 		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
610 			ret = panthor_kernel_bo_vmap(section->mem);
611 			if (ret)
612 				return ret;
613 		}
614 
615 		panthor_fw_init_section_mem(ptdev, section);
616 
617 		bo = to_panthor_bo(section->mem->obj);
618 		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
619 		if (IS_ERR(sgt))
620 			return PTR_ERR(sgt);
621 
622 		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
623 	}
624 
625 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
626 		ptdev->fw->shared_section = section;
627 
628 	return 0;
629 }
630 
631 static void
632 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
633 {
634 	struct panthor_fw_section *section;
635 
636 	list_for_each_entry(section, &ptdev->fw->sections, node) {
637 		struct sg_table *sgt;
638 
639 		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
640 			continue;
641 
642 		panthor_fw_init_section_mem(ptdev, section);
643 		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
644 		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
645 			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
646 	}
647 }
648 
649 static int panthor_fw_load_entry(struct panthor_device *ptdev,
650 				 const struct firmware *fw,
651 				 struct panthor_fw_binary_iter *iter)
652 {
653 	struct panthor_fw_binary_iter eiter;
654 	u32 ehdr;
655 	int ret;
656 
657 	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
658 	if (ret)
659 		return ret;
660 
661 	if ((iter->offset % sizeof(u32)) ||
662 	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
663 		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
664 			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
665 		return -EINVAL;
666 	}
667 
668 	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
669 					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
670 		return -EINVAL;
671 
672 	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
673 	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
674 		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
675 
676 	/* FIXME: handle those entry types? */
677 	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
678 	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
679 	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
680 	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
681 		return 0;
682 	default:
683 		break;
684 	}
685 
686 	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
687 		return 0;
688 
689 	drm_err(&ptdev->base,
690 		"Unsupported non-optional entry type %u in firmware\n",
691 		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
692 	return -EINVAL;
693 }
694 
695 static int panthor_fw_load(struct panthor_device *ptdev)
696 {
697 	const struct firmware *fw = NULL;
698 	struct panthor_fw_binary_iter iter = {};
699 	struct panthor_fw_binary_hdr hdr;
700 	char fw_path[128];
701 	int ret;
702 
703 	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
704 		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
705 		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
706 		 CSF_FW_NAME);
707 
708 	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
709 	if (ret) {
710 		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
711 			CSF_FW_NAME);
712 		return ret;
713 	}
714 
715 	iter.data = fw->data;
716 	iter.size = fw->size;
717 	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
718 	if (ret)
719 		goto out;
720 
721 	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
722 		ret = -EINVAL;
723 		drm_err(&ptdev->base, "Invalid firmware magic\n");
724 		goto out;
725 	}
726 
727 	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
728 		ret = -EINVAL;
729 		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
730 			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
731 		goto out;
732 	}
733 
734 	if (hdr.size > iter.size) {
735 		drm_err(&ptdev->base, "Firmware image is truncated\n");
736 		goto out;
737 	}
738 
739 	iter.size = hdr.size;
740 
741 	while (iter.offset < hdr.size) {
742 		ret = panthor_fw_load_entry(ptdev, fw, &iter);
743 		if (ret)
744 			goto out;
745 	}
746 
747 	if (!ptdev->fw->shared_section) {
748 		drm_err(&ptdev->base, "Shared interface region not found\n");
749 		ret = -EINVAL;
750 		goto out;
751 	}
752 
753 out:
754 	release_firmware(fw);
755 	return ret;
756 }
757 
758 /**
759  * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
760  * @ptdev: Device.
761  * @mcu_va: MCU address.
762  *
763  * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
764  */
765 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
766 {
767 	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
768 	u64 shared_mem_end = shared_mem_start +
769 			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
770 	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
771 		return NULL;
772 
773 	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
774 }
775 
776 static int panthor_init_cs_iface(struct panthor_device *ptdev,
777 				 unsigned int csg_idx, unsigned int cs_idx)
778 {
779 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
780 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
781 	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
782 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
783 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
784 			   (csg_idx * glb_iface->control->group_stride) +
785 			   CSF_STREAM_CONTROL_OFFSET +
786 			   (cs_idx * csg_iface->control->stream_stride);
787 	struct panthor_fw_cs_iface *first_cs_iface =
788 		panthor_fw_get_cs_iface(ptdev, 0, 0);
789 
790 	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
791 		return -EINVAL;
792 
793 	spin_lock_init(&cs_iface->lock);
794 	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
795 	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
796 	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
797 
798 	if (!cs_iface->input || !cs_iface->output) {
799 		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
800 		return -EINVAL;
801 	}
802 
803 	if (cs_iface != first_cs_iface) {
804 		if (cs_iface->control->features != first_cs_iface->control->features) {
805 			drm_err(&ptdev->base, "Expecting identical CS slots");
806 			return -EINVAL;
807 		}
808 	} else {
809 		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
810 
811 		ptdev->csif_info.cs_reg_count = reg_count;
812 		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
813 	}
814 
815 	return 0;
816 }
817 
818 static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
819 			const struct panthor_fw_csg_control_iface *b)
820 {
821 	if (a->features != b->features)
822 		return false;
823 	if (a->suspend_size != b->suspend_size)
824 		return false;
825 	if (a->protm_suspend_size != b->protm_suspend_size)
826 		return false;
827 	if (a->stream_num != b->stream_num)
828 		return false;
829 	return true;
830 }
831 
832 static int panthor_init_csg_iface(struct panthor_device *ptdev,
833 				  unsigned int csg_idx)
834 {
835 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
836 	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
837 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
838 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
839 	unsigned int i;
840 
841 	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
842 		return -EINVAL;
843 
844 	spin_lock_init(&csg_iface->lock);
845 	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
846 	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
847 	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
848 
849 	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
850 	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
851 		return -EINVAL;
852 
853 	if (!csg_iface->input || !csg_iface->output) {
854 		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
855 		return -EINVAL;
856 	}
857 
858 	if (csg_idx > 0) {
859 		struct panthor_fw_csg_iface *first_csg_iface =
860 			panthor_fw_get_csg_iface(ptdev, 0);
861 
862 		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
863 			drm_err(&ptdev->base, "Expecting identical CSG slots");
864 			return -EINVAL;
865 		}
866 	}
867 
868 	for (i = 0; i < csg_iface->control->stream_num; i++) {
869 		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
870 
871 		if (ret)
872 			return ret;
873 	}
874 
875 	return 0;
876 }
877 
878 static u32 panthor_get_instr_features(struct panthor_device *ptdev)
879 {
880 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
881 
882 	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
883 		return 0;
884 
885 	return glb_iface->control->instr_features;
886 }
887 
888 static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
889 {
890 	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
891 	unsigned int i;
892 
893 	if (!ptdev->fw->shared_section->mem->kmap)
894 		return -EINVAL;
895 
896 	spin_lock_init(&glb_iface->lock);
897 	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
898 
899 	if (!glb_iface->control->version) {
900 		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
901 		return -EINVAL;
902 	}
903 
904 	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
905 	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
906 	if (!glb_iface->input || !glb_iface->output) {
907 		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
908 		return -EINVAL;
909 	}
910 
911 	if (glb_iface->control->group_num > MAX_CSGS ||
912 	    glb_iface->control->group_num < MIN_CSGS) {
913 		drm_err(&ptdev->base, "Invalid number of control groups");
914 		return -EINVAL;
915 	}
916 
917 	for (i = 0; i < glb_iface->control->group_num; i++) {
918 		int ret = panthor_init_csg_iface(ptdev, i);
919 
920 		if (ret)
921 			return ret;
922 	}
923 
924 	drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x",
925 		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
926 		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
927 		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
928 		 glb_iface->control->features,
929 		 panthor_get_instr_features(ptdev));
930 	return 0;
931 }
932 
933 static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
934 {
935 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
936 
937 	/* Enable all cores. */
938 	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
939 
940 	/* Setup timers. */
941 	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
942 	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
943 	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
944 
945 	/* Enable interrupts we care about. */
946 	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
947 					 GLB_PING |
948 					 GLB_CFG_PROGRESS_TIMER |
949 					 GLB_CFG_POWEROFF_TIMER |
950 					 GLB_IDLE_EN |
951 					 GLB_IDLE;
952 
953 	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
954 	panthor_fw_toggle_reqs(glb_iface, req, ack,
955 			       GLB_CFG_ALLOC_EN |
956 			       GLB_CFG_POWEROFF_TIMER |
957 			       GLB_CFG_PROGRESS_TIMER);
958 
959 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
960 
961 	/* Kick the watchdog. */
962 	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
963 			 msecs_to_jiffies(PING_INTERVAL_MS));
964 }
965 
966 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
967 {
968 	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
969 		ptdev->fw->booted = true;
970 
971 	wake_up_all(&ptdev->fw->req_waitqueue);
972 
973 	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
974 	if (!ptdev->fw->booted)
975 		return;
976 
977 	panthor_sched_report_fw_events(ptdev, status);
978 }
979 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
980 
981 static int panthor_fw_start(struct panthor_device *ptdev)
982 {
983 	bool timedout = false;
984 
985 	ptdev->fw->booted = false;
986 	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
987 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
988 
989 	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
990 				ptdev->fw->booted,
991 				msecs_to_jiffies(1000))) {
992 		if (!ptdev->fw->booted &&
993 		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
994 			timedout = true;
995 	}
996 
997 	if (timedout) {
998 		static const char * const status_str[] = {
999 			[MCU_STATUS_DISABLED] = "disabled",
1000 			[MCU_STATUS_ENABLED] = "enabled",
1001 			[MCU_STATUS_HALT] = "halt",
1002 			[MCU_STATUS_FATAL] = "fatal",
1003 		};
1004 		u32 status = gpu_read(ptdev, MCU_STATUS);
1005 
1006 		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1007 			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1008 		return -ETIMEDOUT;
1009 	}
1010 
1011 	return 0;
1012 }
1013 
1014 static void panthor_fw_stop(struct panthor_device *ptdev)
1015 {
1016 	u32 status;
1017 
1018 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1019 	if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1020 			       status == MCU_STATUS_DISABLED, 10, 100000))
1021 		drm_err(&ptdev->base, "Failed to stop MCU");
1022 }
1023 
1024 /**
1025  * panthor_fw_pre_reset() - Call before a reset.
1026  * @ptdev: Device.
1027  * @on_hang: true if the reset was triggered on a GPU hang.
1028  *
1029  * If the reset is not triggered on a hang, we try to gracefully halt the
1030  * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1031  */
1032 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1033 {
1034 	/* Make sure we won't be woken up by a ping. */
1035 	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1036 
1037 	ptdev->fw->fast_reset = false;
1038 
1039 	if (!on_hang) {
1040 		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1041 		u32 status;
1042 
1043 		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1044 		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1045 		if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1046 					status == MCU_STATUS_HALT, 10, 100000) &&
1047 		    glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
1048 			ptdev->fw->fast_reset = true;
1049 		} else {
1050 			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1051 		}
1052 
1053 		/* The FW detects 0 -> 1 transitions. Make sure we reset
1054 		 * the HALT bit before the FW is rebooted.
1055 		 */
1056 		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1057 	}
1058 
1059 	panthor_job_irq_suspend(&ptdev->fw->irq);
1060 }
1061 
1062 /**
1063  * panthor_fw_post_reset() - Call after a reset.
1064  * @ptdev: Device.
1065  *
1066  * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1067  * make sure we can recover from a memory corruption.
1068  */
1069 int panthor_fw_post_reset(struct panthor_device *ptdev)
1070 {
1071 	int ret;
1072 
1073 	/* Make the MCU VM active. */
1074 	ret = panthor_vm_active(ptdev->fw->vm);
1075 	if (ret)
1076 		return ret;
1077 
1078 	/* If this is a fast reset, try to start the MCU without reloading
1079 	 * the FW sections. If it fails, go for a full reset.
1080 	 */
1081 	if (ptdev->fw->fast_reset) {
1082 		ret = panthor_fw_start(ptdev);
1083 		if (!ret)
1084 			goto out;
1085 
1086 		/* Forcibly reset the MCU and force a slow reset, so we get a
1087 		 * fresh boot on the next panthor_fw_start() call.
1088 		 */
1089 		panthor_fw_stop(ptdev);
1090 		ptdev->fw->fast_reset = false;
1091 		drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
1092 	}
1093 
1094 	/* Reload all sections, including RO ones. We're not supposed
1095 	 * to end up here anyway, let's just assume the overhead of
1096 	 * reloading everything is acceptable.
1097 	 */
1098 	panthor_reload_fw_sections(ptdev, true);
1099 
1100 	ret = panthor_fw_start(ptdev);
1101 	if (ret) {
1102 		drm_err(&ptdev->base, "FW slow reset failed");
1103 		return ret;
1104 	}
1105 
1106 out:
1107 	/* We must re-initialize the global interface even on fast-reset. */
1108 	panthor_fw_init_global_iface(ptdev);
1109 	return 0;
1110 }
1111 
1112 /**
1113  * panthor_fw_unplug() - Called when the device is unplugged.
1114  * @ptdev: Device.
1115  *
1116  * This function must make sure all pending operations are flushed before
1117  * will release device resources, thus preventing any interaction with
1118  * the HW.
1119  *
1120  * If there is still FW-related work running after this function returns,
1121  * they must use drm_dev_{enter,exit}() and skip any HW access when
1122  * drm_dev_enter() returns false.
1123  */
1124 void panthor_fw_unplug(struct panthor_device *ptdev)
1125 {
1126 	struct panthor_fw_section *section;
1127 
1128 	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1129 
1130 	/* Make sure the IRQ handler can be called after that point. */
1131 	if (ptdev->fw->irq.irq)
1132 		panthor_job_irq_suspend(&ptdev->fw->irq);
1133 
1134 	panthor_fw_stop(ptdev);
1135 
1136 	list_for_each_entry(section, &ptdev->fw->sections, node)
1137 		panthor_kernel_bo_destroy(section->mem);
1138 
1139 	/* We intentionally don't call panthor_vm_idle() and let
1140 	 * panthor_mmu_unplug() release the AS we acquired with
1141 	 * panthor_vm_active() so we don't have to track the VM active/idle
1142 	 * state to keep the active_refcnt balanced.
1143 	 */
1144 	panthor_vm_put(ptdev->fw->vm);
1145 	ptdev->fw->vm = NULL;
1146 
1147 	panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
1148 }
1149 
1150 /**
1151  * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1152  * @req_ptr: Pointer to the req register.
1153  * @ack_ptr: Pointer to the ack register.
1154  * @wq: Wait queue to use for the sleeping wait.
1155  * @req_mask: Mask of requests to wait for.
1156  * @acked: Pointer to field that's updated with the acked requests.
1157  * If the function returns 0, *acked == req_mask.
1158  * @timeout_ms: Timeout expressed in milliseconds.
1159  *
1160  * Return: 0 on success, -ETIMEDOUT otherwise.
1161  */
1162 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1163 				wait_queue_head_t *wq,
1164 				u32 req_mask, u32 *acked,
1165 				u32 timeout_ms)
1166 {
1167 	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1168 	int ret;
1169 
1170 	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
1171 	*acked = req_mask;
1172 	ret = read_poll_timeout_atomic(READ_ONCE, ack,
1173 				       (ack & req_mask) == req,
1174 				       0, 10, 0,
1175 				       *ack_ptr);
1176 	if (!ret)
1177 		return 0;
1178 
1179 	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1180 			       msecs_to_jiffies(timeout_ms)))
1181 		return 0;
1182 
1183 	/* Check one last time, in case we were not woken up for some reason. */
1184 	ack = READ_ONCE(*ack_ptr);
1185 	if ((ack & req_mask) == req)
1186 		return 0;
1187 
1188 	*acked = ~(req ^ ack) & req_mask;
1189 	return -ETIMEDOUT;
1190 }
1191 
1192 /**
1193  * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1194  * @ptdev: Device.
1195  * @req_mask: Mask of requests to wait for.
1196  * @acked: Pointer to field that's updated with the acked requests.
1197  * If the function returns 0, *acked == req_mask.
1198  * @timeout_ms: Timeout expressed in milliseconds.
1199  *
1200  * Return: 0 on success, -ETIMEDOUT otherwise.
1201  */
1202 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1203 			     u32 req_mask, u32 *acked,
1204 			     u32 timeout_ms)
1205 {
1206 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1207 
1208 	/* GLB_HALT doesn't get acked through the FW interface. */
1209 	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1210 		return -EINVAL;
1211 
1212 	return panthor_fw_wait_acks(&glb_iface->input->req,
1213 				    &glb_iface->output->ack,
1214 				    &ptdev->fw->req_waitqueue,
1215 				    req_mask, acked, timeout_ms);
1216 }
1217 
1218 /**
1219  * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1220  * @ptdev: Device.
1221  * @csg_slot: CSG slot ID.
1222  * @req_mask: Mask of requests to wait for.
1223  * @acked: Pointer to field that's updated with the acked requests.
1224  * If the function returns 0, *acked == req_mask.
1225  * @timeout_ms: Timeout expressed in milliseconds.
1226  *
1227  * Return: 0 on success, -ETIMEDOUT otherwise.
1228  */
1229 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1230 			     u32 req_mask, u32 *acked, u32 timeout_ms)
1231 {
1232 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1233 	int ret;
1234 
1235 	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1236 		return -EINVAL;
1237 
1238 	ret = panthor_fw_wait_acks(&csg_iface->input->req,
1239 				   &csg_iface->output->ack,
1240 				   &ptdev->fw->req_waitqueue,
1241 				   req_mask, acked, timeout_ms);
1242 
1243 	/*
1244 	 * Check that all bits in the state field were updated, if any mismatch
1245 	 * then clear all bits in the state field. This allows code to do
1246 	 * (acked & CSG_STATE_MASK) and get the right value.
1247 	 */
1248 
1249 	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1250 		*acked &= ~CSG_STATE_MASK;
1251 
1252 	return ret;
1253 }
1254 
1255 /**
1256  * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1257  * @ptdev: Device.
1258  * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1259  *
1260  * This function is toggling bits in the doorbell_req and ringing the
1261  * global doorbell. It doesn't require a user doorbell to be attached to
1262  * the group.
1263  */
1264 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1265 {
1266 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1267 
1268 	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1269 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1270 }
1271 
1272 static void panthor_fw_ping_work(struct work_struct *work)
1273 {
1274 	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1275 	struct panthor_device *ptdev = fw->irq.ptdev;
1276 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1277 	u32 acked;
1278 	int ret;
1279 
1280 	if (panthor_device_reset_is_pending(ptdev))
1281 		return;
1282 
1283 	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1284 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1285 
1286 	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1287 	if (ret) {
1288 		panthor_device_schedule_reset(ptdev);
1289 		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1290 	} else {
1291 		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1292 				 msecs_to_jiffies(PING_INTERVAL_MS));
1293 	}
1294 }
1295 
1296 /**
1297  * panthor_fw_init() - Initialize FW related data.
1298  * @ptdev: Device.
1299  *
1300  * Return: 0 on success, a negative error code otherwise.
1301  */
1302 int panthor_fw_init(struct panthor_device *ptdev)
1303 {
1304 	struct panthor_fw *fw;
1305 	int ret, irq;
1306 
1307 	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1308 	if (!fw)
1309 		return -ENOMEM;
1310 
1311 	ptdev->fw = fw;
1312 	init_waitqueue_head(&fw->req_waitqueue);
1313 	INIT_LIST_HEAD(&fw->sections);
1314 	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1315 
1316 	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1317 	if (irq <= 0)
1318 		return -ENODEV;
1319 
1320 	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1321 	if (ret) {
1322 		drm_err(&ptdev->base, "failed to request job irq");
1323 		return ret;
1324 	}
1325 
1326 	ret = panthor_gpu_l2_power_on(ptdev);
1327 	if (ret)
1328 		return ret;
1329 
1330 	fw->vm = panthor_vm_create(ptdev, true,
1331 				   0, SZ_4G,
1332 				   CSF_MCU_SHARED_REGION_START,
1333 				   CSF_MCU_SHARED_REGION_SIZE);
1334 	if (IS_ERR(fw->vm)) {
1335 		ret = PTR_ERR(fw->vm);
1336 		fw->vm = NULL;
1337 		goto err_unplug_fw;
1338 	}
1339 
1340 	ret = panthor_fw_load(ptdev);
1341 	if (ret)
1342 		goto err_unplug_fw;
1343 
1344 	ret = panthor_vm_active(fw->vm);
1345 	if (ret)
1346 		goto err_unplug_fw;
1347 
1348 	ret = panthor_fw_start(ptdev);
1349 	if (ret)
1350 		goto err_unplug_fw;
1351 
1352 	ret = panthor_fw_init_ifaces(ptdev);
1353 	if (ret)
1354 		goto err_unplug_fw;
1355 
1356 	panthor_fw_init_global_iface(ptdev);
1357 	return 0;
1358 
1359 err_unplug_fw:
1360 	panthor_fw_unplug(ptdev);
1361 	return ret;
1362 }
1363 
1364 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1365