xref: /linux/drivers/gpu/drm/panthor/panthor_fw.c (revision 74ba587f402d5501af2c85e50cf1e4044263b6ca)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #ifdef CONFIG_ARM_ARCH_TIMER
5 #include <asm/arch_timer.h>
6 #endif
7 
8 #include <linux/clk.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/firmware.h>
11 #include <linux/iopoll.h>
12 #include <linux/iosys-map.h>
13 #include <linux/mutex.h>
14 #include <linux/platform_device.h>
15 #include <linux/pm_runtime.h>
16 
17 #include <drm/drm_drv.h>
18 #include <drm/drm_managed.h>
19 #include <drm/drm_print.h>
20 
21 #include "panthor_device.h"
22 #include "panthor_fw.h"
23 #include "panthor_gem.h"
24 #include "panthor_gpu.h"
25 #include "panthor_mmu.h"
26 #include "panthor_regs.h"
27 #include "panthor_sched.h"
28 
29 #define CSF_FW_NAME "mali_csffw.bin"
30 
31 #define PING_INTERVAL_MS			12000
32 #define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
33 #define PROGRESS_TIMEOUT_SCALE_SHIFT		10
34 #define IDLE_HYSTERESIS_US			800
35 #define PWROFF_HYSTERESIS_US			10000
36 
37 /**
38  * struct panthor_fw_binary_hdr - Firmware binary header.
39  */
40 struct panthor_fw_binary_hdr {
41 	/** @magic: Magic value to check binary validity. */
42 	u32 magic;
43 #define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
44 
45 	/** @minor: Minor FW version. */
46 	u8 minor;
47 
48 	/** @major: Major FW version. */
49 	u8 major;
50 #define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
51 
52 	/** @padding1: MBZ. */
53 	u16 padding1;
54 
55 	/** @version_hash: FW version hash. */
56 	u32 version_hash;
57 
58 	/** @padding2: MBZ. */
59 	u32 padding2;
60 
61 	/** @size: FW binary size. */
62 	u32 size;
63 };
64 
65 /**
66  * enum panthor_fw_binary_entry_type - Firmware binary entry type
67  */
68 enum panthor_fw_binary_entry_type {
69 	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
70 	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
71 
72 	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
73 	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
74 
75 	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
76 	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
77 
78 	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
79 	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
80 
81 	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
82 	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
83 
84 	/**
85 	 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
86 	 * the FW binary was built.
87 	 */
88 	CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
89 };
90 
91 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
92 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
93 #define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
94 #define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
95 
96 #define CSF_FW_BINARY_IFACE_ENTRY_RD					BIT(0)
97 #define CSF_FW_BINARY_IFACE_ENTRY_WR					BIT(1)
98 #define CSF_FW_BINARY_IFACE_ENTRY_EX					BIT(2)
99 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE			(0 << 3)
100 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED			(1 << 3)
101 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT		(2 << 3)
102 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT		(3 << 3)
103 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK			GENMASK(4, 3)
104 #define CSF_FW_BINARY_IFACE_ENTRY_PROT					BIT(5)
105 #define CSF_FW_BINARY_IFACE_ENTRY_SHARED				BIT(30)
106 #define CSF_FW_BINARY_IFACE_ENTRY_ZERO					BIT(31)
107 
108 #define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS			\
109 	(CSF_FW_BINARY_IFACE_ENTRY_RD |					\
110 	 CSF_FW_BINARY_IFACE_ENTRY_WR |					\
111 	 CSF_FW_BINARY_IFACE_ENTRY_EX |					\
112 	 CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK |			\
113 	 CSF_FW_BINARY_IFACE_ENTRY_PROT |				\
114 	 CSF_FW_BINARY_IFACE_ENTRY_SHARED  |				\
115 	 CSF_FW_BINARY_IFACE_ENTRY_ZERO)
116 
117 /**
118  * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
119  */
120 struct panthor_fw_binary_section_entry_hdr {
121 	/** @flags: Section flags. */
122 	u32 flags;
123 
124 	/** @va: MCU virtual range to map this binary section to. */
125 	struct {
126 		/** @start: Start address. */
127 		u32 start;
128 
129 		/** @end: End address. */
130 		u32 end;
131 	} va;
132 
133 	/** @data: Data to initialize the FW section with. */
134 	struct {
135 		/** @start: Start offset in the FW binary. */
136 		u32 start;
137 
138 		/** @end: End offset in the FW binary. */
139 		u32 end;
140 	} data;
141 };
142 
143 struct panthor_fw_build_info_hdr {
144 	/** @meta_start: Offset of the build info data in the FW binary */
145 	u32 meta_start;
146 	/** @meta_size: Size of the build info data in the FW binary */
147 	u32 meta_size;
148 };
149 
150 /**
151  * struct panthor_fw_binary_iter - Firmware binary iterator
152  *
153  * Used to parse a firmware binary.
154  */
155 struct panthor_fw_binary_iter {
156 	/** @data: FW binary data. */
157 	const void *data;
158 
159 	/** @size: FW binary size. */
160 	size_t size;
161 
162 	/** @offset: Iterator offset. */
163 	size_t offset;
164 };
165 
166 /**
167  * struct panthor_fw_section - FW section
168  */
169 struct panthor_fw_section {
170 	/** @node: Used to keep track of FW sections. */
171 	struct list_head node;
172 
173 	/** @flags: Section flags, as encoded in the FW binary. */
174 	u32 flags;
175 
176 	/** @mem: Section memory. */
177 	struct panthor_kernel_bo *mem;
178 
179 	/**
180 	 * @name: Name of the section, as specified in the binary.
181 	 *
182 	 * Can be NULL.
183 	 */
184 	const char *name;
185 
186 	/**
187 	 * @data: Initial data copied to the FW memory.
188 	 *
189 	 * We keep data around so we can reload sections after a reset.
190 	 */
191 	struct {
192 		/** @buf: Buffed used to store init data. */
193 		const void *buf;
194 
195 		/** @size: Size of @buf in bytes. */
196 		size_t size;
197 	} data;
198 };
199 
200 #define CSF_MCU_SHARED_REGION_START		0x04000000ULL
201 #define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
202 
203 #define MIN_CS_PER_CSG				8
204 #define MIN_CSGS				3
205 
206 #define CSF_IFACE_VERSION(major, minor, patch)	\
207 	(((major) << 24) | ((minor) << 16) | (patch))
208 #define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
209 #define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
210 #define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
211 
212 #define CSF_GROUP_CONTROL_OFFSET		0x1000
213 #define CSF_STREAM_CONTROL_OFFSET		0x40
214 #define CSF_UNPRESERVED_REG_COUNT		4
215 
216 /**
217  * struct panthor_fw_iface - FW interfaces
218  */
219 struct panthor_fw_iface {
220 	/** @global: Global interface. */
221 	struct panthor_fw_global_iface global;
222 
223 	/** @groups: Group slot interfaces. */
224 	struct panthor_fw_csg_iface groups[MAX_CSGS];
225 
226 	/** @streams: Command stream slot interfaces. */
227 	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
228 };
229 
230 /**
231  * struct panthor_fw - Firmware management
232  */
233 struct panthor_fw {
234 	/** @vm: MCU VM. */
235 	struct panthor_vm *vm;
236 
237 	/** @sections: List of FW sections. */
238 	struct list_head sections;
239 
240 	/** @shared_section: The section containing the FW interfaces. */
241 	struct panthor_fw_section *shared_section;
242 
243 	/** @iface: FW interfaces. */
244 	struct panthor_fw_iface iface;
245 
246 	/** @watchdog: Collection of fields relating to the FW watchdog. */
247 	struct {
248 		/** @ping_work: Delayed work used to ping the FW. */
249 		struct delayed_work ping_work;
250 	} watchdog;
251 
252 	/**
253 	 * @req_waitqueue: FW request waitqueue.
254 	 *
255 	 * Everytime a request is sent to a command stream group or the global
256 	 * interface, the caller will first busy wait for the request to be
257 	 * acknowledged, and then fallback to a sleeping wait.
258 	 *
259 	 * This wait queue is here to support the sleeping wait flavor.
260 	 */
261 	wait_queue_head_t req_waitqueue;
262 
263 	/** @booted: True is the FW is booted */
264 	bool booted;
265 
266 	/** @irq: Job irq data. */
267 	struct panthor_irq irq;
268 };
269 
270 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
271 {
272 	return ptdev->fw->vm;
273 }
274 
275 /**
276  * panthor_fw_get_glb_iface() - Get the global interface
277  * @ptdev: Device.
278  *
279  * Return: The global interface.
280  */
281 struct panthor_fw_global_iface *
282 panthor_fw_get_glb_iface(struct panthor_device *ptdev)
283 {
284 	return &ptdev->fw->iface.global;
285 }
286 
287 /**
288  * panthor_fw_get_csg_iface() - Get a command stream group slot interface
289  * @ptdev: Device.
290  * @csg_slot: Index of the command stream group slot.
291  *
292  * Return: The command stream group slot interface.
293  */
294 struct panthor_fw_csg_iface *
295 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
296 {
297 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
298 		return NULL;
299 
300 	return &ptdev->fw->iface.groups[csg_slot];
301 }
302 
303 /**
304  * panthor_fw_get_cs_iface() - Get a command stream slot interface
305  * @ptdev: Device.
306  * @csg_slot: Index of the command stream group slot.
307  * @cs_slot: Index of the command stream slot.
308  *
309  * Return: The command stream slot interface.
310  */
311 struct panthor_fw_cs_iface *
312 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
313 {
314 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
315 		return NULL;
316 
317 	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
318 }
319 
320 /**
321  * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
322  * @ptdev: Device.
323  * @timeout_us: Timeout expressed in micro-seconds.
324  *
325  * The FW has two timer sources: the GPU counter or arch-timer. We need
326  * to express timeouts in term of number of cycles and specify which
327  * timer source should be used.
328  *
329  * Return: A value suitable for timeout fields in the global interface.
330  */
331 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
332 {
333 	bool use_cycle_counter = false;
334 	u32 timer_rate = 0;
335 	u64 mod_cycles;
336 
337 #ifdef CONFIG_ARM_ARCH_TIMER
338 	timer_rate = arch_timer_get_cntfrq();
339 #endif
340 
341 	if (!timer_rate) {
342 		use_cycle_counter = true;
343 		timer_rate = clk_get_rate(ptdev->clks.core);
344 	}
345 
346 	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
347 		/* We couldn't get a valid clock rate, let's just pick the
348 		 * maximum value so the FW still handles the core
349 		 * power on/off requests.
350 		 */
351 		return GLB_TIMER_VAL(~0) |
352 		       GLB_TIMER_SOURCE_GPU_COUNTER;
353 	}
354 
355 	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
356 				      1000000ull << 10);
357 	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
358 		mod_cycles = GLB_TIMER_VAL(~0);
359 
360 	return GLB_TIMER_VAL(mod_cycles) |
361 	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
362 }
363 
364 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
365 				       struct panthor_fw_binary_iter *iter,
366 				       void *out, size_t size)
367 {
368 	size_t new_offset = iter->offset + size;
369 
370 	if (new_offset > iter->size || new_offset < iter->offset) {
371 		drm_err(&ptdev->base, "Firmware too small\n");
372 		return -EINVAL;
373 	}
374 
375 	memcpy(out, iter->data + iter->offset, size);
376 	iter->offset = new_offset;
377 	return 0;
378 }
379 
380 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
381 					   struct panthor_fw_binary_iter *iter,
382 					   struct panthor_fw_binary_iter *sub_iter,
383 					   size_t size)
384 {
385 	size_t new_offset = iter->offset + size;
386 
387 	if (new_offset > iter->size || new_offset < iter->offset) {
388 		drm_err(&ptdev->base, "Firmware entry too long\n");
389 		return -EINVAL;
390 	}
391 
392 	sub_iter->offset = 0;
393 	sub_iter->data = iter->data + iter->offset;
394 	sub_iter->size = size;
395 	iter->offset = new_offset;
396 	return 0;
397 }
398 
399 static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
400 					struct panthor_fw_section *section)
401 {
402 	bool was_mapped = !!section->mem->kmap;
403 	int ret;
404 
405 	if (!section->data.size &&
406 	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO))
407 		return;
408 
409 	ret = panthor_kernel_bo_vmap(section->mem);
410 	if (drm_WARN_ON(&ptdev->base, ret))
411 		return;
412 
413 	memcpy(section->mem->kmap, section->data.buf, section->data.size);
414 	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) {
415 		memset(section->mem->kmap + section->data.size, 0,
416 		       panthor_kernel_bo_size(section->mem) - section->data.size);
417 	}
418 
419 	if (!was_mapped)
420 		panthor_kernel_bo_vunmap(section->mem);
421 }
422 
423 /**
424  * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
425  * @ptdev: Device.
426  * @input: Pointer holding the input interface on success.
427  * Should be ignored on failure.
428  * @output: Pointer holding the output interface on success.
429  * Should be ignored on failure.
430  * @input_fw_va: Pointer holding the input interface FW VA on success.
431  * Should be ignored on failure.
432  * @output_fw_va: Pointer holding the output interface FW VA on success.
433  * Should be ignored on failure.
434  *
435  * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
436  * interface is at offset 0, and the output interface at offset 4096.
437  *
438  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
439  */
440 struct panthor_kernel_bo *
441 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
442 				 struct panthor_fw_ringbuf_input_iface **input,
443 				 const struct panthor_fw_ringbuf_output_iface **output,
444 				 u32 *input_fw_va, u32 *output_fw_va)
445 {
446 	struct panthor_kernel_bo *mem;
447 	int ret;
448 
449 	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
450 				       DRM_PANTHOR_BO_NO_MMAP,
451 				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
452 				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
453 				       PANTHOR_VM_KERNEL_AUTO_VA,
454 				       "Queue FW interface");
455 	if (IS_ERR(mem))
456 		return mem;
457 
458 	ret = panthor_kernel_bo_vmap(mem);
459 	if (ret) {
460 		panthor_kernel_bo_destroy(mem);
461 		return ERR_PTR(ret);
462 	}
463 
464 	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
465 	*input = mem->kmap;
466 	*output = mem->kmap + SZ_4K;
467 	*input_fw_va = panthor_kernel_bo_gpuva(mem);
468 	*output_fw_va = *input_fw_va + SZ_4K;
469 
470 	return mem;
471 }
472 
473 /**
474  * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
475  * @ptdev: Device.
476  * @size: Size of the suspend buffer.
477  *
478  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
479  */
480 struct panthor_kernel_bo *
481 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
482 {
483 	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
484 					DRM_PANTHOR_BO_NO_MMAP,
485 					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
486 					PANTHOR_VM_KERNEL_AUTO_VA,
487 					"FW suspend buffer");
488 }
489 
490 static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
491 					 const struct firmware *fw,
492 					 struct panthor_fw_binary_iter *iter,
493 					 u32 ehdr)
494 {
495 	ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
496 	struct panthor_fw_binary_section_entry_hdr hdr;
497 	struct panthor_fw_section *section;
498 	u32 section_size;
499 	u32 name_len;
500 	int ret;
501 
502 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
503 	if (ret)
504 		return ret;
505 
506 	if (hdr.data.end < hdr.data.start) {
507 		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
508 			hdr.data.end, hdr.data.start);
509 		return -EINVAL;
510 	}
511 
512 	if (hdr.va.end < hdr.va.start) {
513 		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
514 			hdr.va.end, hdr.va.start);
515 		return -EINVAL;
516 	}
517 
518 	if (hdr.data.end > fw->size) {
519 		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
520 			hdr.data.end, fw->size);
521 		return -EINVAL;
522 	}
523 
524 	if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
525 		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
526 			hdr.va.start, hdr.va.end);
527 		return -EINVAL;
528 	}
529 
530 	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) {
531 		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
532 			hdr.flags);
533 		return -EINVAL;
534 	}
535 
536 	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) {
537 		drm_warn(&ptdev->base,
538 			 "Firmware protected mode entry not be supported, ignoring");
539 		return 0;
540 	}
541 
542 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
543 	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) {
544 		drm_err(&ptdev->base,
545 			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
546 		return -EINVAL;
547 	}
548 
549 	name_len = iter->size - iter->offset;
550 
551 	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
552 	if (!section)
553 		return -ENOMEM;
554 
555 	list_add_tail(&section->node, &ptdev->fw->sections);
556 	section->flags = hdr.flags;
557 	section->data.size = hdr.data.end - hdr.data.start;
558 
559 	if (section->data.size > 0) {
560 		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
561 
562 		if (!data)
563 			return -ENOMEM;
564 
565 		memcpy(data, fw->data + hdr.data.start, section->data.size);
566 		section->data.buf = data;
567 	}
568 
569 	if (name_len > 0) {
570 		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
571 
572 		if (!name)
573 			return -ENOMEM;
574 
575 		memcpy(name, iter->data + iter->offset, name_len);
576 		name[name_len] = '\0';
577 		section->name = name;
578 	}
579 
580 	section_size = hdr.va.end - hdr.va.start;
581 	if (section_size) {
582 		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK;
583 		struct panthor_gem_object *bo;
584 		u32 vm_map_flags = 0;
585 		struct sg_table *sgt;
586 		u64 va = hdr.va.start;
587 
588 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
589 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
590 
591 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX))
592 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
593 
594 		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to
595 		 * non-cacheable for now. We might want to introduce a new
596 		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
597 		 * memory and is currently not used by our driver) for
598 		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
599 		 * of IO-coherent systems.
600 		 */
601 		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED)
602 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
603 
604 		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
605 							section_size,
606 							DRM_PANTHOR_BO_NO_MMAP,
607 							vm_map_flags, va, "FW section");
608 		if (IS_ERR(section->mem))
609 			return PTR_ERR(section->mem);
610 
611 		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
612 			return -EINVAL;
613 
614 		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) {
615 			ret = panthor_kernel_bo_vmap(section->mem);
616 			if (ret)
617 				return ret;
618 		}
619 
620 		panthor_fw_init_section_mem(ptdev, section);
621 
622 		bo = to_panthor_bo(section->mem->obj);
623 		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
624 		if (IS_ERR(sgt))
625 			return PTR_ERR(sgt);
626 
627 		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
628 	}
629 
630 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
631 		ptdev->fw->shared_section = section;
632 
633 	return 0;
634 }
635 
636 static int panthor_fw_read_build_info(struct panthor_device *ptdev,
637 				      const struct firmware *fw,
638 				      struct panthor_fw_binary_iter *iter,
639 				      u32 ehdr)
640 {
641 	struct panthor_fw_build_info_hdr hdr;
642 	static const char git_sha_header[] = "git_sha: ";
643 	const int header_len = sizeof(git_sha_header) - 1;
644 	int ret;
645 
646 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
647 	if (ret)
648 		return ret;
649 
650 	if (hdr.meta_start > fw->size ||
651 	    hdr.meta_start + hdr.meta_size > fw->size) {
652 		drm_err(&ptdev->base, "Firmware build info corrupt\n");
653 		/* We don't need the build info, so continue */
654 		return 0;
655 	}
656 
657 	if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) {
658 		/* Not the expected header, this isn't metadata we understand */
659 		return 0;
660 	}
661 
662 	/* Check that the git SHA is NULL terminated as expected */
663 	if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
664 		drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
665 		/* Don't treat as fatal */
666 		return 0;
667 	}
668 
669 	drm_info(&ptdev->base, "Firmware git sha: %s\n",
670 		 fw->data + hdr.meta_start + header_len);
671 
672 	return 0;
673 }
674 
675 static void
676 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
677 {
678 	struct panthor_fw_section *section;
679 
680 	list_for_each_entry(section, &ptdev->fw->sections, node) {
681 		struct sg_table *sgt;
682 
683 		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
684 			continue;
685 
686 		panthor_fw_init_section_mem(ptdev, section);
687 		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
688 		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
689 			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
690 	}
691 }
692 
693 static int panthor_fw_load_entry(struct panthor_device *ptdev,
694 				 const struct firmware *fw,
695 				 struct panthor_fw_binary_iter *iter)
696 {
697 	struct panthor_fw_binary_iter eiter;
698 	u32 ehdr;
699 	int ret;
700 
701 	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
702 	if (ret)
703 		return ret;
704 
705 	if ((iter->offset % sizeof(u32)) ||
706 	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
707 		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
708 			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
709 		return -EINVAL;
710 	}
711 
712 	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
713 					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
714 		return -EINVAL;
715 
716 	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
717 	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
718 		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
719 	case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
720 		return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
721 
722 	/* FIXME: handle those entry types? */
723 	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
724 	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
725 	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
726 	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
727 		return 0;
728 	default:
729 		break;
730 	}
731 
732 	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
733 		return 0;
734 
735 	drm_err(&ptdev->base,
736 		"Unsupported non-optional entry type %u in firmware\n",
737 		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
738 	return -EINVAL;
739 }
740 
741 static int panthor_fw_load(struct panthor_device *ptdev)
742 {
743 	const struct firmware *fw = NULL;
744 	struct panthor_fw_binary_iter iter = {};
745 	struct panthor_fw_binary_hdr hdr;
746 	char fw_path[128];
747 	int ret;
748 
749 	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
750 		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
751 		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
752 		 CSF_FW_NAME);
753 
754 	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
755 	if (ret) {
756 		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
757 			CSF_FW_NAME);
758 		return ret;
759 	}
760 
761 	iter.data = fw->data;
762 	iter.size = fw->size;
763 	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
764 	if (ret)
765 		goto out;
766 
767 	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
768 		ret = -EINVAL;
769 		drm_err(&ptdev->base, "Invalid firmware magic\n");
770 		goto out;
771 	}
772 
773 	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
774 		ret = -EINVAL;
775 		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
776 			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
777 		goto out;
778 	}
779 
780 	if (hdr.size > iter.size) {
781 		drm_err(&ptdev->base, "Firmware image is truncated\n");
782 		goto out;
783 	}
784 
785 	iter.size = hdr.size;
786 
787 	while (iter.offset < hdr.size) {
788 		ret = panthor_fw_load_entry(ptdev, fw, &iter);
789 		if (ret)
790 			goto out;
791 	}
792 
793 	if (!ptdev->fw->shared_section) {
794 		drm_err(&ptdev->base, "Shared interface region not found\n");
795 		ret = -EINVAL;
796 		goto out;
797 	}
798 
799 out:
800 	release_firmware(fw);
801 	return ret;
802 }
803 
804 /**
805  * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
806  * @ptdev: Device.
807  * @mcu_va: MCU address.
808  *
809  * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
810  */
811 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
812 {
813 	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
814 	u64 shared_mem_end = shared_mem_start +
815 			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
816 	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
817 		return NULL;
818 
819 	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
820 }
821 
822 static int panthor_init_cs_iface(struct panthor_device *ptdev,
823 				 unsigned int csg_idx, unsigned int cs_idx)
824 {
825 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
826 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
827 	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
828 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
829 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
830 			   (csg_idx * glb_iface->control->group_stride) +
831 			   CSF_STREAM_CONTROL_OFFSET +
832 			   (cs_idx * csg_iface->control->stream_stride);
833 	struct panthor_fw_cs_iface *first_cs_iface =
834 		panthor_fw_get_cs_iface(ptdev, 0, 0);
835 
836 	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
837 		return -EINVAL;
838 
839 	spin_lock_init(&cs_iface->lock);
840 	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
841 	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
842 	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
843 
844 	if (!cs_iface->input || !cs_iface->output) {
845 		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
846 		return -EINVAL;
847 	}
848 
849 	if (cs_iface != first_cs_iface) {
850 		if (cs_iface->control->features != first_cs_iface->control->features) {
851 			drm_err(&ptdev->base, "Expecting identical CS slots");
852 			return -EINVAL;
853 		}
854 	} else {
855 		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
856 
857 		ptdev->csif_info.cs_reg_count = reg_count;
858 		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
859 	}
860 
861 	return 0;
862 }
863 
864 static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
865 			const struct panthor_fw_csg_control_iface *b)
866 {
867 	if (a->features != b->features)
868 		return false;
869 	if (a->suspend_size != b->suspend_size)
870 		return false;
871 	if (a->protm_suspend_size != b->protm_suspend_size)
872 		return false;
873 	if (a->stream_num != b->stream_num)
874 		return false;
875 	return true;
876 }
877 
878 static int panthor_init_csg_iface(struct panthor_device *ptdev,
879 				  unsigned int csg_idx)
880 {
881 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
882 	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
883 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
884 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
885 	unsigned int i;
886 
887 	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
888 		return -EINVAL;
889 
890 	spin_lock_init(&csg_iface->lock);
891 	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
892 	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
893 	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
894 
895 	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
896 	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
897 		return -EINVAL;
898 
899 	if (!csg_iface->input || !csg_iface->output) {
900 		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
901 		return -EINVAL;
902 	}
903 
904 	if (csg_idx > 0) {
905 		struct panthor_fw_csg_iface *first_csg_iface =
906 			panthor_fw_get_csg_iface(ptdev, 0);
907 
908 		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
909 			drm_err(&ptdev->base, "Expecting identical CSG slots");
910 			return -EINVAL;
911 		}
912 	}
913 
914 	for (i = 0; i < csg_iface->control->stream_num; i++) {
915 		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
916 
917 		if (ret)
918 			return ret;
919 	}
920 
921 	return 0;
922 }
923 
924 static u32 panthor_get_instr_features(struct panthor_device *ptdev)
925 {
926 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
927 
928 	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
929 		return 0;
930 
931 	return glb_iface->control->instr_features;
932 }
933 
934 static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
935 {
936 	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
937 	unsigned int i;
938 
939 	if (!ptdev->fw->shared_section->mem->kmap)
940 		return -EINVAL;
941 
942 	spin_lock_init(&glb_iface->lock);
943 	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
944 
945 	if (!glb_iface->control->version) {
946 		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
947 		return -EINVAL;
948 	}
949 
950 	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
951 	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
952 	if (!glb_iface->input || !glb_iface->output) {
953 		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
954 		return -EINVAL;
955 	}
956 
957 	if (glb_iface->control->group_num > MAX_CSGS ||
958 	    glb_iface->control->group_num < MIN_CSGS) {
959 		drm_err(&ptdev->base, "Invalid number of control groups");
960 		return -EINVAL;
961 	}
962 
963 	for (i = 0; i < glb_iface->control->group_num; i++) {
964 		int ret = panthor_init_csg_iface(ptdev, i);
965 
966 		if (ret)
967 			return ret;
968 	}
969 
970 	drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
971 		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
972 		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
973 		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
974 		 glb_iface->control->features,
975 		 panthor_get_instr_features(ptdev));
976 	return 0;
977 }
978 
979 static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
980 {
981 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
982 
983 	/* Enable all cores. */
984 	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
985 
986 	/* Setup timers. */
987 	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
988 	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
989 	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
990 
991 	/* Enable interrupts we care about. */
992 	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
993 					 GLB_PING |
994 					 GLB_CFG_PROGRESS_TIMER |
995 					 GLB_CFG_POWEROFF_TIMER |
996 					 GLB_IDLE_EN |
997 					 GLB_IDLE;
998 
999 	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
1000 	panthor_fw_toggle_reqs(glb_iface, req, ack,
1001 			       GLB_CFG_ALLOC_EN |
1002 			       GLB_CFG_POWEROFF_TIMER |
1003 			       GLB_CFG_PROGRESS_TIMER);
1004 
1005 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1006 
1007 	/* Kick the watchdog. */
1008 	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
1009 			 msecs_to_jiffies(PING_INTERVAL_MS));
1010 }
1011 
1012 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
1013 {
1014 	gpu_write(ptdev, JOB_INT_CLEAR, status);
1015 
1016 	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
1017 		ptdev->fw->booted = true;
1018 
1019 	wake_up_all(&ptdev->fw->req_waitqueue);
1020 
1021 	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
1022 	if (!ptdev->fw->booted)
1023 		return;
1024 
1025 	panthor_sched_report_fw_events(ptdev, status);
1026 }
1027 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
1028 
1029 static int panthor_fw_start(struct panthor_device *ptdev)
1030 {
1031 	bool timedout = false;
1032 
1033 	ptdev->fw->booted = false;
1034 	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
1035 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
1036 
1037 	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
1038 				ptdev->fw->booted,
1039 				msecs_to_jiffies(1000))) {
1040 		if (!ptdev->fw->booted &&
1041 		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
1042 			timedout = true;
1043 	}
1044 
1045 	if (timedout) {
1046 		static const char * const status_str[] = {
1047 			[MCU_STATUS_DISABLED] = "disabled",
1048 			[MCU_STATUS_ENABLED] = "enabled",
1049 			[MCU_STATUS_HALT] = "halt",
1050 			[MCU_STATUS_FATAL] = "fatal",
1051 		};
1052 		u32 status = gpu_read(ptdev, MCU_STATUS);
1053 
1054 		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1055 			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1056 		return -ETIMEDOUT;
1057 	}
1058 
1059 	return 0;
1060 }
1061 
1062 static void panthor_fw_stop(struct panthor_device *ptdev)
1063 {
1064 	u32 status;
1065 
1066 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1067 	if (gpu_read_poll_timeout(ptdev, MCU_STATUS, status,
1068 				  status == MCU_STATUS_DISABLED, 10, 100000))
1069 		drm_err(&ptdev->base, "Failed to stop MCU");
1070 }
1071 
1072 /**
1073  * panthor_fw_pre_reset() - Call before a reset.
1074  * @ptdev: Device.
1075  * @on_hang: true if the reset was triggered on a GPU hang.
1076  *
1077  * If the reset is not triggered on a hang, we try to gracefully halt the
1078  * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1079  */
1080 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1081 {
1082 	/* Make sure we won't be woken up by a ping. */
1083 	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1084 
1085 	ptdev->reset.fast = false;
1086 
1087 	if (!on_hang) {
1088 		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1089 		u32 status;
1090 
1091 		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1092 		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1093 		if (!gpu_read_poll_timeout(ptdev, MCU_STATUS, status,
1094 					   status == MCU_STATUS_HALT, 10,
1095 					   100000)) {
1096 			ptdev->reset.fast = true;
1097 		} else {
1098 			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1099 		}
1100 	}
1101 
1102 	panthor_job_irq_suspend(&ptdev->fw->irq);
1103 	panthor_fw_stop(ptdev);
1104 }
1105 
1106 /**
1107  * panthor_fw_post_reset() - Call after a reset.
1108  * @ptdev: Device.
1109  *
1110  * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1111  * make sure we can recover from a memory corruption.
1112  */
1113 int panthor_fw_post_reset(struct panthor_device *ptdev)
1114 {
1115 	int ret;
1116 
1117 	/* Make the MCU VM active. */
1118 	ret = panthor_vm_active(ptdev->fw->vm);
1119 	if (ret)
1120 		return ret;
1121 
1122 	if (!ptdev->reset.fast) {
1123 		/* On a slow reset, reload all sections, including RO ones.
1124 		 * We're not supposed to end up here anyway, let's just assume
1125 		 * the overhead of reloading everything is acceptable.
1126 		 */
1127 		panthor_reload_fw_sections(ptdev, true);
1128 	} else {
1129 		/* The FW detects 0 -> 1 transitions. Make sure we reset
1130 		 * the HALT bit before the FW is rebooted.
1131 		 * This is not needed on a slow reset because FW sections are
1132 		 * re-initialized.
1133 		 */
1134 		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1135 
1136 		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1137 	}
1138 
1139 	ret = panthor_fw_start(ptdev);
1140 	if (ret) {
1141 		drm_err(&ptdev->base, "FW %s reset failed",
1142 			ptdev->reset.fast ?  "fast" : "slow");
1143 		return ret;
1144 	}
1145 
1146 	/* We must re-initialize the global interface even on fast-reset. */
1147 	panthor_fw_init_global_iface(ptdev);
1148 	return 0;
1149 }
1150 
1151 /**
1152  * panthor_fw_unplug() - Called when the device is unplugged.
1153  * @ptdev: Device.
1154  *
1155  * This function must make sure all pending operations are flushed before
1156  * will release device resources, thus preventing any interaction with
1157  * the HW.
1158  *
1159  * If there is still FW-related work running after this function returns,
1160  * they must use drm_dev_{enter,exit}() and skip any HW access when
1161  * drm_dev_enter() returns false.
1162  */
1163 void panthor_fw_unplug(struct panthor_device *ptdev)
1164 {
1165 	struct panthor_fw_section *section;
1166 
1167 	disable_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1168 
1169 	if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) {
1170 		/* Make sure the IRQ handler cannot be called after that point. */
1171 		if (ptdev->fw->irq.irq)
1172 			panthor_job_irq_suspend(&ptdev->fw->irq);
1173 
1174 		panthor_fw_stop(ptdev);
1175 	}
1176 
1177 	list_for_each_entry(section, &ptdev->fw->sections, node)
1178 		panthor_kernel_bo_destroy(section->mem);
1179 
1180 	/* We intentionally don't call panthor_vm_idle() and let
1181 	 * panthor_mmu_unplug() release the AS we acquired with
1182 	 * panthor_vm_active() so we don't have to track the VM active/idle
1183 	 * state to keep the active_refcnt balanced.
1184 	 */
1185 	panthor_vm_put(ptdev->fw->vm);
1186 	ptdev->fw->vm = NULL;
1187 
1188 	if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
1189 		panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
1190 }
1191 
1192 /**
1193  * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1194  * @req_ptr: Pointer to the req register.
1195  * @ack_ptr: Pointer to the ack register.
1196  * @wq: Wait queue to use for the sleeping wait.
1197  * @req_mask: Mask of requests to wait for.
1198  * @acked: Pointer to field that's updated with the acked requests.
1199  * If the function returns 0, *acked == req_mask.
1200  * @timeout_ms: Timeout expressed in milliseconds.
1201  *
1202  * Return: 0 on success, -ETIMEDOUT otherwise.
1203  */
1204 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1205 				wait_queue_head_t *wq,
1206 				u32 req_mask, u32 *acked,
1207 				u32 timeout_ms)
1208 {
1209 	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1210 	int ret;
1211 
1212 	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
1213 	*acked = req_mask;
1214 	ret = read_poll_timeout_atomic(READ_ONCE, ack,
1215 				       (ack & req_mask) == req,
1216 				       0, 10, 0,
1217 				       *ack_ptr);
1218 	if (!ret)
1219 		return 0;
1220 
1221 	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1222 			       msecs_to_jiffies(timeout_ms)))
1223 		return 0;
1224 
1225 	/* Check one last time, in case we were not woken up for some reason. */
1226 	ack = READ_ONCE(*ack_ptr);
1227 	if ((ack & req_mask) == req)
1228 		return 0;
1229 
1230 	*acked = ~(req ^ ack) & req_mask;
1231 	return -ETIMEDOUT;
1232 }
1233 
1234 /**
1235  * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1236  * @ptdev: Device.
1237  * @req_mask: Mask of requests to wait for.
1238  * @acked: Pointer to field that's updated with the acked requests.
1239  * If the function returns 0, *acked == req_mask.
1240  * @timeout_ms: Timeout expressed in milliseconds.
1241  *
1242  * Return: 0 on success, -ETIMEDOUT otherwise.
1243  */
1244 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1245 			     u32 req_mask, u32 *acked,
1246 			     u32 timeout_ms)
1247 {
1248 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1249 
1250 	/* GLB_HALT doesn't get acked through the FW interface. */
1251 	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1252 		return -EINVAL;
1253 
1254 	return panthor_fw_wait_acks(&glb_iface->input->req,
1255 				    &glb_iface->output->ack,
1256 				    &ptdev->fw->req_waitqueue,
1257 				    req_mask, acked, timeout_ms);
1258 }
1259 
1260 /**
1261  * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1262  * @ptdev: Device.
1263  * @csg_slot: CSG slot ID.
1264  * @req_mask: Mask of requests to wait for.
1265  * @acked: Pointer to field that's updated with the acked requests.
1266  * If the function returns 0, *acked == req_mask.
1267  * @timeout_ms: Timeout expressed in milliseconds.
1268  *
1269  * Return: 0 on success, -ETIMEDOUT otherwise.
1270  */
1271 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1272 			     u32 req_mask, u32 *acked, u32 timeout_ms)
1273 {
1274 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1275 	int ret;
1276 
1277 	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1278 		return -EINVAL;
1279 
1280 	ret = panthor_fw_wait_acks(&csg_iface->input->req,
1281 				   &csg_iface->output->ack,
1282 				   &ptdev->fw->req_waitqueue,
1283 				   req_mask, acked, timeout_ms);
1284 
1285 	/*
1286 	 * Check that all bits in the state field were updated, if any mismatch
1287 	 * then clear all bits in the state field. This allows code to do
1288 	 * (acked & CSG_STATE_MASK) and get the right value.
1289 	 */
1290 
1291 	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1292 		*acked &= ~CSG_STATE_MASK;
1293 
1294 	return ret;
1295 }
1296 
1297 /**
1298  * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1299  * @ptdev: Device.
1300  * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1301  *
1302  * This function is toggling bits in the doorbell_req and ringing the
1303  * global doorbell. It doesn't require a user doorbell to be attached to
1304  * the group.
1305  */
1306 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1307 {
1308 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1309 
1310 	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1311 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1312 }
1313 
1314 static void panthor_fw_ping_work(struct work_struct *work)
1315 {
1316 	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1317 	struct panthor_device *ptdev = fw->irq.ptdev;
1318 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1319 	u32 acked;
1320 	int ret;
1321 
1322 	if (panthor_device_reset_is_pending(ptdev))
1323 		return;
1324 
1325 	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1326 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1327 
1328 	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1329 	if (ret) {
1330 		panthor_device_schedule_reset(ptdev);
1331 		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1332 	} else {
1333 		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1334 				 msecs_to_jiffies(PING_INTERVAL_MS));
1335 	}
1336 }
1337 
1338 /**
1339  * panthor_fw_init() - Initialize FW related data.
1340  * @ptdev: Device.
1341  *
1342  * Return: 0 on success, a negative error code otherwise.
1343  */
1344 int panthor_fw_init(struct panthor_device *ptdev)
1345 {
1346 	struct panthor_fw *fw;
1347 	int ret, irq;
1348 
1349 	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1350 	if (!fw)
1351 		return -ENOMEM;
1352 
1353 	ptdev->fw = fw;
1354 	init_waitqueue_head(&fw->req_waitqueue);
1355 	INIT_LIST_HEAD(&fw->sections);
1356 	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1357 
1358 	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1359 	if (irq <= 0)
1360 		return -ENODEV;
1361 
1362 	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1363 	if (ret) {
1364 		drm_err(&ptdev->base, "failed to request job irq");
1365 		return ret;
1366 	}
1367 
1368 	ret = panthor_gpu_l2_power_on(ptdev);
1369 	if (ret)
1370 		return ret;
1371 
1372 	fw->vm = panthor_vm_create(ptdev, true,
1373 				   0, SZ_4G,
1374 				   CSF_MCU_SHARED_REGION_START,
1375 				   CSF_MCU_SHARED_REGION_SIZE);
1376 	if (IS_ERR(fw->vm)) {
1377 		ret = PTR_ERR(fw->vm);
1378 		fw->vm = NULL;
1379 		goto err_unplug_fw;
1380 	}
1381 
1382 	ret = panthor_fw_load(ptdev);
1383 	if (ret)
1384 		goto err_unplug_fw;
1385 
1386 	ret = panthor_vm_active(fw->vm);
1387 	if (ret)
1388 		goto err_unplug_fw;
1389 
1390 	ret = panthor_fw_start(ptdev);
1391 	if (ret)
1392 		goto err_unplug_fw;
1393 
1394 	ret = panthor_fw_init_ifaces(ptdev);
1395 	if (ret)
1396 		goto err_unplug_fw;
1397 
1398 	panthor_fw_init_global_iface(ptdev);
1399 	return 0;
1400 
1401 err_unplug_fw:
1402 	panthor_fw_unplug(ptdev);
1403 	return ret;
1404 }
1405 
1406 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1407 MODULE_FIRMWARE("arm/mali/arch10.10/mali_csffw.bin");
1408 MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin");
1409 MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin");
1410 MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin");
1411 MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin");
1412