xref: /linux/drivers/gpu/drm/panthor/panthor_fw.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #ifdef CONFIG_ARM_ARCH_TIMER
5 #include <asm/arch_timer.h>
6 #endif
7 
8 #include <linux/clk.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/firmware.h>
11 #include <linux/iopoll.h>
12 #include <linux/iosys-map.h>
13 #include <linux/mutex.h>
14 #include <linux/platform_device.h>
15 #include <linux/pm_runtime.h>
16 
17 #include <drm/drm_drv.h>
18 #include <drm/drm_managed.h>
19 #include <drm/drm_print.h>
20 
21 #include "panthor_device.h"
22 #include "panthor_fw.h"
23 #include "panthor_fw_regs.h"
24 #include "panthor_gem.h"
25 #include "panthor_gpu.h"
26 #include "panthor_hw.h"
27 #include "panthor_mmu.h"
28 #include "panthor_sched.h"
29 #include "panthor_trace.h"
30 
31 #define CSF_FW_NAME "mali_csffw.bin"
32 
33 #define PING_INTERVAL_MS			12000
34 #define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
35 #define PROGRESS_TIMEOUT_SCALE_SHIFT		10
36 #define IDLE_HYSTERESIS_US			800
37 #define PWROFF_HYSTERESIS_US			10000
38 #define MCU_HALT_TIMEOUT_US			(1ULL * USEC_PER_SEC)
39 
40 /**
41  * struct panthor_fw_binary_hdr - Firmware binary header.
42  */
43 struct panthor_fw_binary_hdr {
44 	/** @magic: Magic value to check binary validity. */
45 	u32 magic;
46 #define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
47 
48 	/** @minor: Minor FW version. */
49 	u8 minor;
50 
51 	/** @major: Major FW version. */
52 	u8 major;
53 #define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
54 
55 	/** @padding1: MBZ. */
56 	u16 padding1;
57 
58 	/** @version_hash: FW version hash. */
59 	u32 version_hash;
60 
61 	/** @padding2: MBZ. */
62 	u32 padding2;
63 
64 	/** @size: FW binary size. */
65 	u32 size;
66 };
67 
68 /**
69  * enum panthor_fw_binary_entry_type - Firmware binary entry type
70  */
71 enum panthor_fw_binary_entry_type {
72 	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
73 	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
74 
75 	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
76 	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
77 
78 	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
79 	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
80 
81 	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
82 	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
83 
84 	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
85 	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
86 
87 	/**
88 	 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
89 	 * the FW binary was built.
90 	 */
91 	CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
92 };
93 
94 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
95 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
96 #define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
97 #define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
98 
99 #define CSF_FW_BINARY_IFACE_ENTRY_RD					BIT(0)
100 #define CSF_FW_BINARY_IFACE_ENTRY_WR					BIT(1)
101 #define CSF_FW_BINARY_IFACE_ENTRY_EX					BIT(2)
102 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE			(0 << 3)
103 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED			(1 << 3)
104 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT		(2 << 3)
105 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT		(3 << 3)
106 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK			GENMASK(4, 3)
107 #define CSF_FW_BINARY_IFACE_ENTRY_PROT					BIT(5)
108 #define CSF_FW_BINARY_IFACE_ENTRY_SHARED				BIT(30)
109 #define CSF_FW_BINARY_IFACE_ENTRY_ZERO					BIT(31)
110 
111 #define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS			\
112 	(CSF_FW_BINARY_IFACE_ENTRY_RD |					\
113 	 CSF_FW_BINARY_IFACE_ENTRY_WR |					\
114 	 CSF_FW_BINARY_IFACE_ENTRY_EX |					\
115 	 CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK |			\
116 	 CSF_FW_BINARY_IFACE_ENTRY_PROT |				\
117 	 CSF_FW_BINARY_IFACE_ENTRY_SHARED  |				\
118 	 CSF_FW_BINARY_IFACE_ENTRY_ZERO)
119 
120 /**
121  * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
122  */
123 struct panthor_fw_binary_section_entry_hdr {
124 	/** @flags: Section flags. */
125 	u32 flags;
126 
127 	/** @va: MCU virtual range to map this binary section to. */
128 	struct {
129 		/** @start: Start address. */
130 		u32 start;
131 
132 		/** @end: End address. */
133 		u32 end;
134 	} va;
135 
136 	/** @data: Data to initialize the FW section with. */
137 	struct {
138 		/** @start: Start offset in the FW binary. */
139 		u32 start;
140 
141 		/** @end: End offset in the FW binary. */
142 		u32 end;
143 	} data;
144 };
145 
146 struct panthor_fw_build_info_hdr {
147 	/** @meta_start: Offset of the build info data in the FW binary */
148 	u32 meta_start;
149 	/** @meta_size: Size of the build info data in the FW binary */
150 	u32 meta_size;
151 };
152 
153 /**
154  * struct panthor_fw_binary_iter - Firmware binary iterator
155  *
156  * Used to parse a firmware binary.
157  */
158 struct panthor_fw_binary_iter {
159 	/** @data: FW binary data. */
160 	const void *data;
161 
162 	/** @size: FW binary size. */
163 	size_t size;
164 
165 	/** @offset: Iterator offset. */
166 	size_t offset;
167 };
168 
169 /**
170  * struct panthor_fw_section - FW section
171  */
172 struct panthor_fw_section {
173 	/** @node: Used to keep track of FW sections. */
174 	struct list_head node;
175 
176 	/** @flags: Section flags, as encoded in the FW binary. */
177 	u32 flags;
178 
179 	/** @mem: Section memory. */
180 	struct panthor_kernel_bo *mem;
181 
182 	/**
183 	 * @name: Name of the section, as specified in the binary.
184 	 *
185 	 * Can be NULL.
186 	 */
187 	const char *name;
188 
189 	/**
190 	 * @data: Initial data copied to the FW memory.
191 	 *
192 	 * We keep data around so we can reload sections after a reset.
193 	 */
194 	struct {
195 		/** @buf: Buffed used to store init data. */
196 		const void *buf;
197 
198 		/** @size: Size of @buf in bytes. */
199 		size_t size;
200 	} data;
201 };
202 
203 #define CSF_MCU_SHARED_REGION_START		0x04000000ULL
204 #define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
205 
206 #define MIN_CS_PER_CSG				8
207 #define MIN_CSGS				3
208 
209 #define CSF_IFACE_VERSION(major, minor, patch)	\
210 	(((major) << 24) | ((minor) << 16) | (patch))
211 #define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
212 #define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
213 #define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
214 
215 #define CSF_GROUP_CONTROL_OFFSET		0x1000
216 #define CSF_STREAM_CONTROL_OFFSET		0x40
217 #define CSF_UNPRESERVED_REG_COUNT		4
218 
219 /**
220  * struct panthor_fw_iface - FW interfaces
221  */
222 struct panthor_fw_iface {
223 	/** @global: Global interface. */
224 	struct panthor_fw_global_iface global;
225 
226 	/** @groups: Group slot interfaces. */
227 	struct panthor_fw_csg_iface groups[MAX_CSGS];
228 
229 	/** @streams: Command stream slot interfaces. */
230 	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
231 };
232 
233 /**
234  * struct panthor_fw - Firmware management
235  */
236 struct panthor_fw {
237 	/** @iomem: CPU mapping of MCU_CONTROL iomem region */
238 	void __iomem *iomem;
239 
240 	/** @vm: MCU VM. */
241 	struct panthor_vm *vm;
242 
243 	/** @sections: List of FW sections. */
244 	struct list_head sections;
245 
246 	/** @shared_section: The section containing the FW interfaces. */
247 	struct panthor_fw_section *shared_section;
248 
249 	/** @iface: FW interfaces. */
250 	struct panthor_fw_iface iface;
251 
252 	/** @watchdog: Collection of fields relating to the FW watchdog. */
253 	struct {
254 		/** @ping_work: Delayed work used to ping the FW. */
255 		struct delayed_work ping_work;
256 	} watchdog;
257 
258 	/**
259 	 * @req_waitqueue: FW request waitqueue.
260 	 *
261 	 * Everytime a request is sent to a command stream group or the global
262 	 * interface, the caller will first busy wait for the request to be
263 	 * acknowledged, and then fallback to a sleeping wait.
264 	 *
265 	 * This wait queue is here to support the sleeping wait flavor.
266 	 */
267 	wait_queue_head_t req_waitqueue;
268 
269 	/** @booted: True is the FW is booted */
270 	bool booted;
271 
272 	/** @irq: Job irq data. */
273 	struct panthor_irq irq;
274 };
275 
276 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
277 {
278 	return ptdev->fw->vm;
279 }
280 
281 /**
282  * panthor_fw_get_glb_iface() - Get the global interface
283  * @ptdev: Device.
284  *
285  * Return: The global interface.
286  */
287 struct panthor_fw_global_iface *
288 panthor_fw_get_glb_iface(struct panthor_device *ptdev)
289 {
290 	return &ptdev->fw->iface.global;
291 }
292 
293 /**
294  * panthor_fw_get_csg_iface() - Get a command stream group slot interface
295  * @ptdev: Device.
296  * @csg_slot: Index of the command stream group slot.
297  *
298  * Return: The command stream group slot interface.
299  */
300 struct panthor_fw_csg_iface *
301 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
302 {
303 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
304 		return NULL;
305 
306 	return &ptdev->fw->iface.groups[csg_slot];
307 }
308 
309 /**
310  * panthor_fw_get_cs_iface() - Get a command stream slot interface
311  * @ptdev: Device.
312  * @csg_slot: Index of the command stream group slot.
313  * @cs_slot: Index of the command stream slot.
314  *
315  * Return: The command stream slot interface.
316  */
317 struct panthor_fw_cs_iface *
318 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
319 {
320 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
321 		return NULL;
322 
323 	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
324 }
325 
326 static bool panthor_fw_has_glb_state(struct panthor_device *ptdev)
327 {
328 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
329 
330 	return glb_iface->control->version >= CSF_IFACE_VERSION(4, 1, 0);
331 }
332 
333 static bool panthor_fw_has_64bit_ep_req(struct panthor_device *ptdev)
334 {
335 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
336 
337 	return glb_iface->control->version >= CSF_IFACE_VERSION(4, 0, 0);
338 }
339 
340 u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev,
341 				    struct panthor_fw_csg_iface *csg_iface)
342 {
343 	if (panthor_fw_has_64bit_ep_req(ptdev))
344 		return csg_iface->input->endpoint_req2;
345 	else
346 		return csg_iface->input->endpoint_req;
347 }
348 
349 void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev,
350 				     struct panthor_fw_csg_iface *csg_iface, u64 value)
351 {
352 	if (panthor_fw_has_64bit_ep_req(ptdev))
353 		csg_iface->input->endpoint_req2 = value;
354 	else
355 		csg_iface->input->endpoint_req = lower_32_bits(value);
356 }
357 
358 void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev,
359 					struct panthor_fw_csg_iface *csg_iface, u64 value,
360 					u64 mask)
361 {
362 	if (panthor_fw_has_64bit_ep_req(ptdev))
363 		panthor_fw_update_reqs64(csg_iface, endpoint_req2, value, mask);
364 	else
365 		panthor_fw_update_reqs(csg_iface, endpoint_req, lower_32_bits(value),
366 				       lower_32_bits(mask));
367 }
368 
369 /**
370  * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
371  * @ptdev: Device.
372  * @timeout_us: Timeout expressed in micro-seconds.
373  *
374  * The FW has two timer sources: the GPU counter or arch-timer. We need
375  * to express timeouts in term of number of cycles and specify which
376  * timer source should be used.
377  *
378  * Return: A value suitable for timeout fields in the global interface.
379  */
380 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
381 {
382 	bool use_cycle_counter = false;
383 	u32 timer_rate = 0;
384 	u64 mod_cycles;
385 
386 #ifdef CONFIG_ARM_ARCH_TIMER
387 	timer_rate = arch_timer_get_cntfrq();
388 #endif
389 
390 	if (!timer_rate) {
391 		use_cycle_counter = true;
392 		timer_rate = clk_get_rate(ptdev->clks.core);
393 	}
394 
395 	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
396 		/* We couldn't get a valid clock rate, let's just pick the
397 		 * maximum value so the FW still handles the core
398 		 * power on/off requests.
399 		 */
400 		return GLB_TIMER_VAL(~0) |
401 		       GLB_TIMER_SOURCE_GPU_COUNTER;
402 	}
403 
404 	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
405 				      1000000ull << 10);
406 	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
407 		mod_cycles = GLB_TIMER_VAL(~0);
408 
409 	return GLB_TIMER_VAL(mod_cycles) |
410 	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
411 }
412 
413 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
414 				       struct panthor_fw_binary_iter *iter,
415 				       void *out, size_t size)
416 {
417 	size_t new_offset = iter->offset + size;
418 
419 	if (new_offset > iter->size || new_offset < iter->offset) {
420 		drm_err(&ptdev->base, "Firmware too small\n");
421 		return -EINVAL;
422 	}
423 
424 	memcpy(out, iter->data + iter->offset, size);
425 	iter->offset = new_offset;
426 	return 0;
427 }
428 
429 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
430 					   struct panthor_fw_binary_iter *iter,
431 					   struct panthor_fw_binary_iter *sub_iter,
432 					   size_t size)
433 {
434 	size_t new_offset = iter->offset + size;
435 
436 	if (new_offset > iter->size || new_offset < iter->offset) {
437 		drm_err(&ptdev->base, "Firmware entry too long\n");
438 		return -EINVAL;
439 	}
440 
441 	sub_iter->offset = 0;
442 	sub_iter->data = iter->data + iter->offset;
443 	sub_iter->size = size;
444 	iter->offset = new_offset;
445 	return 0;
446 }
447 
448 static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
449 					struct panthor_fw_section *section)
450 {
451 	bool was_mapped = !!section->mem->kmap;
452 	int ret;
453 
454 	if (!section->data.size &&
455 	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO))
456 		return;
457 
458 	ret = panthor_kernel_bo_vmap(section->mem);
459 	if (drm_WARN_ON(&ptdev->base, ret))
460 		return;
461 
462 	memcpy(section->mem->kmap, section->data.buf, section->data.size);
463 	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) {
464 		memset(section->mem->kmap + section->data.size, 0,
465 		       panthor_kernel_bo_size(section->mem) - section->data.size);
466 	}
467 
468 	if (!was_mapped)
469 		panthor_kernel_bo_vunmap(section->mem);
470 }
471 
472 /**
473  * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
474  * @ptdev: Device.
475  * @input: Pointer holding the input interface on success.
476  * Should be ignored on failure.
477  * @output: Pointer holding the output interface on success.
478  * Should be ignored on failure.
479  * @input_fw_va: Pointer holding the input interface FW VA on success.
480  * Should be ignored on failure.
481  * @output_fw_va: Pointer holding the output interface FW VA on success.
482  * Should be ignored on failure.
483  *
484  * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
485  * interface is at offset 0, and the output interface at offset 4096.
486  *
487  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
488  */
489 struct panthor_kernel_bo *
490 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
491 				 struct panthor_fw_ringbuf_input_iface **input,
492 				 const struct panthor_fw_ringbuf_output_iface **output,
493 				 u32 *input_fw_va, u32 *output_fw_va)
494 {
495 	struct panthor_kernel_bo *mem;
496 	int ret;
497 
498 	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
499 				       DRM_PANTHOR_BO_NO_MMAP,
500 				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
501 				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
502 				       PANTHOR_VM_KERNEL_AUTO_VA,
503 				       "Queue FW interface");
504 	if (IS_ERR(mem))
505 		return mem;
506 
507 	ret = panthor_kernel_bo_vmap(mem);
508 	if (ret) {
509 		panthor_kernel_bo_destroy(mem);
510 		return ERR_PTR(ret);
511 	}
512 
513 	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
514 	*input = mem->kmap;
515 	*output = mem->kmap + SZ_4K;
516 	*input_fw_va = panthor_kernel_bo_gpuva(mem);
517 	*output_fw_va = *input_fw_va + SZ_4K;
518 
519 	return mem;
520 }
521 
522 /**
523  * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
524  * @ptdev: Device.
525  * @size: Size of the suspend buffer.
526  *
527  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
528  */
529 struct panthor_kernel_bo *
530 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
531 {
532 	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
533 					DRM_PANTHOR_BO_NO_MMAP,
534 					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
535 					PANTHOR_VM_KERNEL_AUTO_VA,
536 					"FW suspend buffer");
537 }
538 
539 static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
540 					 const struct firmware *fw,
541 					 struct panthor_fw_binary_iter *iter,
542 					 u32 ehdr)
543 {
544 	ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
545 	struct panthor_fw_binary_section_entry_hdr hdr;
546 	struct panthor_fw_section *section;
547 	u32 section_size;
548 	u32 name_len;
549 	int ret;
550 
551 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
552 	if (ret)
553 		return ret;
554 
555 	if (hdr.data.end < hdr.data.start) {
556 		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
557 			hdr.data.end, hdr.data.start);
558 		return -EINVAL;
559 	}
560 
561 	if (hdr.va.end < hdr.va.start) {
562 		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
563 			hdr.va.end, hdr.va.start);
564 		return -EINVAL;
565 	}
566 
567 	if (hdr.data.end > fw->size) {
568 		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
569 			hdr.data.end, fw->size);
570 		return -EINVAL;
571 	}
572 
573 	if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
574 		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
575 			hdr.va.start, hdr.va.end);
576 		return -EINVAL;
577 	}
578 
579 	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) {
580 		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
581 			hdr.flags);
582 		return -EINVAL;
583 	}
584 
585 	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) {
586 		drm_warn(&ptdev->base,
587 			 "Firmware protected mode entry is not supported, ignoring");
588 		return 0;
589 	}
590 
591 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
592 	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) {
593 		drm_err(&ptdev->base,
594 			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
595 		return -EINVAL;
596 	}
597 
598 	name_len = iter->size - iter->offset;
599 
600 	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
601 	if (!section)
602 		return -ENOMEM;
603 
604 	list_add_tail(&section->node, &ptdev->fw->sections);
605 	section->flags = hdr.flags;
606 	section->data.size = hdr.data.end - hdr.data.start;
607 
608 	if (section->data.size > 0) {
609 		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
610 
611 		if (!data)
612 			return -ENOMEM;
613 
614 		memcpy(data, fw->data + hdr.data.start, section->data.size);
615 		section->data.buf = data;
616 	}
617 
618 	if (name_len > 0) {
619 		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
620 
621 		if (!name)
622 			return -ENOMEM;
623 
624 		memcpy(name, iter->data + iter->offset, name_len);
625 		name[name_len] = '\0';
626 		section->name = name;
627 	}
628 
629 	section_size = hdr.va.end - hdr.va.start;
630 	if (section_size) {
631 		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK;
632 		struct panthor_gem_object *bo;
633 		u32 vm_map_flags = 0;
634 		u64 va = hdr.va.start;
635 
636 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
637 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
638 
639 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX))
640 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
641 
642 		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to
643 		 * non-cacheable for now. We might want to introduce a new
644 		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
645 		 * memory and is currently not used by our driver) for
646 		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
647 		 * of IO-coherent systems.
648 		 */
649 		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED)
650 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
651 
652 		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
653 							section_size,
654 							DRM_PANTHOR_BO_NO_MMAP,
655 							vm_map_flags, va, "FW section");
656 		if (IS_ERR(section->mem))
657 			return PTR_ERR(section->mem);
658 
659 		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
660 			return -EINVAL;
661 
662 		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) {
663 			ret = panthor_kernel_bo_vmap(section->mem);
664 			if (ret)
665 				return ret;
666 		}
667 
668 		panthor_fw_init_section_mem(ptdev, section);
669 
670 		bo = to_panthor_bo(section->mem->obj);
671 
672 		/* An sgt should have been requested when the kernel BO was GPU-mapped. */
673 		if (drm_WARN_ON_ONCE(&ptdev->base, !bo->dmap.sgt))
674 			return -EINVAL;
675 
676 		dma_sync_sgtable_for_device(ptdev->base.dev, bo->dmap.sgt, DMA_TO_DEVICE);
677 	}
678 
679 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
680 		ptdev->fw->shared_section = section;
681 
682 	return 0;
683 }
684 
685 static int panthor_fw_read_build_info(struct panthor_device *ptdev,
686 				      const struct firmware *fw,
687 				      struct panthor_fw_binary_iter *iter,
688 				      u32 ehdr)
689 {
690 	struct panthor_fw_build_info_hdr hdr;
691 	static const char git_sha_header[] = "git_sha: ";
692 	const int header_len = sizeof(git_sha_header) - 1;
693 	int ret;
694 
695 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
696 	if (ret)
697 		return ret;
698 
699 	if (hdr.meta_start > fw->size ||
700 	    hdr.meta_start + hdr.meta_size > fw->size) {
701 		drm_err(&ptdev->base, "Firmware build info corrupt\n");
702 		/* We don't need the build info, so continue */
703 		return 0;
704 	}
705 
706 	if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) {
707 		/* Not the expected header, this isn't metadata we understand */
708 		return 0;
709 	}
710 
711 	/* Check that the git SHA is NULL terminated as expected */
712 	if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
713 		drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
714 		/* Don't treat as fatal */
715 		return 0;
716 	}
717 
718 	drm_info(&ptdev->base, "Firmware git sha: %s\n",
719 		 fw->data + hdr.meta_start + header_len);
720 
721 	return 0;
722 }
723 
724 static void
725 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
726 {
727 	struct panthor_fw_section *section;
728 
729 	list_for_each_entry(section, &ptdev->fw->sections, node) {
730 		struct sg_table *sgt;
731 
732 		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
733 			continue;
734 
735 		panthor_fw_init_section_mem(ptdev, section);
736 
737 		/* An sgt should have been requested when the kernel BO was GPU-mapped. */
738 		sgt = to_panthor_bo(section->mem->obj)->dmap.sgt;
739 		if (!drm_WARN_ON_ONCE(&ptdev->base, !sgt))
740 			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
741 	}
742 }
743 
744 static int panthor_fw_load_entry(struct panthor_device *ptdev,
745 				 const struct firmware *fw,
746 				 struct panthor_fw_binary_iter *iter)
747 {
748 	struct panthor_fw_binary_iter eiter;
749 	u32 ehdr;
750 	int ret;
751 
752 	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
753 	if (ret)
754 		return ret;
755 
756 	if ((iter->offset % sizeof(u32)) ||
757 	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
758 		drm_err(&ptdev->base, "Firmware entry is not 32-bit aligned, offset=0x%x size=0x%x\n",
759 			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
760 		return -EINVAL;
761 	}
762 
763 	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
764 					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
765 		return -EINVAL;
766 
767 	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
768 	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
769 		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
770 	case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
771 		return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
772 
773 	/* FIXME: handle those entry types? */
774 	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
775 	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
776 	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
777 	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
778 		return 0;
779 	default:
780 		break;
781 	}
782 
783 	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
784 		return 0;
785 
786 	drm_err(&ptdev->base,
787 		"Unsupported non-optional entry type %u in firmware\n",
788 		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
789 	return -EINVAL;
790 }
791 
792 static int panthor_fw_load(struct panthor_device *ptdev)
793 {
794 	const struct firmware *fw = NULL;
795 	struct panthor_fw_binary_iter iter = {};
796 	struct panthor_fw_binary_hdr hdr;
797 	char fw_path[128];
798 	int ret;
799 
800 	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
801 		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
802 		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
803 		 CSF_FW_NAME);
804 
805 	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
806 	if (ret) {
807 		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
808 			CSF_FW_NAME);
809 		return ret;
810 	}
811 
812 	iter.data = fw->data;
813 	iter.size = fw->size;
814 	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
815 	if (ret)
816 		goto out;
817 
818 	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
819 		ret = -EINVAL;
820 		drm_err(&ptdev->base, "Invalid firmware magic\n");
821 		goto out;
822 	}
823 
824 	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
825 		ret = -EINVAL;
826 		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
827 			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
828 		goto out;
829 	}
830 
831 	if (hdr.size > iter.size) {
832 		drm_err(&ptdev->base, "Firmware image is truncated\n");
833 		goto out;
834 	}
835 
836 	iter.size = hdr.size;
837 
838 	while (iter.offset < hdr.size) {
839 		ret = panthor_fw_load_entry(ptdev, fw, &iter);
840 		if (ret)
841 			goto out;
842 	}
843 
844 	if (!ptdev->fw->shared_section) {
845 		drm_err(&ptdev->base, "Shared interface region not found\n");
846 		ret = -EINVAL;
847 		goto out;
848 	}
849 
850 out:
851 	release_firmware(fw);
852 	return ret;
853 }
854 
855 /**
856  * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
857  * @ptdev: Device.
858  * @mcu_va: MCU address.
859  *
860  * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
861  */
862 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
863 {
864 	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
865 	u64 shared_mem_end = shared_mem_start +
866 			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
867 	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
868 		return NULL;
869 
870 	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
871 }
872 
873 static int panthor_init_cs_iface(struct panthor_device *ptdev,
874 				 unsigned int csg_idx, unsigned int cs_idx)
875 {
876 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
877 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
878 	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
879 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
880 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
881 			   (csg_idx * glb_iface->control->group_stride) +
882 			   CSF_STREAM_CONTROL_OFFSET +
883 			   (cs_idx * csg_iface->control->stream_stride);
884 	struct panthor_fw_cs_iface *first_cs_iface =
885 		panthor_fw_get_cs_iface(ptdev, 0, 0);
886 
887 	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
888 		return -EINVAL;
889 
890 	spin_lock_init(&cs_iface->lock);
891 	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
892 	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
893 	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
894 
895 	if (!cs_iface->input || !cs_iface->output) {
896 		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
897 		return -EINVAL;
898 	}
899 
900 	if (cs_iface != first_cs_iface) {
901 		if (cs_iface->control->features != first_cs_iface->control->features) {
902 			drm_err(&ptdev->base, "Expecting identical CS slots");
903 			return -EINVAL;
904 		}
905 	} else {
906 		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
907 
908 		ptdev->csif_info.cs_reg_count = reg_count;
909 		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
910 	}
911 
912 	return 0;
913 }
914 
915 static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
916 			const struct panthor_fw_csg_control_iface *b)
917 {
918 	if (a->features != b->features)
919 		return false;
920 	if (a->suspend_size != b->suspend_size)
921 		return false;
922 	if (a->protm_suspend_size != b->protm_suspend_size)
923 		return false;
924 	if (a->stream_num != b->stream_num)
925 		return false;
926 	return true;
927 }
928 
929 static int panthor_init_csg_iface(struct panthor_device *ptdev,
930 				  unsigned int csg_idx)
931 {
932 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
933 	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
934 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
935 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
936 	unsigned int i;
937 
938 	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
939 		return -EINVAL;
940 
941 	spin_lock_init(&csg_iface->lock);
942 	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
943 	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
944 	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
945 
946 	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
947 	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
948 		return -EINVAL;
949 
950 	if (!csg_iface->input || !csg_iface->output) {
951 		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
952 		return -EINVAL;
953 	}
954 
955 	if (csg_idx > 0) {
956 		struct panthor_fw_csg_iface *first_csg_iface =
957 			panthor_fw_get_csg_iface(ptdev, 0);
958 
959 		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
960 			drm_err(&ptdev->base, "Expecting identical CSG slots");
961 			return -EINVAL;
962 		}
963 	}
964 
965 	for (i = 0; i < csg_iface->control->stream_num; i++) {
966 		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
967 
968 		if (ret)
969 			return ret;
970 	}
971 
972 	return 0;
973 }
974 
975 static u32 panthor_get_instr_features(struct panthor_device *ptdev)
976 {
977 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
978 
979 	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
980 		return 0;
981 
982 	return glb_iface->control->instr_features;
983 }
984 
985 static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
986 {
987 	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
988 	unsigned int i;
989 
990 	if (!ptdev->fw->shared_section->mem->kmap)
991 		return -EINVAL;
992 
993 	spin_lock_init(&glb_iface->lock);
994 	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
995 
996 	if (!glb_iface->control->version) {
997 		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
998 		return -EINVAL;
999 	}
1000 
1001 	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
1002 	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
1003 	if (!glb_iface->input || !glb_iface->output) {
1004 		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
1005 		return -EINVAL;
1006 	}
1007 
1008 	if (glb_iface->control->group_num > MAX_CSGS ||
1009 	    glb_iface->control->group_num < MIN_CSGS) {
1010 		drm_err(&ptdev->base, "Invalid number of control groups");
1011 		return -EINVAL;
1012 	}
1013 
1014 	for (i = 0; i < glb_iface->control->group_num; i++) {
1015 		int ret = panthor_init_csg_iface(ptdev, i);
1016 
1017 		if (ret)
1018 			return ret;
1019 	}
1020 
1021 	drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
1022 		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
1023 		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
1024 		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
1025 		 glb_iface->control->features,
1026 		 panthor_get_instr_features(ptdev));
1027 	return 0;
1028 }
1029 
1030 static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
1031 {
1032 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1033 
1034 	/* Enable all cores. */
1035 	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
1036 
1037 	/* Setup timers. */
1038 	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
1039 	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
1040 	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
1041 
1042 	/* Enable interrupts we care about. */
1043 	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
1044 					 GLB_PING |
1045 					 GLB_CFG_PROGRESS_TIMER |
1046 					 GLB_CFG_POWEROFF_TIMER |
1047 					 GLB_IDLE_EN |
1048 					 GLB_IDLE;
1049 
1050 	if (panthor_fw_has_glb_state(ptdev))
1051 		glb_iface->input->ack_irq_mask |= GLB_STATE_MASK;
1052 
1053 	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN | GLB_COUNTER_EN,
1054 			       GLB_IDLE_EN | GLB_COUNTER_EN);
1055 	panthor_fw_toggle_reqs(glb_iface, req, ack,
1056 			       GLB_CFG_ALLOC_EN |
1057 			       GLB_CFG_POWEROFF_TIMER |
1058 			       GLB_CFG_PROGRESS_TIMER);
1059 
1060 	panthor_fw_ring_doorbell(ptdev, CSF_GLB_DOORBELL_ID);
1061 
1062 	/* Kick the watchdog. */
1063 	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
1064 			 msecs_to_jiffies(PING_INTERVAL_MS));
1065 }
1066 
1067 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
1068 {
1069 	u32 duration;
1070 	u64 start = 0;
1071 
1072 	if (tracepoint_enabled(gpu_job_irq))
1073 		start = ktime_get_ns();
1074 
1075 	gpu_write(ptdev->fw->irq.iomem, INT_CLEAR, status);
1076 
1077 	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
1078 		ptdev->fw->booted = true;
1079 
1080 	wake_up_all(&ptdev->fw->req_waitqueue);
1081 
1082 	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
1083 	if (!ptdev->fw->booted)
1084 		return;
1085 
1086 	panthor_sched_report_fw_events(ptdev, status);
1087 
1088 	if (tracepoint_enabled(gpu_job_irq) && start) {
1089 		if (check_sub_overflow(ktime_get_ns(), start, &duration))
1090 			duration = U32_MAX;
1091 		trace_gpu_job_irq(ptdev->base.dev, status, duration);
1092 	}
1093 }
1094 PANTHOR_IRQ_HANDLER(job, panthor_job_irq_handler);
1095 
1096 static int panthor_fw_start(struct panthor_device *ptdev)
1097 {
1098 	struct panthor_fw *fw = ptdev->fw;
1099 	bool timedout = false;
1100 
1101 	ptdev->fw->booted = false;
1102 	panthor_job_irq_enable_events(&ptdev->fw->irq, ~0);
1103 	panthor_job_irq_resume(&ptdev->fw->irq);
1104 	gpu_write(fw->iomem, MCU_CONTROL, MCU_CONTROL_AUTO);
1105 
1106 	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
1107 				ptdev->fw->booted,
1108 				msecs_to_jiffies(1000))) {
1109 		if (!ptdev->fw->booted &&
1110 		    !(gpu_read(fw->irq.iomem, INT_STAT) & JOB_INT_GLOBAL_IF))
1111 			timedout = true;
1112 	}
1113 
1114 	if (timedout) {
1115 		static const char * const status_str[] = {
1116 			[MCU_STATUS_DISABLED] = "disabled",
1117 			[MCU_STATUS_ENABLED] = "enabled",
1118 			[MCU_STATUS_HALT] = "halt",
1119 			[MCU_STATUS_FATAL] = "fatal",
1120 		};
1121 		u32 status = gpu_read(fw->iomem, MCU_STATUS);
1122 
1123 		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1124 			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1125 		return -ETIMEDOUT;
1126 	}
1127 
1128 	return 0;
1129 }
1130 
1131 static void panthor_fw_stop(struct panthor_device *ptdev)
1132 {
1133 	struct panthor_fw *fw = ptdev->fw;
1134 	u32 status;
1135 
1136 	gpu_write(fw->iomem, MCU_CONTROL, MCU_CONTROL_DISABLE);
1137 	if (gpu_read_poll_timeout(fw->iomem, MCU_STATUS, status,
1138 				  status == MCU_STATUS_DISABLED, 10, 100000))
1139 		drm_err(&ptdev->base, "Failed to stop MCU");
1140 }
1141 
1142 static bool panthor_fw_mcu_halted(struct panthor_device *ptdev)
1143 {
1144 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1145 	bool halted;
1146 
1147 	halted = gpu_read(ptdev->fw->iomem, MCU_STATUS) == MCU_STATUS_HALT;
1148 
1149 	if (panthor_fw_has_glb_state(ptdev))
1150 		halted &= (GLB_STATE_GET(glb_iface->output->ack) == GLB_STATE_HALT);
1151 
1152 	return halted;
1153 }
1154 
1155 static void panthor_fw_halt_mcu(struct panthor_device *ptdev)
1156 {
1157 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1158 
1159 	if (panthor_fw_has_glb_state(ptdev))
1160 		panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_HALT), GLB_STATE_MASK);
1161 	else
1162 		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1163 
1164 	panthor_fw_ring_doorbell(ptdev, CSF_GLB_DOORBELL_ID);
1165 }
1166 
1167 static bool panthor_fw_wait_mcu_halted(struct panthor_device *ptdev)
1168 {
1169 	bool halted = false;
1170 
1171 	if (read_poll_timeout_atomic(panthor_fw_mcu_halted, halted, halted, 10,
1172 				     MCU_HALT_TIMEOUT_US, 0, ptdev)) {
1173 		drm_warn(&ptdev->base, "Timed out waiting for MCU to halt");
1174 		return false;
1175 	}
1176 
1177 	return true;
1178 }
1179 
1180 static void panthor_fw_mcu_set_active(struct panthor_device *ptdev)
1181 {
1182 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1183 
1184 	if (panthor_fw_has_glb_state(ptdev))
1185 		panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_ACTIVE), GLB_STATE_MASK);
1186 	else
1187 		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1188 }
1189 
1190 /**
1191  * panthor_fw_pre_reset() - Call before a reset.
1192  * @ptdev: Device.
1193  * @on_hang: true if the reset was triggered on a GPU hang.
1194  *
1195  * If the reset is not triggered on a hang, we try to gracefully halt the
1196  * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1197  */
1198 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1199 {
1200 	/* Make sure we won't be woken up by a ping. */
1201 	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1202 
1203 	ptdev->reset.fast = false;
1204 
1205 	if (!on_hang) {
1206 		panthor_fw_halt_mcu(ptdev);
1207 		if (!panthor_fw_wait_mcu_halted(ptdev))
1208 			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1209 		else
1210 			ptdev->reset.fast = true;
1211 	}
1212 
1213 	panthor_job_irq_suspend(&ptdev->fw->irq);
1214 	panthor_fw_stop(ptdev);
1215 }
1216 
1217 /**
1218  * panthor_fw_post_reset() - Call after a reset.
1219  * @ptdev: Device.
1220  *
1221  * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1222  * make sure we can recover from a memory corruption.
1223  */
1224 int panthor_fw_post_reset(struct panthor_device *ptdev)
1225 {
1226 	int ret;
1227 
1228 	/* Make the MCU VM active. */
1229 	ret = panthor_vm_active(ptdev->fw->vm);
1230 	if (ret)
1231 		return ret;
1232 
1233 	if (!ptdev->reset.fast) {
1234 		/* On a slow reset, reload all sections, including RO ones.
1235 		 * We're not supposed to end up here anyway, let's just assume
1236 		 * the overhead of reloading everything is acceptable.
1237 		 */
1238 		panthor_reload_fw_sections(ptdev, true);
1239 	} else {
1240 		/*
1241 		 * If the FW was previously successfully halted in the pre-reset
1242 		 * operation, we need to transition it to active again before
1243 		 * the FW is rebooted.
1244 		 * This is not needed on a slow reset because FW sections are
1245 		 * re-initialized.
1246 		 */
1247 		panthor_fw_mcu_set_active(ptdev);
1248 	}
1249 
1250 	ret = panthor_fw_start(ptdev);
1251 	if (ret) {
1252 		drm_err(&ptdev->base, "FW %s reset failed",
1253 			ptdev->reset.fast ?  "fast" : "slow");
1254 		return ret;
1255 	}
1256 
1257 	/* We must re-initialize the global interface even on fast-reset. */
1258 	panthor_fw_init_global_iface(ptdev);
1259 	return 0;
1260 }
1261 
1262 /**
1263  * panthor_fw_unplug() - Called when the device is unplugged.
1264  * @ptdev: Device.
1265  *
1266  * This function must make sure all pending operations are flushed before
1267  * will release device resources, thus preventing any interaction with
1268  * the HW.
1269  *
1270  * If there is still FW-related work running after this function returns,
1271  * they must use drm_dev_{enter,exit}() and skip any HW access when
1272  * drm_dev_enter() returns false.
1273  */
1274 void panthor_fw_unplug(struct panthor_device *ptdev)
1275 {
1276 	struct panthor_fw_section *section;
1277 
1278 	disable_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1279 
1280 	if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) {
1281 		/* Make sure the IRQ handler cannot be called after that point. */
1282 		if (ptdev->fw->irq.irq)
1283 			panthor_job_irq_suspend(&ptdev->fw->irq);
1284 
1285 		panthor_fw_stop(ptdev);
1286 	}
1287 
1288 	list_for_each_entry(section, &ptdev->fw->sections, node)
1289 		panthor_kernel_bo_destroy(section->mem);
1290 
1291 	/* We intentionally don't call panthor_vm_idle() and let
1292 	 * panthor_mmu_unplug() release the AS we acquired with
1293 	 * panthor_vm_active() so we don't have to track the VM active/idle
1294 	 * state to keep the active_refcnt balanced.
1295 	 */
1296 	panthor_vm_put(ptdev->fw->vm);
1297 	ptdev->fw->vm = NULL;
1298 
1299 	if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
1300 		panthor_hw_l2_power_off(ptdev);
1301 }
1302 
1303 /**
1304  * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1305  * @req_ptr: Pointer to the req register.
1306  * @ack_ptr: Pointer to the ack register.
1307  * @wq: Wait queue to use for the sleeping wait.
1308  * @req_mask: Mask of requests to wait for.
1309  * @acked: Pointer to field that's updated with the acked requests.
1310  * If the function returns 0, *acked == req_mask.
1311  * @timeout_ms: Timeout expressed in milliseconds.
1312  *
1313  * Return: 0 on success, -ETIMEDOUT otherwise.
1314  */
1315 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1316 				wait_queue_head_t *wq,
1317 				u32 req_mask, u32 *acked,
1318 				u32 timeout_ms)
1319 {
1320 	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1321 	int ret;
1322 
1323 	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
1324 	*acked = req_mask;
1325 	ret = read_poll_timeout_atomic(READ_ONCE, ack,
1326 				       (ack & req_mask) == req,
1327 				       0, 10, 0,
1328 				       *ack_ptr);
1329 	if (!ret)
1330 		return 0;
1331 
1332 	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1333 			       msecs_to_jiffies(timeout_ms)))
1334 		return 0;
1335 
1336 	/* Check one last time, in case we were not woken up for some reason. */
1337 	ack = READ_ONCE(*ack_ptr);
1338 	if ((ack & req_mask) == req)
1339 		return 0;
1340 
1341 	*acked = ~(req ^ ack) & req_mask;
1342 	return -ETIMEDOUT;
1343 }
1344 
1345 /**
1346  * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1347  * @ptdev: Device.
1348  * @req_mask: Mask of requests to wait for.
1349  * @acked: Pointer to field that's updated with the acked requests.
1350  * If the function returns 0, *acked == req_mask.
1351  * @timeout_ms: Timeout expressed in milliseconds.
1352  *
1353  * Return: 0 on success, -ETIMEDOUT otherwise.
1354  */
1355 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1356 			     u32 req_mask, u32 *acked,
1357 			     u32 timeout_ms)
1358 {
1359 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1360 
1361 	/* GLB_HALT doesn't get acked through the FW interface. */
1362 	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1363 		return -EINVAL;
1364 
1365 	return panthor_fw_wait_acks(&glb_iface->input->req,
1366 				    &glb_iface->output->ack,
1367 				    &ptdev->fw->req_waitqueue,
1368 				    req_mask, acked, timeout_ms);
1369 }
1370 
1371 /**
1372  * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1373  * @ptdev: Device.
1374  * @csg_slot: CSG slot ID.
1375  * @req_mask: Mask of requests to wait for.
1376  * @acked: Pointer to field that's updated with the acked requests.
1377  * If the function returns 0, *acked == req_mask.
1378  * @timeout_ms: Timeout expressed in milliseconds.
1379  *
1380  * Return: 0 on success, -ETIMEDOUT otherwise.
1381  */
1382 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1383 			     u32 req_mask, u32 *acked, u32 timeout_ms)
1384 {
1385 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1386 	int ret;
1387 
1388 	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1389 		return -EINVAL;
1390 
1391 	ret = panthor_fw_wait_acks(&csg_iface->input->req,
1392 				   &csg_iface->output->ack,
1393 				   &ptdev->fw->req_waitqueue,
1394 				   req_mask, acked, timeout_ms);
1395 
1396 	/*
1397 	 * Check that all bits in the state field were updated, if any mismatch
1398 	 * then clear all bits in the state field. This allows code to do
1399 	 * (acked & CSG_STATE_MASK) and get the right value.
1400 	 */
1401 
1402 	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1403 		*acked &= ~CSG_STATE_MASK;
1404 
1405 	return ret;
1406 }
1407 
1408 void panthor_fw_ring_doorbell(struct panthor_device *ptdev, u32 doorbell_id)
1409 {
1410 	gpu_write(ptdev->iomem, CSF_DOORBELL(doorbell_id), 1);
1411 }
1412 
1413 /**
1414  * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1415  * @ptdev: Device.
1416  * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1417  *
1418  * This function is toggling bits in the doorbell_req and ringing the
1419  * global doorbell. It doesn't require a user doorbell to be attached to
1420  * the group.
1421  */
1422 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1423 {
1424 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1425 
1426 	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1427 	panthor_fw_ring_doorbell(ptdev, CSF_GLB_DOORBELL_ID);
1428 }
1429 
1430 static void panthor_fw_ping_work(struct work_struct *work)
1431 {
1432 	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1433 	struct panthor_device *ptdev = fw->irq.ptdev;
1434 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1435 	u32 acked;
1436 	int ret;
1437 
1438 	if (panthor_device_reset_is_pending(ptdev))
1439 		return;
1440 
1441 	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1442 	panthor_fw_ring_doorbell(ptdev, CSF_GLB_DOORBELL_ID);
1443 
1444 	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1445 	if (ret) {
1446 		panthor_device_schedule_reset(ptdev);
1447 		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1448 	} else {
1449 		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1450 				 msecs_to_jiffies(PING_INTERVAL_MS));
1451 	}
1452 }
1453 
1454 /**
1455  * panthor_fw_init() - Initialize FW related data.
1456  * @ptdev: Device.
1457  *
1458  * Return: 0 on success, a negative error code otherwise.
1459  */
1460 int panthor_fw_init(struct panthor_device *ptdev)
1461 {
1462 	struct panthor_fw *fw;
1463 	int ret, irq;
1464 
1465 	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1466 	if (!fw)
1467 		return -ENOMEM;
1468 
1469 	fw->iomem = ptdev->iomem + MCU_CONTROL_BASE;
1470 	ptdev->fw = fw;
1471 	init_waitqueue_head(&fw->req_waitqueue);
1472 	INIT_LIST_HEAD(&fw->sections);
1473 	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1474 
1475 	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1476 	if (irq <= 0)
1477 		return -ENODEV;
1478 
1479 	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0,
1480 				      ptdev->iomem + JOB_INT_BASE);
1481 	if (ret) {
1482 		drm_err(&ptdev->base, "failed to request job irq");
1483 		return ret;
1484 	}
1485 
1486 	ret = panthor_hw_l2_power_on(ptdev);
1487 	if (ret)
1488 		return ret;
1489 
1490 	fw->vm = panthor_vm_create(ptdev, true,
1491 				   0, SZ_4G,
1492 				   CSF_MCU_SHARED_REGION_START,
1493 				   CSF_MCU_SHARED_REGION_SIZE);
1494 	if (IS_ERR(fw->vm)) {
1495 		ret = PTR_ERR(fw->vm);
1496 		fw->vm = NULL;
1497 		goto err_unplug_fw;
1498 	}
1499 
1500 	ret = panthor_fw_load(ptdev);
1501 	if (ret)
1502 		goto err_unplug_fw;
1503 
1504 	ret = panthor_vm_active(fw->vm);
1505 	if (ret)
1506 		goto err_unplug_fw;
1507 
1508 	ret = panthor_fw_start(ptdev);
1509 	if (ret)
1510 		goto err_unplug_fw;
1511 
1512 	ret = panthor_fw_init_ifaces(ptdev);
1513 	if (ret)
1514 		goto err_unplug_fw;
1515 
1516 	panthor_fw_init_global_iface(ptdev);
1517 	return 0;
1518 
1519 err_unplug_fw:
1520 	panthor_fw_unplug(ptdev);
1521 	return ret;
1522 }
1523 
1524 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1525 MODULE_FIRMWARE("arm/mali/arch10.10/mali_csffw.bin");
1526 MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin");
1527 MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin");
1528 MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin");
1529 MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin");
1530 MODULE_FIRMWARE("arm/mali/arch14.8/mali_csffw.bin");
1531