xref: /linux/drivers/gpu/drm/panthor/panthor_fw.c (revision e0c0ab04f6785abaa71b9b8dc252cb1a2072c225)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #ifdef CONFIG_ARM_ARCH_TIMER
5 #include <asm/arch_timer.h>
6 #endif
7 
8 #include <linux/clk.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/firmware.h>
11 #include <linux/iopoll.h>
12 #include <linux/iosys-map.h>
13 #include <linux/mutex.h>
14 #include <linux/platform_device.h>
15 #include <linux/pm_runtime.h>
16 
17 #include <drm/drm_drv.h>
18 #include <drm/drm_managed.h>
19 
20 #include "panthor_device.h"
21 #include "panthor_fw.h"
22 #include "panthor_gem.h"
23 #include "panthor_gpu.h"
24 #include "panthor_mmu.h"
25 #include "panthor_regs.h"
26 #include "panthor_sched.h"
27 
28 #define CSF_FW_NAME "mali_csffw.bin"
29 
30 #define PING_INTERVAL_MS			12000
31 #define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
32 #define PROGRESS_TIMEOUT_SCALE_SHIFT		10
33 #define IDLE_HYSTERESIS_US			800
34 #define PWROFF_HYSTERESIS_US			10000
35 
36 /**
37  * struct panthor_fw_binary_hdr - Firmware binary header.
38  */
39 struct panthor_fw_binary_hdr {
40 	/** @magic: Magic value to check binary validity. */
41 	u32 magic;
42 #define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
43 
44 	/** @minor: Minor FW version. */
45 	u8 minor;
46 
47 	/** @major: Major FW version. */
48 	u8 major;
49 #define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
50 
51 	/** @padding1: MBZ. */
52 	u16 padding1;
53 
54 	/** @version_hash: FW version hash. */
55 	u32 version_hash;
56 
57 	/** @padding2: MBZ. */
58 	u32 padding2;
59 
60 	/** @size: FW binary size. */
61 	u32 size;
62 };
63 
64 /**
65  * enum panthor_fw_binary_entry_type - Firmware binary entry type
66  */
67 enum panthor_fw_binary_entry_type {
68 	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
69 	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
70 
71 	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
72 	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
73 
74 	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
75 	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
76 
77 	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
78 	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
79 
80 	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
81 	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
82 
83 	/**
84 	 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
85 	 * the FW binary was built.
86 	 */
87 	CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
88 };
89 
90 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
91 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
92 #define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
93 #define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
94 
95 #define CSF_FW_BINARY_IFACE_ENTRY_RD					BIT(0)
96 #define CSF_FW_BINARY_IFACE_ENTRY_WR					BIT(1)
97 #define CSF_FW_BINARY_IFACE_ENTRY_EX					BIT(2)
98 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE			(0 << 3)
99 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED			(1 << 3)
100 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT		(2 << 3)
101 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT		(3 << 3)
102 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK			GENMASK(4, 3)
103 #define CSF_FW_BINARY_IFACE_ENTRY_PROT					BIT(5)
104 #define CSF_FW_BINARY_IFACE_ENTRY_SHARED				BIT(30)
105 #define CSF_FW_BINARY_IFACE_ENTRY_ZERO					BIT(31)
106 
107 #define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS			\
108 	(CSF_FW_BINARY_IFACE_ENTRY_RD |					\
109 	 CSF_FW_BINARY_IFACE_ENTRY_WR |					\
110 	 CSF_FW_BINARY_IFACE_ENTRY_EX |					\
111 	 CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK |			\
112 	 CSF_FW_BINARY_IFACE_ENTRY_PROT |				\
113 	 CSF_FW_BINARY_IFACE_ENTRY_SHARED  |				\
114 	 CSF_FW_BINARY_IFACE_ENTRY_ZERO)
115 
116 /**
117  * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
118  */
119 struct panthor_fw_binary_section_entry_hdr {
120 	/** @flags: Section flags. */
121 	u32 flags;
122 
123 	/** @va: MCU virtual range to map this binary section to. */
124 	struct {
125 		/** @start: Start address. */
126 		u32 start;
127 
128 		/** @end: End address. */
129 		u32 end;
130 	} va;
131 
132 	/** @data: Data to initialize the FW section with. */
133 	struct {
134 		/** @start: Start offset in the FW binary. */
135 		u32 start;
136 
137 		/** @end: End offset in the FW binary. */
138 		u32 end;
139 	} data;
140 };
141 
142 struct panthor_fw_build_info_hdr {
143 	/** @meta_start: Offset of the build info data in the FW binary */
144 	u32 meta_start;
145 	/** @meta_size: Size of the build info data in the FW binary */
146 	u32 meta_size;
147 };
148 
149 /**
150  * struct panthor_fw_binary_iter - Firmware binary iterator
151  *
152  * Used to parse a firmware binary.
153  */
154 struct panthor_fw_binary_iter {
155 	/** @data: FW binary data. */
156 	const void *data;
157 
158 	/** @size: FW binary size. */
159 	size_t size;
160 
161 	/** @offset: Iterator offset. */
162 	size_t offset;
163 };
164 
165 /**
166  * struct panthor_fw_section - FW section
167  */
168 struct panthor_fw_section {
169 	/** @node: Used to keep track of FW sections. */
170 	struct list_head node;
171 
172 	/** @flags: Section flags, as encoded in the FW binary. */
173 	u32 flags;
174 
175 	/** @mem: Section memory. */
176 	struct panthor_kernel_bo *mem;
177 
178 	/**
179 	 * @name: Name of the section, as specified in the binary.
180 	 *
181 	 * Can be NULL.
182 	 */
183 	const char *name;
184 
185 	/**
186 	 * @data: Initial data copied to the FW memory.
187 	 *
188 	 * We keep data around so we can reload sections after a reset.
189 	 */
190 	struct {
191 		/** @buf: Buffed used to store init data. */
192 		const void *buf;
193 
194 		/** @size: Size of @buf in bytes. */
195 		size_t size;
196 	} data;
197 };
198 
199 #define CSF_MCU_SHARED_REGION_START		0x04000000ULL
200 #define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
201 
202 #define MIN_CS_PER_CSG				8
203 #define MIN_CSGS				3
204 
205 #define CSF_IFACE_VERSION(major, minor, patch)	\
206 	(((major) << 24) | ((minor) << 16) | (patch))
207 #define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
208 #define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
209 #define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
210 
211 #define CSF_GROUP_CONTROL_OFFSET		0x1000
212 #define CSF_STREAM_CONTROL_OFFSET		0x40
213 #define CSF_UNPRESERVED_REG_COUNT		4
214 
215 /**
216  * struct panthor_fw_iface - FW interfaces
217  */
218 struct panthor_fw_iface {
219 	/** @global: Global interface. */
220 	struct panthor_fw_global_iface global;
221 
222 	/** @groups: Group slot interfaces. */
223 	struct panthor_fw_csg_iface groups[MAX_CSGS];
224 
225 	/** @streams: Command stream slot interfaces. */
226 	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
227 };
228 
229 /**
230  * struct panthor_fw - Firmware management
231  */
232 struct panthor_fw {
233 	/** @vm: MCU VM. */
234 	struct panthor_vm *vm;
235 
236 	/** @sections: List of FW sections. */
237 	struct list_head sections;
238 
239 	/** @shared_section: The section containing the FW interfaces. */
240 	struct panthor_fw_section *shared_section;
241 
242 	/** @iface: FW interfaces. */
243 	struct panthor_fw_iface iface;
244 
245 	/** @watchdog: Collection of fields relating to the FW watchdog. */
246 	struct {
247 		/** @ping_work: Delayed work used to ping the FW. */
248 		struct delayed_work ping_work;
249 	} watchdog;
250 
251 	/**
252 	 * @req_waitqueue: FW request waitqueue.
253 	 *
254 	 * Everytime a request is sent to a command stream group or the global
255 	 * interface, the caller will first busy wait for the request to be
256 	 * acknowledged, and then fallback to a sleeping wait.
257 	 *
258 	 * This wait queue is here to support the sleeping wait flavor.
259 	 */
260 	wait_queue_head_t req_waitqueue;
261 
262 	/** @booted: True is the FW is booted */
263 	bool booted;
264 
265 	/** @irq: Job irq data. */
266 	struct panthor_irq irq;
267 };
268 
269 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
270 {
271 	return ptdev->fw->vm;
272 }
273 
274 /**
275  * panthor_fw_get_glb_iface() - Get the global interface
276  * @ptdev: Device.
277  *
278  * Return: The global interface.
279  */
280 struct panthor_fw_global_iface *
281 panthor_fw_get_glb_iface(struct panthor_device *ptdev)
282 {
283 	return &ptdev->fw->iface.global;
284 }
285 
286 /**
287  * panthor_fw_get_csg_iface() - Get a command stream group slot interface
288  * @ptdev: Device.
289  * @csg_slot: Index of the command stream group slot.
290  *
291  * Return: The command stream group slot interface.
292  */
293 struct panthor_fw_csg_iface *
294 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
295 {
296 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
297 		return NULL;
298 
299 	return &ptdev->fw->iface.groups[csg_slot];
300 }
301 
302 /**
303  * panthor_fw_get_cs_iface() - Get a command stream slot interface
304  * @ptdev: Device.
305  * @csg_slot: Index of the command stream group slot.
306  * @cs_slot: Index of the command stream slot.
307  *
308  * Return: The command stream slot interface.
309  */
310 struct panthor_fw_cs_iface *
311 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
312 {
313 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
314 		return NULL;
315 
316 	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
317 }
318 
319 /**
320  * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
321  * @ptdev: Device.
322  * @timeout_us: Timeout expressed in micro-seconds.
323  *
324  * The FW has two timer sources: the GPU counter or arch-timer. We need
325  * to express timeouts in term of number of cycles and specify which
326  * timer source should be used.
327  *
328  * Return: A value suitable for timeout fields in the global interface.
329  */
330 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
331 {
332 	bool use_cycle_counter = false;
333 	u32 timer_rate = 0;
334 	u64 mod_cycles;
335 
336 #ifdef CONFIG_ARM_ARCH_TIMER
337 	timer_rate = arch_timer_get_cntfrq();
338 #endif
339 
340 	if (!timer_rate) {
341 		use_cycle_counter = true;
342 		timer_rate = clk_get_rate(ptdev->clks.core);
343 	}
344 
345 	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
346 		/* We couldn't get a valid clock rate, let's just pick the
347 		 * maximum value so the FW still handles the core
348 		 * power on/off requests.
349 		 */
350 		return GLB_TIMER_VAL(~0) |
351 		       GLB_TIMER_SOURCE_GPU_COUNTER;
352 	}
353 
354 	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
355 				      1000000ull << 10);
356 	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
357 		mod_cycles = GLB_TIMER_VAL(~0);
358 
359 	return GLB_TIMER_VAL(mod_cycles) |
360 	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
361 }
362 
363 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
364 				       struct panthor_fw_binary_iter *iter,
365 				       void *out, size_t size)
366 {
367 	size_t new_offset = iter->offset + size;
368 
369 	if (new_offset > iter->size || new_offset < iter->offset) {
370 		drm_err(&ptdev->base, "Firmware too small\n");
371 		return -EINVAL;
372 	}
373 
374 	memcpy(out, iter->data + iter->offset, size);
375 	iter->offset = new_offset;
376 	return 0;
377 }
378 
379 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
380 					   struct panthor_fw_binary_iter *iter,
381 					   struct panthor_fw_binary_iter *sub_iter,
382 					   size_t size)
383 {
384 	size_t new_offset = iter->offset + size;
385 
386 	if (new_offset > iter->size || new_offset < iter->offset) {
387 		drm_err(&ptdev->base, "Firmware entry too long\n");
388 		return -EINVAL;
389 	}
390 
391 	sub_iter->offset = 0;
392 	sub_iter->data = iter->data + iter->offset;
393 	sub_iter->size = size;
394 	iter->offset = new_offset;
395 	return 0;
396 }
397 
398 static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
399 					struct panthor_fw_section *section)
400 {
401 	bool was_mapped = !!section->mem->kmap;
402 	int ret;
403 
404 	if (!section->data.size &&
405 	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO))
406 		return;
407 
408 	ret = panthor_kernel_bo_vmap(section->mem);
409 	if (drm_WARN_ON(&ptdev->base, ret))
410 		return;
411 
412 	memcpy(section->mem->kmap, section->data.buf, section->data.size);
413 	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) {
414 		memset(section->mem->kmap + section->data.size, 0,
415 		       panthor_kernel_bo_size(section->mem) - section->data.size);
416 	}
417 
418 	if (!was_mapped)
419 		panthor_kernel_bo_vunmap(section->mem);
420 }
421 
422 /**
423  * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
424  * @ptdev: Device.
425  * @input: Pointer holding the input interface on success.
426  * Should be ignored on failure.
427  * @output: Pointer holding the output interface on success.
428  * Should be ignored on failure.
429  * @input_fw_va: Pointer holding the input interface FW VA on success.
430  * Should be ignored on failure.
431  * @output_fw_va: Pointer holding the output interface FW VA on success.
432  * Should be ignored on failure.
433  *
434  * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
435  * interface is at offset 0, and the output interface at offset 4096.
436  *
437  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
438  */
439 struct panthor_kernel_bo *
440 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
441 				 struct panthor_fw_ringbuf_input_iface **input,
442 				 const struct panthor_fw_ringbuf_output_iface **output,
443 				 u32 *input_fw_va, u32 *output_fw_va)
444 {
445 	struct panthor_kernel_bo *mem;
446 	int ret;
447 
448 	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
449 				       DRM_PANTHOR_BO_NO_MMAP,
450 				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
451 				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
452 				       PANTHOR_VM_KERNEL_AUTO_VA,
453 				       "Queue FW interface");
454 	if (IS_ERR(mem))
455 		return mem;
456 
457 	ret = panthor_kernel_bo_vmap(mem);
458 	if (ret) {
459 		panthor_kernel_bo_destroy(mem);
460 		return ERR_PTR(ret);
461 	}
462 
463 	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
464 	*input = mem->kmap;
465 	*output = mem->kmap + SZ_4K;
466 	*input_fw_va = panthor_kernel_bo_gpuva(mem);
467 	*output_fw_va = *input_fw_va + SZ_4K;
468 
469 	return mem;
470 }
471 
472 /**
473  * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
474  * @ptdev: Device.
475  * @size: Size of the suspend buffer.
476  *
477  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
478  */
479 struct panthor_kernel_bo *
480 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
481 {
482 	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
483 					DRM_PANTHOR_BO_NO_MMAP,
484 					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
485 					PANTHOR_VM_KERNEL_AUTO_VA,
486 					"FW suspend buffer");
487 }
488 
489 static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
490 					 const struct firmware *fw,
491 					 struct panthor_fw_binary_iter *iter,
492 					 u32 ehdr)
493 {
494 	ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
495 	struct panthor_fw_binary_section_entry_hdr hdr;
496 	struct panthor_fw_section *section;
497 	u32 section_size;
498 	u32 name_len;
499 	int ret;
500 
501 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
502 	if (ret)
503 		return ret;
504 
505 	if (hdr.data.end < hdr.data.start) {
506 		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
507 			hdr.data.end, hdr.data.start);
508 		return -EINVAL;
509 	}
510 
511 	if (hdr.va.end < hdr.va.start) {
512 		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
513 			hdr.va.end, hdr.va.start);
514 		return -EINVAL;
515 	}
516 
517 	if (hdr.data.end > fw->size) {
518 		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
519 			hdr.data.end, fw->size);
520 		return -EINVAL;
521 	}
522 
523 	if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
524 		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
525 			hdr.va.start, hdr.va.end);
526 		return -EINVAL;
527 	}
528 
529 	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) {
530 		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
531 			hdr.flags);
532 		return -EINVAL;
533 	}
534 
535 	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) {
536 		drm_warn(&ptdev->base,
537 			 "Firmware protected mode entry not be supported, ignoring");
538 		return 0;
539 	}
540 
541 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
542 	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) {
543 		drm_err(&ptdev->base,
544 			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
545 		return -EINVAL;
546 	}
547 
548 	name_len = iter->size - iter->offset;
549 
550 	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
551 	if (!section)
552 		return -ENOMEM;
553 
554 	list_add_tail(&section->node, &ptdev->fw->sections);
555 	section->flags = hdr.flags;
556 	section->data.size = hdr.data.end - hdr.data.start;
557 
558 	if (section->data.size > 0) {
559 		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
560 
561 		if (!data)
562 			return -ENOMEM;
563 
564 		memcpy(data, fw->data + hdr.data.start, section->data.size);
565 		section->data.buf = data;
566 	}
567 
568 	if (name_len > 0) {
569 		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
570 
571 		if (!name)
572 			return -ENOMEM;
573 
574 		memcpy(name, iter->data + iter->offset, name_len);
575 		name[name_len] = '\0';
576 		section->name = name;
577 	}
578 
579 	section_size = hdr.va.end - hdr.va.start;
580 	if (section_size) {
581 		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK;
582 		struct panthor_gem_object *bo;
583 		u32 vm_map_flags = 0;
584 		struct sg_table *sgt;
585 		u64 va = hdr.va.start;
586 
587 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
588 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
589 
590 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX))
591 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
592 
593 		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to
594 		 * non-cacheable for now. We might want to introduce a new
595 		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
596 		 * memory and is currently not used by our driver) for
597 		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
598 		 * of IO-coherent systems.
599 		 */
600 		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED)
601 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
602 
603 		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
604 							section_size,
605 							DRM_PANTHOR_BO_NO_MMAP,
606 							vm_map_flags, va, "FW section");
607 		if (IS_ERR(section->mem))
608 			return PTR_ERR(section->mem);
609 
610 		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
611 			return -EINVAL;
612 
613 		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) {
614 			ret = panthor_kernel_bo_vmap(section->mem);
615 			if (ret)
616 				return ret;
617 		}
618 
619 		panthor_fw_init_section_mem(ptdev, section);
620 
621 		bo = to_panthor_bo(section->mem->obj);
622 		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
623 		if (IS_ERR(sgt))
624 			return PTR_ERR(sgt);
625 
626 		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
627 	}
628 
629 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
630 		ptdev->fw->shared_section = section;
631 
632 	return 0;
633 }
634 
635 static int panthor_fw_read_build_info(struct panthor_device *ptdev,
636 				      const struct firmware *fw,
637 				      struct panthor_fw_binary_iter *iter,
638 				      u32 ehdr)
639 {
640 	struct panthor_fw_build_info_hdr hdr;
641 	static const char git_sha_header[] = "git_sha: ";
642 	const int header_len = sizeof(git_sha_header) - 1;
643 	int ret;
644 
645 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
646 	if (ret)
647 		return ret;
648 
649 	if (hdr.meta_start > fw->size ||
650 	    hdr.meta_start + hdr.meta_size > fw->size) {
651 		drm_err(&ptdev->base, "Firmware build info corrupt\n");
652 		/* We don't need the build info, so continue */
653 		return 0;
654 	}
655 
656 	if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) {
657 		/* Not the expected header, this isn't metadata we understand */
658 		return 0;
659 	}
660 
661 	/* Check that the git SHA is NULL terminated as expected */
662 	if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
663 		drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
664 		/* Don't treat as fatal */
665 		return 0;
666 	}
667 
668 	drm_info(&ptdev->base, "Firmware git sha: %s\n",
669 		 fw->data + hdr.meta_start + header_len);
670 
671 	return 0;
672 }
673 
674 static void
675 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
676 {
677 	struct panthor_fw_section *section;
678 
679 	list_for_each_entry(section, &ptdev->fw->sections, node) {
680 		struct sg_table *sgt;
681 
682 		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
683 			continue;
684 
685 		panthor_fw_init_section_mem(ptdev, section);
686 		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
687 		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
688 			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
689 	}
690 }
691 
692 static int panthor_fw_load_entry(struct panthor_device *ptdev,
693 				 const struct firmware *fw,
694 				 struct panthor_fw_binary_iter *iter)
695 {
696 	struct panthor_fw_binary_iter eiter;
697 	u32 ehdr;
698 	int ret;
699 
700 	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
701 	if (ret)
702 		return ret;
703 
704 	if ((iter->offset % sizeof(u32)) ||
705 	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
706 		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
707 			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
708 		return -EINVAL;
709 	}
710 
711 	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
712 					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
713 		return -EINVAL;
714 
715 	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
716 	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
717 		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
718 	case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
719 		return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
720 
721 	/* FIXME: handle those entry types? */
722 	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
723 	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
724 	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
725 	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
726 		return 0;
727 	default:
728 		break;
729 	}
730 
731 	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
732 		return 0;
733 
734 	drm_err(&ptdev->base,
735 		"Unsupported non-optional entry type %u in firmware\n",
736 		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
737 	return -EINVAL;
738 }
739 
740 static int panthor_fw_load(struct panthor_device *ptdev)
741 {
742 	const struct firmware *fw = NULL;
743 	struct panthor_fw_binary_iter iter = {};
744 	struct panthor_fw_binary_hdr hdr;
745 	char fw_path[128];
746 	int ret;
747 
748 	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
749 		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
750 		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
751 		 CSF_FW_NAME);
752 
753 	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
754 	if (ret) {
755 		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
756 			CSF_FW_NAME);
757 		return ret;
758 	}
759 
760 	iter.data = fw->data;
761 	iter.size = fw->size;
762 	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
763 	if (ret)
764 		goto out;
765 
766 	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
767 		ret = -EINVAL;
768 		drm_err(&ptdev->base, "Invalid firmware magic\n");
769 		goto out;
770 	}
771 
772 	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
773 		ret = -EINVAL;
774 		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
775 			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
776 		goto out;
777 	}
778 
779 	if (hdr.size > iter.size) {
780 		drm_err(&ptdev->base, "Firmware image is truncated\n");
781 		goto out;
782 	}
783 
784 	iter.size = hdr.size;
785 
786 	while (iter.offset < hdr.size) {
787 		ret = panthor_fw_load_entry(ptdev, fw, &iter);
788 		if (ret)
789 			goto out;
790 	}
791 
792 	if (!ptdev->fw->shared_section) {
793 		drm_err(&ptdev->base, "Shared interface region not found\n");
794 		ret = -EINVAL;
795 		goto out;
796 	}
797 
798 out:
799 	release_firmware(fw);
800 	return ret;
801 }
802 
803 /**
804  * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
805  * @ptdev: Device.
806  * @mcu_va: MCU address.
807  *
808  * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
809  */
810 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
811 {
812 	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
813 	u64 shared_mem_end = shared_mem_start +
814 			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
815 	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
816 		return NULL;
817 
818 	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
819 }
820 
821 static int panthor_init_cs_iface(struct panthor_device *ptdev,
822 				 unsigned int csg_idx, unsigned int cs_idx)
823 {
824 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
825 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
826 	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
827 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
828 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
829 			   (csg_idx * glb_iface->control->group_stride) +
830 			   CSF_STREAM_CONTROL_OFFSET +
831 			   (cs_idx * csg_iface->control->stream_stride);
832 	struct panthor_fw_cs_iface *first_cs_iface =
833 		panthor_fw_get_cs_iface(ptdev, 0, 0);
834 
835 	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
836 		return -EINVAL;
837 
838 	spin_lock_init(&cs_iface->lock);
839 	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
840 	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
841 	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
842 
843 	if (!cs_iface->input || !cs_iface->output) {
844 		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
845 		return -EINVAL;
846 	}
847 
848 	if (cs_iface != first_cs_iface) {
849 		if (cs_iface->control->features != first_cs_iface->control->features) {
850 			drm_err(&ptdev->base, "Expecting identical CS slots");
851 			return -EINVAL;
852 		}
853 	} else {
854 		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
855 
856 		ptdev->csif_info.cs_reg_count = reg_count;
857 		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
858 	}
859 
860 	return 0;
861 }
862 
863 static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
864 			const struct panthor_fw_csg_control_iface *b)
865 {
866 	if (a->features != b->features)
867 		return false;
868 	if (a->suspend_size != b->suspend_size)
869 		return false;
870 	if (a->protm_suspend_size != b->protm_suspend_size)
871 		return false;
872 	if (a->stream_num != b->stream_num)
873 		return false;
874 	return true;
875 }
876 
877 static int panthor_init_csg_iface(struct panthor_device *ptdev,
878 				  unsigned int csg_idx)
879 {
880 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
881 	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
882 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
883 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
884 	unsigned int i;
885 
886 	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
887 		return -EINVAL;
888 
889 	spin_lock_init(&csg_iface->lock);
890 	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
891 	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
892 	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
893 
894 	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
895 	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
896 		return -EINVAL;
897 
898 	if (!csg_iface->input || !csg_iface->output) {
899 		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
900 		return -EINVAL;
901 	}
902 
903 	if (csg_idx > 0) {
904 		struct panthor_fw_csg_iface *first_csg_iface =
905 			panthor_fw_get_csg_iface(ptdev, 0);
906 
907 		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
908 			drm_err(&ptdev->base, "Expecting identical CSG slots");
909 			return -EINVAL;
910 		}
911 	}
912 
913 	for (i = 0; i < csg_iface->control->stream_num; i++) {
914 		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
915 
916 		if (ret)
917 			return ret;
918 	}
919 
920 	return 0;
921 }
922 
923 static u32 panthor_get_instr_features(struct panthor_device *ptdev)
924 {
925 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
926 
927 	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
928 		return 0;
929 
930 	return glb_iface->control->instr_features;
931 }
932 
933 static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
934 {
935 	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
936 	unsigned int i;
937 
938 	if (!ptdev->fw->shared_section->mem->kmap)
939 		return -EINVAL;
940 
941 	spin_lock_init(&glb_iface->lock);
942 	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
943 
944 	if (!glb_iface->control->version) {
945 		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
946 		return -EINVAL;
947 	}
948 
949 	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
950 	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
951 	if (!glb_iface->input || !glb_iface->output) {
952 		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
953 		return -EINVAL;
954 	}
955 
956 	if (glb_iface->control->group_num > MAX_CSGS ||
957 	    glb_iface->control->group_num < MIN_CSGS) {
958 		drm_err(&ptdev->base, "Invalid number of control groups");
959 		return -EINVAL;
960 	}
961 
962 	for (i = 0; i < glb_iface->control->group_num; i++) {
963 		int ret = panthor_init_csg_iface(ptdev, i);
964 
965 		if (ret)
966 			return ret;
967 	}
968 
969 	drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
970 		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
971 		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
972 		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
973 		 glb_iface->control->features,
974 		 panthor_get_instr_features(ptdev));
975 	return 0;
976 }
977 
978 static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
979 {
980 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
981 
982 	/* Enable all cores. */
983 	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
984 
985 	/* Setup timers. */
986 	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
987 	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
988 	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
989 
990 	/* Enable interrupts we care about. */
991 	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
992 					 GLB_PING |
993 					 GLB_CFG_PROGRESS_TIMER |
994 					 GLB_CFG_POWEROFF_TIMER |
995 					 GLB_IDLE_EN |
996 					 GLB_IDLE;
997 
998 	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
999 	panthor_fw_toggle_reqs(glb_iface, req, ack,
1000 			       GLB_CFG_ALLOC_EN |
1001 			       GLB_CFG_POWEROFF_TIMER |
1002 			       GLB_CFG_PROGRESS_TIMER);
1003 
1004 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1005 
1006 	/* Kick the watchdog. */
1007 	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
1008 			 msecs_to_jiffies(PING_INTERVAL_MS));
1009 }
1010 
1011 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
1012 {
1013 	gpu_write(ptdev, JOB_INT_CLEAR, status);
1014 
1015 	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
1016 		ptdev->fw->booted = true;
1017 
1018 	wake_up_all(&ptdev->fw->req_waitqueue);
1019 
1020 	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
1021 	if (!ptdev->fw->booted)
1022 		return;
1023 
1024 	panthor_sched_report_fw_events(ptdev, status);
1025 }
1026 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
1027 
1028 static int panthor_fw_start(struct panthor_device *ptdev)
1029 {
1030 	bool timedout = false;
1031 
1032 	ptdev->fw->booted = false;
1033 	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
1034 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
1035 
1036 	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
1037 				ptdev->fw->booted,
1038 				msecs_to_jiffies(1000))) {
1039 		if (!ptdev->fw->booted &&
1040 		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
1041 			timedout = true;
1042 	}
1043 
1044 	if (timedout) {
1045 		static const char * const status_str[] = {
1046 			[MCU_STATUS_DISABLED] = "disabled",
1047 			[MCU_STATUS_ENABLED] = "enabled",
1048 			[MCU_STATUS_HALT] = "halt",
1049 			[MCU_STATUS_FATAL] = "fatal",
1050 		};
1051 		u32 status = gpu_read(ptdev, MCU_STATUS);
1052 
1053 		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1054 			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1055 		return -ETIMEDOUT;
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 static void panthor_fw_stop(struct panthor_device *ptdev)
1062 {
1063 	u32 status;
1064 
1065 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1066 	if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1067 			       status == MCU_STATUS_DISABLED, 10, 100000))
1068 		drm_err(&ptdev->base, "Failed to stop MCU");
1069 }
1070 
1071 /**
1072  * panthor_fw_pre_reset() - Call before a reset.
1073  * @ptdev: Device.
1074  * @on_hang: true if the reset was triggered on a GPU hang.
1075  *
1076  * If the reset is not triggered on a hang, we try to gracefully halt the
1077  * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1078  */
1079 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1080 {
1081 	/* Make sure we won't be woken up by a ping. */
1082 	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1083 
1084 	ptdev->reset.fast = false;
1085 
1086 	if (!on_hang) {
1087 		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1088 		u32 status;
1089 
1090 		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1091 		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1092 		if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1093 					status == MCU_STATUS_HALT, 10, 100000)) {
1094 			ptdev->reset.fast = true;
1095 		} else {
1096 			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1097 		}
1098 	}
1099 
1100 	panthor_job_irq_suspend(&ptdev->fw->irq);
1101 }
1102 
1103 /**
1104  * panthor_fw_post_reset() - Call after a reset.
1105  * @ptdev: Device.
1106  *
1107  * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1108  * make sure we can recover from a memory corruption.
1109  */
1110 int panthor_fw_post_reset(struct panthor_device *ptdev)
1111 {
1112 	int ret;
1113 
1114 	/* Make the MCU VM active. */
1115 	ret = panthor_vm_active(ptdev->fw->vm);
1116 	if (ret)
1117 		return ret;
1118 
1119 	if (!ptdev->reset.fast) {
1120 		/* On a slow reset, reload all sections, including RO ones.
1121 		 * We're not supposed to end up here anyway, let's just assume
1122 		 * the overhead of reloading everything is acceptable.
1123 		 */
1124 		panthor_reload_fw_sections(ptdev, true);
1125 	} else {
1126 		/* The FW detects 0 -> 1 transitions. Make sure we reset
1127 		 * the HALT bit before the FW is rebooted.
1128 		 * This is not needed on a slow reset because FW sections are
1129 		 * re-initialized.
1130 		 */
1131 		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1132 
1133 		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1134 	}
1135 
1136 	ret = panthor_fw_start(ptdev);
1137 	if (ret) {
1138 		drm_err(&ptdev->base, "FW %s reset failed",
1139 			ptdev->reset.fast ?  "fast" : "slow");
1140 		return ret;
1141 	}
1142 
1143 	/* We must re-initialize the global interface even on fast-reset. */
1144 	panthor_fw_init_global_iface(ptdev);
1145 	return 0;
1146 }
1147 
1148 /**
1149  * panthor_fw_unplug() - Called when the device is unplugged.
1150  * @ptdev: Device.
1151  *
1152  * This function must make sure all pending operations are flushed before
1153  * will release device resources, thus preventing any interaction with
1154  * the HW.
1155  *
1156  * If there is still FW-related work running after this function returns,
1157  * they must use drm_dev_{enter,exit}() and skip any HW access when
1158  * drm_dev_enter() returns false.
1159  */
1160 void panthor_fw_unplug(struct panthor_device *ptdev)
1161 {
1162 	struct panthor_fw_section *section;
1163 
1164 	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1165 
1166 	if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) {
1167 		/* Make sure the IRQ handler cannot be called after that point. */
1168 		if (ptdev->fw->irq.irq)
1169 			panthor_job_irq_suspend(&ptdev->fw->irq);
1170 
1171 		panthor_fw_stop(ptdev);
1172 	}
1173 
1174 	list_for_each_entry(section, &ptdev->fw->sections, node)
1175 		panthor_kernel_bo_destroy(section->mem);
1176 
1177 	/* We intentionally don't call panthor_vm_idle() and let
1178 	 * panthor_mmu_unplug() release the AS we acquired with
1179 	 * panthor_vm_active() so we don't have to track the VM active/idle
1180 	 * state to keep the active_refcnt balanced.
1181 	 */
1182 	panthor_vm_put(ptdev->fw->vm);
1183 	ptdev->fw->vm = NULL;
1184 
1185 	if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
1186 		panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
1187 }
1188 
1189 /**
1190  * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1191  * @req_ptr: Pointer to the req register.
1192  * @ack_ptr: Pointer to the ack register.
1193  * @wq: Wait queue to use for the sleeping wait.
1194  * @req_mask: Mask of requests to wait for.
1195  * @acked: Pointer to field that's updated with the acked requests.
1196  * If the function returns 0, *acked == req_mask.
1197  * @timeout_ms: Timeout expressed in milliseconds.
1198  *
1199  * Return: 0 on success, -ETIMEDOUT otherwise.
1200  */
1201 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1202 				wait_queue_head_t *wq,
1203 				u32 req_mask, u32 *acked,
1204 				u32 timeout_ms)
1205 {
1206 	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1207 	int ret;
1208 
1209 	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
1210 	*acked = req_mask;
1211 	ret = read_poll_timeout_atomic(READ_ONCE, ack,
1212 				       (ack & req_mask) == req,
1213 				       0, 10, 0,
1214 				       *ack_ptr);
1215 	if (!ret)
1216 		return 0;
1217 
1218 	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1219 			       msecs_to_jiffies(timeout_ms)))
1220 		return 0;
1221 
1222 	/* Check one last time, in case we were not woken up for some reason. */
1223 	ack = READ_ONCE(*ack_ptr);
1224 	if ((ack & req_mask) == req)
1225 		return 0;
1226 
1227 	*acked = ~(req ^ ack) & req_mask;
1228 	return -ETIMEDOUT;
1229 }
1230 
1231 /**
1232  * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1233  * @ptdev: Device.
1234  * @req_mask: Mask of requests to wait for.
1235  * @acked: Pointer to field that's updated with the acked requests.
1236  * If the function returns 0, *acked == req_mask.
1237  * @timeout_ms: Timeout expressed in milliseconds.
1238  *
1239  * Return: 0 on success, -ETIMEDOUT otherwise.
1240  */
1241 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1242 			     u32 req_mask, u32 *acked,
1243 			     u32 timeout_ms)
1244 {
1245 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1246 
1247 	/* GLB_HALT doesn't get acked through the FW interface. */
1248 	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1249 		return -EINVAL;
1250 
1251 	return panthor_fw_wait_acks(&glb_iface->input->req,
1252 				    &glb_iface->output->ack,
1253 				    &ptdev->fw->req_waitqueue,
1254 				    req_mask, acked, timeout_ms);
1255 }
1256 
1257 /**
1258  * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1259  * @ptdev: Device.
1260  * @csg_slot: CSG slot ID.
1261  * @req_mask: Mask of requests to wait for.
1262  * @acked: Pointer to field that's updated with the acked requests.
1263  * If the function returns 0, *acked == req_mask.
1264  * @timeout_ms: Timeout expressed in milliseconds.
1265  *
1266  * Return: 0 on success, -ETIMEDOUT otherwise.
1267  */
1268 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1269 			     u32 req_mask, u32 *acked, u32 timeout_ms)
1270 {
1271 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1272 	int ret;
1273 
1274 	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1275 		return -EINVAL;
1276 
1277 	ret = panthor_fw_wait_acks(&csg_iface->input->req,
1278 				   &csg_iface->output->ack,
1279 				   &ptdev->fw->req_waitqueue,
1280 				   req_mask, acked, timeout_ms);
1281 
1282 	/*
1283 	 * Check that all bits in the state field were updated, if any mismatch
1284 	 * then clear all bits in the state field. This allows code to do
1285 	 * (acked & CSG_STATE_MASK) and get the right value.
1286 	 */
1287 
1288 	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1289 		*acked &= ~CSG_STATE_MASK;
1290 
1291 	return ret;
1292 }
1293 
1294 /**
1295  * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1296  * @ptdev: Device.
1297  * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1298  *
1299  * This function is toggling bits in the doorbell_req and ringing the
1300  * global doorbell. It doesn't require a user doorbell to be attached to
1301  * the group.
1302  */
1303 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1304 {
1305 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1306 
1307 	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1308 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1309 }
1310 
1311 static void panthor_fw_ping_work(struct work_struct *work)
1312 {
1313 	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1314 	struct panthor_device *ptdev = fw->irq.ptdev;
1315 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1316 	u32 acked;
1317 	int ret;
1318 
1319 	if (panthor_device_reset_is_pending(ptdev))
1320 		return;
1321 
1322 	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1323 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1324 
1325 	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1326 	if (ret) {
1327 		panthor_device_schedule_reset(ptdev);
1328 		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1329 	} else {
1330 		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1331 				 msecs_to_jiffies(PING_INTERVAL_MS));
1332 	}
1333 }
1334 
1335 /**
1336  * panthor_fw_init() - Initialize FW related data.
1337  * @ptdev: Device.
1338  *
1339  * Return: 0 on success, a negative error code otherwise.
1340  */
1341 int panthor_fw_init(struct panthor_device *ptdev)
1342 {
1343 	struct panthor_fw *fw;
1344 	int ret, irq;
1345 
1346 	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1347 	if (!fw)
1348 		return -ENOMEM;
1349 
1350 	ptdev->fw = fw;
1351 	init_waitqueue_head(&fw->req_waitqueue);
1352 	INIT_LIST_HEAD(&fw->sections);
1353 	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1354 
1355 	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1356 	if (irq <= 0)
1357 		return -ENODEV;
1358 
1359 	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1360 	if (ret) {
1361 		drm_err(&ptdev->base, "failed to request job irq");
1362 		return ret;
1363 	}
1364 
1365 	ret = panthor_gpu_l2_power_on(ptdev);
1366 	if (ret)
1367 		return ret;
1368 
1369 	fw->vm = panthor_vm_create(ptdev, true,
1370 				   0, SZ_4G,
1371 				   CSF_MCU_SHARED_REGION_START,
1372 				   CSF_MCU_SHARED_REGION_SIZE);
1373 	if (IS_ERR(fw->vm)) {
1374 		ret = PTR_ERR(fw->vm);
1375 		fw->vm = NULL;
1376 		goto err_unplug_fw;
1377 	}
1378 
1379 	ret = panthor_fw_load(ptdev);
1380 	if (ret)
1381 		goto err_unplug_fw;
1382 
1383 	ret = panthor_vm_active(fw->vm);
1384 	if (ret)
1385 		goto err_unplug_fw;
1386 
1387 	ret = panthor_fw_start(ptdev);
1388 	if (ret)
1389 		goto err_unplug_fw;
1390 
1391 	ret = panthor_fw_init_ifaces(ptdev);
1392 	if (ret)
1393 		goto err_unplug_fw;
1394 
1395 	panthor_fw_init_global_iface(ptdev);
1396 	return 0;
1397 
1398 err_unplug_fw:
1399 	panthor_fw_unplug(ptdev);
1400 	return ret;
1401 }
1402 
1403 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1404