xref: /linux/drivers/gpu/drm/panthor/panthor_fw.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #ifdef CONFIG_ARM_ARCH_TIMER
5 #include <asm/arch_timer.h>
6 #endif
7 
8 #include <linux/clk.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/firmware.h>
11 #include <linux/iopoll.h>
12 #include <linux/iosys-map.h>
13 #include <linux/mutex.h>
14 #include <linux/platform_device.h>
15 
16 #include <drm/drm_drv.h>
17 #include <drm/drm_managed.h>
18 
19 #include "panthor_device.h"
20 #include "panthor_fw.h"
21 #include "panthor_gem.h"
22 #include "panthor_gpu.h"
23 #include "panthor_mmu.h"
24 #include "panthor_regs.h"
25 #include "panthor_sched.h"
26 
27 #define CSF_FW_NAME "mali_csffw.bin"
28 
29 #define PING_INTERVAL_MS			12000
30 #define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
31 #define PROGRESS_TIMEOUT_SCALE_SHIFT		10
32 #define IDLE_HYSTERESIS_US			800
33 #define PWROFF_HYSTERESIS_US			10000
34 
35 /**
36  * struct panthor_fw_binary_hdr - Firmware binary header.
37  */
38 struct panthor_fw_binary_hdr {
39 	/** @magic: Magic value to check binary validity. */
40 	u32 magic;
41 #define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
42 
43 	/** @minor: Minor FW version. */
44 	u8 minor;
45 
46 	/** @major: Major FW version. */
47 	u8 major;
48 #define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
49 
50 	/** @padding1: MBZ. */
51 	u16 padding1;
52 
53 	/** @version_hash: FW version hash. */
54 	u32 version_hash;
55 
56 	/** @padding2: MBZ. */
57 	u32 padding2;
58 
59 	/** @size: FW binary size. */
60 	u32 size;
61 };
62 
63 /**
64  * enum panthor_fw_binary_entry_type - Firmware binary entry type
65  */
66 enum panthor_fw_binary_entry_type {
67 	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
68 	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
69 
70 	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
71 	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
72 
73 	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
74 	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
75 
76 	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
77 	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
78 
79 	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
80 	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
81 
82 	/**
83 	 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
84 	 * the FW binary was built.
85 	 */
86 	CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
87 };
88 
89 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
90 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
91 #define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
92 #define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
93 
94 #define CSF_FW_BINARY_IFACE_ENTRY_RD_RD					BIT(0)
95 #define CSF_FW_BINARY_IFACE_ENTRY_RD_WR					BIT(1)
96 #define CSF_FW_BINARY_IFACE_ENTRY_RD_EX					BIT(2)
97 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE			(0 << 3)
98 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED			(1 << 3)
99 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT	(2 << 3)
100 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT		(3 << 3)
101 #define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK			GENMASK(4, 3)
102 #define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT				BIT(5)
103 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED				BIT(30)
104 #define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO				BIT(31)
105 
106 #define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS			\
107 	(CSF_FW_BINARY_IFACE_ENTRY_RD_RD |				\
108 	 CSF_FW_BINARY_IFACE_ENTRY_RD_WR |				\
109 	 CSF_FW_BINARY_IFACE_ENTRY_RD_EX |				\
110 	 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK |			\
111 	 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT |				\
112 	 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED  |				\
113 	 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
114 
115 /**
116  * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
117  */
118 struct panthor_fw_binary_section_entry_hdr {
119 	/** @flags: Section flags. */
120 	u32 flags;
121 
122 	/** @va: MCU virtual range to map this binary section to. */
123 	struct {
124 		/** @start: Start address. */
125 		u32 start;
126 
127 		/** @end: End address. */
128 		u32 end;
129 	} va;
130 
131 	/** @data: Data to initialize the FW section with. */
132 	struct {
133 		/** @start: Start offset in the FW binary. */
134 		u32 start;
135 
136 		/** @end: End offset in the FW binary. */
137 		u32 end;
138 	} data;
139 };
140 
141 struct panthor_fw_build_info_hdr {
142 	/** @meta_start: Offset of the build info data in the FW binary */
143 	u32 meta_start;
144 	/** @meta_size: Size of the build info data in the FW binary */
145 	u32 meta_size;
146 };
147 
148 /**
149  * struct panthor_fw_binary_iter - Firmware binary iterator
150  *
151  * Used to parse a firmware binary.
152  */
153 struct panthor_fw_binary_iter {
154 	/** @data: FW binary data. */
155 	const void *data;
156 
157 	/** @size: FW binary size. */
158 	size_t size;
159 
160 	/** @offset: Iterator offset. */
161 	size_t offset;
162 };
163 
164 /**
165  * struct panthor_fw_section - FW section
166  */
167 struct panthor_fw_section {
168 	/** @node: Used to keep track of FW sections. */
169 	struct list_head node;
170 
171 	/** @flags: Section flags, as encoded in the FW binary. */
172 	u32 flags;
173 
174 	/** @mem: Section memory. */
175 	struct panthor_kernel_bo *mem;
176 
177 	/**
178 	 * @name: Name of the section, as specified in the binary.
179 	 *
180 	 * Can be NULL.
181 	 */
182 	const char *name;
183 
184 	/**
185 	 * @data: Initial data copied to the FW memory.
186 	 *
187 	 * We keep data around so we can reload sections after a reset.
188 	 */
189 	struct {
190 		/** @buf: Buffed used to store init data. */
191 		const void *buf;
192 
193 		/** @size: Size of @buf in bytes. */
194 		size_t size;
195 	} data;
196 };
197 
198 #define CSF_MCU_SHARED_REGION_START		0x04000000ULL
199 #define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
200 
201 #define MIN_CS_PER_CSG				8
202 #define MIN_CSGS				3
203 #define MAX_CSG_PRIO				0xf
204 
205 #define CSF_IFACE_VERSION(major, minor, patch)	\
206 	(((major) << 24) | ((minor) << 16) | (patch))
207 #define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
208 #define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
209 #define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
210 
211 #define CSF_GROUP_CONTROL_OFFSET		0x1000
212 #define CSF_STREAM_CONTROL_OFFSET		0x40
213 #define CSF_UNPRESERVED_REG_COUNT		4
214 
215 /**
216  * struct panthor_fw_iface - FW interfaces
217  */
218 struct panthor_fw_iface {
219 	/** @global: Global interface. */
220 	struct panthor_fw_global_iface global;
221 
222 	/** @groups: Group slot interfaces. */
223 	struct panthor_fw_csg_iface groups[MAX_CSGS];
224 
225 	/** @streams: Command stream slot interfaces. */
226 	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
227 };
228 
229 /**
230  * struct panthor_fw - Firmware management
231  */
232 struct panthor_fw {
233 	/** @vm: MCU VM. */
234 	struct panthor_vm *vm;
235 
236 	/** @sections: List of FW sections. */
237 	struct list_head sections;
238 
239 	/** @shared_section: The section containing the FW interfaces. */
240 	struct panthor_fw_section *shared_section;
241 
242 	/** @iface: FW interfaces. */
243 	struct panthor_fw_iface iface;
244 
245 	/** @watchdog: Collection of fields relating to the FW watchdog. */
246 	struct {
247 		/** @ping_work: Delayed work used to ping the FW. */
248 		struct delayed_work ping_work;
249 	} watchdog;
250 
251 	/**
252 	 * @req_waitqueue: FW request waitqueue.
253 	 *
254 	 * Everytime a request is sent to a command stream group or the global
255 	 * interface, the caller will first busy wait for the request to be
256 	 * acknowledged, and then fallback to a sleeping wait.
257 	 *
258 	 * This wait queue is here to support the sleeping wait flavor.
259 	 */
260 	wait_queue_head_t req_waitqueue;
261 
262 	/** @booted: True is the FW is booted */
263 	bool booted;
264 
265 	/**
266 	 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
267 	 *
268 	 * A fast reset is just a reset where the driver doesn't reload the FW sections.
269 	 *
270 	 * Any time the firmware is properly suspended, a fast reset can take place.
271 	 * On the other hand, if the halt operation failed, the driver will reload
272 	 * all sections to make sure we start from a fresh state.
273 	 */
274 	bool fast_reset;
275 
276 	/** @irq: Job irq data. */
277 	struct panthor_irq irq;
278 };
279 
280 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
281 {
282 	return ptdev->fw->vm;
283 }
284 
285 /**
286  * panthor_fw_get_glb_iface() - Get the global interface
287  * @ptdev: Device.
288  *
289  * Return: The global interface.
290  */
291 struct panthor_fw_global_iface *
292 panthor_fw_get_glb_iface(struct panthor_device *ptdev)
293 {
294 	return &ptdev->fw->iface.global;
295 }
296 
297 /**
298  * panthor_fw_get_csg_iface() - Get a command stream group slot interface
299  * @ptdev: Device.
300  * @csg_slot: Index of the command stream group slot.
301  *
302  * Return: The command stream group slot interface.
303  */
304 struct panthor_fw_csg_iface *
305 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
306 {
307 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
308 		return NULL;
309 
310 	return &ptdev->fw->iface.groups[csg_slot];
311 }
312 
313 /**
314  * panthor_fw_get_cs_iface() - Get a command stream slot interface
315  * @ptdev: Device.
316  * @csg_slot: Index of the command stream group slot.
317  * @cs_slot: Index of the command stream slot.
318  *
319  * Return: The command stream slot interface.
320  */
321 struct panthor_fw_cs_iface *
322 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
323 {
324 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
325 		return NULL;
326 
327 	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
328 }
329 
330 /**
331  * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
332  * @ptdev: Device.
333  * @timeout_us: Timeout expressed in micro-seconds.
334  *
335  * The FW has two timer sources: the GPU counter or arch-timer. We need
336  * to express timeouts in term of number of cycles and specify which
337  * timer source should be used.
338  *
339  * Return: A value suitable for timeout fields in the global interface.
340  */
341 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
342 {
343 	bool use_cycle_counter = false;
344 	u32 timer_rate = 0;
345 	u64 mod_cycles;
346 
347 #ifdef CONFIG_ARM_ARCH_TIMER
348 	timer_rate = arch_timer_get_cntfrq();
349 #endif
350 
351 	if (!timer_rate) {
352 		use_cycle_counter = true;
353 		timer_rate = clk_get_rate(ptdev->clks.core);
354 	}
355 
356 	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
357 		/* We couldn't get a valid clock rate, let's just pick the
358 		 * maximum value so the FW still handles the core
359 		 * power on/off requests.
360 		 */
361 		return GLB_TIMER_VAL(~0) |
362 		       GLB_TIMER_SOURCE_GPU_COUNTER;
363 	}
364 
365 	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
366 				      1000000ull << 10);
367 	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
368 		mod_cycles = GLB_TIMER_VAL(~0);
369 
370 	return GLB_TIMER_VAL(mod_cycles) |
371 	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
372 }
373 
374 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
375 				       struct panthor_fw_binary_iter *iter,
376 				       void *out, size_t size)
377 {
378 	size_t new_offset = iter->offset + size;
379 
380 	if (new_offset > iter->size || new_offset < iter->offset) {
381 		drm_err(&ptdev->base, "Firmware too small\n");
382 		return -EINVAL;
383 	}
384 
385 	memcpy(out, iter->data + iter->offset, size);
386 	iter->offset = new_offset;
387 	return 0;
388 }
389 
390 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
391 					   struct panthor_fw_binary_iter *iter,
392 					   struct panthor_fw_binary_iter *sub_iter,
393 					   size_t size)
394 {
395 	size_t new_offset = iter->offset + size;
396 
397 	if (new_offset > iter->size || new_offset < iter->offset) {
398 		drm_err(&ptdev->base, "Firmware entry too long\n");
399 		return -EINVAL;
400 	}
401 
402 	sub_iter->offset = 0;
403 	sub_iter->data = iter->data + iter->offset;
404 	sub_iter->size = size;
405 	iter->offset = new_offset;
406 	return 0;
407 }
408 
409 static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
410 					struct panthor_fw_section *section)
411 {
412 	bool was_mapped = !!section->mem->kmap;
413 	int ret;
414 
415 	if (!section->data.size &&
416 	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
417 		return;
418 
419 	ret = panthor_kernel_bo_vmap(section->mem);
420 	if (drm_WARN_ON(&ptdev->base, ret))
421 		return;
422 
423 	memcpy(section->mem->kmap, section->data.buf, section->data.size);
424 	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
425 		memset(section->mem->kmap + section->data.size, 0,
426 		       panthor_kernel_bo_size(section->mem) - section->data.size);
427 	}
428 
429 	if (!was_mapped)
430 		panthor_kernel_bo_vunmap(section->mem);
431 }
432 
433 /**
434  * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
435  * @ptdev: Device.
436  * @input: Pointer holding the input interface on success.
437  * Should be ignored on failure.
438  * @output: Pointer holding the output interface on success.
439  * Should be ignored on failure.
440  * @input_fw_va: Pointer holding the input interface FW VA on success.
441  * Should be ignored on failure.
442  * @output_fw_va: Pointer holding the output interface FW VA on success.
443  * Should be ignored on failure.
444  *
445  * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
446  * interface is at offset 0, and the output interface at offset 4096.
447  *
448  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
449  */
450 struct panthor_kernel_bo *
451 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
452 				 struct panthor_fw_ringbuf_input_iface **input,
453 				 const struct panthor_fw_ringbuf_output_iface **output,
454 				 u32 *input_fw_va, u32 *output_fw_va)
455 {
456 	struct panthor_kernel_bo *mem;
457 	int ret;
458 
459 	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
460 				       DRM_PANTHOR_BO_NO_MMAP,
461 				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
462 				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
463 				       PANTHOR_VM_KERNEL_AUTO_VA);
464 	if (IS_ERR(mem))
465 		return mem;
466 
467 	ret = panthor_kernel_bo_vmap(mem);
468 	if (ret) {
469 		panthor_kernel_bo_destroy(mem);
470 		return ERR_PTR(ret);
471 	}
472 
473 	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
474 	*input = mem->kmap;
475 	*output = mem->kmap + SZ_4K;
476 	*input_fw_va = panthor_kernel_bo_gpuva(mem);
477 	*output_fw_va = *input_fw_va + SZ_4K;
478 
479 	return mem;
480 }
481 
482 /**
483  * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
484  * @ptdev: Device.
485  * @size: Size of the suspend buffer.
486  *
487  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
488  */
489 struct panthor_kernel_bo *
490 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
491 {
492 	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
493 					DRM_PANTHOR_BO_NO_MMAP,
494 					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
495 					PANTHOR_VM_KERNEL_AUTO_VA);
496 }
497 
498 static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
499 					 const struct firmware *fw,
500 					 struct panthor_fw_binary_iter *iter,
501 					 u32 ehdr)
502 {
503 	ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
504 	struct panthor_fw_binary_section_entry_hdr hdr;
505 	struct panthor_fw_section *section;
506 	u32 section_size;
507 	u32 name_len;
508 	int ret;
509 
510 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
511 	if (ret)
512 		return ret;
513 
514 	if (hdr.data.end < hdr.data.start) {
515 		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
516 			hdr.data.end, hdr.data.start);
517 		return -EINVAL;
518 	}
519 
520 	if (hdr.va.end < hdr.va.start) {
521 		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
522 			hdr.va.end, hdr.va.start);
523 		return -EINVAL;
524 	}
525 
526 	if (hdr.data.end > fw->size) {
527 		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
528 			hdr.data.end, fw->size);
529 		return -EINVAL;
530 	}
531 
532 	if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
533 		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
534 			hdr.va.start, hdr.va.end);
535 		return -EINVAL;
536 	}
537 
538 	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
539 		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
540 			hdr.flags);
541 		return -EINVAL;
542 	}
543 
544 	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
545 		drm_warn(&ptdev->base,
546 			 "Firmware protected mode entry not be supported, ignoring");
547 		return 0;
548 	}
549 
550 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
551 	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
552 		drm_err(&ptdev->base,
553 			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
554 		return -EINVAL;
555 	}
556 
557 	name_len = iter->size - iter->offset;
558 
559 	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
560 	if (!section)
561 		return -ENOMEM;
562 
563 	list_add_tail(&section->node, &ptdev->fw->sections);
564 	section->flags = hdr.flags;
565 	section->data.size = hdr.data.end - hdr.data.start;
566 
567 	if (section->data.size > 0) {
568 		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
569 
570 		if (!data)
571 			return -ENOMEM;
572 
573 		memcpy(data, fw->data + hdr.data.start, section->data.size);
574 		section->data.buf = data;
575 	}
576 
577 	if (name_len > 0) {
578 		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
579 
580 		if (!name)
581 			return -ENOMEM;
582 
583 		memcpy(name, iter->data + iter->offset, name_len);
584 		name[name_len] = '\0';
585 		section->name = name;
586 	}
587 
588 	section_size = hdr.va.end - hdr.va.start;
589 	if (section_size) {
590 		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
591 		struct panthor_gem_object *bo;
592 		u32 vm_map_flags = 0;
593 		struct sg_table *sgt;
594 		u64 va = hdr.va.start;
595 
596 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
597 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
598 
599 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
600 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
601 
602 		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
603 		 * non-cacheable for now. We might want to introduce a new
604 		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
605 		 * memory and is currently not used by our driver) for
606 		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
607 		 * of IO-coherent systems.
608 		 */
609 		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
610 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
611 
612 		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
613 							section_size,
614 							DRM_PANTHOR_BO_NO_MMAP,
615 							vm_map_flags, va);
616 		if (IS_ERR(section->mem))
617 			return PTR_ERR(section->mem);
618 
619 		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
620 			return -EINVAL;
621 
622 		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
623 			ret = panthor_kernel_bo_vmap(section->mem);
624 			if (ret)
625 				return ret;
626 		}
627 
628 		panthor_fw_init_section_mem(ptdev, section);
629 
630 		bo = to_panthor_bo(section->mem->obj);
631 		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
632 		if (IS_ERR(sgt))
633 			return PTR_ERR(sgt);
634 
635 		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
636 	}
637 
638 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
639 		ptdev->fw->shared_section = section;
640 
641 	return 0;
642 }
643 
644 static int panthor_fw_read_build_info(struct panthor_device *ptdev,
645 				      const struct firmware *fw,
646 				      struct panthor_fw_binary_iter *iter,
647 				      u32 ehdr)
648 {
649 	struct panthor_fw_build_info_hdr hdr;
650 	char header[9];
651 	const char git_sha_header[sizeof(header)] = "git_sha: ";
652 	int ret;
653 
654 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
655 	if (ret)
656 		return ret;
657 
658 	if (hdr.meta_start > fw->size ||
659 	    hdr.meta_start + hdr.meta_size > fw->size) {
660 		drm_err(&ptdev->base, "Firmware build info corrupt\n");
661 		/* We don't need the build info, so continue */
662 		return 0;
663 	}
664 
665 	if (memcmp(git_sha_header, fw->data + hdr.meta_start,
666 		   sizeof(git_sha_header))) {
667 		/* Not the expected header, this isn't metadata we understand */
668 		return 0;
669 	}
670 
671 	/* Check that the git SHA is NULL terminated as expected */
672 	if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
673 		drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
674 		/* Don't treat as fatal */
675 		return 0;
676 	}
677 
678 	drm_info(&ptdev->base, "Firmware git sha: %s\n",
679 		 fw->data + hdr.meta_start + sizeof(git_sha_header));
680 
681 	return 0;
682 }
683 
684 static void
685 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
686 {
687 	struct panthor_fw_section *section;
688 
689 	list_for_each_entry(section, &ptdev->fw->sections, node) {
690 		struct sg_table *sgt;
691 
692 		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
693 			continue;
694 
695 		panthor_fw_init_section_mem(ptdev, section);
696 		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
697 		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
698 			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
699 	}
700 }
701 
702 static int panthor_fw_load_entry(struct panthor_device *ptdev,
703 				 const struct firmware *fw,
704 				 struct panthor_fw_binary_iter *iter)
705 {
706 	struct panthor_fw_binary_iter eiter;
707 	u32 ehdr;
708 	int ret;
709 
710 	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
711 	if (ret)
712 		return ret;
713 
714 	if ((iter->offset % sizeof(u32)) ||
715 	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
716 		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
717 			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
718 		return -EINVAL;
719 	}
720 
721 	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
722 					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
723 		return -EINVAL;
724 
725 	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
726 	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
727 		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
728 	case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
729 		return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
730 
731 	/* FIXME: handle those entry types? */
732 	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
733 	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
734 	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
735 	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
736 		return 0;
737 	default:
738 		break;
739 	}
740 
741 	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
742 		return 0;
743 
744 	drm_err(&ptdev->base,
745 		"Unsupported non-optional entry type %u in firmware\n",
746 		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
747 	return -EINVAL;
748 }
749 
750 static int panthor_fw_load(struct panthor_device *ptdev)
751 {
752 	const struct firmware *fw = NULL;
753 	struct panthor_fw_binary_iter iter = {};
754 	struct panthor_fw_binary_hdr hdr;
755 	char fw_path[128];
756 	int ret;
757 
758 	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
759 		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
760 		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
761 		 CSF_FW_NAME);
762 
763 	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
764 	if (ret) {
765 		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
766 			CSF_FW_NAME);
767 		return ret;
768 	}
769 
770 	iter.data = fw->data;
771 	iter.size = fw->size;
772 	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
773 	if (ret)
774 		goto out;
775 
776 	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
777 		ret = -EINVAL;
778 		drm_err(&ptdev->base, "Invalid firmware magic\n");
779 		goto out;
780 	}
781 
782 	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
783 		ret = -EINVAL;
784 		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
785 			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
786 		goto out;
787 	}
788 
789 	if (hdr.size > iter.size) {
790 		drm_err(&ptdev->base, "Firmware image is truncated\n");
791 		goto out;
792 	}
793 
794 	iter.size = hdr.size;
795 
796 	while (iter.offset < hdr.size) {
797 		ret = panthor_fw_load_entry(ptdev, fw, &iter);
798 		if (ret)
799 			goto out;
800 	}
801 
802 	if (!ptdev->fw->shared_section) {
803 		drm_err(&ptdev->base, "Shared interface region not found\n");
804 		ret = -EINVAL;
805 		goto out;
806 	}
807 
808 out:
809 	release_firmware(fw);
810 	return ret;
811 }
812 
813 /**
814  * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
815  * @ptdev: Device.
816  * @mcu_va: MCU address.
817  *
818  * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
819  */
820 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
821 {
822 	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
823 	u64 shared_mem_end = shared_mem_start +
824 			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
825 	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
826 		return NULL;
827 
828 	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
829 }
830 
831 static int panthor_init_cs_iface(struct panthor_device *ptdev,
832 				 unsigned int csg_idx, unsigned int cs_idx)
833 {
834 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
835 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
836 	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
837 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
838 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
839 			   (csg_idx * glb_iface->control->group_stride) +
840 			   CSF_STREAM_CONTROL_OFFSET +
841 			   (cs_idx * csg_iface->control->stream_stride);
842 	struct panthor_fw_cs_iface *first_cs_iface =
843 		panthor_fw_get_cs_iface(ptdev, 0, 0);
844 
845 	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
846 		return -EINVAL;
847 
848 	spin_lock_init(&cs_iface->lock);
849 	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
850 	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
851 	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
852 
853 	if (!cs_iface->input || !cs_iface->output) {
854 		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
855 		return -EINVAL;
856 	}
857 
858 	if (cs_iface != first_cs_iface) {
859 		if (cs_iface->control->features != first_cs_iface->control->features) {
860 			drm_err(&ptdev->base, "Expecting identical CS slots");
861 			return -EINVAL;
862 		}
863 	} else {
864 		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
865 
866 		ptdev->csif_info.cs_reg_count = reg_count;
867 		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
868 	}
869 
870 	return 0;
871 }
872 
873 static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
874 			const struct panthor_fw_csg_control_iface *b)
875 {
876 	if (a->features != b->features)
877 		return false;
878 	if (a->suspend_size != b->suspend_size)
879 		return false;
880 	if (a->protm_suspend_size != b->protm_suspend_size)
881 		return false;
882 	if (a->stream_num != b->stream_num)
883 		return false;
884 	return true;
885 }
886 
887 static int panthor_init_csg_iface(struct panthor_device *ptdev,
888 				  unsigned int csg_idx)
889 {
890 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
891 	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
892 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
893 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
894 	unsigned int i;
895 
896 	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
897 		return -EINVAL;
898 
899 	spin_lock_init(&csg_iface->lock);
900 	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
901 	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
902 	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
903 
904 	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
905 	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
906 		return -EINVAL;
907 
908 	if (!csg_iface->input || !csg_iface->output) {
909 		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
910 		return -EINVAL;
911 	}
912 
913 	if (csg_idx > 0) {
914 		struct panthor_fw_csg_iface *first_csg_iface =
915 			panthor_fw_get_csg_iface(ptdev, 0);
916 
917 		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
918 			drm_err(&ptdev->base, "Expecting identical CSG slots");
919 			return -EINVAL;
920 		}
921 	}
922 
923 	for (i = 0; i < csg_iface->control->stream_num; i++) {
924 		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
925 
926 		if (ret)
927 			return ret;
928 	}
929 
930 	return 0;
931 }
932 
933 static u32 panthor_get_instr_features(struct panthor_device *ptdev)
934 {
935 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
936 
937 	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
938 		return 0;
939 
940 	return glb_iface->control->instr_features;
941 }
942 
943 static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
944 {
945 	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
946 	unsigned int i;
947 
948 	if (!ptdev->fw->shared_section->mem->kmap)
949 		return -EINVAL;
950 
951 	spin_lock_init(&glb_iface->lock);
952 	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
953 
954 	if (!glb_iface->control->version) {
955 		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
956 		return -EINVAL;
957 	}
958 
959 	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
960 	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
961 	if (!glb_iface->input || !glb_iface->output) {
962 		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
963 		return -EINVAL;
964 	}
965 
966 	if (glb_iface->control->group_num > MAX_CSGS ||
967 	    glb_iface->control->group_num < MIN_CSGS) {
968 		drm_err(&ptdev->base, "Invalid number of control groups");
969 		return -EINVAL;
970 	}
971 
972 	for (i = 0; i < glb_iface->control->group_num; i++) {
973 		int ret = panthor_init_csg_iface(ptdev, i);
974 
975 		if (ret)
976 			return ret;
977 	}
978 
979 	drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
980 		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
981 		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
982 		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
983 		 glb_iface->control->features,
984 		 panthor_get_instr_features(ptdev));
985 	return 0;
986 }
987 
988 static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
989 {
990 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
991 
992 	/* Enable all cores. */
993 	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
994 
995 	/* Setup timers. */
996 	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
997 	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
998 	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
999 
1000 	/* Enable interrupts we care about. */
1001 	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
1002 					 GLB_PING |
1003 					 GLB_CFG_PROGRESS_TIMER |
1004 					 GLB_CFG_POWEROFF_TIMER |
1005 					 GLB_IDLE_EN |
1006 					 GLB_IDLE;
1007 
1008 	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
1009 	panthor_fw_toggle_reqs(glb_iface, req, ack,
1010 			       GLB_CFG_ALLOC_EN |
1011 			       GLB_CFG_POWEROFF_TIMER |
1012 			       GLB_CFG_PROGRESS_TIMER);
1013 
1014 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1015 
1016 	/* Kick the watchdog. */
1017 	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
1018 			 msecs_to_jiffies(PING_INTERVAL_MS));
1019 }
1020 
1021 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
1022 {
1023 	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
1024 		ptdev->fw->booted = true;
1025 
1026 	wake_up_all(&ptdev->fw->req_waitqueue);
1027 
1028 	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
1029 	if (!ptdev->fw->booted)
1030 		return;
1031 
1032 	panthor_sched_report_fw_events(ptdev, status);
1033 }
1034 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
1035 
1036 static int panthor_fw_start(struct panthor_device *ptdev)
1037 {
1038 	bool timedout = false;
1039 
1040 	ptdev->fw->booted = false;
1041 	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
1042 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
1043 
1044 	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
1045 				ptdev->fw->booted,
1046 				msecs_to_jiffies(1000))) {
1047 		if (!ptdev->fw->booted &&
1048 		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
1049 			timedout = true;
1050 	}
1051 
1052 	if (timedout) {
1053 		static const char * const status_str[] = {
1054 			[MCU_STATUS_DISABLED] = "disabled",
1055 			[MCU_STATUS_ENABLED] = "enabled",
1056 			[MCU_STATUS_HALT] = "halt",
1057 			[MCU_STATUS_FATAL] = "fatal",
1058 		};
1059 		u32 status = gpu_read(ptdev, MCU_STATUS);
1060 
1061 		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1062 			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1063 		return -ETIMEDOUT;
1064 	}
1065 
1066 	return 0;
1067 }
1068 
1069 static void panthor_fw_stop(struct panthor_device *ptdev)
1070 {
1071 	u32 status;
1072 
1073 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1074 	if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1075 			       status == MCU_STATUS_DISABLED, 10, 100000))
1076 		drm_err(&ptdev->base, "Failed to stop MCU");
1077 }
1078 
1079 /**
1080  * panthor_fw_pre_reset() - Call before a reset.
1081  * @ptdev: Device.
1082  * @on_hang: true if the reset was triggered on a GPU hang.
1083  *
1084  * If the reset is not triggered on a hang, we try to gracefully halt the
1085  * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1086  */
1087 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1088 {
1089 	/* Make sure we won't be woken up by a ping. */
1090 	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1091 
1092 	ptdev->fw->fast_reset = false;
1093 
1094 	if (!on_hang) {
1095 		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1096 		u32 status;
1097 
1098 		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1099 		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1100 		if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1101 					status == MCU_STATUS_HALT, 10, 100000) &&
1102 		    glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
1103 			ptdev->fw->fast_reset = true;
1104 		} else {
1105 			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1106 		}
1107 
1108 		/* The FW detects 0 -> 1 transitions. Make sure we reset
1109 		 * the HALT bit before the FW is rebooted.
1110 		 */
1111 		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1112 	}
1113 
1114 	panthor_job_irq_suspend(&ptdev->fw->irq);
1115 }
1116 
1117 /**
1118  * panthor_fw_post_reset() - Call after a reset.
1119  * @ptdev: Device.
1120  *
1121  * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1122  * make sure we can recover from a memory corruption.
1123  */
1124 int panthor_fw_post_reset(struct panthor_device *ptdev)
1125 {
1126 	int ret;
1127 
1128 	/* Make the MCU VM active. */
1129 	ret = panthor_vm_active(ptdev->fw->vm);
1130 	if (ret)
1131 		return ret;
1132 
1133 	/* If this is a fast reset, try to start the MCU without reloading
1134 	 * the FW sections. If it fails, go for a full reset.
1135 	 */
1136 	if (ptdev->fw->fast_reset) {
1137 		ret = panthor_fw_start(ptdev);
1138 		if (!ret)
1139 			goto out;
1140 
1141 		/* Forcibly reset the MCU and force a slow reset, so we get a
1142 		 * fresh boot on the next panthor_fw_start() call.
1143 		 */
1144 		panthor_fw_stop(ptdev);
1145 		ptdev->fw->fast_reset = false;
1146 		drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
1147 
1148 		ret = panthor_vm_flush_all(ptdev->fw->vm);
1149 		if (ret) {
1150 			drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)");
1151 			return ret;
1152 		}
1153 	}
1154 
1155 	/* Reload all sections, including RO ones. We're not supposed
1156 	 * to end up here anyway, let's just assume the overhead of
1157 	 * reloading everything is acceptable.
1158 	 */
1159 	panthor_reload_fw_sections(ptdev, true);
1160 
1161 	ret = panthor_fw_start(ptdev);
1162 	if (ret) {
1163 		drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )");
1164 		return ret;
1165 	}
1166 
1167 out:
1168 	/* We must re-initialize the global interface even on fast-reset. */
1169 	panthor_fw_init_global_iface(ptdev);
1170 	return 0;
1171 }
1172 
1173 /**
1174  * panthor_fw_unplug() - Called when the device is unplugged.
1175  * @ptdev: Device.
1176  *
1177  * This function must make sure all pending operations are flushed before
1178  * will release device resources, thus preventing any interaction with
1179  * the HW.
1180  *
1181  * If there is still FW-related work running after this function returns,
1182  * they must use drm_dev_{enter,exit}() and skip any HW access when
1183  * drm_dev_enter() returns false.
1184  */
1185 void panthor_fw_unplug(struct panthor_device *ptdev)
1186 {
1187 	struct panthor_fw_section *section;
1188 
1189 	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1190 
1191 	/* Make sure the IRQ handler can be called after that point. */
1192 	if (ptdev->fw->irq.irq)
1193 		panthor_job_irq_suspend(&ptdev->fw->irq);
1194 
1195 	panthor_fw_stop(ptdev);
1196 
1197 	list_for_each_entry(section, &ptdev->fw->sections, node)
1198 		panthor_kernel_bo_destroy(section->mem);
1199 
1200 	/* We intentionally don't call panthor_vm_idle() and let
1201 	 * panthor_mmu_unplug() release the AS we acquired with
1202 	 * panthor_vm_active() so we don't have to track the VM active/idle
1203 	 * state to keep the active_refcnt balanced.
1204 	 */
1205 	panthor_vm_put(ptdev->fw->vm);
1206 	ptdev->fw->vm = NULL;
1207 
1208 	panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
1209 }
1210 
1211 /**
1212  * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1213  * @req_ptr: Pointer to the req register.
1214  * @ack_ptr: Pointer to the ack register.
1215  * @wq: Wait queue to use for the sleeping wait.
1216  * @req_mask: Mask of requests to wait for.
1217  * @acked: Pointer to field that's updated with the acked requests.
1218  * If the function returns 0, *acked == req_mask.
1219  * @timeout_ms: Timeout expressed in milliseconds.
1220  *
1221  * Return: 0 on success, -ETIMEDOUT otherwise.
1222  */
1223 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1224 				wait_queue_head_t *wq,
1225 				u32 req_mask, u32 *acked,
1226 				u32 timeout_ms)
1227 {
1228 	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1229 	int ret;
1230 
1231 	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
1232 	*acked = req_mask;
1233 	ret = read_poll_timeout_atomic(READ_ONCE, ack,
1234 				       (ack & req_mask) == req,
1235 				       0, 10, 0,
1236 				       *ack_ptr);
1237 	if (!ret)
1238 		return 0;
1239 
1240 	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1241 			       msecs_to_jiffies(timeout_ms)))
1242 		return 0;
1243 
1244 	/* Check one last time, in case we were not woken up for some reason. */
1245 	ack = READ_ONCE(*ack_ptr);
1246 	if ((ack & req_mask) == req)
1247 		return 0;
1248 
1249 	*acked = ~(req ^ ack) & req_mask;
1250 	return -ETIMEDOUT;
1251 }
1252 
1253 /**
1254  * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1255  * @ptdev: Device.
1256  * @req_mask: Mask of requests to wait for.
1257  * @acked: Pointer to field that's updated with the acked requests.
1258  * If the function returns 0, *acked == req_mask.
1259  * @timeout_ms: Timeout expressed in milliseconds.
1260  *
1261  * Return: 0 on success, -ETIMEDOUT otherwise.
1262  */
1263 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1264 			     u32 req_mask, u32 *acked,
1265 			     u32 timeout_ms)
1266 {
1267 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1268 
1269 	/* GLB_HALT doesn't get acked through the FW interface. */
1270 	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1271 		return -EINVAL;
1272 
1273 	return panthor_fw_wait_acks(&glb_iface->input->req,
1274 				    &glb_iface->output->ack,
1275 				    &ptdev->fw->req_waitqueue,
1276 				    req_mask, acked, timeout_ms);
1277 }
1278 
1279 /**
1280  * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1281  * @ptdev: Device.
1282  * @csg_slot: CSG slot ID.
1283  * @req_mask: Mask of requests to wait for.
1284  * @acked: Pointer to field that's updated with the acked requests.
1285  * If the function returns 0, *acked == req_mask.
1286  * @timeout_ms: Timeout expressed in milliseconds.
1287  *
1288  * Return: 0 on success, -ETIMEDOUT otherwise.
1289  */
1290 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1291 			     u32 req_mask, u32 *acked, u32 timeout_ms)
1292 {
1293 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1294 	int ret;
1295 
1296 	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1297 		return -EINVAL;
1298 
1299 	ret = panthor_fw_wait_acks(&csg_iface->input->req,
1300 				   &csg_iface->output->ack,
1301 				   &ptdev->fw->req_waitqueue,
1302 				   req_mask, acked, timeout_ms);
1303 
1304 	/*
1305 	 * Check that all bits in the state field were updated, if any mismatch
1306 	 * then clear all bits in the state field. This allows code to do
1307 	 * (acked & CSG_STATE_MASK) and get the right value.
1308 	 */
1309 
1310 	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1311 		*acked &= ~CSG_STATE_MASK;
1312 
1313 	return ret;
1314 }
1315 
1316 /**
1317  * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1318  * @ptdev: Device.
1319  * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1320  *
1321  * This function is toggling bits in the doorbell_req and ringing the
1322  * global doorbell. It doesn't require a user doorbell to be attached to
1323  * the group.
1324  */
1325 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1326 {
1327 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1328 
1329 	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1330 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1331 }
1332 
1333 static void panthor_fw_ping_work(struct work_struct *work)
1334 {
1335 	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1336 	struct panthor_device *ptdev = fw->irq.ptdev;
1337 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1338 	u32 acked;
1339 	int ret;
1340 
1341 	if (panthor_device_reset_is_pending(ptdev))
1342 		return;
1343 
1344 	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1345 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1346 
1347 	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1348 	if (ret) {
1349 		panthor_device_schedule_reset(ptdev);
1350 		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1351 	} else {
1352 		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1353 				 msecs_to_jiffies(PING_INTERVAL_MS));
1354 	}
1355 }
1356 
1357 /**
1358  * panthor_fw_init() - Initialize FW related data.
1359  * @ptdev: Device.
1360  *
1361  * Return: 0 on success, a negative error code otherwise.
1362  */
1363 int panthor_fw_init(struct panthor_device *ptdev)
1364 {
1365 	struct panthor_fw *fw;
1366 	int ret, irq;
1367 
1368 	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1369 	if (!fw)
1370 		return -ENOMEM;
1371 
1372 	ptdev->fw = fw;
1373 	init_waitqueue_head(&fw->req_waitqueue);
1374 	INIT_LIST_HEAD(&fw->sections);
1375 	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1376 
1377 	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1378 	if (irq <= 0)
1379 		return -ENODEV;
1380 
1381 	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1382 	if (ret) {
1383 		drm_err(&ptdev->base, "failed to request job irq");
1384 		return ret;
1385 	}
1386 
1387 	ret = panthor_gpu_l2_power_on(ptdev);
1388 	if (ret)
1389 		return ret;
1390 
1391 	fw->vm = panthor_vm_create(ptdev, true,
1392 				   0, SZ_4G,
1393 				   CSF_MCU_SHARED_REGION_START,
1394 				   CSF_MCU_SHARED_REGION_SIZE);
1395 	if (IS_ERR(fw->vm)) {
1396 		ret = PTR_ERR(fw->vm);
1397 		fw->vm = NULL;
1398 		goto err_unplug_fw;
1399 	}
1400 
1401 	ret = panthor_fw_load(ptdev);
1402 	if (ret)
1403 		goto err_unplug_fw;
1404 
1405 	ret = panthor_vm_active(fw->vm);
1406 	if (ret)
1407 		goto err_unplug_fw;
1408 
1409 	ret = panthor_fw_start(ptdev);
1410 	if (ret)
1411 		goto err_unplug_fw;
1412 
1413 	ret = panthor_fw_init_ifaces(ptdev);
1414 	if (ret)
1415 		goto err_unplug_fw;
1416 
1417 	panthor_fw_init_global_iface(ptdev);
1418 	return 0;
1419 
1420 err_unplug_fw:
1421 	panthor_fw_unplug(ptdev);
1422 	return ret;
1423 }
1424 
1425 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1426