xref: /linux/drivers/gpu/drm/panthor/panthor_fw.c (revision 3f1c07fc21c68bd3bd2df9d2c9441f6485e934d9)
1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3 
4 #ifdef CONFIG_ARM_ARCH_TIMER
5 #include <asm/arch_timer.h>
6 #endif
7 
8 #include <linux/clk.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/firmware.h>
11 #include <linux/iopoll.h>
12 #include <linux/iosys-map.h>
13 #include <linux/mutex.h>
14 #include <linux/platform_device.h>
15 #include <linux/pm_runtime.h>
16 
17 #include <drm/drm_drv.h>
18 #include <drm/drm_managed.h>
19 #include <drm/drm_print.h>
20 
21 #include "panthor_device.h"
22 #include "panthor_fw.h"
23 #include "panthor_gem.h"
24 #include "panthor_gpu.h"
25 #include "panthor_hw.h"
26 #include "panthor_mmu.h"
27 #include "panthor_regs.h"
28 #include "panthor_sched.h"
29 
30 #define CSF_FW_NAME "mali_csffw.bin"
31 
32 #define PING_INTERVAL_MS			12000
33 #define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
34 #define PROGRESS_TIMEOUT_SCALE_SHIFT		10
35 #define IDLE_HYSTERESIS_US			800
36 #define PWROFF_HYSTERESIS_US			10000
37 #define MCU_HALT_TIMEOUT_US			(1ULL * USEC_PER_SEC)
38 
39 /**
40  * struct panthor_fw_binary_hdr - Firmware binary header.
41  */
42 struct panthor_fw_binary_hdr {
43 	/** @magic: Magic value to check binary validity. */
44 	u32 magic;
45 #define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
46 
47 	/** @minor: Minor FW version. */
48 	u8 minor;
49 
50 	/** @major: Major FW version. */
51 	u8 major;
52 #define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
53 
54 	/** @padding1: MBZ. */
55 	u16 padding1;
56 
57 	/** @version_hash: FW version hash. */
58 	u32 version_hash;
59 
60 	/** @padding2: MBZ. */
61 	u32 padding2;
62 
63 	/** @size: FW binary size. */
64 	u32 size;
65 };
66 
67 /**
68  * enum panthor_fw_binary_entry_type - Firmware binary entry type
69  */
70 enum panthor_fw_binary_entry_type {
71 	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
72 	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
73 
74 	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
75 	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
76 
77 	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
78 	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
79 
80 	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
81 	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
82 
83 	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
84 	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
85 
86 	/**
87 	 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
88 	 * the FW binary was built.
89 	 */
90 	CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
91 };
92 
93 #define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
94 #define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
95 #define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
96 #define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
97 
98 #define CSF_FW_BINARY_IFACE_ENTRY_RD					BIT(0)
99 #define CSF_FW_BINARY_IFACE_ENTRY_WR					BIT(1)
100 #define CSF_FW_BINARY_IFACE_ENTRY_EX					BIT(2)
101 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE			(0 << 3)
102 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED			(1 << 3)
103 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT		(2 << 3)
104 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT		(3 << 3)
105 #define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK			GENMASK(4, 3)
106 #define CSF_FW_BINARY_IFACE_ENTRY_PROT					BIT(5)
107 #define CSF_FW_BINARY_IFACE_ENTRY_SHARED				BIT(30)
108 #define CSF_FW_BINARY_IFACE_ENTRY_ZERO					BIT(31)
109 
110 #define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS			\
111 	(CSF_FW_BINARY_IFACE_ENTRY_RD |					\
112 	 CSF_FW_BINARY_IFACE_ENTRY_WR |					\
113 	 CSF_FW_BINARY_IFACE_ENTRY_EX |					\
114 	 CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK |			\
115 	 CSF_FW_BINARY_IFACE_ENTRY_PROT |				\
116 	 CSF_FW_BINARY_IFACE_ENTRY_SHARED  |				\
117 	 CSF_FW_BINARY_IFACE_ENTRY_ZERO)
118 
119 /**
120  * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
121  */
122 struct panthor_fw_binary_section_entry_hdr {
123 	/** @flags: Section flags. */
124 	u32 flags;
125 
126 	/** @va: MCU virtual range to map this binary section to. */
127 	struct {
128 		/** @start: Start address. */
129 		u32 start;
130 
131 		/** @end: End address. */
132 		u32 end;
133 	} va;
134 
135 	/** @data: Data to initialize the FW section with. */
136 	struct {
137 		/** @start: Start offset in the FW binary. */
138 		u32 start;
139 
140 		/** @end: End offset in the FW binary. */
141 		u32 end;
142 	} data;
143 };
144 
145 struct panthor_fw_build_info_hdr {
146 	/** @meta_start: Offset of the build info data in the FW binary */
147 	u32 meta_start;
148 	/** @meta_size: Size of the build info data in the FW binary */
149 	u32 meta_size;
150 };
151 
152 /**
153  * struct panthor_fw_binary_iter - Firmware binary iterator
154  *
155  * Used to parse a firmware binary.
156  */
157 struct panthor_fw_binary_iter {
158 	/** @data: FW binary data. */
159 	const void *data;
160 
161 	/** @size: FW binary size. */
162 	size_t size;
163 
164 	/** @offset: Iterator offset. */
165 	size_t offset;
166 };
167 
168 /**
169  * struct panthor_fw_section - FW section
170  */
171 struct panthor_fw_section {
172 	/** @node: Used to keep track of FW sections. */
173 	struct list_head node;
174 
175 	/** @flags: Section flags, as encoded in the FW binary. */
176 	u32 flags;
177 
178 	/** @mem: Section memory. */
179 	struct panthor_kernel_bo *mem;
180 
181 	/**
182 	 * @name: Name of the section, as specified in the binary.
183 	 *
184 	 * Can be NULL.
185 	 */
186 	const char *name;
187 
188 	/**
189 	 * @data: Initial data copied to the FW memory.
190 	 *
191 	 * We keep data around so we can reload sections after a reset.
192 	 */
193 	struct {
194 		/** @buf: Buffed used to store init data. */
195 		const void *buf;
196 
197 		/** @size: Size of @buf in bytes. */
198 		size_t size;
199 	} data;
200 };
201 
202 #define CSF_MCU_SHARED_REGION_START		0x04000000ULL
203 #define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
204 
205 #define MIN_CS_PER_CSG				8
206 #define MIN_CSGS				3
207 
208 #define CSF_IFACE_VERSION(major, minor, patch)	\
209 	(((major) << 24) | ((minor) << 16) | (patch))
210 #define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
211 #define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
212 #define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
213 
214 #define CSF_GROUP_CONTROL_OFFSET		0x1000
215 #define CSF_STREAM_CONTROL_OFFSET		0x40
216 #define CSF_UNPRESERVED_REG_COUNT		4
217 
218 /**
219  * struct panthor_fw_iface - FW interfaces
220  */
221 struct panthor_fw_iface {
222 	/** @global: Global interface. */
223 	struct panthor_fw_global_iface global;
224 
225 	/** @groups: Group slot interfaces. */
226 	struct panthor_fw_csg_iface groups[MAX_CSGS];
227 
228 	/** @streams: Command stream slot interfaces. */
229 	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
230 };
231 
232 /**
233  * struct panthor_fw - Firmware management
234  */
235 struct panthor_fw {
236 	/** @vm: MCU VM. */
237 	struct panthor_vm *vm;
238 
239 	/** @sections: List of FW sections. */
240 	struct list_head sections;
241 
242 	/** @shared_section: The section containing the FW interfaces. */
243 	struct panthor_fw_section *shared_section;
244 
245 	/** @iface: FW interfaces. */
246 	struct panthor_fw_iface iface;
247 
248 	/** @watchdog: Collection of fields relating to the FW watchdog. */
249 	struct {
250 		/** @ping_work: Delayed work used to ping the FW. */
251 		struct delayed_work ping_work;
252 	} watchdog;
253 
254 	/**
255 	 * @req_waitqueue: FW request waitqueue.
256 	 *
257 	 * Everytime a request is sent to a command stream group or the global
258 	 * interface, the caller will first busy wait for the request to be
259 	 * acknowledged, and then fallback to a sleeping wait.
260 	 *
261 	 * This wait queue is here to support the sleeping wait flavor.
262 	 */
263 	wait_queue_head_t req_waitqueue;
264 
265 	/** @booted: True is the FW is booted */
266 	bool booted;
267 
268 	/** @irq: Job irq data. */
269 	struct panthor_irq irq;
270 };
271 
panthor_fw_vm(struct panthor_device * ptdev)272 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
273 {
274 	return ptdev->fw->vm;
275 }
276 
277 /**
278  * panthor_fw_get_glb_iface() - Get the global interface
279  * @ptdev: Device.
280  *
281  * Return: The global interface.
282  */
283 struct panthor_fw_global_iface *
panthor_fw_get_glb_iface(struct panthor_device * ptdev)284 panthor_fw_get_glb_iface(struct panthor_device *ptdev)
285 {
286 	return &ptdev->fw->iface.global;
287 }
288 
289 /**
290  * panthor_fw_get_csg_iface() - Get a command stream group slot interface
291  * @ptdev: Device.
292  * @csg_slot: Index of the command stream group slot.
293  *
294  * Return: The command stream group slot interface.
295  */
296 struct panthor_fw_csg_iface *
panthor_fw_get_csg_iface(struct panthor_device * ptdev,u32 csg_slot)297 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
298 {
299 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
300 		return NULL;
301 
302 	return &ptdev->fw->iface.groups[csg_slot];
303 }
304 
305 /**
306  * panthor_fw_get_cs_iface() - Get a command stream slot interface
307  * @ptdev: Device.
308  * @csg_slot: Index of the command stream group slot.
309  * @cs_slot: Index of the command stream slot.
310  *
311  * Return: The command stream slot interface.
312  */
313 struct panthor_fw_cs_iface *
panthor_fw_get_cs_iface(struct panthor_device * ptdev,u32 csg_slot,u32 cs_slot)314 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
315 {
316 	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
317 		return NULL;
318 
319 	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
320 }
321 
panthor_fw_has_glb_state(struct panthor_device * ptdev)322 static bool panthor_fw_has_glb_state(struct panthor_device *ptdev)
323 {
324 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
325 
326 	return glb_iface->control->version >= CSF_IFACE_VERSION(4, 1, 0);
327 }
328 
panthor_fw_has_64bit_ep_req(struct panthor_device * ptdev)329 static bool panthor_fw_has_64bit_ep_req(struct panthor_device *ptdev)
330 {
331 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
332 
333 	return glb_iface->control->version >= CSF_IFACE_VERSION(4, 0, 0);
334 }
335 
panthor_fw_csg_endpoint_req_get(struct panthor_device * ptdev,struct panthor_fw_csg_iface * csg_iface)336 u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev,
337 				    struct panthor_fw_csg_iface *csg_iface)
338 {
339 	if (panthor_fw_has_64bit_ep_req(ptdev))
340 		return csg_iface->input->endpoint_req2;
341 	else
342 		return csg_iface->input->endpoint_req;
343 }
344 
panthor_fw_csg_endpoint_req_set(struct panthor_device * ptdev,struct panthor_fw_csg_iface * csg_iface,u64 value)345 void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev,
346 				     struct panthor_fw_csg_iface *csg_iface, u64 value)
347 {
348 	if (panthor_fw_has_64bit_ep_req(ptdev))
349 		csg_iface->input->endpoint_req2 = value;
350 	else
351 		csg_iface->input->endpoint_req = lower_32_bits(value);
352 }
353 
panthor_fw_csg_endpoint_req_update(struct panthor_device * ptdev,struct panthor_fw_csg_iface * csg_iface,u64 value,u64 mask)354 void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev,
355 					struct panthor_fw_csg_iface *csg_iface, u64 value,
356 					u64 mask)
357 {
358 	if (panthor_fw_has_64bit_ep_req(ptdev))
359 		panthor_fw_update_reqs64(csg_iface, endpoint_req2, value, mask);
360 	else
361 		panthor_fw_update_reqs(csg_iface, endpoint_req, lower_32_bits(value),
362 				       lower_32_bits(mask));
363 }
364 
365 /**
366  * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
367  * @ptdev: Device.
368  * @timeout_us: Timeout expressed in micro-seconds.
369  *
370  * The FW has two timer sources: the GPU counter or arch-timer. We need
371  * to express timeouts in term of number of cycles and specify which
372  * timer source should be used.
373  *
374  * Return: A value suitable for timeout fields in the global interface.
375  */
panthor_fw_conv_timeout(struct panthor_device * ptdev,u32 timeout_us)376 static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
377 {
378 	bool use_cycle_counter = false;
379 	u32 timer_rate = 0;
380 	u64 mod_cycles;
381 
382 #ifdef CONFIG_ARM_ARCH_TIMER
383 	timer_rate = arch_timer_get_cntfrq();
384 #endif
385 
386 	if (!timer_rate) {
387 		use_cycle_counter = true;
388 		timer_rate = clk_get_rate(ptdev->clks.core);
389 	}
390 
391 	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
392 		/* We couldn't get a valid clock rate, let's just pick the
393 		 * maximum value so the FW still handles the core
394 		 * power on/off requests.
395 		 */
396 		return GLB_TIMER_VAL(~0) |
397 		       GLB_TIMER_SOURCE_GPU_COUNTER;
398 	}
399 
400 	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
401 				      1000000ull << 10);
402 	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
403 		mod_cycles = GLB_TIMER_VAL(~0);
404 
405 	return GLB_TIMER_VAL(mod_cycles) |
406 	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
407 }
408 
panthor_fw_binary_iter_read(struct panthor_device * ptdev,struct panthor_fw_binary_iter * iter,void * out,size_t size)409 static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
410 				       struct panthor_fw_binary_iter *iter,
411 				       void *out, size_t size)
412 {
413 	size_t new_offset = iter->offset + size;
414 
415 	if (new_offset > iter->size || new_offset < iter->offset) {
416 		drm_err(&ptdev->base, "Firmware too small\n");
417 		return -EINVAL;
418 	}
419 
420 	memcpy(out, iter->data + iter->offset, size);
421 	iter->offset = new_offset;
422 	return 0;
423 }
424 
panthor_fw_binary_sub_iter_init(struct panthor_device * ptdev,struct panthor_fw_binary_iter * iter,struct panthor_fw_binary_iter * sub_iter,size_t size)425 static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
426 					   struct panthor_fw_binary_iter *iter,
427 					   struct panthor_fw_binary_iter *sub_iter,
428 					   size_t size)
429 {
430 	size_t new_offset = iter->offset + size;
431 
432 	if (new_offset > iter->size || new_offset < iter->offset) {
433 		drm_err(&ptdev->base, "Firmware entry too long\n");
434 		return -EINVAL;
435 	}
436 
437 	sub_iter->offset = 0;
438 	sub_iter->data = iter->data + iter->offset;
439 	sub_iter->size = size;
440 	iter->offset = new_offset;
441 	return 0;
442 }
443 
panthor_fw_init_section_mem(struct panthor_device * ptdev,struct panthor_fw_section * section)444 static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
445 					struct panthor_fw_section *section)
446 {
447 	bool was_mapped = !!section->mem->kmap;
448 	int ret;
449 
450 	if (!section->data.size &&
451 	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO))
452 		return;
453 
454 	ret = panthor_kernel_bo_vmap(section->mem);
455 	if (drm_WARN_ON(&ptdev->base, ret))
456 		return;
457 
458 	memcpy(section->mem->kmap, section->data.buf, section->data.size);
459 	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) {
460 		memset(section->mem->kmap + section->data.size, 0,
461 		       panthor_kernel_bo_size(section->mem) - section->data.size);
462 	}
463 
464 	if (!was_mapped)
465 		panthor_kernel_bo_vunmap(section->mem);
466 }
467 
468 /**
469  * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
470  * @ptdev: Device.
471  * @input: Pointer holding the input interface on success.
472  * Should be ignored on failure.
473  * @output: Pointer holding the output interface on success.
474  * Should be ignored on failure.
475  * @input_fw_va: Pointer holding the input interface FW VA on success.
476  * Should be ignored on failure.
477  * @output_fw_va: Pointer holding the output interface FW VA on success.
478  * Should be ignored on failure.
479  *
480  * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
481  * interface is at offset 0, and the output interface at offset 4096.
482  *
483  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
484  */
485 struct panthor_kernel_bo *
panthor_fw_alloc_queue_iface_mem(struct panthor_device * ptdev,struct panthor_fw_ringbuf_input_iface ** input,const struct panthor_fw_ringbuf_output_iface ** output,u32 * input_fw_va,u32 * output_fw_va)486 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
487 				 struct panthor_fw_ringbuf_input_iface **input,
488 				 const struct panthor_fw_ringbuf_output_iface **output,
489 				 u32 *input_fw_va, u32 *output_fw_va)
490 {
491 	struct panthor_kernel_bo *mem;
492 	int ret;
493 
494 	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
495 				       DRM_PANTHOR_BO_NO_MMAP,
496 				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
497 				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
498 				       PANTHOR_VM_KERNEL_AUTO_VA,
499 				       "Queue FW interface");
500 	if (IS_ERR(mem))
501 		return mem;
502 
503 	ret = panthor_kernel_bo_vmap(mem);
504 	if (ret) {
505 		panthor_kernel_bo_destroy(mem);
506 		return ERR_PTR(ret);
507 	}
508 
509 	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
510 	*input = mem->kmap;
511 	*output = mem->kmap + SZ_4K;
512 	*input_fw_va = panthor_kernel_bo_gpuva(mem);
513 	*output_fw_va = *input_fw_va + SZ_4K;
514 
515 	return mem;
516 }
517 
518 /**
519  * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
520  * @ptdev: Device.
521  * @size: Size of the suspend buffer.
522  *
523  * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
524  */
525 struct panthor_kernel_bo *
panthor_fw_alloc_suspend_buf_mem(struct panthor_device * ptdev,size_t size)526 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
527 {
528 	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
529 					DRM_PANTHOR_BO_NO_MMAP,
530 					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
531 					PANTHOR_VM_KERNEL_AUTO_VA,
532 					"FW suspend buffer");
533 }
534 
panthor_fw_load_section_entry(struct panthor_device * ptdev,const struct firmware * fw,struct panthor_fw_binary_iter * iter,u32 ehdr)535 static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
536 					 const struct firmware *fw,
537 					 struct panthor_fw_binary_iter *iter,
538 					 u32 ehdr)
539 {
540 	ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
541 	struct panthor_fw_binary_section_entry_hdr hdr;
542 	struct panthor_fw_section *section;
543 	u32 section_size;
544 	u32 name_len;
545 	int ret;
546 
547 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
548 	if (ret)
549 		return ret;
550 
551 	if (hdr.data.end < hdr.data.start) {
552 		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
553 			hdr.data.end, hdr.data.start);
554 		return -EINVAL;
555 	}
556 
557 	if (hdr.va.end < hdr.va.start) {
558 		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
559 			hdr.va.end, hdr.va.start);
560 		return -EINVAL;
561 	}
562 
563 	if (hdr.data.end > fw->size) {
564 		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
565 			hdr.data.end, fw->size);
566 		return -EINVAL;
567 	}
568 
569 	if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
570 		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
571 			hdr.va.start, hdr.va.end);
572 		return -EINVAL;
573 	}
574 
575 	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) {
576 		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
577 			hdr.flags);
578 		return -EINVAL;
579 	}
580 
581 	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) {
582 		drm_warn(&ptdev->base,
583 			 "Firmware protected mode entry not be supported, ignoring");
584 		return 0;
585 	}
586 
587 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
588 	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) {
589 		drm_err(&ptdev->base,
590 			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
591 		return -EINVAL;
592 	}
593 
594 	name_len = iter->size - iter->offset;
595 
596 	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
597 	if (!section)
598 		return -ENOMEM;
599 
600 	list_add_tail(&section->node, &ptdev->fw->sections);
601 	section->flags = hdr.flags;
602 	section->data.size = hdr.data.end - hdr.data.start;
603 
604 	if (section->data.size > 0) {
605 		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
606 
607 		if (!data)
608 			return -ENOMEM;
609 
610 		memcpy(data, fw->data + hdr.data.start, section->data.size);
611 		section->data.buf = data;
612 	}
613 
614 	if (name_len > 0) {
615 		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
616 
617 		if (!name)
618 			return -ENOMEM;
619 
620 		memcpy(name, iter->data + iter->offset, name_len);
621 		name[name_len] = '\0';
622 		section->name = name;
623 	}
624 
625 	section_size = hdr.va.end - hdr.va.start;
626 	if (section_size) {
627 		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK;
628 		struct panthor_gem_object *bo;
629 		u32 vm_map_flags = 0;
630 		struct sg_table *sgt;
631 		u64 va = hdr.va.start;
632 
633 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
634 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
635 
636 		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX))
637 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
638 
639 		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to
640 		 * non-cacheable for now. We might want to introduce a new
641 		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
642 		 * memory and is currently not used by our driver) for
643 		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
644 		 * of IO-coherent systems.
645 		 */
646 		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED)
647 			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
648 
649 		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
650 							section_size,
651 							DRM_PANTHOR_BO_NO_MMAP,
652 							vm_map_flags, va, "FW section");
653 		if (IS_ERR(section->mem))
654 			return PTR_ERR(section->mem);
655 
656 		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
657 			return -EINVAL;
658 
659 		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) {
660 			ret = panthor_kernel_bo_vmap(section->mem);
661 			if (ret)
662 				return ret;
663 		}
664 
665 		panthor_fw_init_section_mem(ptdev, section);
666 
667 		bo = to_panthor_bo(section->mem->obj);
668 		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
669 		if (IS_ERR(sgt))
670 			return PTR_ERR(sgt);
671 
672 		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
673 	}
674 
675 	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
676 		ptdev->fw->shared_section = section;
677 
678 	return 0;
679 }
680 
panthor_fw_read_build_info(struct panthor_device * ptdev,const struct firmware * fw,struct panthor_fw_binary_iter * iter,u32 ehdr)681 static int panthor_fw_read_build_info(struct panthor_device *ptdev,
682 				      const struct firmware *fw,
683 				      struct panthor_fw_binary_iter *iter,
684 				      u32 ehdr)
685 {
686 	struct panthor_fw_build_info_hdr hdr;
687 	static const char git_sha_header[] = "git_sha: ";
688 	const int header_len = sizeof(git_sha_header) - 1;
689 	int ret;
690 
691 	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
692 	if (ret)
693 		return ret;
694 
695 	if (hdr.meta_start > fw->size ||
696 	    hdr.meta_start + hdr.meta_size > fw->size) {
697 		drm_err(&ptdev->base, "Firmware build info corrupt\n");
698 		/* We don't need the build info, so continue */
699 		return 0;
700 	}
701 
702 	if (memcmp(git_sha_header, fw->data + hdr.meta_start, header_len)) {
703 		/* Not the expected header, this isn't metadata we understand */
704 		return 0;
705 	}
706 
707 	/* Check that the git SHA is NULL terminated as expected */
708 	if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
709 		drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
710 		/* Don't treat as fatal */
711 		return 0;
712 	}
713 
714 	drm_info(&ptdev->base, "Firmware git sha: %s\n",
715 		 fw->data + hdr.meta_start + header_len);
716 
717 	return 0;
718 }
719 
720 static void
panthor_reload_fw_sections(struct panthor_device * ptdev,bool full_reload)721 panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
722 {
723 	struct panthor_fw_section *section;
724 
725 	list_for_each_entry(section, &ptdev->fw->sections, node) {
726 		struct sg_table *sgt;
727 
728 		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR))
729 			continue;
730 
731 		panthor_fw_init_section_mem(ptdev, section);
732 		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
733 		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
734 			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
735 	}
736 }
737 
panthor_fw_load_entry(struct panthor_device * ptdev,const struct firmware * fw,struct panthor_fw_binary_iter * iter)738 static int panthor_fw_load_entry(struct panthor_device *ptdev,
739 				 const struct firmware *fw,
740 				 struct panthor_fw_binary_iter *iter)
741 {
742 	struct panthor_fw_binary_iter eiter;
743 	u32 ehdr;
744 	int ret;
745 
746 	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
747 	if (ret)
748 		return ret;
749 
750 	if ((iter->offset % sizeof(u32)) ||
751 	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
752 		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
753 			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
754 		return -EINVAL;
755 	}
756 
757 	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
758 					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
759 		return -EINVAL;
760 
761 	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
762 	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
763 		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
764 	case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
765 		return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
766 
767 	/* FIXME: handle those entry types? */
768 	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
769 	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
770 	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
771 	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
772 		return 0;
773 	default:
774 		break;
775 	}
776 
777 	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
778 		return 0;
779 
780 	drm_err(&ptdev->base,
781 		"Unsupported non-optional entry type %u in firmware\n",
782 		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
783 	return -EINVAL;
784 }
785 
panthor_fw_load(struct panthor_device * ptdev)786 static int panthor_fw_load(struct panthor_device *ptdev)
787 {
788 	const struct firmware *fw = NULL;
789 	struct panthor_fw_binary_iter iter = {};
790 	struct panthor_fw_binary_hdr hdr;
791 	char fw_path[128];
792 	int ret;
793 
794 	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
795 		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
796 		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
797 		 CSF_FW_NAME);
798 
799 	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
800 	if (ret) {
801 		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
802 			CSF_FW_NAME);
803 		return ret;
804 	}
805 
806 	iter.data = fw->data;
807 	iter.size = fw->size;
808 	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
809 	if (ret)
810 		goto out;
811 
812 	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
813 		ret = -EINVAL;
814 		drm_err(&ptdev->base, "Invalid firmware magic\n");
815 		goto out;
816 	}
817 
818 	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
819 		ret = -EINVAL;
820 		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
821 			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
822 		goto out;
823 	}
824 
825 	if (hdr.size > iter.size) {
826 		drm_err(&ptdev->base, "Firmware image is truncated\n");
827 		goto out;
828 	}
829 
830 	iter.size = hdr.size;
831 
832 	while (iter.offset < hdr.size) {
833 		ret = panthor_fw_load_entry(ptdev, fw, &iter);
834 		if (ret)
835 			goto out;
836 	}
837 
838 	if (!ptdev->fw->shared_section) {
839 		drm_err(&ptdev->base, "Shared interface region not found\n");
840 		ret = -EINVAL;
841 		goto out;
842 	}
843 
844 out:
845 	release_firmware(fw);
846 	return ret;
847 }
848 
849 /**
850  * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
851  * @ptdev: Device.
852  * @mcu_va: MCU address.
853  *
854  * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
855  */
iface_fw_to_cpu_addr(struct panthor_device * ptdev,u32 mcu_va)856 static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
857 {
858 	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
859 	u64 shared_mem_end = shared_mem_start +
860 			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
861 	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
862 		return NULL;
863 
864 	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
865 }
866 
panthor_init_cs_iface(struct panthor_device * ptdev,unsigned int csg_idx,unsigned int cs_idx)867 static int panthor_init_cs_iface(struct panthor_device *ptdev,
868 				 unsigned int csg_idx, unsigned int cs_idx)
869 {
870 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
871 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
872 	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
873 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
874 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
875 			   (csg_idx * glb_iface->control->group_stride) +
876 			   CSF_STREAM_CONTROL_OFFSET +
877 			   (cs_idx * csg_iface->control->stream_stride);
878 	struct panthor_fw_cs_iface *first_cs_iface =
879 		panthor_fw_get_cs_iface(ptdev, 0, 0);
880 
881 	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
882 		return -EINVAL;
883 
884 	spin_lock_init(&cs_iface->lock);
885 	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
886 	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
887 	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
888 
889 	if (!cs_iface->input || !cs_iface->output) {
890 		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
891 		return -EINVAL;
892 	}
893 
894 	if (cs_iface != first_cs_iface) {
895 		if (cs_iface->control->features != first_cs_iface->control->features) {
896 			drm_err(&ptdev->base, "Expecting identical CS slots");
897 			return -EINVAL;
898 		}
899 	} else {
900 		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
901 
902 		ptdev->csif_info.cs_reg_count = reg_count;
903 		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
904 	}
905 
906 	return 0;
907 }
908 
compare_csg(const struct panthor_fw_csg_control_iface * a,const struct panthor_fw_csg_control_iface * b)909 static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
910 			const struct panthor_fw_csg_control_iface *b)
911 {
912 	if (a->features != b->features)
913 		return false;
914 	if (a->suspend_size != b->suspend_size)
915 		return false;
916 	if (a->protm_suspend_size != b->protm_suspend_size)
917 		return false;
918 	if (a->stream_num != b->stream_num)
919 		return false;
920 	return true;
921 }
922 
panthor_init_csg_iface(struct panthor_device * ptdev,unsigned int csg_idx)923 static int panthor_init_csg_iface(struct panthor_device *ptdev,
924 				  unsigned int csg_idx)
925 {
926 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
927 	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
928 	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
929 	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
930 	unsigned int i;
931 
932 	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
933 		return -EINVAL;
934 
935 	spin_lock_init(&csg_iface->lock);
936 	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
937 	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
938 	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
939 
940 	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
941 	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
942 		return -EINVAL;
943 
944 	if (!csg_iface->input || !csg_iface->output) {
945 		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
946 		return -EINVAL;
947 	}
948 
949 	if (csg_idx > 0) {
950 		struct panthor_fw_csg_iface *first_csg_iface =
951 			panthor_fw_get_csg_iface(ptdev, 0);
952 
953 		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
954 			drm_err(&ptdev->base, "Expecting identical CSG slots");
955 			return -EINVAL;
956 		}
957 	}
958 
959 	for (i = 0; i < csg_iface->control->stream_num; i++) {
960 		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
961 
962 		if (ret)
963 			return ret;
964 	}
965 
966 	return 0;
967 }
968 
panthor_get_instr_features(struct panthor_device * ptdev)969 static u32 panthor_get_instr_features(struct panthor_device *ptdev)
970 {
971 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
972 
973 	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
974 		return 0;
975 
976 	return glb_iface->control->instr_features;
977 }
978 
panthor_fw_init_ifaces(struct panthor_device * ptdev)979 static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
980 {
981 	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
982 	unsigned int i;
983 
984 	if (!ptdev->fw->shared_section->mem->kmap)
985 		return -EINVAL;
986 
987 	spin_lock_init(&glb_iface->lock);
988 	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
989 
990 	if (!glb_iface->control->version) {
991 		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
992 		return -EINVAL;
993 	}
994 
995 	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
996 	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
997 	if (!glb_iface->input || !glb_iface->output) {
998 		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
999 		return -EINVAL;
1000 	}
1001 
1002 	if (glb_iface->control->group_num > MAX_CSGS ||
1003 	    glb_iface->control->group_num < MIN_CSGS) {
1004 		drm_err(&ptdev->base, "Invalid number of control groups");
1005 		return -EINVAL;
1006 	}
1007 
1008 	for (i = 0; i < glb_iface->control->group_num; i++) {
1009 		int ret = panthor_init_csg_iface(ptdev, i);
1010 
1011 		if (ret)
1012 			return ret;
1013 	}
1014 
1015 	drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
1016 		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
1017 		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
1018 		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
1019 		 glb_iface->control->features,
1020 		 panthor_get_instr_features(ptdev));
1021 	return 0;
1022 }
1023 
panthor_fw_init_global_iface(struct panthor_device * ptdev)1024 static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
1025 {
1026 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1027 
1028 	/* Enable all cores. */
1029 	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
1030 
1031 	/* Setup timers. */
1032 	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
1033 	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
1034 	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
1035 
1036 	/* Enable interrupts we care about. */
1037 	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
1038 					 GLB_PING |
1039 					 GLB_CFG_PROGRESS_TIMER |
1040 					 GLB_CFG_POWEROFF_TIMER |
1041 					 GLB_IDLE_EN |
1042 					 GLB_IDLE;
1043 
1044 	if (panthor_fw_has_glb_state(ptdev))
1045 		glb_iface->input->ack_irq_mask |= GLB_STATE_MASK;
1046 
1047 	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
1048 	panthor_fw_toggle_reqs(glb_iface, req, ack,
1049 			       GLB_CFG_ALLOC_EN |
1050 			       GLB_CFG_POWEROFF_TIMER |
1051 			       GLB_CFG_PROGRESS_TIMER);
1052 
1053 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1054 
1055 	/* Kick the watchdog. */
1056 	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
1057 			 msecs_to_jiffies(PING_INTERVAL_MS));
1058 }
1059 
panthor_job_irq_handler(struct panthor_device * ptdev,u32 status)1060 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
1061 {
1062 	gpu_write(ptdev, JOB_INT_CLEAR, status);
1063 
1064 	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
1065 		ptdev->fw->booted = true;
1066 
1067 	wake_up_all(&ptdev->fw->req_waitqueue);
1068 
1069 	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
1070 	if (!ptdev->fw->booted)
1071 		return;
1072 
1073 	panthor_sched_report_fw_events(ptdev, status);
1074 }
1075 PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
1076 
panthor_fw_start(struct panthor_device * ptdev)1077 static int panthor_fw_start(struct panthor_device *ptdev)
1078 {
1079 	bool timedout = false;
1080 
1081 	ptdev->fw->booted = false;
1082 	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
1083 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
1084 
1085 	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
1086 				ptdev->fw->booted,
1087 				msecs_to_jiffies(1000))) {
1088 		if (!ptdev->fw->booted &&
1089 		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
1090 			timedout = true;
1091 	}
1092 
1093 	if (timedout) {
1094 		static const char * const status_str[] = {
1095 			[MCU_STATUS_DISABLED] = "disabled",
1096 			[MCU_STATUS_ENABLED] = "enabled",
1097 			[MCU_STATUS_HALT] = "halt",
1098 			[MCU_STATUS_FATAL] = "fatal",
1099 		};
1100 		u32 status = gpu_read(ptdev, MCU_STATUS);
1101 
1102 		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1103 			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1104 		return -ETIMEDOUT;
1105 	}
1106 
1107 	return 0;
1108 }
1109 
panthor_fw_stop(struct panthor_device * ptdev)1110 static void panthor_fw_stop(struct panthor_device *ptdev)
1111 {
1112 	u32 status;
1113 
1114 	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1115 	if (gpu_read_poll_timeout(ptdev, MCU_STATUS, status,
1116 				  status == MCU_STATUS_DISABLED, 10, 100000))
1117 		drm_err(&ptdev->base, "Failed to stop MCU");
1118 }
1119 
panthor_fw_mcu_halted(struct panthor_device * ptdev)1120 static bool panthor_fw_mcu_halted(struct panthor_device *ptdev)
1121 {
1122 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1123 	bool halted;
1124 
1125 	halted = gpu_read(ptdev, MCU_STATUS) == MCU_STATUS_HALT;
1126 
1127 	if (panthor_fw_has_glb_state(ptdev))
1128 		halted &= (GLB_STATE_GET(glb_iface->output->ack) == GLB_STATE_HALT);
1129 
1130 	return halted;
1131 }
1132 
panthor_fw_halt_mcu(struct panthor_device * ptdev)1133 static void panthor_fw_halt_mcu(struct panthor_device *ptdev)
1134 {
1135 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1136 
1137 	if (panthor_fw_has_glb_state(ptdev))
1138 		panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_HALT), GLB_STATE_MASK);
1139 	else
1140 		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1141 
1142 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1143 }
1144 
panthor_fw_wait_mcu_halted(struct panthor_device * ptdev)1145 static bool panthor_fw_wait_mcu_halted(struct panthor_device *ptdev)
1146 {
1147 	bool halted = false;
1148 
1149 	if (read_poll_timeout_atomic(panthor_fw_mcu_halted, halted, halted, 10,
1150 				     MCU_HALT_TIMEOUT_US, 0, ptdev)) {
1151 		drm_warn(&ptdev->base, "Timed out waiting for MCU to halt");
1152 		return false;
1153 	}
1154 
1155 	return true;
1156 }
1157 
panthor_fw_mcu_set_active(struct panthor_device * ptdev)1158 static void panthor_fw_mcu_set_active(struct panthor_device *ptdev)
1159 {
1160 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1161 
1162 	if (panthor_fw_has_glb_state(ptdev))
1163 		panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_ACTIVE), GLB_STATE_MASK);
1164 	else
1165 		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1166 }
1167 
1168 /**
1169  * panthor_fw_pre_reset() - Call before a reset.
1170  * @ptdev: Device.
1171  * @on_hang: true if the reset was triggered on a GPU hang.
1172  *
1173  * If the reset is not triggered on a hang, we try to gracefully halt the
1174  * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1175  */
panthor_fw_pre_reset(struct panthor_device * ptdev,bool on_hang)1176 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1177 {
1178 	/* Make sure we won't be woken up by a ping. */
1179 	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1180 
1181 	ptdev->reset.fast = false;
1182 
1183 	if (!on_hang) {
1184 		panthor_fw_halt_mcu(ptdev);
1185 		if (!panthor_fw_wait_mcu_halted(ptdev))
1186 			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1187 		else
1188 			ptdev->reset.fast = true;
1189 	}
1190 	panthor_fw_stop(ptdev);
1191 
1192 	panthor_job_irq_suspend(&ptdev->fw->irq);
1193 	panthor_fw_stop(ptdev);
1194 }
1195 
1196 /**
1197  * panthor_fw_post_reset() - Call after a reset.
1198  * @ptdev: Device.
1199  *
1200  * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1201  * make sure we can recover from a memory corruption.
1202  */
panthor_fw_post_reset(struct panthor_device * ptdev)1203 int panthor_fw_post_reset(struct panthor_device *ptdev)
1204 {
1205 	int ret;
1206 
1207 	/* Make the MCU VM active. */
1208 	ret = panthor_vm_active(ptdev->fw->vm);
1209 	if (ret)
1210 		return ret;
1211 
1212 	if (!ptdev->reset.fast) {
1213 		/* On a slow reset, reload all sections, including RO ones.
1214 		 * We're not supposed to end up here anyway, let's just assume
1215 		 * the overhead of reloading everything is acceptable.
1216 		 */
1217 		panthor_reload_fw_sections(ptdev, true);
1218 	} else {
1219 		/*
1220 		 * If the FW was previously successfully halted in the pre-reset
1221 		 * operation, we need to transition it to active again before
1222 		 * the FW is rebooted.
1223 		 * This is not needed on a slow reset because FW sections are
1224 		 * re-initialized.
1225 		 */
1226 		panthor_fw_mcu_set_active(ptdev);
1227 	}
1228 
1229 	ret = panthor_fw_start(ptdev);
1230 	if (ret) {
1231 		drm_err(&ptdev->base, "FW %s reset failed",
1232 			ptdev->reset.fast ?  "fast" : "slow");
1233 		return ret;
1234 	}
1235 
1236 	/* We must re-initialize the global interface even on fast-reset. */
1237 	panthor_fw_init_global_iface(ptdev);
1238 	return 0;
1239 }
1240 
1241 /**
1242  * panthor_fw_unplug() - Called when the device is unplugged.
1243  * @ptdev: Device.
1244  *
1245  * This function must make sure all pending operations are flushed before
1246  * will release device resources, thus preventing any interaction with
1247  * the HW.
1248  *
1249  * If there is still FW-related work running after this function returns,
1250  * they must use drm_dev_{enter,exit}() and skip any HW access when
1251  * drm_dev_enter() returns false.
1252  */
panthor_fw_unplug(struct panthor_device * ptdev)1253 void panthor_fw_unplug(struct panthor_device *ptdev)
1254 {
1255 	struct panthor_fw_section *section;
1256 
1257 	disable_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1258 
1259 	if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) {
1260 		/* Make sure the IRQ handler cannot be called after that point. */
1261 		if (ptdev->fw->irq.irq)
1262 			panthor_job_irq_suspend(&ptdev->fw->irq);
1263 
1264 		panthor_fw_halt_mcu(ptdev);
1265 		if (!panthor_fw_wait_mcu_halted(ptdev))
1266 			drm_warn(&ptdev->base, "Failed to halt MCU on unplug");
1267 
1268 		panthor_fw_stop(ptdev);
1269 	}
1270 
1271 	list_for_each_entry(section, &ptdev->fw->sections, node)
1272 		panthor_kernel_bo_destroy(section->mem);
1273 
1274 	/* We intentionally don't call panthor_vm_idle() and let
1275 	 * panthor_mmu_unplug() release the AS we acquired with
1276 	 * panthor_vm_active() so we don't have to track the VM active/idle
1277 	 * state to keep the active_refcnt balanced.
1278 	 */
1279 	panthor_vm_put(ptdev->fw->vm);
1280 	ptdev->fw->vm = NULL;
1281 
1282 	if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
1283 		panthor_hw_l2_power_off(ptdev);
1284 }
1285 
1286 /**
1287  * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1288  * @req_ptr: Pointer to the req register.
1289  * @ack_ptr: Pointer to the ack register.
1290  * @wq: Wait queue to use for the sleeping wait.
1291  * @req_mask: Mask of requests to wait for.
1292  * @acked: Pointer to field that's updated with the acked requests.
1293  * If the function returns 0, *acked == req_mask.
1294  * @timeout_ms: Timeout expressed in milliseconds.
1295  *
1296  * Return: 0 on success, -ETIMEDOUT otherwise.
1297  */
panthor_fw_wait_acks(const u32 * req_ptr,const u32 * ack_ptr,wait_queue_head_t * wq,u32 req_mask,u32 * acked,u32 timeout_ms)1298 static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1299 				wait_queue_head_t *wq,
1300 				u32 req_mask, u32 *acked,
1301 				u32 timeout_ms)
1302 {
1303 	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1304 	int ret;
1305 
1306 	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
1307 	*acked = req_mask;
1308 	ret = read_poll_timeout_atomic(READ_ONCE, ack,
1309 				       (ack & req_mask) == req,
1310 				       0, 10, 0,
1311 				       *ack_ptr);
1312 	if (!ret)
1313 		return 0;
1314 
1315 	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1316 			       msecs_to_jiffies(timeout_ms)))
1317 		return 0;
1318 
1319 	/* Check one last time, in case we were not woken up for some reason. */
1320 	ack = READ_ONCE(*ack_ptr);
1321 	if ((ack & req_mask) == req)
1322 		return 0;
1323 
1324 	*acked = ~(req ^ ack) & req_mask;
1325 	return -ETIMEDOUT;
1326 }
1327 
1328 /**
1329  * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1330  * @ptdev: Device.
1331  * @req_mask: Mask of requests to wait for.
1332  * @acked: Pointer to field that's updated with the acked requests.
1333  * If the function returns 0, *acked == req_mask.
1334  * @timeout_ms: Timeout expressed in milliseconds.
1335  *
1336  * Return: 0 on success, -ETIMEDOUT otherwise.
1337  */
panthor_fw_glb_wait_acks(struct panthor_device * ptdev,u32 req_mask,u32 * acked,u32 timeout_ms)1338 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1339 			     u32 req_mask, u32 *acked,
1340 			     u32 timeout_ms)
1341 {
1342 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1343 
1344 	/* GLB_HALT doesn't get acked through the FW interface. */
1345 	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1346 		return -EINVAL;
1347 
1348 	return panthor_fw_wait_acks(&glb_iface->input->req,
1349 				    &glb_iface->output->ack,
1350 				    &ptdev->fw->req_waitqueue,
1351 				    req_mask, acked, timeout_ms);
1352 }
1353 
1354 /**
1355  * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1356  * @ptdev: Device.
1357  * @csg_slot: CSG slot ID.
1358  * @req_mask: Mask of requests to wait for.
1359  * @acked: Pointer to field that's updated with the acked requests.
1360  * If the function returns 0, *acked == req_mask.
1361  * @timeout_ms: Timeout expressed in milliseconds.
1362  *
1363  * Return: 0 on success, -ETIMEDOUT otherwise.
1364  */
panthor_fw_csg_wait_acks(struct panthor_device * ptdev,u32 csg_slot,u32 req_mask,u32 * acked,u32 timeout_ms)1365 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1366 			     u32 req_mask, u32 *acked, u32 timeout_ms)
1367 {
1368 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1369 	int ret;
1370 
1371 	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1372 		return -EINVAL;
1373 
1374 	ret = panthor_fw_wait_acks(&csg_iface->input->req,
1375 				   &csg_iface->output->ack,
1376 				   &ptdev->fw->req_waitqueue,
1377 				   req_mask, acked, timeout_ms);
1378 
1379 	/*
1380 	 * Check that all bits in the state field were updated, if any mismatch
1381 	 * then clear all bits in the state field. This allows code to do
1382 	 * (acked & CSG_STATE_MASK) and get the right value.
1383 	 */
1384 
1385 	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1386 		*acked &= ~CSG_STATE_MASK;
1387 
1388 	return ret;
1389 }
1390 
1391 /**
1392  * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1393  * @ptdev: Device.
1394  * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1395  *
1396  * This function is toggling bits in the doorbell_req and ringing the
1397  * global doorbell. It doesn't require a user doorbell to be attached to
1398  * the group.
1399  */
panthor_fw_ring_csg_doorbells(struct panthor_device * ptdev,u32 csg_mask)1400 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1401 {
1402 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1403 
1404 	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1405 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1406 }
1407 
panthor_fw_ping_work(struct work_struct * work)1408 static void panthor_fw_ping_work(struct work_struct *work)
1409 {
1410 	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1411 	struct panthor_device *ptdev = fw->irq.ptdev;
1412 	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1413 	u32 acked;
1414 	int ret;
1415 
1416 	if (panthor_device_reset_is_pending(ptdev))
1417 		return;
1418 
1419 	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1420 	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1421 
1422 	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1423 	if (ret) {
1424 		panthor_device_schedule_reset(ptdev);
1425 		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1426 	} else {
1427 		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1428 				 msecs_to_jiffies(PING_INTERVAL_MS));
1429 	}
1430 }
1431 
1432 /**
1433  * panthor_fw_init() - Initialize FW related data.
1434  * @ptdev: Device.
1435  *
1436  * Return: 0 on success, a negative error code otherwise.
1437  */
panthor_fw_init(struct panthor_device * ptdev)1438 int panthor_fw_init(struct panthor_device *ptdev)
1439 {
1440 	struct panthor_fw *fw;
1441 	int ret, irq;
1442 
1443 	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1444 	if (!fw)
1445 		return -ENOMEM;
1446 
1447 	ptdev->fw = fw;
1448 	init_waitqueue_head(&fw->req_waitqueue);
1449 	INIT_LIST_HEAD(&fw->sections);
1450 	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1451 
1452 	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1453 	if (irq <= 0)
1454 		return -ENODEV;
1455 
1456 	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1457 	if (ret) {
1458 		drm_err(&ptdev->base, "failed to request job irq");
1459 		return ret;
1460 	}
1461 
1462 	ret = panthor_hw_l2_power_on(ptdev);
1463 	if (ret)
1464 		return ret;
1465 
1466 	fw->vm = panthor_vm_create(ptdev, true,
1467 				   0, SZ_4G,
1468 				   CSF_MCU_SHARED_REGION_START,
1469 				   CSF_MCU_SHARED_REGION_SIZE);
1470 	if (IS_ERR(fw->vm)) {
1471 		ret = PTR_ERR(fw->vm);
1472 		fw->vm = NULL;
1473 		goto err_unplug_fw;
1474 	}
1475 
1476 	ret = panthor_fw_load(ptdev);
1477 	if (ret)
1478 		goto err_unplug_fw;
1479 
1480 	ret = panthor_vm_active(fw->vm);
1481 	if (ret)
1482 		goto err_unplug_fw;
1483 
1484 	ret = panthor_fw_start(ptdev);
1485 	if (ret)
1486 		goto err_unplug_fw;
1487 
1488 	ret = panthor_fw_init_ifaces(ptdev);
1489 	if (ret)
1490 		goto err_unplug_fw;
1491 
1492 	panthor_fw_init_global_iface(ptdev);
1493 	return 0;
1494 
1495 err_unplug_fw:
1496 	panthor_fw_unplug(ptdev);
1497 	return ret;
1498 }
1499 
1500 MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
1501 MODULE_FIRMWARE("arm/mali/arch10.10/mali_csffw.bin");
1502 MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin");
1503 MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin");
1504 MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin");
1505 MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin");
1506 MODULE_FIRMWARE("arm/mali/arch14.8/mali_csffw.bin");
1507