xref: /linux/drivers/accel/habanalabs/common/firmware_if.c (revision 52990390f91c1c39ca742fc8f390b29891d95127)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "habanalabs.h"
9 #include "../include/common/hl_boot_if.h"
10 
11 #include <linux/firmware.h>
12 #include <linux/crc32.h>
13 #include <linux/slab.h>
14 #include <linux/ctype.h>
15 #include <linux/vmalloc.h>
16 
17 #include <trace/events/habanalabs.h>
18 
19 #define FW_FILE_MAX_SIZE		0x1400000 /* maximum size of 20MB */
20 
21 static char *comms_cmd_str_arr[COMMS_INVLD_LAST] = {
22 	[COMMS_NOOP] = __stringify(COMMS_NOOP),
23 	[COMMS_CLR_STS] = __stringify(COMMS_CLR_STS),
24 	[COMMS_RST_STATE] = __stringify(COMMS_RST_STATE),
25 	[COMMS_PREP_DESC] = __stringify(COMMS_PREP_DESC),
26 	[COMMS_DATA_RDY] = __stringify(COMMS_DATA_RDY),
27 	[COMMS_EXEC] = __stringify(COMMS_EXEC),
28 	[COMMS_RST_DEV] = __stringify(COMMS_RST_DEV),
29 	[COMMS_GOTO_WFE] = __stringify(COMMS_GOTO_WFE),
30 	[COMMS_SKIP_BMC] = __stringify(COMMS_SKIP_BMC),
31 	[COMMS_PREP_DESC_ELBI] = __stringify(COMMS_PREP_DESC_ELBI),
32 };
33 
34 static char *comms_sts_str_arr[COMMS_STS_INVLD_LAST] = {
35 	[COMMS_STS_NOOP] = __stringify(COMMS_STS_NOOP),
36 	[COMMS_STS_ACK] = __stringify(COMMS_STS_ACK),
37 	[COMMS_STS_OK] = __stringify(COMMS_STS_OK),
38 	[COMMS_STS_ERR] = __stringify(COMMS_STS_ERR),
39 	[COMMS_STS_VALID_ERR] = __stringify(COMMS_STS_VALID_ERR),
40 	[COMMS_STS_TIMEOUT_ERR] = __stringify(COMMS_STS_TIMEOUT_ERR),
41 };
42 
43 static char *extract_fw_ver_from_str(const char *fw_str)
44 {
45 	char *str, *fw_ver, *whitespace;
46 	u32 ver_offset;
47 
48 	fw_ver = kmalloc(VERSION_MAX_LEN, GFP_KERNEL);
49 	if (!fw_ver)
50 		return NULL;
51 
52 	str = strnstr(fw_str, "fw-", VERSION_MAX_LEN);
53 	if (!str)
54 		goto free_fw_ver;
55 
56 	/* Skip the fw- part */
57 	str += 3;
58 	ver_offset = str - fw_str;
59 
60 	/* Copy until the next whitespace */
61 	whitespace = strnstr(str, " ", VERSION_MAX_LEN - ver_offset);
62 	if (!whitespace)
63 		goto free_fw_ver;
64 
65 	strscpy(fw_ver, str, whitespace - str + 1);
66 
67 	return fw_ver;
68 
69 free_fw_ver:
70 	kfree(fw_ver);
71 	return NULL;
72 }
73 
74 static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver)
75 {
76 	char major[8], minor[8], *first_dot, *second_dot;
77 	int rc;
78 
79 	first_dot = strnstr(preboot_ver, ".", 10);
80 	if (first_dot) {
81 		strscpy(major, preboot_ver, first_dot - preboot_ver + 1);
82 		rc = kstrtou32(major, 10, &hdev->fw_major_version);
83 	} else {
84 		rc = -EINVAL;
85 	}
86 
87 	if (rc) {
88 		dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc);
89 		return rc;
90 	}
91 
92 	/* skip the first dot */
93 	first_dot++;
94 
95 	second_dot = strnstr(first_dot, ".", 10);
96 	if (second_dot) {
97 		strscpy(minor, first_dot, second_dot - first_dot + 1);
98 		rc = kstrtou32(minor, 10, &hdev->fw_minor_version);
99 	} else {
100 		rc = -EINVAL;
101 	}
102 
103 	if (rc)
104 		dev_err(hdev->dev, "Error %d parsing preboot minor version\n", rc);
105 	return rc;
106 }
107 
108 static int hl_request_fw(struct hl_device *hdev,
109 				const struct firmware **firmware_p,
110 				const char *fw_name)
111 {
112 	size_t fw_size;
113 	int rc;
114 
115 	rc = request_firmware(firmware_p, fw_name, hdev->dev);
116 	if (rc) {
117 		dev_err(hdev->dev, "Firmware file %s is not found! (error %d)\n",
118 				fw_name, rc);
119 		goto out;
120 	}
121 
122 	fw_size = (*firmware_p)->size;
123 	if ((fw_size % 4) != 0) {
124 		dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
125 				fw_name, fw_size);
126 		rc = -EINVAL;
127 		goto release_fw;
128 	}
129 
130 	dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
131 
132 	if (fw_size > FW_FILE_MAX_SIZE) {
133 		dev_err(hdev->dev,
134 			"FW file size %zu exceeds maximum of %u bytes\n",
135 			fw_size, FW_FILE_MAX_SIZE);
136 		rc = -EINVAL;
137 		goto release_fw;
138 	}
139 
140 	return 0;
141 
142 release_fw:
143 	release_firmware(*firmware_p);
144 out:
145 	return rc;
146 }
147 
148 /**
149  * hl_release_firmware() - release FW
150  *
151  * @fw: fw descriptor
152  *
153  * note: this inline function added to serve as a comprehensive mirror for the
154  *       hl_request_fw function.
155  */
156 static inline void hl_release_firmware(const struct firmware *fw)
157 {
158 	release_firmware(fw);
159 }
160 
161 /**
162  * hl_fw_copy_fw_to_device() - copy FW to device
163  *
164  * @hdev: pointer to hl_device structure.
165  * @fw: fw descriptor
166  * @dst: IO memory mapped address space to copy firmware to
167  * @src_offset: offset in src FW to copy from
168  * @size: amount of bytes to copy (0 to copy the whole binary)
169  *
170  * actual copy of FW binary data to device, shared by static and dynamic loaders
171  */
172 static int hl_fw_copy_fw_to_device(struct hl_device *hdev,
173 				const struct firmware *fw, void __iomem *dst,
174 				u32 src_offset, u32 size)
175 {
176 	const void *fw_data;
177 
178 	/* size 0 indicates to copy the whole file */
179 	if (!size)
180 		size = fw->size;
181 
182 	if (src_offset + size > fw->size) {
183 		dev_err(hdev->dev,
184 			"size to copy(%u) and offset(%u) are invalid\n",
185 			size, src_offset);
186 		return -EINVAL;
187 	}
188 
189 	fw_data = (const void *) fw->data;
190 
191 	memcpy_toio(dst, fw_data + src_offset, size);
192 	return 0;
193 }
194 
195 /**
196  * hl_fw_copy_msg_to_device() - copy message to device
197  *
198  * @hdev: pointer to hl_device structure.
199  * @msg: message
200  * @dst: IO memory mapped address space to copy firmware to
201  * @src_offset: offset in src message to copy from
202  * @size: amount of bytes to copy (0 to copy the whole binary)
203  *
204  * actual copy of message data to device.
205  */
206 static int hl_fw_copy_msg_to_device(struct hl_device *hdev,
207 		struct lkd_msg_comms *msg, void __iomem *dst,
208 		u32 src_offset, u32 size)
209 {
210 	void *msg_data;
211 
212 	/* size 0 indicates to copy the whole file */
213 	if (!size)
214 		size = sizeof(struct lkd_msg_comms);
215 
216 	if (src_offset + size > sizeof(struct lkd_msg_comms)) {
217 		dev_err(hdev->dev,
218 			"size to copy(%u) and offset(%u) are invalid\n",
219 			size, src_offset);
220 		return -EINVAL;
221 	}
222 
223 	msg_data = (void *) msg;
224 
225 	memcpy_toio(dst, msg_data + src_offset, size);
226 
227 	return 0;
228 }
229 
230 /**
231  * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
232  *
233  * @hdev: pointer to hl_device structure.
234  * @fw_name: the firmware image name
235  * @dst: IO memory mapped address space to copy firmware to
236  * @src_offset: offset in src FW to copy from
237  * @size: amount of bytes to copy (0 to copy the whole binary)
238  *
239  * Copy fw code from firmware file to device memory.
240  *
241  * Return: 0 on success, non-zero for failure.
242  */
243 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
244 				void __iomem *dst, u32 src_offset, u32 size)
245 {
246 	const struct firmware *fw;
247 	int rc;
248 
249 	rc = hl_request_fw(hdev, &fw, fw_name);
250 	if (rc)
251 		return rc;
252 
253 	rc = hl_fw_copy_fw_to_device(hdev, fw, dst, src_offset, size);
254 
255 	hl_release_firmware(fw);
256 	return rc;
257 }
258 
259 int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value)
260 {
261 	struct cpucp_packet pkt = {};
262 
263 	pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
264 	pkt.value = cpu_to_le64(value);
265 
266 	return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
267 }
268 
269 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
270 				u16 len, u32 timeout, u64 *result)
271 {
272 	struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
273 	struct asic_fixed_properties *prop = &hdev->asic_prop;
274 	struct cpucp_packet *pkt;
275 	dma_addr_t pkt_dma_addr;
276 	struct hl_bd *sent_bd;
277 	u32 tmp, expected_ack_val, pi, opcode;
278 	int rc;
279 
280 	pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr);
281 	if (!pkt) {
282 		dev_err(hdev->dev,
283 			"Failed to allocate DMA memory for packet to CPU\n");
284 		return -ENOMEM;
285 	}
286 
287 	memcpy(pkt, msg, len);
288 
289 	mutex_lock(&hdev->send_cpu_message_lock);
290 
291 	/* CPU-CP messages can be sent during soft-reset */
292 	if (hdev->disabled && !hdev->reset_info.in_compute_reset) {
293 		rc = 0;
294 		goto out;
295 	}
296 
297 	if (hdev->device_cpu_disabled) {
298 		rc = -EIO;
299 		goto out;
300 	}
301 
302 	/* set fence to a non valid value */
303 	pkt->fence = cpu_to_le32(UINT_MAX);
304 	pi = queue->pi;
305 
306 	/*
307 	 * The CPU queue is a synchronous queue with an effective depth of
308 	 * a single entry (although it is allocated with room for multiple
309 	 * entries). We lock on it using 'send_cpu_message_lock' which
310 	 * serializes accesses to the CPU queue.
311 	 * Which means that we don't need to lock the access to the entire H/W
312 	 * queues module when submitting a JOB to the CPU queue.
313 	 */
314 	hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), len, pkt_dma_addr);
315 
316 	if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
317 		expected_ack_val = queue->pi;
318 	else
319 		expected_ack_val = CPUCP_PACKET_FENCE_VAL;
320 
321 	rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
322 				(tmp == expected_ack_val), 1000,
323 				timeout, true);
324 
325 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
326 
327 	if (rc == -ETIMEDOUT) {
328 		/* If FW performed reset just before sending it a packet, we will get a timeout.
329 		 * This is expected behavior, hence no need for error message.
330 		 */
331 		if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset)
332 			dev_dbg(hdev->dev, "Device CPU packet timeout (0x%x) due to FW reset\n",
333 					tmp);
334 		else
335 			dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n", tmp);
336 		hdev->device_cpu_disabled = true;
337 		goto out;
338 	}
339 
340 	tmp = le32_to_cpu(pkt->ctl);
341 
342 	rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
343 	if (rc) {
344 		opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT;
345 
346 		if (!prop->supports_advanced_cpucp_rc) {
347 			dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode);
348 			rc = -EIO;
349 			goto scrub_descriptor;
350 		}
351 
352 		switch (rc) {
353 		case cpucp_packet_invalid:
354 			dev_err(hdev->dev,
355 				"CPU packet %d is not supported by F/W\n", opcode);
356 			break;
357 		case cpucp_packet_fault:
358 			dev_err(hdev->dev,
359 				"F/W failed processing CPU packet %d\n", opcode);
360 			break;
361 		case cpucp_packet_invalid_pkt:
362 			dev_dbg(hdev->dev,
363 				"CPU packet %d is not supported by F/W\n", opcode);
364 			break;
365 		case cpucp_packet_invalid_params:
366 			dev_err(hdev->dev,
367 				"F/W reports invalid parameters for CPU packet %d\n", opcode);
368 			break;
369 
370 		default:
371 			dev_err(hdev->dev,
372 				"Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode);
373 		}
374 
375 		/* propagate the return code from the f/w to the callers who want to check it */
376 		if (result)
377 			*result = rc;
378 
379 		rc = -EIO;
380 
381 	} else if (result) {
382 		*result = le64_to_cpu(pkt->result);
383 	}
384 
385 scrub_descriptor:
386 	/* Scrub previous buffer descriptor 'ctl' field which contains the
387 	 * previous PI value written during packet submission.
388 	 * We must do this or else F/W can read an old value upon queue wraparound.
389 	 */
390 	sent_bd = queue->kernel_address;
391 	sent_bd += hl_pi_2_offset(pi);
392 	sent_bd->ctl = cpu_to_le32(UINT_MAX);
393 
394 out:
395 	mutex_unlock(&hdev->send_cpu_message_lock);
396 
397 	hl_cpu_accessible_dma_pool_free(hdev, len, pkt);
398 
399 	return rc;
400 }
401 
402 int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
403 {
404 	struct cpucp_packet pkt;
405 	u64 result;
406 	int rc;
407 
408 	memset(&pkt, 0, sizeof(pkt));
409 
410 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
411 				CPUCP_PKT_CTL_OPCODE_SHIFT);
412 	pkt.value = cpu_to_le64(event_type);
413 
414 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
415 						0, &result);
416 
417 	if (rc)
418 		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
419 
420 	return rc;
421 }
422 
423 int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
424 		size_t irq_arr_size)
425 {
426 	struct cpucp_unmask_irq_arr_packet *pkt;
427 	size_t total_pkt_size;
428 	u64 result;
429 	int rc;
430 
431 	total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
432 			irq_arr_size;
433 
434 	/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
435 	total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
436 
437 	/* total_pkt_size is casted to u16 later on */
438 	if (total_pkt_size > USHRT_MAX) {
439 		dev_err(hdev->dev, "too many elements in IRQ array\n");
440 		return -EINVAL;
441 	}
442 
443 	pkt = kzalloc(total_pkt_size, GFP_KERNEL);
444 	if (!pkt)
445 		return -ENOMEM;
446 
447 	pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
448 	memcpy(&pkt->irqs, irq_arr, irq_arr_size);
449 
450 	pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
451 						CPUCP_PKT_CTL_OPCODE_SHIFT);
452 
453 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
454 						total_pkt_size, 0, &result);
455 
456 	if (rc)
457 		dev_err(hdev->dev, "failed to unmask IRQ array\n");
458 
459 	kfree(pkt);
460 
461 	return rc;
462 }
463 
464 int hl_fw_test_cpu_queue(struct hl_device *hdev)
465 {
466 	struct cpucp_packet test_pkt = {};
467 	u64 result;
468 	int rc;
469 
470 	test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
471 					CPUCP_PKT_CTL_OPCODE_SHIFT);
472 	test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
473 
474 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
475 						sizeof(test_pkt), 0, &result);
476 
477 	if (!rc) {
478 		if (result != CPUCP_PACKET_FENCE_VAL)
479 			dev_err(hdev->dev,
480 				"CPU queue test failed (%#08llx)\n", result);
481 	} else {
482 		dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
483 	}
484 
485 	return rc;
486 }
487 
488 void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
489 						dma_addr_t *dma_handle)
490 {
491 	u64 kernel_addr;
492 
493 	kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
494 
495 	*dma_handle = hdev->cpu_accessible_dma_address +
496 		(kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
497 
498 	return (void *) (uintptr_t) kernel_addr;
499 }
500 
501 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
502 					void *vaddr)
503 {
504 	gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
505 			size);
506 }
507 
508 int hl_fw_send_device_activity(struct hl_device *hdev, bool open)
509 {
510 	struct cpucp_packet pkt;
511 	int rc;
512 
513 	memset(&pkt, 0, sizeof(pkt));
514 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET <<	CPUCP_PKT_CTL_OPCODE_SHIFT);
515 	pkt.value = cpu_to_le64(open);
516 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
517 	if (rc)
518 		dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open);
519 
520 	return rc;
521 }
522 
523 int hl_fw_send_heartbeat(struct hl_device *hdev)
524 {
525 	struct cpucp_packet hb_pkt;
526 	u64 result;
527 	int rc;
528 
529 	memset(&hb_pkt, 0, sizeof(hb_pkt));
530 	hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
531 					CPUCP_PKT_CTL_OPCODE_SHIFT);
532 	hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
533 
534 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
535 						sizeof(hb_pkt), 0, &result);
536 
537 	if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
538 		return -EIO;
539 
540 	if (le32_to_cpu(hb_pkt.status_mask) &
541 					CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK) {
542 		dev_warn(hdev->dev, "FW reported EQ fault during heartbeat\n");
543 		rc = -EIO;
544 	}
545 
546 	return rc;
547 }
548 
549 static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
550 								u32 sts_val)
551 {
552 	bool err_exists = false;
553 
554 	if (!(err_val & CPU_BOOT_ERR0_ENABLED))
555 		return false;
556 
557 	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
558 		dev_err(hdev->dev,
559 			"Device boot error - DRAM initialization failed\n");
560 		err_exists = true;
561 	}
562 
563 	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
564 		dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
565 		err_exists = true;
566 	}
567 
568 	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
569 		dev_err(hdev->dev,
570 			"Device boot error - Thermal Sensor initialization failed\n");
571 		err_exists = true;
572 	}
573 
574 	if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
575 		if (hdev->bmc_enable) {
576 			dev_err(hdev->dev,
577 				"Device boot error - Skipped waiting for BMC\n");
578 			err_exists = true;
579 		} else {
580 			dev_info(hdev->dev,
581 				"Device boot message - Skipped waiting for BMC\n");
582 			/* This is an info so we don't want it to disable the
583 			 * device
584 			 */
585 			err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
586 		}
587 	}
588 
589 	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
590 		dev_err(hdev->dev,
591 			"Device boot error - Serdes data from BMC not available\n");
592 		err_exists = true;
593 	}
594 
595 	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
596 		dev_err(hdev->dev,
597 			"Device boot error - NIC F/W initialization failed\n");
598 		err_exists = true;
599 	}
600 
601 	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
602 		dev_err(hdev->dev,
603 			"Device boot warning - security not ready\n");
604 		err_exists = true;
605 	}
606 
607 	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
608 		dev_err(hdev->dev, "Device boot error - security failure\n");
609 		err_exists = true;
610 	}
611 
612 	if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
613 		dev_err(hdev->dev, "Device boot error - eFuse failure\n");
614 		err_exists = true;
615 	}
616 
617 	if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) {
618 		dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n");
619 		err_exists = true;
620 	}
621 
622 	if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
623 		dev_err(hdev->dev, "Device boot error - PLL failure\n");
624 		err_exists = true;
625 	}
626 
627 	if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
628 		/* Ignore this bit, don't prevent driver loading */
629 		dev_dbg(hdev->dev, "device unusable status is set\n");
630 		err_val &= ~CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL;
631 	}
632 
633 	if (err_val & CPU_BOOT_ERR0_BINNING_FAIL) {
634 		dev_err(hdev->dev, "Device boot error - binning failure\n");
635 		err_exists = true;
636 	}
637 
638 	if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
639 		dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
640 
641 	if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
642 		dev_err(hdev->dev, "Device boot error - EEPROM failure detected\n");
643 		err_exists = true;
644 	}
645 
646 	/* All warnings should go here in order not to reach the unknown error validation */
647 	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
648 		dev_warn(hdev->dev,
649 			"Device boot warning - Skipped DRAM initialization\n");
650 		/* This is a warning so we don't want it to disable the
651 		 * device
652 		 */
653 		err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
654 	}
655 
656 	if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) {
657 		dev_warn(hdev->dev,
658 			"Device boot warning - Failed to load preboot primary image\n");
659 		/* This is a warning so we don't want it to disable the
660 		 * device as we have a secondary preboot image
661 		 */
662 		err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL;
663 	}
664 
665 	if (err_val & CPU_BOOT_ERR0_TPM_FAIL) {
666 		dev_warn(hdev->dev,
667 			"Device boot warning - TPM failure\n");
668 		/* This is a warning so we don't want it to disable the
669 		 * device
670 		 */
671 		err_val &= ~CPU_BOOT_ERR0_TPM_FAIL;
672 	}
673 
674 	if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
675 		dev_err(hdev->dev,
676 			"Device boot error - unknown ERR0 error 0x%08x\n", err_val);
677 		err_exists = true;
678 	}
679 
680 	/* return error only if it's in the predefined mask */
681 	if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
682 				lower_32_bits(hdev->boot_error_status_mask)))
683 		return true;
684 
685 	return false;
686 }
687 
688 /* placeholder for ERR1 as no errors defined there yet */
689 static bool fw_report_boot_dev1(struct hl_device *hdev, u32 err_val,
690 								u32 sts_val)
691 {
692 	/*
693 	 * keep this variable to preserve the logic of the function.
694 	 * this way it would require less modifications when error will be
695 	 * added to DEV_ERR1
696 	 */
697 	bool err_exists = false;
698 
699 	if (!(err_val & CPU_BOOT_ERR1_ENABLED))
700 		return false;
701 
702 	if (sts_val & CPU_BOOT_DEV_STS1_ENABLED)
703 		dev_dbg(hdev->dev, "Device status1 %#x\n", sts_val);
704 
705 	if (!err_exists && (err_val & ~CPU_BOOT_ERR1_ENABLED)) {
706 		dev_err(hdev->dev,
707 			"Device boot error - unknown ERR1 error 0x%08x\n",
708 								err_val);
709 		err_exists = true;
710 	}
711 
712 	/* return error only if it's in the predefined mask */
713 	if (err_exists && ((err_val & ~CPU_BOOT_ERR1_ENABLED) &
714 				upper_32_bits(hdev->boot_error_status_mask)))
715 		return true;
716 
717 	return false;
718 }
719 
720 static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
721 				u32 boot_err1_reg, u32 cpu_boot_dev_status0_reg,
722 				u32 cpu_boot_dev_status1_reg)
723 {
724 	u32 err_val, status_val;
725 	bool err_exists = false;
726 
727 	/* Some of the firmware status codes are deprecated in newer f/w
728 	 * versions. In those versions, the errors are reported
729 	 * in different registers. Therefore, we need to check those
730 	 * registers and print the exact errors. Moreover, there
731 	 * may be multiple errors, so we need to report on each error
732 	 * separately. Some of the error codes might indicate a state
733 	 * that is not an error per-se, but it is an error in production
734 	 * environment
735 	 */
736 	err_val = RREG32(boot_err0_reg);
737 	status_val = RREG32(cpu_boot_dev_status0_reg);
738 	err_exists = fw_report_boot_dev0(hdev, err_val, status_val);
739 
740 	err_val = RREG32(boot_err1_reg);
741 	status_val = RREG32(cpu_boot_dev_status1_reg);
742 	err_exists |= fw_report_boot_dev1(hdev, err_val, status_val);
743 
744 	if (err_exists)
745 		return -EIO;
746 
747 	return 0;
748 }
749 
750 int hl_fw_cpucp_info_get(struct hl_device *hdev,
751 				u32 sts_boot_dev_sts0_reg,
752 				u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
753 				u32 boot_err1_reg)
754 {
755 	struct asic_fixed_properties *prop = &hdev->asic_prop;
756 	struct cpucp_packet pkt = {};
757 	dma_addr_t cpucp_info_dma_addr;
758 	void *cpucp_info_cpu_addr;
759 	char *kernel_ver;
760 	u64 result;
761 	int rc;
762 
763 	cpucp_info_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, sizeof(struct cpucp_info),
764 								&cpucp_info_dma_addr);
765 	if (!cpucp_info_cpu_addr) {
766 		dev_err(hdev->dev,
767 			"Failed to allocate DMA memory for CPU-CP info packet\n");
768 		return -ENOMEM;
769 	}
770 
771 	memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info));
772 
773 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET <<
774 				CPUCP_PKT_CTL_OPCODE_SHIFT);
775 	pkt.addr = cpu_to_le64(cpucp_info_dma_addr);
776 	pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info));
777 
778 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
779 					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
780 	if (rc) {
781 		dev_err(hdev->dev,
782 			"Failed to handle CPU-CP info pkt, error %d\n", rc);
783 		goto out;
784 	}
785 
786 	rc = fw_read_errors(hdev, boot_err0_reg, boot_err1_reg,
787 				sts_boot_dev_sts0_reg, sts_boot_dev_sts1_reg);
788 	if (rc) {
789 		dev_err(hdev->dev, "Errors in device boot\n");
790 		goto out;
791 	}
792 
793 	memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
794 			sizeof(prop->cpucp_info));
795 
796 	rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors);
797 	if (rc) {
798 		dev_err(hdev->dev,
799 			"Failed to build hwmon channel info, error %d\n", rc);
800 		rc = -EFAULT;
801 		goto out;
802 	}
803 
804 	kernel_ver = extract_fw_ver_from_str(prop->cpucp_info.kernel_version);
805 	if (kernel_ver) {
806 		dev_info(hdev->dev, "Linux version %s", kernel_ver);
807 		kfree(kernel_ver);
808 	}
809 
810 	/* assume EQ code doesn't need to check eqe index */
811 	hdev->event_queue.check_eqe_index = false;
812 
813 	/* Read FW application security bits again */
814 	if (prop->fw_cpu_boot_dev_sts0_valid) {
815 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
816 		if (prop->fw_app_cpu_boot_dev_sts0 &
817 				CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
818 			hdev->event_queue.check_eqe_index = true;
819 	}
820 
821 	if (prop->fw_cpu_boot_dev_sts1_valid)
822 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
823 
824 out:
825 	hl_cpu_accessible_dma_pool_free(hdev, sizeof(struct cpucp_info), cpucp_info_cpu_addr);
826 
827 	return rc;
828 }
829 
830 static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
831 {
832 	struct cpucp_array_data_packet *pkt;
833 	size_t total_pkt_size, data_size;
834 	u64 result;
835 	int rc;
836 
837 	/* skip sending this info for unsupported ASICs */
838 	if (!hdev->asic_funcs->get_msi_info)
839 		return 0;
840 
841 	data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32);
842 	total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size;
843 
844 	/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
845 	total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
846 
847 	/* total_pkt_size is casted to u16 later on */
848 	if (total_pkt_size > USHRT_MAX) {
849 		dev_err(hdev->dev, "CPUCP array data is too big\n");
850 		return -EINVAL;
851 	}
852 
853 	pkt = kzalloc(total_pkt_size, GFP_KERNEL);
854 	if (!pkt)
855 		return -ENOMEM;
856 
857 	pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES);
858 
859 	memset((void *) &pkt->data, 0xFF, data_size);
860 	hdev->asic_funcs->get_msi_info(pkt->data);
861 
862 	pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET <<
863 						CPUCP_PKT_CTL_OPCODE_SHIFT);
864 
865 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt,
866 						total_pkt_size, 0, &result);
867 
868 	/*
869 	 * in case packet result is invalid it means that FW does not support
870 	 * this feature and will use default/hard coded MSI values. no reason
871 	 * to stop the boot
872 	 */
873 	if (rc && result == cpucp_packet_invalid)
874 		rc = 0;
875 
876 	if (rc)
877 		dev_err(hdev->dev, "failed to send CPUCP array data\n");
878 
879 	kfree(pkt);
880 
881 	return rc;
882 }
883 
884 int hl_fw_cpucp_handshake(struct hl_device *hdev,
885 				u32 sts_boot_dev_sts0_reg,
886 				u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
887 				u32 boot_err1_reg)
888 {
889 	int rc;
890 
891 	rc = hl_fw_cpucp_info_get(hdev, sts_boot_dev_sts0_reg,
892 					sts_boot_dev_sts1_reg, boot_err0_reg,
893 					boot_err1_reg);
894 	if (rc)
895 		return rc;
896 
897 	return hl_fw_send_msi_info_msg(hdev);
898 }
899 
900 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
901 {
902 	struct cpucp_packet pkt = {};
903 	void *eeprom_info_cpu_addr;
904 	dma_addr_t eeprom_info_dma_addr;
905 	u64 result;
906 	int rc;
907 
908 	eeprom_info_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, max_size,
909 									&eeprom_info_dma_addr);
910 	if (!eeprom_info_cpu_addr) {
911 		dev_err(hdev->dev,
912 			"Failed to allocate DMA memory for CPU-CP EEPROM packet\n");
913 		return -ENOMEM;
914 	}
915 
916 	memset(eeprom_info_cpu_addr, 0, max_size);
917 
918 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET <<
919 				CPUCP_PKT_CTL_OPCODE_SHIFT);
920 	pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
921 	pkt.data_max_size = cpu_to_le32(max_size);
922 
923 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
924 			HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
925 
926 	if (rc) {
927 		dev_err(hdev->dev,
928 			"Failed to handle CPU-CP EEPROM packet, error %d\n",
929 			rc);
930 		goto out;
931 	}
932 
933 	/* result contains the actual size */
934 	memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
935 
936 out:
937 	hl_cpu_accessible_dma_pool_free(hdev, max_size, eeprom_info_cpu_addr);
938 
939 	return rc;
940 }
941 
942 int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data)
943 {
944 	struct cpucp_monitor_dump *mon_dump_cpu_addr;
945 	dma_addr_t mon_dump_dma_addr;
946 	struct cpucp_packet pkt = {};
947 	size_t data_size;
948 	__le32 *src_ptr;
949 	u32 *dst_ptr;
950 	u64 result;
951 	int i, rc;
952 
953 	data_size = sizeof(struct cpucp_monitor_dump);
954 	mon_dump_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, data_size, &mon_dump_dma_addr);
955 	if (!mon_dump_cpu_addr) {
956 		dev_err(hdev->dev,
957 			"Failed to allocate DMA memory for CPU-CP monitor-dump packet\n");
958 		return -ENOMEM;
959 	}
960 
961 	memset(mon_dump_cpu_addr, 0, data_size);
962 
963 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MONITOR_DUMP_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
964 	pkt.addr = cpu_to_le64(mon_dump_dma_addr);
965 	pkt.data_max_size = cpu_to_le32(data_size);
966 
967 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
968 							HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result);
969 	if (rc) {
970 		dev_err(hdev->dev, "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc);
971 		goto out;
972 	}
973 
974 	/* result contains the actual size */
975 	src_ptr = (__le32 *) mon_dump_cpu_addr;
976 	dst_ptr = data;
977 	for (i = 0; i < (data_size / sizeof(u32)); i++) {
978 		*dst_ptr = le32_to_cpu(*src_ptr);
979 		src_ptr++;
980 		dst_ptr++;
981 	}
982 
983 out:
984 	hl_cpu_accessible_dma_pool_free(hdev, data_size, mon_dump_cpu_addr);
985 
986 	return rc;
987 }
988 
989 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
990 		struct hl_info_pci_counters *counters)
991 {
992 	struct cpucp_packet pkt = {};
993 	u64 result;
994 	int rc;
995 
996 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
997 			CPUCP_PKT_CTL_OPCODE_SHIFT);
998 
999 	/* Fetch PCI rx counter */
1000 	pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx);
1001 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1002 					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1003 	if (rc) {
1004 		dev_err(hdev->dev,
1005 			"Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
1006 		return rc;
1007 	}
1008 	counters->rx_throughput = result;
1009 
1010 	memset(&pkt, 0, sizeof(pkt));
1011 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
1012 			CPUCP_PKT_CTL_OPCODE_SHIFT);
1013 
1014 	/* Fetch PCI tx counter */
1015 	pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
1016 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1017 					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1018 	if (rc) {
1019 		dev_err(hdev->dev,
1020 			"Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
1021 		return rc;
1022 	}
1023 	counters->tx_throughput = result;
1024 
1025 	/* Fetch PCI replay counter */
1026 	memset(&pkt, 0, sizeof(pkt));
1027 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
1028 			CPUCP_PKT_CTL_OPCODE_SHIFT);
1029 
1030 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1031 			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1032 	if (rc) {
1033 		dev_err(hdev->dev,
1034 			"Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
1035 		return rc;
1036 	}
1037 	counters->replay_cnt = (u32) result;
1038 
1039 	return rc;
1040 }
1041 
1042 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
1043 {
1044 	struct cpucp_packet pkt = {};
1045 	u64 result;
1046 	int rc;
1047 
1048 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
1049 				CPUCP_PKT_CTL_OPCODE_SHIFT);
1050 
1051 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1052 					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1053 	if (rc) {
1054 		dev_err(hdev->dev,
1055 			"Failed to handle CpuCP total energy pkt, error %d\n",
1056 				rc);
1057 		return rc;
1058 	}
1059 
1060 	*total_energy = result;
1061 
1062 	return rc;
1063 }
1064 
1065 int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
1066 						enum pll_index *pll_index)
1067 {
1068 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1069 	u8 pll_byte, pll_bit_off;
1070 	bool dynamic_pll;
1071 	int fw_pll_idx;
1072 
1073 	dynamic_pll = !!(prop->fw_app_cpu_boot_dev_sts0 &
1074 						CPU_BOOT_DEV_STS0_DYN_PLL_EN);
1075 
1076 	if (!dynamic_pll) {
1077 		/*
1078 		 * in case we are working with legacy FW (each asic has unique
1079 		 * PLL numbering) use the driver based index as they are
1080 		 * aligned with fw legacy numbering
1081 		 */
1082 		*pll_index = input_pll_index;
1083 		return 0;
1084 	}
1085 
1086 	/* retrieve a FW compatible PLL index based on
1087 	 * ASIC specific user request
1088 	 */
1089 	fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index);
1090 	if (fw_pll_idx < 0) {
1091 		dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n",
1092 			input_pll_index, fw_pll_idx);
1093 		return -EINVAL;
1094 	}
1095 
1096 	/* PLL map is a u8 array */
1097 	pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3];
1098 	pll_bit_off = fw_pll_idx & 0x7;
1099 
1100 	if (!(pll_byte & BIT(pll_bit_off))) {
1101 		dev_err(hdev->dev, "PLL index %d is not supported\n",
1102 			fw_pll_idx);
1103 		return -EINVAL;
1104 	}
1105 
1106 	*pll_index = fw_pll_idx;
1107 
1108 	return 0;
1109 }
1110 
1111 int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
1112 		u16 *pll_freq_arr)
1113 {
1114 	struct cpucp_packet pkt;
1115 	enum pll_index used_pll_idx;
1116 	u64 result;
1117 	int rc;
1118 
1119 	rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
1120 	if (rc)
1121 		return rc;
1122 
1123 	memset(&pkt, 0, sizeof(pkt));
1124 
1125 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
1126 				CPUCP_PKT_CTL_OPCODE_SHIFT);
1127 	pkt.pll_type = __cpu_to_le16((u16)used_pll_idx);
1128 
1129 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1130 			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1131 	if (rc) {
1132 		dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
1133 		return rc;
1134 	}
1135 
1136 	pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
1137 	pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
1138 	pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
1139 	pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
1140 
1141 	return 0;
1142 }
1143 
1144 int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
1145 {
1146 	struct cpucp_packet pkt;
1147 	u64 result;
1148 	int rc;
1149 
1150 	memset(&pkt, 0, sizeof(pkt));
1151 
1152 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
1153 				CPUCP_PKT_CTL_OPCODE_SHIFT);
1154 	pkt.type = cpu_to_le16(CPUCP_POWER_INPUT);
1155 
1156 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1157 			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1158 	if (rc) {
1159 		dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
1160 		return rc;
1161 	}
1162 
1163 	*power = result;
1164 
1165 	return rc;
1166 }
1167 
1168 int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
1169 				struct cpucp_hbm_row_info *info)
1170 {
1171 	struct cpucp_hbm_row_info *cpucp_repl_rows_info_cpu_addr;
1172 	dma_addr_t cpucp_repl_rows_info_dma_addr;
1173 	struct cpucp_packet pkt = {};
1174 	u64 result;
1175 	int rc;
1176 
1177 	cpucp_repl_rows_info_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev,
1178 							sizeof(struct cpucp_hbm_row_info),
1179 							&cpucp_repl_rows_info_dma_addr);
1180 	if (!cpucp_repl_rows_info_cpu_addr) {
1181 		dev_err(hdev->dev,
1182 			"Failed to allocate DMA memory for CPU-CP replaced rows info packet\n");
1183 		return -ENOMEM;
1184 	}
1185 
1186 	memset(cpucp_repl_rows_info_cpu_addr, 0, sizeof(struct cpucp_hbm_row_info));
1187 
1188 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET <<
1189 					CPUCP_PKT_CTL_OPCODE_SHIFT);
1190 	pkt.addr = cpu_to_le64(cpucp_repl_rows_info_dma_addr);
1191 	pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_hbm_row_info));
1192 
1193 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1194 					HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1195 	if (rc) {
1196 		dev_err(hdev->dev,
1197 			"Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc);
1198 		goto out;
1199 	}
1200 
1201 	memcpy(info, cpucp_repl_rows_info_cpu_addr, sizeof(*info));
1202 
1203 out:
1204 	hl_cpu_accessible_dma_pool_free(hdev, sizeof(struct cpucp_hbm_row_info),
1205 						cpucp_repl_rows_info_cpu_addr);
1206 
1207 	return rc;
1208 }
1209 
1210 int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num)
1211 {
1212 	struct cpucp_packet pkt;
1213 	u64 result;
1214 	int rc;
1215 
1216 	memset(&pkt, 0, sizeof(pkt));
1217 
1218 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_HBM_PENDING_ROWS_STATUS << CPUCP_PKT_CTL_OPCODE_SHIFT);
1219 
1220 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
1221 	if (rc) {
1222 		dev_err(hdev->dev,
1223 				"Failed to handle CPU-CP pending rows info pkt, error %d\n", rc);
1224 		goto out;
1225 	}
1226 
1227 	*pend_rows_num = (u32) result;
1228 out:
1229 	return rc;
1230 }
1231 
1232 int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid)
1233 {
1234 	struct cpucp_packet pkt;
1235 	int rc;
1236 
1237 	memset(&pkt, 0, sizeof(pkt));
1238 
1239 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_ENGINE_CORE_ASID_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
1240 	pkt.value = cpu_to_le64(asid);
1241 
1242 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1243 						HL_CPUCP_INFO_TIMEOUT_USEC, NULL);
1244 	if (rc)
1245 		dev_err(hdev->dev,
1246 			"Failed on ASID configuration request for engine core, error %d\n",
1247 			rc);
1248 
1249 	return rc;
1250 }
1251 
1252 void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev)
1253 {
1254 	struct static_fw_load_mgr *static_loader =
1255 			&hdev->fw_loader.static_loader;
1256 	int rc;
1257 
1258 	if (hdev->asic_prop.dynamic_fw_load) {
1259 		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
1260 				COMMS_RST_DEV, 0, false,
1261 				hdev->fw_loader.cpu_timeout);
1262 		if (rc)
1263 			dev_err(hdev->dev, "Failed sending COMMS_RST_DEV\n");
1264 	} else {
1265 		WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_RST_DEV);
1266 	}
1267 }
1268 
1269 void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
1270 {
1271 	struct static_fw_load_mgr *static_loader =
1272 			&hdev->fw_loader.static_loader;
1273 	int rc;
1274 
1275 	if (hdev->device_cpu_is_halted)
1276 		return;
1277 
1278 	/* Stop device CPU to make sure nothing bad happens */
1279 	if (hdev->asic_prop.dynamic_fw_load) {
1280 		rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
1281 				COMMS_GOTO_WFE, 0, false,
1282 				hdev->fw_loader.cpu_timeout);
1283 		if (rc)
1284 			dev_err(hdev->dev, "Failed sending COMMS_GOTO_WFE\n");
1285 	} else {
1286 		WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
1287 		msleep(static_loader->cpu_reset_wait_msec);
1288 
1289 		/* Must clear this register in order to prevent preboot
1290 		 * from reading WFE after reboot
1291 		 */
1292 		WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_NA);
1293 	}
1294 
1295 	hdev->device_cpu_is_halted = true;
1296 }
1297 
1298 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
1299 {
1300 	/* Some of the status codes below are deprecated in newer f/w
1301 	 * versions but we keep them here for backward compatibility
1302 	 */
1303 	switch (status) {
1304 	case CPU_BOOT_STATUS_NA:
1305 		dev_err(hdev->dev,
1306 			"Device boot progress - BTL/ROM did NOT run\n");
1307 		break;
1308 	case CPU_BOOT_STATUS_IN_WFE:
1309 		dev_err(hdev->dev,
1310 			"Device boot progress - Stuck inside WFE loop\n");
1311 		break;
1312 	case CPU_BOOT_STATUS_IN_BTL:
1313 		dev_err(hdev->dev,
1314 			"Device boot progress - Stuck in BTL\n");
1315 		break;
1316 	case CPU_BOOT_STATUS_IN_PREBOOT:
1317 		dev_err(hdev->dev,
1318 			"Device boot progress - Stuck in Preboot\n");
1319 		break;
1320 	case CPU_BOOT_STATUS_IN_SPL:
1321 		dev_err(hdev->dev,
1322 			"Device boot progress - Stuck in SPL\n");
1323 		break;
1324 	case CPU_BOOT_STATUS_IN_UBOOT:
1325 		dev_err(hdev->dev,
1326 			"Device boot progress - Stuck in u-boot\n");
1327 		break;
1328 	case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
1329 		dev_err(hdev->dev,
1330 			"Device boot progress - DRAM initialization failed\n");
1331 		break;
1332 	case CPU_BOOT_STATUS_UBOOT_NOT_READY:
1333 		dev_err(hdev->dev,
1334 			"Device boot progress - Cannot boot\n");
1335 		break;
1336 	case CPU_BOOT_STATUS_TS_INIT_FAIL:
1337 		dev_err(hdev->dev,
1338 			"Device boot progress - Thermal Sensor initialization failed\n");
1339 		break;
1340 	case CPU_BOOT_STATUS_SECURITY_READY:
1341 		dev_err(hdev->dev,
1342 			"Device boot progress - Stuck in preboot after security initialization\n");
1343 		break;
1344 	default:
1345 		dev_err(hdev->dev,
1346 			"Device boot progress - Invalid or unexpected status code %d\n", status);
1347 		break;
1348 	}
1349 }
1350 
1351 int hl_fw_wait_preboot_ready(struct hl_device *hdev)
1352 {
1353 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
1354 	u32 status;
1355 	int rc;
1356 
1357 	/* Need to check two possible scenarios:
1358 	 *
1359 	 * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
1360 	 * the preboot is waiting for the boot fit
1361 	 *
1362 	 * All other status values - for older firmwares where the uboot was
1363 	 * loaded from the FLASH
1364 	 */
1365 	rc = hl_poll_timeout(
1366 		hdev,
1367 		pre_fw_load->cpu_boot_status_reg,
1368 		status,
1369 		(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
1370 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
1371 		(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
1372 		hdev->fw_poll_interval_usec,
1373 		pre_fw_load->wait_for_preboot_timeout);
1374 
1375 	if (rc) {
1376 		detect_cpu_boot_status(hdev, status);
1377 		dev_err(hdev->dev, "CPU boot ready timeout (status = %d)\n", status);
1378 
1379 		/* If we read all FF, then something is totally wrong, no point
1380 		 * of reading specific errors
1381 		 */
1382 		if (status != -1)
1383 			fw_read_errors(hdev, pre_fw_load->boot_err0_reg,
1384 						pre_fw_load->boot_err1_reg,
1385 						pre_fw_load->sts_boot_dev_sts0_reg,
1386 						pre_fw_load->sts_boot_dev_sts1_reg);
1387 		return -EIO;
1388 	}
1389 
1390 	hdev->fw_loader.fw_comp_loaded |= FW_TYPE_PREBOOT_CPU;
1391 
1392 	return 0;
1393 }
1394 
1395 static int hl_fw_read_preboot_caps(struct hl_device *hdev)
1396 {
1397 	struct pre_fw_load_props *pre_fw_load;
1398 	struct asic_fixed_properties *prop;
1399 	u32 reg_val;
1400 	int rc;
1401 
1402 	prop = &hdev->asic_prop;
1403 	pre_fw_load = &hdev->fw_loader.pre_fw_load;
1404 
1405 	rc = hl_fw_wait_preboot_ready(hdev);
1406 	if (rc)
1407 		return rc;
1408 
1409 	/*
1410 	 * the registers DEV_STS* contain FW capabilities/features.
1411 	 * We can rely on this registers only if bit CPU_BOOT_DEV_STS*_ENABLED
1412 	 * is set.
1413 	 * In the first read of this register we store the value of this
1414 	 * register ONLY if the register is enabled (which will be propagated
1415 	 * to next stages) and also mark the register as valid.
1416 	 * In case it is not enabled the stored value will be left 0- all
1417 	 * caps/features are off
1418 	 */
1419 	reg_val = RREG32(pre_fw_load->sts_boot_dev_sts0_reg);
1420 	if (reg_val & CPU_BOOT_DEV_STS0_ENABLED) {
1421 		prop->fw_cpu_boot_dev_sts0_valid = true;
1422 		prop->fw_preboot_cpu_boot_dev_sts0 = reg_val;
1423 	}
1424 
1425 	reg_val = RREG32(pre_fw_load->sts_boot_dev_sts1_reg);
1426 	if (reg_val & CPU_BOOT_DEV_STS1_ENABLED) {
1427 		prop->fw_cpu_boot_dev_sts1_valid = true;
1428 		prop->fw_preboot_cpu_boot_dev_sts1 = reg_val;
1429 	}
1430 
1431 	prop->dynamic_fw_load = !!(prop->fw_preboot_cpu_boot_dev_sts0 &
1432 						CPU_BOOT_DEV_STS0_FW_LD_COM_EN);
1433 
1434 	/* initialize FW loader once we know what load protocol is used */
1435 	hdev->asic_funcs->init_firmware_loader(hdev);
1436 
1437 	dev_dbg(hdev->dev, "Attempting %s FW load\n",
1438 			prop->dynamic_fw_load ? "dynamic" : "legacy");
1439 	return 0;
1440 }
1441 
1442 static int hl_fw_static_read_device_fw_version(struct hl_device *hdev,
1443 					enum hl_fw_component fwc)
1444 {
1445 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1446 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
1447 	struct static_fw_load_mgr *static_loader;
1448 	char *dest, *boot_ver, *preboot_ver;
1449 	u32 ver_off, limit;
1450 	const char *name;
1451 	char btl_ver[32];
1452 
1453 	static_loader = &hdev->fw_loader.static_loader;
1454 
1455 	switch (fwc) {
1456 	case FW_COMP_BOOT_FIT:
1457 		ver_off = RREG32(static_loader->boot_fit_version_offset_reg);
1458 		dest = prop->uboot_ver;
1459 		name = "Boot-fit";
1460 		limit = static_loader->boot_fit_version_max_off;
1461 		break;
1462 	case FW_COMP_PREBOOT:
1463 		ver_off = RREG32(static_loader->preboot_version_offset_reg);
1464 		dest = prop->preboot_ver;
1465 		name = "Preboot";
1466 		limit = static_loader->preboot_version_max_off;
1467 		break;
1468 	default:
1469 		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
1470 		return -EIO;
1471 	}
1472 
1473 	ver_off &= static_loader->sram_offset_mask;
1474 
1475 	if (ver_off < limit) {
1476 		memcpy_fromio(dest,
1477 			hdev->pcie_bar[fw_loader->sram_bar_id] + ver_off,
1478 			VERSION_MAX_LEN);
1479 	} else {
1480 		dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
1481 								name, ver_off);
1482 		strscpy(dest, "unavailable", VERSION_MAX_LEN);
1483 		return -EIO;
1484 	}
1485 
1486 	if (fwc == FW_COMP_BOOT_FIT) {
1487 		boot_ver = extract_fw_ver_from_str(prop->uboot_ver);
1488 		if (boot_ver) {
1489 			dev_info(hdev->dev, "boot-fit version %s\n", boot_ver);
1490 			kfree(boot_ver);
1491 		}
1492 	} else if (fwc == FW_COMP_PREBOOT) {
1493 		preboot_ver = strnstr(prop->preboot_ver, "Preboot",
1494 						VERSION_MAX_LEN);
1495 		if (preboot_ver && preboot_ver != prop->preboot_ver) {
1496 			strscpy(btl_ver, prop->preboot_ver,
1497 				min((int) (preboot_ver - prop->preboot_ver),
1498 									31));
1499 			dev_info(hdev->dev, "%s\n", btl_ver);
1500 		}
1501 
1502 		preboot_ver = extract_fw_ver_from_str(prop->preboot_ver);
1503 		if (preboot_ver) {
1504 			dev_info(hdev->dev, "preboot version %s\n",
1505 								preboot_ver);
1506 			kfree(preboot_ver);
1507 		}
1508 	}
1509 
1510 	return 0;
1511 }
1512 
1513 /**
1514  * hl_fw_preboot_update_state - update internal data structures during
1515  *                              handshake with preboot
1516  *
1517  *
1518  * @hdev: pointer to the habanalabs device structure
1519  *
1520  * @return 0 on success, otherwise non-zero error code
1521  */
1522 static void hl_fw_preboot_update_state(struct hl_device *hdev)
1523 {
1524 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1525 	u32 cpu_boot_dev_sts0, cpu_boot_dev_sts1;
1526 
1527 	cpu_boot_dev_sts0 = prop->fw_preboot_cpu_boot_dev_sts0;
1528 	cpu_boot_dev_sts1 = prop->fw_preboot_cpu_boot_dev_sts1;
1529 
1530 	/* We read boot_dev_sts registers multiple times during boot:
1531 	 * 1. preboot - a. Check whether the security status bits are valid
1532 	 *              b. Check whether fw security is enabled
1533 	 *              c. Check whether hard reset is done by preboot
1534 	 * 2. boot cpu - a. Fetch boot cpu security status
1535 	 *               b. Check whether hard reset is done by boot cpu
1536 	 * 3. FW application - a. Fetch fw application security status
1537 	 *                     b. Check whether hard reset is done by fw app
1538 	 */
1539 	prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
1540 
1541 	prop->fw_security_enabled = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN);
1542 
1543 	dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n",
1544 							cpu_boot_dev_sts0);
1545 
1546 	dev_dbg(hdev->dev, "Firmware preboot boot device status1 %#x\n",
1547 							cpu_boot_dev_sts1);
1548 
1549 	dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
1550 			prop->hard_reset_done_by_fw ? "enabled" : "disabled");
1551 
1552 	dev_dbg(hdev->dev, "firmware-level security is %s\n",
1553 			prop->fw_security_enabled ? "enabled" : "disabled");
1554 
1555 	dev_dbg(hdev->dev, "GIC controller is %s\n",
1556 			prop->gic_interrupts_enable ? "enabled" : "disabled");
1557 }
1558 
1559 static int hl_fw_static_read_preboot_status(struct hl_device *hdev)
1560 {
1561 	int rc;
1562 
1563 	rc = hl_fw_static_read_device_fw_version(hdev, FW_COMP_PREBOOT);
1564 	if (rc)
1565 		return rc;
1566 
1567 	return 0;
1568 }
1569 
1570 int hl_fw_read_preboot_status(struct hl_device *hdev)
1571 {
1572 	int rc;
1573 
1574 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
1575 		return 0;
1576 
1577 	/* get FW pre-load parameters  */
1578 	hdev->asic_funcs->init_firmware_preload_params(hdev);
1579 
1580 	/*
1581 	 * In order to determine boot method (static VS dynamic) we need to
1582 	 * read the boot caps register
1583 	 */
1584 	rc = hl_fw_read_preboot_caps(hdev);
1585 	if (rc)
1586 		return rc;
1587 
1588 	hl_fw_preboot_update_state(hdev);
1589 
1590 	/* no need to read preboot status in dynamic load */
1591 	if (hdev->asic_prop.dynamic_fw_load)
1592 		return 0;
1593 
1594 	return hl_fw_static_read_preboot_status(hdev);
1595 }
1596 
1597 /* associate string with COMM status */
1598 static char *hl_dynamic_fw_status_str[COMMS_STS_INVLD_LAST] = {
1599 	[COMMS_STS_NOOP] = "NOOP",
1600 	[COMMS_STS_ACK] = "ACK",
1601 	[COMMS_STS_OK] = "OK",
1602 	[COMMS_STS_ERR] = "ERR",
1603 	[COMMS_STS_VALID_ERR] = "VALID_ERR",
1604 	[COMMS_STS_TIMEOUT_ERR] = "TIMEOUT_ERR",
1605 };
1606 
1607 /**
1608  * hl_fw_dynamic_report_error_status - report error status
1609  *
1610  * @hdev: pointer to the habanalabs device structure
1611  * @status: value of FW status register
1612  * @expected_status: the expected status
1613  */
1614 static void hl_fw_dynamic_report_error_status(struct hl_device *hdev,
1615 						u32 status,
1616 						enum comms_sts expected_status)
1617 {
1618 	enum comms_sts comm_status =
1619 				FIELD_GET(COMMS_STATUS_STATUS_MASK, status);
1620 
1621 	if (comm_status < COMMS_STS_INVLD_LAST)
1622 		dev_err(hdev->dev, "Device status %s, expected status: %s\n",
1623 				hl_dynamic_fw_status_str[comm_status],
1624 				hl_dynamic_fw_status_str[expected_status]);
1625 	else
1626 		dev_err(hdev->dev, "Device status unknown %d, expected status: %s\n",
1627 				comm_status,
1628 				hl_dynamic_fw_status_str[expected_status]);
1629 }
1630 
1631 /**
1632  * hl_fw_dynamic_send_cmd - send LKD to FW cmd
1633  *
1634  * @hdev: pointer to the habanalabs device structure
1635  * @fw_loader: managing structure for loading device's FW
1636  * @cmd: LKD to FW cmd code
1637  * @size: size of next FW component to be loaded (0 if not necessary)
1638  *
1639  * LDK to FW exact command layout is defined at struct comms_command.
1640  * note: the size argument is used only when the next FW component should be
1641  *       loaded, otherwise it shall be 0. the size is used by the FW in later
1642  *       protocol stages and when sending only indicating the amount of memory
1643  *       to be allocated by the FW to receive the next boot component.
1644  */
1645 static void hl_fw_dynamic_send_cmd(struct hl_device *hdev,
1646 				struct fw_load_mgr *fw_loader,
1647 				enum comms_cmd cmd, unsigned int size)
1648 {
1649 	struct cpu_dyn_regs *dyn_regs;
1650 	u32 val;
1651 
1652 	dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
1653 
1654 	val = FIELD_PREP(COMMS_COMMAND_CMD_MASK, cmd);
1655 	val |= FIELD_PREP(COMMS_COMMAND_SIZE_MASK, size);
1656 
1657 	trace_habanalabs_comms_send_cmd(hdev->dev, comms_cmd_str_arr[cmd]);
1658 	WREG32(le32_to_cpu(dyn_regs->kmd_msg_to_cpu), val);
1659 }
1660 
1661 /**
1662  * hl_fw_dynamic_extract_fw_response - update the FW response
1663  *
1664  * @hdev: pointer to the habanalabs device structure
1665  * @fw_loader: managing structure for loading device's FW
1666  * @response: FW response
1667  * @status: the status read from CPU status register
1668  *
1669  * @return 0 on success, otherwise non-zero error code
1670  */
1671 static int hl_fw_dynamic_extract_fw_response(struct hl_device *hdev,
1672 						struct fw_load_mgr *fw_loader,
1673 						struct fw_response *response,
1674 						u32 status)
1675 {
1676 	response->status = FIELD_GET(COMMS_STATUS_STATUS_MASK, status);
1677 	response->ram_offset = FIELD_GET(COMMS_STATUS_OFFSET_MASK, status) <<
1678 						COMMS_STATUS_OFFSET_ALIGN_SHIFT;
1679 	response->ram_type = FIELD_GET(COMMS_STATUS_RAM_TYPE_MASK, status);
1680 
1681 	if ((response->ram_type != COMMS_SRAM) &&
1682 					(response->ram_type != COMMS_DRAM)) {
1683 		dev_err(hdev->dev, "FW status: invalid RAM type %u\n",
1684 							response->ram_type);
1685 		return -EIO;
1686 	}
1687 
1688 	return 0;
1689 }
1690 
1691 /**
1692  * hl_fw_dynamic_wait_for_status - wait for status in dynamic FW load
1693  *
1694  * @hdev: pointer to the habanalabs device structure
1695  * @fw_loader: managing structure for loading device's FW
1696  * @expected_status: expected status to wait for
1697  * @timeout: timeout for status wait
1698  *
1699  * @return 0 on success, otherwise non-zero error code
1700  *
1701  * waiting for status from FW include polling the FW status register until
1702  * expected status is received or timeout occurs (whatever occurs first).
1703  */
1704 static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev,
1705 						struct fw_load_mgr *fw_loader,
1706 						enum comms_sts expected_status,
1707 						u32 timeout)
1708 {
1709 	struct cpu_dyn_regs *dyn_regs;
1710 	u32 status;
1711 	int rc;
1712 
1713 	dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
1714 
1715 	trace_habanalabs_comms_wait_status(hdev->dev, comms_sts_str_arr[expected_status]);
1716 
1717 	/* Wait for expected status */
1718 	rc = hl_poll_timeout(
1719 		hdev,
1720 		le32_to_cpu(dyn_regs->cpu_cmd_status_to_host),
1721 		status,
1722 		FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status,
1723 		hdev->fw_comms_poll_interval_usec,
1724 		timeout);
1725 
1726 	if (rc) {
1727 		hl_fw_dynamic_report_error_status(hdev, status,
1728 							expected_status);
1729 		return -EIO;
1730 	}
1731 
1732 	trace_habanalabs_comms_wait_status_done(hdev->dev, comms_sts_str_arr[expected_status]);
1733 
1734 	/*
1735 	 * skip storing FW response for NOOP to preserve the actual desired
1736 	 * FW status
1737 	 */
1738 	if (expected_status == COMMS_STS_NOOP)
1739 		return 0;
1740 
1741 	rc = hl_fw_dynamic_extract_fw_response(hdev, fw_loader,
1742 					&fw_loader->dynamic_loader.response,
1743 					status);
1744 	return rc;
1745 }
1746 
1747 /**
1748  * hl_fw_dynamic_send_clear_cmd - send clear command to FW
1749  *
1750  * @hdev: pointer to the habanalabs device structure
1751  * @fw_loader: managing structure for loading device's FW
1752  *
1753  * @return 0 on success, otherwise non-zero error code
1754  *
1755  * after command cycle between LKD to FW CPU (i.e. LKD got an expected status
1756  * from FW) we need to clear the CPU status register in order to avoid garbage
1757  * between command cycles.
1758  * This is done by sending clear command and polling the CPU to LKD status
1759  * register to hold the status NOOP
1760  */
1761 static int hl_fw_dynamic_send_clear_cmd(struct hl_device *hdev,
1762 						struct fw_load_mgr *fw_loader)
1763 {
1764 	hl_fw_dynamic_send_cmd(hdev, fw_loader, COMMS_CLR_STS, 0);
1765 
1766 	return hl_fw_dynamic_wait_for_status(hdev, fw_loader, COMMS_STS_NOOP,
1767 							fw_loader->cpu_timeout);
1768 }
1769 
1770 /**
1771  * hl_fw_dynamic_send_protocol_cmd - send LKD to FW cmd and wait for ACK
1772  *
1773  * @hdev: pointer to the habanalabs device structure
1774  * @fw_loader: managing structure for loading device's FW
1775  * @cmd: LKD to FW cmd code
1776  * @size: size of next FW component to be loaded (0 if not necessary)
1777  * @wait_ok: if true also wait for OK response from FW
1778  * @timeout: timeout for status wait
1779  *
1780  * @return 0 on success, otherwise non-zero error code
1781  *
1782  * brief:
1783  * when sending protocol command we have the following steps:
1784  * - send clear (clear command and verify clear status register)
1785  * - send the actual protocol command
1786  * - wait for ACK on the protocol command
1787  * - send clear
1788  * - send NOOP
1789  * if, in addition, the specific protocol command should wait for OK then:
1790  * - wait for OK
1791  * - send clear
1792  * - send NOOP
1793  *
1794  * NOTES:
1795  * send clear: this is necessary in order to clear the status register to avoid
1796  *             leftovers between command
1797  * NOOP command: necessary to avoid loop on the clear command by the FW
1798  */
1799 int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
1800 				struct fw_load_mgr *fw_loader,
1801 				enum comms_cmd cmd, unsigned int size,
1802 				bool wait_ok, u32 timeout)
1803 {
1804 	int rc;
1805 
1806 	trace_habanalabs_comms_protocol_cmd(hdev->dev, comms_cmd_str_arr[cmd]);
1807 
1808 	/* first send clear command to clean former commands */
1809 	rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader);
1810 	if (rc)
1811 		return rc;
1812 
1813 	/* send the actual command */
1814 	hl_fw_dynamic_send_cmd(hdev, fw_loader, cmd, size);
1815 
1816 	/* wait for ACK for the command */
1817 	rc = hl_fw_dynamic_wait_for_status(hdev, fw_loader, COMMS_STS_ACK,
1818 								timeout);
1819 	if (rc)
1820 		return rc;
1821 
1822 	/* clear command to prepare for NOOP command */
1823 	rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader);
1824 	if (rc)
1825 		return rc;
1826 
1827 	/* send the actual NOOP command */
1828 	hl_fw_dynamic_send_cmd(hdev, fw_loader, COMMS_NOOP, 0);
1829 
1830 	if (!wait_ok)
1831 		return 0;
1832 
1833 	rc = hl_fw_dynamic_wait_for_status(hdev, fw_loader, COMMS_STS_OK,
1834 								timeout);
1835 	if (rc)
1836 		return rc;
1837 
1838 	/* clear command to prepare for NOOP command */
1839 	rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader);
1840 	if (rc)
1841 		return rc;
1842 
1843 	/* send the actual NOOP command */
1844 	hl_fw_dynamic_send_cmd(hdev, fw_loader, COMMS_NOOP, 0);
1845 
1846 	return 0;
1847 }
1848 
1849 /**
1850  * hl_fw_compat_crc32 - CRC compatible with FW
1851  *
1852  * @data: pointer to the data
1853  * @size: size of the data
1854  *
1855  * @return the CRC32 result
1856  *
1857  * NOTE: kernel's CRC32 differs from standard CRC32 calculation.
1858  *       in order to be aligned we need to flip the bits of both the input
1859  *       initial CRC and kernel's CRC32 result.
1860  *       in addition both sides use initial CRC of 0,
1861  */
1862 static u32 hl_fw_compat_crc32(u8 *data, size_t size)
1863 {
1864 	return ~crc32_le(~((u32)0), data, size);
1865 }
1866 
1867 /**
1868  * hl_fw_dynamic_validate_memory_bound - validate memory bounds for memory
1869  *                                        transfer (image or descriptor) between
1870  *                                        host and FW
1871  *
1872  * @hdev: pointer to the habanalabs device structure
1873  * @addr: device address of memory transfer
1874  * @size: memory transfer size
1875  * @region: PCI memory region
1876  *
1877  * @return 0 on success, otherwise non-zero error code
1878  */
1879 static int hl_fw_dynamic_validate_memory_bound(struct hl_device *hdev,
1880 						u64 addr, size_t size,
1881 						struct pci_mem_region *region)
1882 {
1883 	u64 end_addr;
1884 
1885 	/* now make sure that the memory transfer is within region's bounds */
1886 	end_addr = addr + size;
1887 	if (end_addr >= region->region_base + region->region_size) {
1888 		dev_err(hdev->dev,
1889 			"dynamic FW load: memory transfer end address out of memory region bounds. addr: %llx\n",
1890 							end_addr);
1891 		return -EIO;
1892 	}
1893 
1894 	/*
1895 	 * now make sure memory transfer is within predefined BAR bounds.
1896 	 * this is to make sure we do not need to set the bar (e.g. for DRAM
1897 	 * memory transfers)
1898 	 */
1899 	if (end_addr >= region->region_base - region->offset_in_bar +
1900 							region->bar_size) {
1901 		dev_err(hdev->dev,
1902 			"FW image beyond PCI BAR bounds\n");
1903 		return -EIO;
1904 	}
1905 
1906 	return 0;
1907 }
1908 
1909 /**
1910  * hl_fw_dynamic_validate_descriptor - validate FW descriptor
1911  *
1912  * @hdev: pointer to the habanalabs device structure
1913  * @fw_loader: managing structure for loading device's FW
1914  * @fw_desc: the descriptor from FW
1915  *
1916  * @return 0 on success, otherwise non-zero error code
1917  */
1918 static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
1919 					struct fw_load_mgr *fw_loader,
1920 					struct lkd_fw_comms_desc *fw_desc)
1921 {
1922 	struct pci_mem_region *region;
1923 	enum pci_region region_id;
1924 	size_t data_size;
1925 	u32 data_crc32;
1926 	u8 *data_ptr;
1927 	u64 addr;
1928 	int rc;
1929 
1930 	if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC)
1931 		dev_dbg(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n",
1932 				fw_desc->header.magic);
1933 
1934 	if (fw_desc->header.version != HL_COMMS_DESC_VER)
1935 		dev_dbg(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n",
1936 				fw_desc->header.version);
1937 
1938 	/*
1939 	 * Calc CRC32 of data without header. use the size of the descriptor
1940 	 * reported by firmware, without calculating it ourself, to allow adding
1941 	 * more fields to the lkd_fw_comms_desc structure.
1942 	 * note that no alignment/stride address issues here as all structures
1943 	 * are 64 bit padded.
1944 	 */
1945 	data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header);
1946 	data_size = le16_to_cpu(fw_desc->header.size);
1947 
1948 	data_crc32 = hl_fw_compat_crc32(data_ptr, data_size);
1949 	if (data_crc32 != le32_to_cpu(fw_desc->header.crc32)) {
1950 		dev_err(hdev->dev, "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n",
1951 			data_crc32, fw_desc->header.crc32);
1952 		return -EIO;
1953 	}
1954 
1955 	/* find memory region to which to copy the image */
1956 	addr = le64_to_cpu(fw_desc->img_addr);
1957 	region_id = hl_get_pci_memory_region(hdev, addr);
1958 	if ((region_id != PCI_REGION_SRAM) && ((region_id != PCI_REGION_DRAM))) {
1959 		dev_err(hdev->dev, "Invalid region to copy FW image address=%llx\n", addr);
1960 		return -EIO;
1961 	}
1962 
1963 	region = &hdev->pci_mem_region[region_id];
1964 
1965 	/* store the region for the copy stage */
1966 	fw_loader->dynamic_loader.image_region = region;
1967 
1968 	/*
1969 	 * here we know that the start address is valid, now make sure that the
1970 	 * image is within region's bounds
1971 	 */
1972 	rc = hl_fw_dynamic_validate_memory_bound(hdev, addr,
1973 					fw_loader->dynamic_loader.fw_image_size,
1974 					region);
1975 	if (rc) {
1976 		dev_err(hdev->dev, "invalid mem transfer request for FW image\n");
1977 		return rc;
1978 	}
1979 
1980 	/* here we can mark the descriptor as valid as the content has been validated */
1981 	fw_loader->dynamic_loader.fw_desc_valid = true;
1982 
1983 	return 0;
1984 }
1985 
1986 static int hl_fw_dynamic_validate_response(struct hl_device *hdev,
1987 						struct fw_response *response,
1988 						struct pci_mem_region *region)
1989 {
1990 	u64 device_addr;
1991 	int rc;
1992 
1993 	device_addr = region->region_base + response->ram_offset;
1994 
1995 	/*
1996 	 * validate that the descriptor is within region's bounds
1997 	 * Note that as the start address was supplied according to the RAM
1998 	 * type- testing only the end address is enough
1999 	 */
2000 	rc = hl_fw_dynamic_validate_memory_bound(hdev, device_addr,
2001 					sizeof(struct lkd_fw_comms_desc),
2002 					region);
2003 	return rc;
2004 }
2005 
2006 /*
2007  * hl_fw_dynamic_read_descriptor_msg - read and show the ascii msg that sent by fw
2008  *
2009  * @hdev: pointer to the habanalabs device structure
2010  * @fw_desc: the descriptor from FW
2011  */
2012 static void hl_fw_dynamic_read_descriptor_msg(struct hl_device *hdev,
2013 					struct lkd_fw_comms_desc *fw_desc)
2014 {
2015 	int i;
2016 	char *msg;
2017 
2018 	for (i = 0 ; i < LKD_FW_ASCII_MSG_MAX ; i++) {
2019 		if (!fw_desc->ascii_msg[i].valid)
2020 			return;
2021 
2022 		/* force NULL termination */
2023 		msg = fw_desc->ascii_msg[i].msg;
2024 		msg[LKD_FW_ASCII_MSG_MAX_LEN - 1] = '\0';
2025 
2026 		switch (fw_desc->ascii_msg[i].msg_lvl) {
2027 		case LKD_FW_ASCII_MSG_ERR:
2028 			dev_err(hdev->dev, "fw: %s", fw_desc->ascii_msg[i].msg);
2029 			break;
2030 		case LKD_FW_ASCII_MSG_WRN:
2031 			dev_warn(hdev->dev, "fw: %s", fw_desc->ascii_msg[i].msg);
2032 			break;
2033 		case LKD_FW_ASCII_MSG_INF:
2034 			dev_info(hdev->dev, "fw: %s", fw_desc->ascii_msg[i].msg);
2035 			break;
2036 		default:
2037 			dev_dbg(hdev->dev, "fw: %s", fw_desc->ascii_msg[i].msg);
2038 			break;
2039 		}
2040 	}
2041 }
2042 
2043 /**
2044  * hl_fw_dynamic_read_and_validate_descriptor - read and validate FW descriptor
2045  *
2046  * @hdev: pointer to the habanalabs device structure
2047  * @fw_loader: managing structure for loading device's FW
2048  *
2049  * @return 0 on success, otherwise non-zero error code
2050  */
2051 static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev,
2052 						struct fw_load_mgr *fw_loader)
2053 {
2054 	struct lkd_fw_comms_desc *fw_desc;
2055 	struct pci_mem_region *region;
2056 	struct fw_response *response;
2057 	void *temp_fw_desc;
2058 	void __iomem *src;
2059 	u16 fw_data_size;
2060 	enum pci_region region_id;
2061 	int rc;
2062 
2063 	fw_desc = &fw_loader->dynamic_loader.comm_desc;
2064 	response = &fw_loader->dynamic_loader.response;
2065 
2066 	region_id = (response->ram_type == COMMS_SRAM) ?
2067 					PCI_REGION_SRAM : PCI_REGION_DRAM;
2068 
2069 	region = &hdev->pci_mem_region[region_id];
2070 
2071 	rc = hl_fw_dynamic_validate_response(hdev, response, region);
2072 	if (rc) {
2073 		dev_err(hdev->dev,
2074 			"invalid mem transfer request for FW descriptor\n");
2075 		return rc;
2076 	}
2077 
2078 	/*
2079 	 * extract address to copy the descriptor from
2080 	 * in addition, as the descriptor value is going to be over-ridden by new data- we mark it
2081 	 * as invalid.
2082 	 * it will be marked again as valid once validated
2083 	 */
2084 	fw_loader->dynamic_loader.fw_desc_valid = false;
2085 	src = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
2086 							response->ram_offset;
2087 
2088 	/*
2089 	 * We do the copy of the fw descriptor in 2 phases:
2090 	 * 1. copy the header + data info according to our lkd_fw_comms_desc definition.
2091 	 *    then we're able to read the actual data size provided by fw.
2092 	 *    this is needed for cases where data in descriptor was changed(add/remove)
2093 	 *    in embedded specs header file before updating lkd copy of the header file
2094 	 * 2. copy descriptor to temporary buffer with aligned size and send it to validation
2095 	 */
2096 	memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc));
2097 	fw_data_size = le16_to_cpu(fw_desc->header.size);
2098 
2099 	temp_fw_desc = vzalloc(sizeof(struct comms_desc_header) + fw_data_size);
2100 	if (!temp_fw_desc)
2101 		return -ENOMEM;
2102 
2103 	memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_desc_header) + fw_data_size);
2104 
2105 	rc = hl_fw_dynamic_validate_descriptor(hdev, fw_loader,
2106 					(struct lkd_fw_comms_desc *) temp_fw_desc);
2107 
2108 	if (!rc)
2109 		hl_fw_dynamic_read_descriptor_msg(hdev, temp_fw_desc);
2110 
2111 	vfree(temp_fw_desc);
2112 
2113 	return rc;
2114 }
2115 
2116 /**
2117  * hl_fw_dynamic_request_descriptor - handshake with CPU to get FW descriptor
2118  *
2119  * @hdev: pointer to the habanalabs device structure
2120  * @fw_loader: managing structure for loading device's FW
2121  * @next_image_size: size to allocate for next FW component
2122  *
2123  * @return 0 on success, otherwise non-zero error code
2124  */
2125 static int hl_fw_dynamic_request_descriptor(struct hl_device *hdev,
2126 						struct fw_load_mgr *fw_loader,
2127 						size_t next_image_size)
2128 {
2129 	int rc;
2130 
2131 	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_PREP_DESC,
2132 						next_image_size, true,
2133 						fw_loader->cpu_timeout);
2134 	if (rc)
2135 		return rc;
2136 
2137 	return hl_fw_dynamic_read_and_validate_descriptor(hdev, fw_loader);
2138 }
2139 
2140 /**
2141  * hl_fw_dynamic_read_device_fw_version - read FW version to exposed properties
2142  *
2143  * @hdev: pointer to the habanalabs device structure
2144  * @fwc: the firmware component
2145  * @fw_version: fw component's version string
2146  */
2147 static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,
2148 					enum hl_fw_component fwc,
2149 					const char *fw_version)
2150 {
2151 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2152 	char *preboot_ver, *boot_ver;
2153 	char btl_ver[32];
2154 
2155 	switch (fwc) {
2156 	case FW_COMP_BOOT_FIT:
2157 		strscpy(prop->uboot_ver, fw_version, VERSION_MAX_LEN);
2158 		boot_ver = extract_fw_ver_from_str(prop->uboot_ver);
2159 		if (boot_ver) {
2160 			dev_info(hdev->dev, "boot-fit version %s\n", boot_ver);
2161 			kfree(boot_ver);
2162 		}
2163 
2164 		break;
2165 	case FW_COMP_PREBOOT:
2166 		strscpy(prop->preboot_ver, fw_version, VERSION_MAX_LEN);
2167 		preboot_ver = strnstr(prop->preboot_ver, "Preboot",
2168 						VERSION_MAX_LEN);
2169 		if (preboot_ver && preboot_ver != prop->preboot_ver) {
2170 			strscpy(btl_ver, prop->preboot_ver,
2171 				min((int) (preboot_ver - prop->preboot_ver), 31));
2172 			dev_info(hdev->dev, "%s\n", btl_ver);
2173 		}
2174 
2175 		preboot_ver = extract_fw_ver_from_str(prop->preboot_ver);
2176 		if (preboot_ver) {
2177 			int rc;
2178 
2179 			dev_info(hdev->dev, "preboot version %s\n", preboot_ver);
2180 
2181 			rc = hl_get_preboot_major_minor(hdev, preboot_ver);
2182 			kfree(preboot_ver);
2183 			if (rc)
2184 				return rc;
2185 		}
2186 
2187 		break;
2188 	default:
2189 		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2190 		return -EINVAL;
2191 	}
2192 
2193 	return 0;
2194 }
2195 
2196 /**
2197  * hl_fw_dynamic_copy_image - copy image to memory allocated by the FW
2198  *
2199  * @hdev: pointer to the habanalabs device structure
2200  * @fw: fw descriptor
2201  * @fw_loader: managing structure for loading device's FW
2202  */
2203 static int hl_fw_dynamic_copy_image(struct hl_device *hdev,
2204 						const struct firmware *fw,
2205 						struct fw_load_mgr *fw_loader)
2206 {
2207 	struct lkd_fw_comms_desc *fw_desc;
2208 	struct pci_mem_region *region;
2209 	void __iomem *dest;
2210 	u64 addr;
2211 	int rc;
2212 
2213 	fw_desc = &fw_loader->dynamic_loader.comm_desc;
2214 	addr = le64_to_cpu(fw_desc->img_addr);
2215 
2216 	/* find memory region to which to copy the image */
2217 	region = fw_loader->dynamic_loader.image_region;
2218 
2219 	dest = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
2220 					(addr - region->region_base);
2221 
2222 	rc = hl_fw_copy_fw_to_device(hdev, fw, dest,
2223 					fw_loader->boot_fit_img.src_off,
2224 					fw_loader->boot_fit_img.copy_size);
2225 
2226 	return rc;
2227 }
2228 
2229 /**
2230  * hl_fw_dynamic_copy_msg - copy msg to memory allocated by the FW
2231  *
2232  * @hdev: pointer to the habanalabs device structure
2233  * @msg: message
2234  * @fw_loader: managing structure for loading device's FW
2235  */
2236 static int hl_fw_dynamic_copy_msg(struct hl_device *hdev,
2237 		struct lkd_msg_comms *msg, struct fw_load_mgr *fw_loader)
2238 {
2239 	struct lkd_fw_comms_desc *fw_desc;
2240 	struct pci_mem_region *region;
2241 	void __iomem *dest;
2242 	u64 addr;
2243 	int rc;
2244 
2245 	fw_desc = &fw_loader->dynamic_loader.comm_desc;
2246 	addr = le64_to_cpu(fw_desc->img_addr);
2247 
2248 	/* find memory region to which to copy the image */
2249 	region = fw_loader->dynamic_loader.image_region;
2250 
2251 	dest = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
2252 					(addr - region->region_base);
2253 
2254 	rc = hl_fw_copy_msg_to_device(hdev, msg, dest, 0, 0);
2255 
2256 	return rc;
2257 }
2258 
2259 /**
2260  * hl_fw_boot_fit_update_state - update internal data structures after boot-fit
2261  *                               is loaded
2262  *
2263  * @hdev: pointer to the habanalabs device structure
2264  * @cpu_boot_dev_sts0_reg: register holding CPU boot dev status 0
2265  * @cpu_boot_dev_sts1_reg: register holding CPU boot dev status 1
2266  *
2267  * @return 0 on success, otherwise non-zero error code
2268  */
2269 static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
2270 						u32 cpu_boot_dev_sts0_reg,
2271 						u32 cpu_boot_dev_sts1_reg)
2272 {
2273 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2274 
2275 	hdev->fw_loader.fw_comp_loaded |= FW_TYPE_BOOT_CPU;
2276 
2277 	/* Read boot_cpu status bits */
2278 	if (prop->fw_preboot_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED) {
2279 		prop->fw_bootfit_cpu_boot_dev_sts0 =
2280 				RREG32(cpu_boot_dev_sts0_reg);
2281 
2282 		prop->hard_reset_done_by_fw = !!(prop->fw_bootfit_cpu_boot_dev_sts0 &
2283 							CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
2284 
2285 		dev_dbg(hdev->dev, "Firmware boot CPU status0 %#x\n",
2286 					prop->fw_bootfit_cpu_boot_dev_sts0);
2287 	}
2288 
2289 	if (prop->fw_cpu_boot_dev_sts1_valid) {
2290 		prop->fw_bootfit_cpu_boot_dev_sts1 =
2291 				RREG32(cpu_boot_dev_sts1_reg);
2292 
2293 		dev_dbg(hdev->dev, "Firmware boot CPU status1 %#x\n",
2294 					prop->fw_bootfit_cpu_boot_dev_sts1);
2295 	}
2296 
2297 	dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
2298 			prop->hard_reset_done_by_fw ? "enabled" : "disabled");
2299 }
2300 
2301 static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev)
2302 {
2303 	struct cpu_dyn_regs *dyn_regs =
2304 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2305 
2306 	/* Check whether all 3 interrupt interfaces are set, if not use a
2307 	 * single interface
2308 	 */
2309 	if (!hdev->asic_prop.gic_interrupts_enable &&
2310 			!(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2311 				CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
2312 		dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_pi_upd_irq;
2313 		dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_pi_upd_irq;
2314 
2315 		dev_warn(hdev->dev,
2316 			"Using a single interrupt interface towards cpucp");
2317 	}
2318 }
2319 /**
2320  * hl_fw_dynamic_load_image - load FW image using dynamic protocol
2321  *
2322  * @hdev: pointer to the habanalabs device structure
2323  * @fw_loader: managing structure for loading device's FW
2324  * @load_fwc: the FW component to be loaded
2325  * @img_ld_timeout: image load timeout
2326  *
2327  * @return 0 on success, otherwise non-zero error code
2328  */
2329 static int hl_fw_dynamic_load_image(struct hl_device *hdev,
2330 						struct fw_load_mgr *fw_loader,
2331 						enum hl_fw_component load_fwc,
2332 						u32 img_ld_timeout)
2333 {
2334 	enum hl_fw_component cur_fwc;
2335 	const struct firmware *fw;
2336 	char *fw_name;
2337 	int rc = 0;
2338 
2339 	/*
2340 	 * when loading image we have one of 2 scenarios:
2341 	 * 1. current FW component is preboot and we want to load boot-fit
2342 	 * 2. current FW component is boot-fit and we want to load linux
2343 	 */
2344 	if (load_fwc == FW_COMP_BOOT_FIT) {
2345 		cur_fwc = FW_COMP_PREBOOT;
2346 		fw_name = fw_loader->boot_fit_img.image_name;
2347 	} else {
2348 		cur_fwc = FW_COMP_BOOT_FIT;
2349 		fw_name = fw_loader->linux_img.image_name;
2350 	}
2351 
2352 	/* request FW in order to communicate to FW the size to be allocated */
2353 	rc = hl_request_fw(hdev, &fw, fw_name);
2354 	if (rc)
2355 		return rc;
2356 
2357 	/* store the image size for future validation */
2358 	fw_loader->dynamic_loader.fw_image_size = fw->size;
2359 
2360 	rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, fw->size);
2361 	if (rc)
2362 		goto release_fw;
2363 
2364 	/* read preboot version */
2365 	rc = hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc,
2366 				fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
2367 	if (rc)
2368 		goto release_fw;
2369 
2370 	/* update state according to boot stage */
2371 	if (cur_fwc == FW_COMP_BOOT_FIT) {
2372 		struct cpu_dyn_regs *dyn_regs;
2373 
2374 		dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
2375 		hl_fw_boot_fit_update_state(hdev,
2376 				le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
2377 				le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
2378 	}
2379 
2380 	/* copy boot fit to space allocated by FW */
2381 	rc = hl_fw_dynamic_copy_image(hdev, fw, fw_loader);
2382 	if (rc)
2383 		goto release_fw;
2384 
2385 	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_DATA_RDY,
2386 						0, true,
2387 						fw_loader->cpu_timeout);
2388 	if (rc)
2389 		goto release_fw;
2390 
2391 	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_EXEC,
2392 						0, false,
2393 						img_ld_timeout);
2394 
2395 release_fw:
2396 	hl_release_firmware(fw);
2397 	return rc;
2398 }
2399 
2400 static int hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device *hdev,
2401 					struct fw_load_mgr *fw_loader)
2402 {
2403 	struct dynamic_fw_load_mgr *dyn_loader;
2404 	u32 status;
2405 	int rc;
2406 
2407 	dyn_loader = &fw_loader->dynamic_loader;
2408 
2409 	/*
2410 	 * Make sure CPU boot-loader is running
2411 	 * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux
2412 	 * yet there is a debug scenario in which we loading uboot (without Linux)
2413 	 * which at later stage is relocated to DRAM. In this case we expect
2414 	 * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the
2415 	 * poll flags
2416 	 */
2417 	rc = hl_poll_timeout(
2418 		hdev,
2419 		le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
2420 		status,
2421 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
2422 		(status == CPU_BOOT_STATUS_SRAM_AVAIL),
2423 		hdev->fw_poll_interval_usec,
2424 		dyn_loader->wait_for_bl_timeout);
2425 	if (rc) {
2426 		dev_err(hdev->dev, "failed to wait for boot (status = %d)\n", status);
2427 		return rc;
2428 	}
2429 
2430 	dev_dbg(hdev->dev, "uboot status = %d\n", status);
2431 	return 0;
2432 }
2433 
2434 static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev,
2435 						struct fw_load_mgr *fw_loader)
2436 {
2437 	struct dynamic_fw_load_mgr *dyn_loader;
2438 	u32 status;
2439 	int rc;
2440 
2441 	dyn_loader = &fw_loader->dynamic_loader;
2442 
2443 	/* Make sure CPU linux is running */
2444 
2445 	rc = hl_poll_timeout(
2446 		hdev,
2447 		le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
2448 		status,
2449 		(status == CPU_BOOT_STATUS_SRAM_AVAIL),
2450 		hdev->fw_poll_interval_usec,
2451 		fw_loader->cpu_timeout);
2452 	if (rc) {
2453 		dev_err(hdev->dev, "failed to wait for Linux (status = %d)\n", status);
2454 		return rc;
2455 	}
2456 
2457 	dev_dbg(hdev->dev, "Boot status = %d\n", status);
2458 	return 0;
2459 }
2460 
2461 /**
2462  * hl_fw_linux_update_state -	update internal data structures after Linux
2463  *				is loaded.
2464  *				Note: Linux initialization is comprised mainly
2465  *				of two stages - loading kernel (SRAM_AVAIL)
2466  *				& loading ARMCP.
2467  *				Therefore reading boot device status in any of
2468  *				these stages might result in different values.
2469  *
2470  * @hdev: pointer to the habanalabs device structure
2471  * @cpu_boot_dev_sts0_reg: register holding CPU boot dev status 0
2472  * @cpu_boot_dev_sts1_reg: register holding CPU boot dev status 1
2473  *
2474  * @return 0 on success, otherwise non-zero error code
2475  */
2476 static void hl_fw_linux_update_state(struct hl_device *hdev,
2477 						u32 cpu_boot_dev_sts0_reg,
2478 						u32 cpu_boot_dev_sts1_reg)
2479 {
2480 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2481 
2482 	hdev->fw_loader.fw_comp_loaded |= FW_TYPE_LINUX;
2483 
2484 	/* Read FW application security bits */
2485 	if (prop->fw_cpu_boot_dev_sts0_valid) {
2486 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
2487 
2488 		prop->hard_reset_done_by_fw = !!(prop->fw_app_cpu_boot_dev_sts0 &
2489 							CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
2490 
2491 		if (prop->fw_app_cpu_boot_dev_sts0 &
2492 				CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN)
2493 			prop->gic_interrupts_enable = false;
2494 
2495 		dev_dbg(hdev->dev,
2496 			"Firmware application CPU status0 %#x\n",
2497 			prop->fw_app_cpu_boot_dev_sts0);
2498 
2499 		dev_dbg(hdev->dev, "GIC controller is %s\n",
2500 				prop->gic_interrupts_enable ?
2501 						"enabled" : "disabled");
2502 	}
2503 
2504 	if (prop->fw_cpu_boot_dev_sts1_valid) {
2505 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
2506 
2507 		dev_dbg(hdev->dev,
2508 			"Firmware application CPU status1 %#x\n",
2509 			prop->fw_app_cpu_boot_dev_sts1);
2510 	}
2511 
2512 	dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
2513 			prop->hard_reset_done_by_fw ? "enabled" : "disabled");
2514 
2515 	dev_info(hdev->dev, "Successfully loaded firmware to device\n");
2516 }
2517 
2518 /**
2519  * hl_fw_dynamic_send_msg - send a COMMS message with attached data
2520  *
2521  * @hdev: pointer to the habanalabs device structure
2522  * @fw_loader: managing structure for loading device's FW
2523  * @msg_type: message type
2524  * @data: data to be sent
2525  *
2526  * @return 0 on success, otherwise non-zero error code
2527  */
2528 static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
2529 		struct fw_load_mgr *fw_loader, u8 msg_type, void *data)
2530 {
2531 	struct lkd_msg_comms *msg;
2532 	int rc;
2533 
2534 	msg = kzalloc(sizeof(*msg), GFP_KERNEL);
2535 	if (!msg)
2536 		return -ENOMEM;
2537 
2538 	/* create message to be sent */
2539 	msg->header.type = msg_type;
2540 	msg->header.size = cpu_to_le16(sizeof(struct comms_msg_header));
2541 	msg->header.magic = cpu_to_le32(HL_COMMS_MSG_MAGIC);
2542 
2543 	switch (msg_type) {
2544 	case HL_COMMS_RESET_CAUSE_TYPE:
2545 		msg->reset_cause = *(__u8 *) data;
2546 		break;
2547 
2548 	default:
2549 		dev_err(hdev->dev,
2550 			"Send COMMS message - invalid message type %u\n",
2551 			msg_type);
2552 		rc = -EINVAL;
2553 		goto out;
2554 	}
2555 
2556 	rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
2557 			sizeof(struct lkd_msg_comms));
2558 	if (rc)
2559 		goto out;
2560 
2561 	/* copy message to space allocated by FW */
2562 	rc = hl_fw_dynamic_copy_msg(hdev, msg, fw_loader);
2563 	if (rc)
2564 		goto out;
2565 
2566 	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_DATA_RDY,
2567 						0, true,
2568 						fw_loader->cpu_timeout);
2569 	if (rc)
2570 		goto out;
2571 
2572 	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_EXEC,
2573 						0, true,
2574 						fw_loader->cpu_timeout);
2575 
2576 out:
2577 	kfree(msg);
2578 	return rc;
2579 }
2580 
2581 /**
2582  * hl_fw_dynamic_init_cpu - initialize the device CPU using dynamic protocol
2583  *
2584  * @hdev: pointer to the habanalabs device structure
2585  * @fw_loader: managing structure for loading device's FW
2586  *
2587  * @return 0 on success, otherwise non-zero error code
2588  *
2589  * brief: the dynamic protocol is master (LKD) slave (FW CPU) protocol.
2590  * the communication is done using registers:
2591  * - LKD command register
2592  * - FW status register
2593  * the protocol is race free. this goal is achieved by splitting the requests
2594  * and response to known synchronization points between the LKD and the FW.
2595  * each response to LKD request is known and bound to a predefined timeout.
2596  * in case of timeout expiration without the desired status from FW- the
2597  * protocol (and hence the boot) will fail.
2598  */
2599 static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
2600 					struct fw_load_mgr *fw_loader)
2601 {
2602 	struct cpu_dyn_regs *dyn_regs;
2603 	int rc, fw_error_rc;
2604 
2605 	dev_info(hdev->dev,
2606 		"Loading %sfirmware to device, may take some time...\n",
2607 		hdev->asic_prop.fw_security_enabled ? "secured " : "");
2608 
2609 	/* initialize FW descriptor as invalid */
2610 	fw_loader->dynamic_loader.fw_desc_valid = false;
2611 
2612 	/*
2613 	 * In this stage, "cpu_dyn_regs" contains only LKD's hard coded values!
2614 	 * It will be updated from FW after hl_fw_dynamic_request_descriptor().
2615 	 */
2616 	dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
2617 
2618 	rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
2619 						0, true,
2620 						fw_loader->cpu_timeout);
2621 	if (rc)
2622 		goto protocol_err;
2623 
2624 	if (hdev->reset_info.curr_reset_cause) {
2625 		rc = hl_fw_dynamic_send_msg(hdev, fw_loader,
2626 				HL_COMMS_RESET_CAUSE_TYPE, &hdev->reset_info.curr_reset_cause);
2627 		if (rc)
2628 			goto protocol_err;
2629 
2630 		/* Clear current reset cause */
2631 		hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
2632 	}
2633 
2634 	if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
2635 		struct lkd_fw_binning_info *binning_info;
2636 
2637 		rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0);
2638 		if (rc)
2639 			goto protocol_err;
2640 
2641 		/* read preboot version */
2642 		rc = hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
2643 				fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
2644 
2645 		if (rc)
2646 			return rc;
2647 
2648 		/* read binning info from preboot */
2649 		if (hdev->support_preboot_binning) {
2650 			binning_info = &fw_loader->dynamic_loader.comm_desc.binning_info;
2651 			hdev->tpc_binning = le64_to_cpu(binning_info->tpc_mask_l);
2652 			hdev->dram_binning = le32_to_cpu(binning_info->dram_mask);
2653 			hdev->edma_binning = le32_to_cpu(binning_info->edma_mask);
2654 			hdev->decoder_binning = le32_to_cpu(binning_info->dec_mask);
2655 			hdev->rotator_binning = le32_to_cpu(binning_info->rot_mask);
2656 
2657 			rc = hdev->asic_funcs->set_dram_properties(hdev);
2658 			if (rc)
2659 				return rc;
2660 
2661 			rc = hdev->asic_funcs->set_binning_masks(hdev);
2662 			if (rc)
2663 				return rc;
2664 
2665 			dev_dbg(hdev->dev,
2666 				"Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x, rot:0x%x\n",
2667 				hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
2668 				hdev->decoder_binning, hdev->rotator_binning);
2669 		}
2670 
2671 		return 0;
2672 	}
2673 
2674 	/* load boot fit to FW */
2675 	rc = hl_fw_dynamic_load_image(hdev, fw_loader, FW_COMP_BOOT_FIT,
2676 						fw_loader->boot_fit_timeout);
2677 	if (rc) {
2678 		dev_err(hdev->dev, "failed to load boot fit\n");
2679 		goto protocol_err;
2680 	}
2681 
2682 	/*
2683 	 * when testing FW load (without Linux) on PLDM we don't want to
2684 	 * wait until boot fit is active as it may take several hours.
2685 	 * instead, we load the bootfit and let it do all initialization in
2686 	 * the background.
2687 	 */
2688 	if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX))
2689 		return 0;
2690 
2691 	rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader);
2692 	if (rc)
2693 		goto protocol_err;
2694 
2695 	/* Enable DRAM scrambling before Linux boot and after successful
2696 	 *  UBoot
2697 	 */
2698 	hdev->asic_funcs->init_cpu_scrambler_dram(hdev);
2699 
2700 	if (!(hdev->fw_components & FW_TYPE_LINUX)) {
2701 		dev_info(hdev->dev, "Skip loading Linux F/W\n");
2702 		return 0;
2703 	}
2704 
2705 	if (fw_loader->skip_bmc) {
2706 		rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader,
2707 							COMMS_SKIP_BMC, 0,
2708 							true,
2709 							fw_loader->cpu_timeout);
2710 		if (rc) {
2711 			dev_err(hdev->dev, "failed to load boot fit\n");
2712 			goto protocol_err;
2713 		}
2714 	}
2715 
2716 	/* load Linux image to FW */
2717 	rc = hl_fw_dynamic_load_image(hdev, fw_loader, FW_COMP_LINUX,
2718 							fw_loader->cpu_timeout);
2719 	if (rc) {
2720 		dev_err(hdev->dev, "failed to load Linux\n");
2721 		goto protocol_err;
2722 	}
2723 
2724 	rc = hl_fw_dynamic_wait_for_linux_active(hdev, fw_loader);
2725 	if (rc)
2726 		goto protocol_err;
2727 
2728 	hl_fw_linux_update_state(hdev, le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
2729 				le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
2730 
2731 	hl_fw_dynamic_update_linux_interrupt_if(hdev);
2732 
2733 protocol_err:
2734 	if (fw_loader->dynamic_loader.fw_desc_valid) {
2735 		fw_error_rc = fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0),
2736 				le32_to_cpu(dyn_regs->cpu_boot_err1),
2737 				le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
2738 				le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
2739 
2740 		if (fw_error_rc)
2741 			return fw_error_rc;
2742 	}
2743 
2744 	return rc;
2745 }
2746 
2747 /**
2748  * hl_fw_static_init_cpu - initialize the device CPU using static protocol
2749  *
2750  * @hdev: pointer to the habanalabs device structure
2751  * @fw_loader: managing structure for loading device's FW
2752  *
2753  * @return 0 on success, otherwise non-zero error code
2754  */
2755 static int hl_fw_static_init_cpu(struct hl_device *hdev,
2756 					struct fw_load_mgr *fw_loader)
2757 {
2758 	u32 cpu_msg_status_reg, cpu_timeout, msg_to_cpu_reg, status;
2759 	u32 cpu_boot_dev_status0_reg, cpu_boot_dev_status1_reg;
2760 	struct static_fw_load_mgr *static_loader;
2761 	u32 cpu_boot_status_reg;
2762 	int rc;
2763 
2764 	if (!(hdev->fw_components & FW_TYPE_BOOT_CPU))
2765 		return 0;
2766 
2767 	/* init common loader parameters */
2768 	cpu_timeout = fw_loader->cpu_timeout;
2769 
2770 	/* init static loader parameters */
2771 	static_loader = &fw_loader->static_loader;
2772 	cpu_msg_status_reg = static_loader->cpu_cmd_status_to_host_reg;
2773 	msg_to_cpu_reg = static_loader->kmd_msg_to_cpu_reg;
2774 	cpu_boot_dev_status0_reg = static_loader->cpu_boot_dev_status0_reg;
2775 	cpu_boot_dev_status1_reg = static_loader->cpu_boot_dev_status1_reg;
2776 	cpu_boot_status_reg = static_loader->cpu_boot_status_reg;
2777 
2778 	dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
2779 		cpu_timeout / USEC_PER_SEC);
2780 
2781 	/* Wait for boot FIT request */
2782 	rc = hl_poll_timeout(
2783 		hdev,
2784 		cpu_boot_status_reg,
2785 		status,
2786 		status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
2787 		hdev->fw_poll_interval_usec,
2788 		fw_loader->boot_fit_timeout);
2789 
2790 	if (rc) {
2791 		dev_dbg(hdev->dev,
2792 			"No boot fit request received (status = %d), resuming boot\n", status);
2793 	} else {
2794 		rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
2795 		if (rc)
2796 			goto out;
2797 
2798 		/* Clear device CPU message status */
2799 		WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
2800 
2801 		/* Signal device CPU that boot loader is ready */
2802 		WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
2803 
2804 		/* Poll for CPU device ack */
2805 		rc = hl_poll_timeout(
2806 			hdev,
2807 			cpu_msg_status_reg,
2808 			status,
2809 			status == CPU_MSG_OK,
2810 			hdev->fw_poll_interval_usec,
2811 			fw_loader->boot_fit_timeout);
2812 
2813 		if (rc) {
2814 			dev_err(hdev->dev,
2815 				"Timeout waiting for boot fit load ack (status = %d)\n", status);
2816 			goto out;
2817 		}
2818 
2819 		/* Clear message */
2820 		WREG32(msg_to_cpu_reg, KMD_MSG_NA);
2821 	}
2822 
2823 	/*
2824 	 * Make sure CPU boot-loader is running
2825 	 * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux
2826 	 * yet there is a debug scenario in which we loading uboot (without Linux)
2827 	 * which at later stage is relocated to DRAM. In this case we expect
2828 	 * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the
2829 	 * poll flags
2830 	 */
2831 	rc = hl_poll_timeout(
2832 		hdev,
2833 		cpu_boot_status_reg,
2834 		status,
2835 		(status == CPU_BOOT_STATUS_DRAM_RDY) ||
2836 		(status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
2837 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
2838 		(status == CPU_BOOT_STATUS_SRAM_AVAIL),
2839 		hdev->fw_poll_interval_usec,
2840 		cpu_timeout);
2841 
2842 	dev_dbg(hdev->dev, "uboot status = %d\n", status);
2843 
2844 	/* Read U-Boot version now in case we will later fail */
2845 	hl_fw_static_read_device_fw_version(hdev, FW_COMP_BOOT_FIT);
2846 
2847 	/* update state according to boot stage */
2848 	hl_fw_boot_fit_update_state(hdev, cpu_boot_dev_status0_reg,
2849 						cpu_boot_dev_status1_reg);
2850 
2851 	if (rc) {
2852 		detect_cpu_boot_status(hdev, status);
2853 		rc = -EIO;
2854 		goto out;
2855 	}
2856 
2857 	/* Enable DRAM scrambling before Linux boot and after successful
2858 	 *  UBoot
2859 	 */
2860 	hdev->asic_funcs->init_cpu_scrambler_dram(hdev);
2861 
2862 	if (!(hdev->fw_components & FW_TYPE_LINUX)) {
2863 		dev_info(hdev->dev, "Skip loading Linux F/W\n");
2864 		rc = 0;
2865 		goto out;
2866 	}
2867 
2868 	if (status == CPU_BOOT_STATUS_SRAM_AVAIL) {
2869 		rc = 0;
2870 		goto out;
2871 	}
2872 
2873 	dev_info(hdev->dev,
2874 		"Loading firmware to device, may take some time...\n");
2875 
2876 	rc = hdev->asic_funcs->load_firmware_to_device(hdev);
2877 	if (rc)
2878 		goto out;
2879 
2880 	if (fw_loader->skip_bmc) {
2881 		WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
2882 
2883 		rc = hl_poll_timeout(
2884 			hdev,
2885 			cpu_boot_status_reg,
2886 			status,
2887 			(status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
2888 			hdev->fw_poll_interval_usec,
2889 			cpu_timeout);
2890 
2891 		if (rc) {
2892 			dev_err(hdev->dev,
2893 				"Failed to get ACK on skipping BMC (status = %d)\n",
2894 				status);
2895 			WREG32(msg_to_cpu_reg, KMD_MSG_NA);
2896 			rc = -EIO;
2897 			goto out;
2898 		}
2899 	}
2900 
2901 	WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
2902 
2903 	rc = hl_poll_timeout(
2904 		hdev,
2905 		cpu_boot_status_reg,
2906 		status,
2907 		(status == CPU_BOOT_STATUS_SRAM_AVAIL),
2908 		hdev->fw_poll_interval_usec,
2909 		cpu_timeout);
2910 
2911 	/* Clear message */
2912 	WREG32(msg_to_cpu_reg, KMD_MSG_NA);
2913 
2914 	if (rc) {
2915 		if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
2916 			dev_err(hdev->dev,
2917 				"Device reports FIT image is corrupted\n");
2918 		else
2919 			dev_err(hdev->dev,
2920 				"Failed to load firmware to device (status = %d)\n",
2921 				status);
2922 
2923 		rc = -EIO;
2924 		goto out;
2925 	}
2926 
2927 	rc = fw_read_errors(hdev, fw_loader->static_loader.boot_err0_reg,
2928 					fw_loader->static_loader.boot_err1_reg,
2929 					cpu_boot_dev_status0_reg,
2930 					cpu_boot_dev_status1_reg);
2931 	if (rc)
2932 		return rc;
2933 
2934 	hl_fw_linux_update_state(hdev, cpu_boot_dev_status0_reg,
2935 						cpu_boot_dev_status1_reg);
2936 
2937 	return 0;
2938 
2939 out:
2940 	fw_read_errors(hdev, fw_loader->static_loader.boot_err0_reg,
2941 					fw_loader->static_loader.boot_err1_reg,
2942 					cpu_boot_dev_status0_reg,
2943 					cpu_boot_dev_status1_reg);
2944 
2945 	return rc;
2946 }
2947 
2948 /**
2949  * hl_fw_init_cpu - initialize the device CPU
2950  *
2951  * @hdev: pointer to the habanalabs device structure
2952  *
2953  * @return 0 on success, otherwise non-zero error code
2954  *
2955  * perform necessary initializations for device's CPU. takes into account if
2956  * init protocol is static or dynamic.
2957  */
2958 int hl_fw_init_cpu(struct hl_device *hdev)
2959 {
2960 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2961 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
2962 
2963 	return  prop->dynamic_fw_load ?
2964 			hl_fw_dynamic_init_cpu(hdev, fw_loader) :
2965 			hl_fw_static_init_cpu(hdev, fw_loader);
2966 }
2967 
2968 void hl_fw_set_pll_profile(struct hl_device *hdev)
2969 {
2970 	hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
2971 				hdev->asic_prop.max_freq_value);
2972 }
2973 
2974 int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
2975 {
2976 	long value;
2977 
2978 	if (!hl_device_operational(hdev, NULL))
2979 		return -ENODEV;
2980 
2981 	if (!hdev->pdev) {
2982 		*cur_clk = 0;
2983 		*max_clk = 0;
2984 		return 0;
2985 	}
2986 
2987 	value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
2988 
2989 	if (value < 0) {
2990 		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value);
2991 		return value;
2992 	}
2993 
2994 	*max_clk = (value / 1000 / 1000);
2995 
2996 	value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
2997 
2998 	if (value < 0) {
2999 		dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value);
3000 		return value;
3001 	}
3002 
3003 	*cur_clk = (value / 1000 / 1000);
3004 
3005 	return 0;
3006 }
3007 
3008 long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
3009 {
3010 	struct cpucp_packet pkt;
3011 	u32 used_pll_idx;
3012 	u64 result;
3013 	int rc;
3014 
3015 	rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
3016 	if (rc)
3017 		return rc;
3018 
3019 	memset(&pkt, 0, sizeof(pkt));
3020 
3021 	if (curr)
3022 		pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
3023 						CPUCP_PKT_CTL_OPCODE_SHIFT);
3024 	else
3025 		pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
3026 
3027 	pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
3028 
3029 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
3030 
3031 	if (rc) {
3032 		dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n",
3033 			used_pll_idx, rc);
3034 		return rc;
3035 	}
3036 
3037 	return (long) result;
3038 }
3039 
3040 void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
3041 {
3042 	struct cpucp_packet pkt;
3043 	u32 used_pll_idx;
3044 	int rc;
3045 
3046 	rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
3047 	if (rc)
3048 		return;
3049 
3050 	memset(&pkt, 0, sizeof(pkt));
3051 
3052 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
3053 	pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
3054 	pkt.value = cpu_to_le64(freq);
3055 
3056 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
3057 
3058 	if (rc)
3059 		dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n",
3060 			used_pll_idx, rc);
3061 }
3062 
3063 long hl_fw_get_max_power(struct hl_device *hdev)
3064 {
3065 	struct cpucp_packet pkt;
3066 	u64 result;
3067 	int rc;
3068 
3069 	memset(&pkt, 0, sizeof(pkt));
3070 
3071 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
3072 
3073 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
3074 
3075 	if (rc) {
3076 		dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
3077 		return rc;
3078 	}
3079 
3080 	return result;
3081 }
3082 
3083 void hl_fw_set_max_power(struct hl_device *hdev)
3084 {
3085 	struct cpucp_packet pkt;
3086 	int rc;
3087 
3088 	/* TODO: remove this after simulator supports this packet */
3089 	if (!hdev->pdev)
3090 		return;
3091 
3092 	memset(&pkt, 0, sizeof(pkt));
3093 
3094 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
3095 	pkt.value = cpu_to_le64(hdev->max_power);
3096 
3097 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
3098 
3099 	if (rc)
3100 		dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
3101 }
3102 
3103 static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void *data, u32 size,
3104 					u32 nonce, u32 timeout)
3105 {
3106 	struct cpucp_packet pkt = {};
3107 	dma_addr_t req_dma_addr;
3108 	void *req_cpu_addr;
3109 	int rc;
3110 
3111 	req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr);
3112 	if (!req_cpu_addr) {
3113 		dev_err(hdev->dev,
3114 			"Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id);
3115 		return -ENOMEM;
3116 	}
3117 
3118 	memset(data, 0, size);
3119 
3120 	pkt.ctl = cpu_to_le32(packet_id << CPUCP_PKT_CTL_OPCODE_SHIFT);
3121 	pkt.addr = cpu_to_le64(req_dma_addr);
3122 	pkt.data_max_size = cpu_to_le32(size);
3123 	pkt.nonce = cpu_to_le32(nonce);
3124 
3125 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
3126 					timeout, NULL);
3127 	if (rc) {
3128 		dev_err(hdev->dev,
3129 			"Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc);
3130 		goto out;
3131 	}
3132 
3133 	memcpy(data, req_cpu_addr, size);
3134 
3135 out:
3136 	hl_cpu_accessible_dma_pool_free(hdev, size, req_cpu_addr);
3137 
3138 	return rc;
3139 }
3140 
3141 int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
3142 				u32 nonce)
3143 {
3144 	return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_SEC_ATTEST_GET, sec_attest_info,
3145 					sizeof(struct cpucp_sec_attest_info), nonce,
3146 					HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
3147 }
3148 
3149 int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode,
3150 						dma_addr_t buff, u32 *size)
3151 {
3152 	struct cpucp_packet pkt = {};
3153 	u64 result;
3154 	int rc = 0;
3155 
3156 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_GENERIC_PASSTHROUGH << CPUCP_PKT_CTL_OPCODE_SHIFT);
3157 	pkt.addr = cpu_to_le64(buff);
3158 	pkt.data_max_size = cpu_to_le32(*size);
3159 	pkt.pkt_subidx = cpu_to_le32(sub_opcode);
3160 
3161 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)&pkt, sizeof(pkt),
3162 						HL_CPUCP_INFO_TIMEOUT_USEC, &result);
3163 	if (rc)
3164 		dev_err(hdev->dev, "failed to send CPUCP data of generic fw pkt\n");
3165 	else
3166 		dev_dbg(hdev->dev, "generic pkt was successful, result: 0x%llx\n", result);
3167 
3168 	*size = (u32)result;
3169 
3170 	return rc;
3171 }
3172