1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "habanalabs.h"
9 #include <linux/habanalabs/hl_boot_if.h>
10
11 #include <linux/pci.h>
12 #include <linux/firmware.h>
13 #include <linux/crc32.h>
14 #include <linux/slab.h>
15 #include <linux/ctype.h>
16 #include <linux/vmalloc.h>
17
18 #include <trace/events/habanalabs.h>
19
20 #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
21
22 static char *comms_cmd_str_arr[COMMS_INVLD_LAST] = {
23 [COMMS_NOOP] = __stringify(COMMS_NOOP),
24 [COMMS_CLR_STS] = __stringify(COMMS_CLR_STS),
25 [COMMS_RST_STATE] = __stringify(COMMS_RST_STATE),
26 [COMMS_PREP_DESC] = __stringify(COMMS_PREP_DESC),
27 [COMMS_DATA_RDY] = __stringify(COMMS_DATA_RDY),
28 [COMMS_EXEC] = __stringify(COMMS_EXEC),
29 [COMMS_RST_DEV] = __stringify(COMMS_RST_DEV),
30 [COMMS_GOTO_WFE] = __stringify(COMMS_GOTO_WFE),
31 [COMMS_SKIP_BMC] = __stringify(COMMS_SKIP_BMC),
32 [COMMS_PREP_DESC_ELBI] = __stringify(COMMS_PREP_DESC_ELBI),
33 };
34
35 static char *comms_sts_str_arr[COMMS_STS_INVLD_LAST] = {
36 [COMMS_STS_NOOP] = __stringify(COMMS_STS_NOOP),
37 [COMMS_STS_ACK] = __stringify(COMMS_STS_ACK),
38 [COMMS_STS_OK] = __stringify(COMMS_STS_OK),
39 [COMMS_STS_ERR] = __stringify(COMMS_STS_ERR),
40 [COMMS_STS_VALID_ERR] = __stringify(COMMS_STS_VALID_ERR),
41 [COMMS_STS_TIMEOUT_ERR] = __stringify(COMMS_STS_TIMEOUT_ERR),
42 };
43
44 /**
45 * hl_fw_version_cmp() - compares the FW version to a specific version
46 *
47 * @hdev: pointer to hl_device structure
48 * @major: major number of a reference version
49 * @minor: minor number of a reference version
50 * @subminor: sub-minor number of a reference version
51 *
52 * Return 1 if FW version greater than the reference version, -1 if it's
53 * smaller and 0 if versions are identical.
54 */
hl_fw_version_cmp(struct hl_device * hdev,u32 major,u32 minor,u32 subminor)55 int hl_fw_version_cmp(struct hl_device *hdev, u32 major, u32 minor, u32 subminor)
56 {
57 if (hdev->fw_sw_major_ver != major)
58 return (hdev->fw_sw_major_ver > major) ? 1 : -1;
59
60 if (hdev->fw_sw_minor_ver != minor)
61 return (hdev->fw_sw_minor_ver > minor) ? 1 : -1;
62
63 if (hdev->fw_sw_sub_minor_ver != subminor)
64 return (hdev->fw_sw_sub_minor_ver > subminor) ? 1 : -1;
65
66 return 0;
67 }
68
extract_fw_ver_from_str(const char * fw_str)69 static char *extract_fw_ver_from_str(const char *fw_str)
70 {
71 char *str, *fw_ver, *whitespace;
72 u32 ver_offset;
73
74 fw_ver = kmalloc(VERSION_MAX_LEN, GFP_KERNEL);
75 if (!fw_ver)
76 return NULL;
77
78 str = strnstr(fw_str, "fw-", VERSION_MAX_LEN);
79 if (!str)
80 goto free_fw_ver;
81
82 /* Skip the fw- part */
83 str += 3;
84 ver_offset = str - fw_str;
85
86 /* Copy until the next whitespace */
87 whitespace = strnstr(str, " ", VERSION_MAX_LEN - ver_offset);
88 if (!whitespace)
89 goto free_fw_ver;
90
91 strscpy(fw_ver, str, whitespace - str + 1);
92
93 return fw_ver;
94
95 free_fw_ver:
96 kfree(fw_ver);
97 return NULL;
98 }
99
100 /**
101 * extract_u32_until_given_char() - given a string of the format "<u32><char>*", extract the u32.
102 * @str: the given string
103 * @ver_num: the pointer to the extracted u32 to be returned to the caller.
104 * @given_char: the given char at the end of the u32 in the string
105 *
106 * Return: Upon success, return a pointer to the given_char in the string. Upon failure, return NULL
107 */
extract_u32_until_given_char(char * str,u32 * ver_num,char given_char)108 static char *extract_u32_until_given_char(char *str, u32 *ver_num, char given_char)
109 {
110 char num_str[8] = {}, *ch;
111
112 ch = strchrnul(str, given_char);
113 if (*ch == '\0' || ch == str || ch - str >= sizeof(num_str))
114 return NULL;
115
116 memcpy(num_str, str, ch - str);
117 if (kstrtou32(num_str, 10, ver_num))
118 return NULL;
119 return ch;
120 }
121
122 /**
123 * hl_get_sw_major_minor_subminor() - extract the FW's SW version major, minor, sub-minor
124 * from the version string
125 * @hdev: pointer to the hl_device
126 * @fw_str: the FW's version string
127 *
128 * The extracted version is set in the hdev fields: fw_sw_{major/minor/sub_minor}_ver.
129 *
130 * fw_str is expected to have one of two possible formats, examples:
131 * 1) 'Preboot version hl-gaudi2-1.9.0-fw-42.0.1-sec-3'
132 * 2) 'Preboot version hl-gaudi2-1.9.0-rc-fw-42.0.1-sec-3'
133 * In those examples, the SW major,minor,subminor are correspondingly: 1,9,0.
134 *
135 * Return: 0 for success or a negative error code for failure.
136 */
hl_get_sw_major_minor_subminor(struct hl_device * hdev,const char * fw_str)137 static int hl_get_sw_major_minor_subminor(struct hl_device *hdev, const char *fw_str)
138 {
139 char *end, *start;
140
141 end = strnstr(fw_str, "-rc-", VERSION_MAX_LEN);
142 if (end == fw_str)
143 return -EINVAL;
144
145 if (!end)
146 end = strnstr(fw_str, "-fw-", VERSION_MAX_LEN);
147
148 if (end == fw_str)
149 return -EINVAL;
150
151 if (!end)
152 return -EINVAL;
153
154 for (start = end - 1; start != fw_str; start--) {
155 if (*start == '-')
156 break;
157 }
158
159 if (start == fw_str)
160 return -EINVAL;
161
162 /* start/end point each to the starting and ending hyphen of the sw version e.g. -1.9.0- */
163 start++;
164 start = extract_u32_until_given_char(start, &hdev->fw_sw_major_ver, '.');
165 if (!start)
166 goto err_zero_ver;
167
168 start++;
169 start = extract_u32_until_given_char(start, &hdev->fw_sw_minor_ver, '.');
170 if (!start)
171 goto err_zero_ver;
172
173 start++;
174 start = extract_u32_until_given_char(start, &hdev->fw_sw_sub_minor_ver, '-');
175 if (!start)
176 goto err_zero_ver;
177
178 return 0;
179
180 err_zero_ver:
181 hdev->fw_sw_major_ver = 0;
182 hdev->fw_sw_minor_ver = 0;
183 hdev->fw_sw_sub_minor_ver = 0;
184 return -EINVAL;
185 }
186
187 /**
188 * hl_get_preboot_major_minor() - extract the FW's version major, minor from the version string.
189 * @hdev: pointer to the hl_device
190 * @preboot_ver: the FW's version string
191 *
192 * preboot_ver is expected to be the format of <major>.<minor>.<sub minor>*, e.g: 42.0.1-sec-3
193 * The extracted version is set in the hdev fields: fw_inner_{major/minor}_ver.
194 *
195 * Return: 0 on success, negative error code for failure.
196 */
hl_get_preboot_major_minor(struct hl_device * hdev,char * preboot_ver)197 static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver)
198 {
199 preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_inner_major_ver, '.');
200 if (!preboot_ver) {
201 dev_err(hdev->dev, "Error parsing preboot major version\n");
202 goto err_zero_ver;
203 }
204
205 preboot_ver++;
206
207 preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_inner_minor_ver, '.');
208 if (!preboot_ver) {
209 dev_err(hdev->dev, "Error parsing preboot minor version\n");
210 goto err_zero_ver;
211 }
212 return 0;
213
214 err_zero_ver:
215 hdev->fw_inner_major_ver = 0;
216 hdev->fw_inner_minor_ver = 0;
217 return -EINVAL;
218 }
219
hl_request_fw(struct hl_device * hdev,const struct firmware ** firmware_p,const char * fw_name)220 static int hl_request_fw(struct hl_device *hdev,
221 const struct firmware **firmware_p,
222 const char *fw_name)
223 {
224 size_t fw_size;
225 int rc;
226
227 rc = request_firmware(firmware_p, fw_name, hdev->dev);
228 if (rc) {
229 dev_err(hdev->dev, "Firmware file %s is not found! (error %d)\n",
230 fw_name, rc);
231 goto out;
232 }
233
234 fw_size = (*firmware_p)->size;
235 if ((fw_size % 4) != 0) {
236 dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
237 fw_name, fw_size);
238 rc = -EINVAL;
239 goto release_fw;
240 }
241
242 dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
243
244 if (fw_size > FW_FILE_MAX_SIZE) {
245 dev_err(hdev->dev,
246 "FW file size %zu exceeds maximum of %u bytes\n",
247 fw_size, FW_FILE_MAX_SIZE);
248 rc = -EINVAL;
249 goto release_fw;
250 }
251
252 return 0;
253
254 release_fw:
255 release_firmware(*firmware_p);
256 out:
257 return rc;
258 }
259
260 /**
261 * hl_release_firmware() - release FW
262 *
263 * @fw: fw descriptor
264 *
265 * note: this inline function added to serve as a comprehensive mirror for the
266 * hl_request_fw function.
267 */
hl_release_firmware(const struct firmware * fw)268 static inline void hl_release_firmware(const struct firmware *fw)
269 {
270 release_firmware(fw);
271 }
272
273 /**
274 * hl_fw_copy_fw_to_device() - copy FW to device
275 *
276 * @hdev: pointer to hl_device structure.
277 * @fw: fw descriptor
278 * @dst: IO memory mapped address space to copy firmware to
279 * @src_offset: offset in src FW to copy from
280 * @size: amount of bytes to copy (0 to copy the whole binary)
281 *
282 * actual copy of FW binary data to device, shared by static and dynamic loaders
283 */
hl_fw_copy_fw_to_device(struct hl_device * hdev,const struct firmware * fw,void __iomem * dst,u32 src_offset,u32 size)284 static int hl_fw_copy_fw_to_device(struct hl_device *hdev,
285 const struct firmware *fw, void __iomem *dst,
286 u32 src_offset, u32 size)
287 {
288 const void *fw_data;
289
290 /* size 0 indicates to copy the whole file */
291 if (!size)
292 size = fw->size;
293
294 if (src_offset + size > fw->size) {
295 dev_err(hdev->dev,
296 "size to copy(%u) and offset(%u) are invalid\n",
297 size, src_offset);
298 return -EINVAL;
299 }
300
301 fw_data = (const void *) fw->data;
302
303 memcpy_toio(dst, fw_data + src_offset, size);
304 return 0;
305 }
306
307 /**
308 * hl_fw_copy_msg_to_device() - copy message to device
309 *
310 * @hdev: pointer to hl_device structure.
311 * @msg: message
312 * @dst: IO memory mapped address space to copy firmware to
313 * @src_offset: offset in src message to copy from
314 * @size: amount of bytes to copy (0 to copy the whole binary)
315 *
316 * actual copy of message data to device.
317 */
hl_fw_copy_msg_to_device(struct hl_device * hdev,struct lkd_msg_comms * msg,void __iomem * dst,u32 src_offset,u32 size)318 static int hl_fw_copy_msg_to_device(struct hl_device *hdev,
319 struct lkd_msg_comms *msg, void __iomem *dst,
320 u32 src_offset, u32 size)
321 {
322 void *msg_data;
323
324 /* size 0 indicates to copy the whole file */
325 if (!size)
326 size = sizeof(struct lkd_msg_comms);
327
328 if (src_offset + size > sizeof(struct lkd_msg_comms)) {
329 dev_err(hdev->dev,
330 "size to copy(%u) and offset(%u) are invalid\n",
331 size, src_offset);
332 return -EINVAL;
333 }
334
335 msg_data = (void *) msg;
336
337 memcpy_toio(dst, msg_data + src_offset, size);
338
339 return 0;
340 }
341
342 /**
343 * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
344 *
345 * @hdev: pointer to hl_device structure.
346 * @fw_name: the firmware image name
347 * @dst: IO memory mapped address space to copy firmware to
348 * @src_offset: offset in src FW to copy from
349 * @size: amount of bytes to copy (0 to copy the whole binary)
350 *
351 * Copy fw code from firmware file to device memory.
352 *
353 * Return: 0 on success, non-zero for failure.
354 */
hl_fw_load_fw_to_device(struct hl_device * hdev,const char * fw_name,void __iomem * dst,u32 src_offset,u32 size)355 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
356 void __iomem *dst, u32 src_offset, u32 size)
357 {
358 const struct firmware *fw;
359 int rc;
360
361 rc = hl_request_fw(hdev, &fw, fw_name);
362 if (rc)
363 return rc;
364
365 rc = hl_fw_copy_fw_to_device(hdev, fw, dst, src_offset, size);
366
367 hl_release_firmware(fw);
368 return rc;
369 }
370
hl_fw_send_pci_access_msg(struct hl_device * hdev,u32 opcode,u64 value)371 int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode, u64 value)
372 {
373 struct cpucp_packet pkt = {};
374 int rc;
375
376 pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
377 pkt.value = cpu_to_le64(value);
378
379 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
380 if (rc)
381 dev_err(hdev->dev, "Failed to disable FW's PCI access\n");
382
383 return rc;
384 }
385
386 /**
387 * hl_fw_send_cpu_message() - send CPU message to the device.
388 *
389 * @hdev: pointer to hl_device structure.
390 * @hw_queue_id: HW queue ID
391 * @msg: raw data of the message/packet
392 * @size: size of @msg in bytes
393 * @timeout_us: timeout in usec to wait for CPU reply on the message
394 * @result: return code reported by FW
395 *
396 * send message to the device CPU.
397 *
398 * Return: 0 on success, non-zero for failure.
399 * -ENOMEM: memory allocation failure
400 * -EAGAIN: CPU is disabled (try again when enabled)
401 * -ETIMEDOUT: timeout waiting for FW response
402 * -EIO: protocol error
403 */
hl_fw_send_cpu_message(struct hl_device * hdev,u32 hw_queue_id,u32 * msg,u16 size,u32 timeout_us,u64 * result)404 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
405 u16 size, u32 timeout_us, u64 *result)
406 {
407 struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
408 struct asic_fixed_properties *prop = &hdev->asic_prop;
409 u32 tmp, expected_ack_val, pi, opcode;
410 struct cpucp_packet *pkt;
411 dma_addr_t pkt_dma_addr;
412 struct hl_bd *sent_bd;
413 int rc = 0, fw_rc;
414
415 pkt = hl_cpu_accessible_dma_pool_alloc(hdev, size, &pkt_dma_addr);
416 if (!pkt) {
417 dev_err(hdev->dev, "Failed to allocate DMA memory for packet to CPU\n");
418 return -ENOMEM;
419 }
420
421 memcpy(pkt, msg, size);
422
423 mutex_lock(&hdev->send_cpu_message_lock);
424
425 /* CPU-CP messages can be sent during soft-reset */
426 if (hdev->disabled && !hdev->reset_info.in_compute_reset)
427 goto out;
428
429 if (hdev->device_cpu_disabled) {
430 rc = -EAGAIN;
431 goto out;
432 }
433
434 /* set fence to a non valid value */
435 pkt->fence = cpu_to_le32(UINT_MAX);
436 pi = queue->pi;
437
438 /*
439 * The CPU queue is a synchronous queue with an effective depth of
440 * a single entry (although it is allocated with room for multiple
441 * entries). We lock on it using 'send_cpu_message_lock' which
442 * serializes accesses to the CPU queue.
443 * Which means that we don't need to lock the access to the entire H/W
444 * queues module when submitting a JOB to the CPU queue.
445 */
446 hl_hw_queue_submit_bd(hdev, queue, hl_queue_inc_ptr(queue->pi), size, pkt_dma_addr);
447
448 if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
449 expected_ack_val = queue->pi;
450 else
451 expected_ack_val = CPUCP_PACKET_FENCE_VAL;
452
453 rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
454 (tmp == expected_ack_val), 1000,
455 timeout_us, true);
456
457 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
458
459 if (rc == -ETIMEDOUT) {
460 /* If FW performed reset just before sending it a packet, we will get a timeout.
461 * This is expected behavior, hence no need for error message.
462 */
463 if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset) {
464 dev_dbg(hdev->dev, "Device CPU packet timeout (0x%x) due to FW reset\n",
465 tmp);
466 } else {
467 struct hl_bd *bd = queue->kernel_address;
468
469 bd += hl_pi_2_offset(pi);
470
471 dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n"
472 "Pkt info[%u]: dma_addr: 0x%llx, kernel_addr: %p, len:0x%x, ctl: 0x%x, ptr:0x%llx, dram_bd:%u\n",
473 tmp, pi, pkt_dma_addr, (void *)pkt, bd->len, bd->ctl, bd->ptr,
474 queue->dram_bd);
475 }
476 hdev->device_cpu_disabled = true;
477 goto out;
478 }
479
480 tmp = le32_to_cpu(pkt->ctl);
481
482 fw_rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
483 if (fw_rc) {
484 opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT;
485
486 if (!prop->supports_advanced_cpucp_rc) {
487 dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode);
488 rc = -EIO;
489 goto scrub_descriptor;
490 }
491
492 switch (fw_rc) {
493 case cpucp_packet_invalid:
494 dev_err(hdev->dev,
495 "CPU packet %d is not supported by F/W\n", opcode);
496 break;
497 case cpucp_packet_fault:
498 dev_err(hdev->dev,
499 "F/W failed processing CPU packet %d\n", opcode);
500 break;
501 case cpucp_packet_invalid_pkt:
502 dev_dbg(hdev->dev,
503 "CPU packet %d is not supported by F/W\n", opcode);
504 break;
505 case cpucp_packet_invalid_params:
506 dev_err(hdev->dev,
507 "F/W reports invalid parameters for CPU packet %d\n", opcode);
508 break;
509
510 default:
511 dev_err(hdev->dev,
512 "Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode);
513 }
514
515 /* propagate the return code from the f/w to the callers who want to check it */
516 if (result)
517 *result = fw_rc;
518
519 rc = -EIO;
520
521 } else if (result) {
522 *result = le64_to_cpu(pkt->result);
523 }
524
525 scrub_descriptor:
526 /* Scrub previous buffer descriptor 'ctl' field which contains the
527 * previous PI value written during packet submission.
528 * We must do this or else F/W can read an old value upon queue wraparound.
529 */
530 sent_bd = queue->kernel_address;
531 sent_bd += hl_pi_2_offset(pi);
532 sent_bd->ctl = cpu_to_le32(UINT_MAX);
533
534 out:
535 mutex_unlock(&hdev->send_cpu_message_lock);
536
537 hl_cpu_accessible_dma_pool_free(hdev, size, pkt);
538
539 return rc;
540 }
541
hl_fw_unmask_irq(struct hl_device * hdev,u16 event_type)542 int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
543 {
544 struct cpucp_packet pkt;
545 u64 result;
546 int rc;
547
548 memset(&pkt, 0, sizeof(pkt));
549
550 pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
551 CPUCP_PKT_CTL_OPCODE_SHIFT);
552 pkt.value = cpu_to_le64(event_type);
553
554 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
555 0, &result);
556
557 if (rc)
558 dev_err(hdev->dev, "failed to unmask event %d", event_type);
559
560 return rc;
561 }
562
hl_fw_unmask_irq_arr(struct hl_device * hdev,const u32 * irq_arr,size_t irq_arr_size)563 int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
564 size_t irq_arr_size)
565 {
566 struct cpucp_unmask_irq_arr_packet *pkt;
567 size_t total_pkt_size;
568 u64 result;
569 int rc;
570
571 total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
572 irq_arr_size;
573
574 /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
575 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
576
577 /* total_pkt_size is casted to u16 later on */
578 if (total_pkt_size > USHRT_MAX) {
579 dev_err(hdev->dev, "too many elements in IRQ array\n");
580 return -EINVAL;
581 }
582
583 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
584 if (!pkt)
585 return -ENOMEM;
586
587 pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
588 memcpy(&pkt->irqs, irq_arr, irq_arr_size);
589
590 pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
591 CPUCP_PKT_CTL_OPCODE_SHIFT);
592
593 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
594 total_pkt_size, 0, &result);
595
596 if (rc)
597 dev_err(hdev->dev, "failed to unmask event array\n");
598
599 kfree(pkt);
600
601 return rc;
602 }
603
hl_fw_test_cpu_queue(struct hl_device * hdev)604 int hl_fw_test_cpu_queue(struct hl_device *hdev)
605 {
606 struct cpucp_packet test_pkt = {};
607 u64 result = 0;
608 int rc;
609
610 test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
611 CPUCP_PKT_CTL_OPCODE_SHIFT);
612 test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
613
614 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
615 sizeof(test_pkt), 0, &result);
616
617 if (!rc) {
618 if (result != CPUCP_PACKET_FENCE_VAL)
619 dev_err(hdev->dev,
620 "CPU queue test failed (%#08llx)\n", result);
621 } else {
622 dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
623 }
624
625 return rc;
626 }
627
hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)628 void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
629 dma_addr_t *dma_handle)
630 {
631 u64 kernel_addr;
632
633 kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
634
635 *dma_handle = hdev->cpu_accessible_dma_address +
636 (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
637
638 return (void *) (uintptr_t) kernel_addr;
639 }
640
hl_fw_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)641 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
642 void *vaddr)
643 {
644 gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
645 size);
646 }
647
hl_fw_send_soft_reset(struct hl_device * hdev)648 int hl_fw_send_soft_reset(struct hl_device *hdev)
649 {
650 struct cpucp_packet pkt;
651 int rc;
652
653 memset(&pkt, 0, sizeof(pkt));
654 pkt.ctl = cpu_to_le32(CPUCP_PACKET_SOFT_RESET << CPUCP_PKT_CTL_OPCODE_SHIFT);
655 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
656 if (rc)
657 dev_err(hdev->dev, "failed to send soft-reset msg (err = %d)\n", rc);
658
659 return rc;
660 }
661
hl_fw_send_device_activity(struct hl_device * hdev,bool open)662 int hl_fw_send_device_activity(struct hl_device *hdev, bool open)
663 {
664 struct cpucp_packet pkt;
665 int rc;
666
667 memset(&pkt, 0, sizeof(pkt));
668 pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
669 pkt.value = cpu_to_le64(open);
670 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
671 if (rc)
672 dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open);
673
674 return rc;
675 }
676
hl_fw_send_heartbeat(struct hl_device * hdev)677 int hl_fw_send_heartbeat(struct hl_device *hdev)
678 {
679 struct cpucp_packet hb_pkt;
680 u64 result = 0;
681 int rc;
682
683 memset(&hb_pkt, 0, sizeof(hb_pkt));
684 hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << CPUCP_PKT_CTL_OPCODE_SHIFT);
685 hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
686
687 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, sizeof(hb_pkt), 0, &result);
688
689 if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
690 return -EIO;
691
692 if (le32_to_cpu(hb_pkt.status_mask) &
693 CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK) {
694 dev_warn(hdev->dev, "FW reported EQ fault during heartbeat\n");
695 rc = -EIO;
696 }
697
698 hdev->heartbeat_debug_info.last_pq_heartbeat_ts = ktime_get_real_seconds();
699
700 return rc;
701 }
702
fw_report_boot_dev0(struct hl_device * hdev,u32 err_val,u32 sts_val)703 static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, u32 sts_val)
704 {
705 bool err_exists = false;
706
707 if (!(err_val & CPU_BOOT_ERR0_ENABLED))
708 return false;
709
710 if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
711 dev_err(hdev->dev, "Device boot error - DRAM initialization failed\n");
712
713 if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
714 dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
715
716 if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
717 dev_err(hdev->dev, "Device boot error - Thermal Sensor initialization failed\n");
718
719 if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
720 if (hdev->bmc_enable) {
721 dev_err(hdev->dev, "Device boot error - Skipped waiting for BMC\n");
722 } else {
723 dev_info(hdev->dev, "Device boot message - Skipped waiting for BMC\n");
724 /* This is an info so we don't want it to disable the
725 * device
726 */
727 err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
728 }
729 }
730
731 if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
732 dev_err(hdev->dev, "Device boot error - Serdes data from BMC not available\n");
733
734 if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
735 dev_err(hdev->dev, "Device boot error - NIC F/W initialization failed\n");
736
737 if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
738 dev_err(hdev->dev, "Device boot warning - security not ready\n");
739
740 if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
741 dev_err(hdev->dev, "Device boot error - security failure\n");
742
743 if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
744 dev_err(hdev->dev, "Device boot error - eFuse failure\n");
745
746 if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL)
747 dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n");
748
749 if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
750 dev_err(hdev->dev, "Device boot error - PLL failure\n");
751
752 if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL)
753 dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
754
755 if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
756 /* Ignore this bit, don't prevent driver loading */
757 dev_dbg(hdev->dev, "device unusable status is set\n");
758 err_val &= ~CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL;
759 }
760
761 if (err_val & CPU_BOOT_ERR0_BINNING_FAIL)
762 dev_err(hdev->dev, "Device boot error - binning failure\n");
763
764 if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
765 dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
766
767 if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
768 dev_err(hdev->dev, "Device boot warning - Skipped DRAM initialization\n");
769
770 if (err_val & CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL)
771 dev_err(hdev->dev, "Device boot error - ARC memory scrub failed\n");
772
773 /* All warnings should go here in order not to reach the unknown error validation */
774 if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
775 dev_err(hdev->dev, "Device boot error - EEPROM failure detected\n");
776 err_exists = true;
777 }
778
779 if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL)
780 dev_warn(hdev->dev, "Device boot warning - Failed to load preboot primary image\n");
781
782 if (err_val & CPU_BOOT_ERR0_TPM_FAIL)
783 dev_warn(hdev->dev, "Device boot warning - TPM failure\n");
784
785 if (err_val & CPU_BOOT_ERR_FATAL_MASK)
786 err_exists = true;
787
788 /* return error only if it's in the predefined mask */
789 if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
790 lower_32_bits(hdev->boot_error_status_mask)))
791 return true;
792
793 return false;
794 }
795
796 /* placeholder for ERR1 as no errors defined there yet */
fw_report_boot_dev1(struct hl_device * hdev,u32 err_val,u32 sts_val)797 static bool fw_report_boot_dev1(struct hl_device *hdev, u32 err_val,
798 u32 sts_val)
799 {
800 /*
801 * keep this variable to preserve the logic of the function.
802 * this way it would require less modifications when error will be
803 * added to DEV_ERR1
804 */
805 bool err_exists = false;
806
807 if (!(err_val & CPU_BOOT_ERR1_ENABLED))
808 return false;
809
810 if (sts_val & CPU_BOOT_DEV_STS1_ENABLED)
811 dev_dbg(hdev->dev, "Device status1 %#x\n", sts_val);
812
813 if (!err_exists && (err_val & ~CPU_BOOT_ERR1_ENABLED)) {
814 dev_err(hdev->dev,
815 "Device boot error - unknown ERR1 error 0x%08x\n",
816 err_val);
817 err_exists = true;
818 }
819
820 /* return error only if it's in the predefined mask */
821 if (err_exists && ((err_val & ~CPU_BOOT_ERR1_ENABLED) &
822 upper_32_bits(hdev->boot_error_status_mask)))
823 return true;
824
825 return false;
826 }
827
fw_read_errors(struct hl_device * hdev,u32 boot_err0_reg,u32 boot_err1_reg,u32 cpu_boot_dev_status0_reg,u32 cpu_boot_dev_status1_reg)828 static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
829 u32 boot_err1_reg, u32 cpu_boot_dev_status0_reg,
830 u32 cpu_boot_dev_status1_reg)
831 {
832 u32 err_val, status_val;
833 bool err_exists = false;
834
835 /* Some of the firmware status codes are deprecated in newer f/w
836 * versions. In those versions, the errors are reported
837 * in different registers. Therefore, we need to check those
838 * registers and print the exact errors. Moreover, there
839 * may be multiple errors, so we need to report on each error
840 * separately. Some of the error codes might indicate a state
841 * that is not an error per-se, but it is an error in production
842 * environment
843 */
844 err_val = RREG32(boot_err0_reg);
845 status_val = RREG32(cpu_boot_dev_status0_reg);
846 err_exists = fw_report_boot_dev0(hdev, err_val, status_val);
847
848 err_val = RREG32(boot_err1_reg);
849 status_val = RREG32(cpu_boot_dev_status1_reg);
850 err_exists |= fw_report_boot_dev1(hdev, err_val, status_val);
851
852 if (err_exists)
853 return -EIO;
854
855 return 0;
856 }
857
hl_fw_cpucp_info_get(struct hl_device * hdev,u32 sts_boot_dev_sts0_reg,u32 sts_boot_dev_sts1_reg,u32 boot_err0_reg,u32 boot_err1_reg)858 int hl_fw_cpucp_info_get(struct hl_device *hdev,
859 u32 sts_boot_dev_sts0_reg,
860 u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
861 u32 boot_err1_reg)
862 {
863 struct asic_fixed_properties *prop = &hdev->asic_prop;
864 struct cpucp_packet pkt = {};
865 dma_addr_t cpucp_info_dma_addr;
866 void *cpucp_info_cpu_addr;
867 char *kernel_ver;
868 u64 result;
869 int rc;
870
871 cpucp_info_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, sizeof(struct cpucp_info),
872 &cpucp_info_dma_addr);
873 if (!cpucp_info_cpu_addr) {
874 dev_err(hdev->dev,
875 "Failed to allocate DMA memory for CPU-CP info packet\n");
876 return -ENOMEM;
877 }
878
879 memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info));
880
881 pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET <<
882 CPUCP_PKT_CTL_OPCODE_SHIFT);
883 pkt.addr = cpu_to_le64(cpucp_info_dma_addr);
884 pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info));
885
886 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
887 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
888 if (rc) {
889 dev_err(hdev->dev,
890 "Failed to handle CPU-CP info pkt, error %d\n", rc);
891 goto out;
892 }
893
894 rc = fw_read_errors(hdev, boot_err0_reg, boot_err1_reg,
895 sts_boot_dev_sts0_reg, sts_boot_dev_sts1_reg);
896 if (rc) {
897 dev_err(hdev->dev, "Errors in device boot\n");
898 goto out;
899 }
900
901 memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
902 sizeof(prop->cpucp_info));
903
904 rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors);
905 if (rc) {
906 dev_err(hdev->dev,
907 "Failed to build hwmon channel info, error %d\n", rc);
908 rc = -EFAULT;
909 goto out;
910 }
911
912 kernel_ver = extract_fw_ver_from_str(prop->cpucp_info.kernel_version);
913 if (kernel_ver) {
914 dev_info(hdev->dev, "Linux version %s", kernel_ver);
915 kfree(kernel_ver);
916 }
917
918 /* assume EQ code doesn't need to check eqe index */
919 hdev->event_queue.check_eqe_index = false;
920
921 /* Read FW application security bits again */
922 if (prop->fw_cpu_boot_dev_sts0_valid) {
923 prop->fw_app_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
924 if (prop->fw_app_cpu_boot_dev_sts0 &
925 CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
926 hdev->event_queue.check_eqe_index = true;
927 }
928
929 if (prop->fw_cpu_boot_dev_sts1_valid)
930 prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
931
932 out:
933 hl_cpu_accessible_dma_pool_free(hdev, sizeof(struct cpucp_info), cpucp_info_cpu_addr);
934
935 return rc;
936 }
937
hl_fw_send_msi_info_msg(struct hl_device * hdev)938 static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
939 {
940 struct cpucp_array_data_packet *pkt;
941 size_t total_pkt_size, data_size;
942 u64 result = 0;
943 int rc;
944
945 /* skip sending this info for unsupported ASICs */
946 if (!hdev->asic_funcs->get_msi_info)
947 return 0;
948
949 data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32);
950 total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size;
951
952 /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
953 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
954
955 /* total_pkt_size is casted to u16 later on */
956 if (total_pkt_size > USHRT_MAX) {
957 dev_err(hdev->dev, "CPUCP array data is too big\n");
958 return -EINVAL;
959 }
960
961 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
962 if (!pkt)
963 return -ENOMEM;
964
965 pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES);
966
967 memset((void *) &pkt->data, 0xFF, data_size);
968 hdev->asic_funcs->get_msi_info(pkt->data);
969
970 pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET <<
971 CPUCP_PKT_CTL_OPCODE_SHIFT);
972
973 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt,
974 total_pkt_size, 0, &result);
975
976 /*
977 * in case packet result is invalid it means that FW does not support
978 * this feature and will use default/hard coded MSI values. no reason
979 * to stop the boot
980 */
981 if (rc && result == cpucp_packet_invalid)
982 rc = 0;
983
984 if (rc)
985 dev_err(hdev->dev, "failed to send CPUCP array data\n");
986
987 kfree(pkt);
988
989 return rc;
990 }
991
hl_fw_cpucp_handshake(struct hl_device * hdev,u32 sts_boot_dev_sts0_reg,u32 sts_boot_dev_sts1_reg,u32 boot_err0_reg,u32 boot_err1_reg)992 int hl_fw_cpucp_handshake(struct hl_device *hdev,
993 u32 sts_boot_dev_sts0_reg,
994 u32 sts_boot_dev_sts1_reg, u32 boot_err0_reg,
995 u32 boot_err1_reg)
996 {
997 int rc;
998
999 rc = hl_fw_cpucp_info_get(hdev, sts_boot_dev_sts0_reg,
1000 sts_boot_dev_sts1_reg, boot_err0_reg,
1001 boot_err1_reg);
1002 if (rc)
1003 return rc;
1004
1005 return hl_fw_send_msi_info_msg(hdev);
1006 }
1007
hl_fw_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)1008 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
1009 {
1010 struct cpucp_packet pkt = {};
1011 void *eeprom_info_cpu_addr;
1012 dma_addr_t eeprom_info_dma_addr;
1013 u64 result;
1014 int rc;
1015
1016 eeprom_info_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, max_size,
1017 &eeprom_info_dma_addr);
1018 if (!eeprom_info_cpu_addr) {
1019 dev_err(hdev->dev,
1020 "Failed to allocate DMA memory for CPU-CP EEPROM packet\n");
1021 return -ENOMEM;
1022 }
1023
1024 memset(eeprom_info_cpu_addr, 0, max_size);
1025
1026 pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET <<
1027 CPUCP_PKT_CTL_OPCODE_SHIFT);
1028 pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
1029 pkt.data_max_size = cpu_to_le32(max_size);
1030
1031 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1032 HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
1033 if (rc) {
1034 if (rc != -EAGAIN)
1035 dev_err(hdev->dev,
1036 "Failed to handle CPU-CP EEPROM packet, error %d\n", rc);
1037 goto out;
1038 }
1039
1040 /* result contains the actual size */
1041 memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
1042
1043 out:
1044 hl_cpu_accessible_dma_pool_free(hdev, max_size, eeprom_info_cpu_addr);
1045
1046 return rc;
1047 }
1048
hl_fw_get_monitor_dump(struct hl_device * hdev,void * data)1049 int hl_fw_get_monitor_dump(struct hl_device *hdev, void *data)
1050 {
1051 struct cpucp_monitor_dump *mon_dump_cpu_addr;
1052 dma_addr_t mon_dump_dma_addr;
1053 struct cpucp_packet pkt = {};
1054 size_t data_size;
1055 __le32 *src_ptr;
1056 u32 *dst_ptr;
1057 u64 result;
1058 int i, rc;
1059
1060 data_size = sizeof(struct cpucp_monitor_dump);
1061 mon_dump_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, data_size, &mon_dump_dma_addr);
1062 if (!mon_dump_cpu_addr) {
1063 dev_err(hdev->dev,
1064 "Failed to allocate DMA memory for CPU-CP monitor-dump packet\n");
1065 return -ENOMEM;
1066 }
1067
1068 memset(mon_dump_cpu_addr, 0, data_size);
1069
1070 pkt.ctl = cpu_to_le32(CPUCP_PACKET_MONITOR_DUMP_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
1071 pkt.addr = cpu_to_le64(mon_dump_dma_addr);
1072 pkt.data_max_size = cpu_to_le32(data_size);
1073
1074 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1075 HL_CPUCP_MON_DUMP_TIMEOUT_USEC, &result);
1076 if (rc) {
1077 if (rc != -EAGAIN)
1078 dev_err(hdev->dev,
1079 "Failed to handle CPU-CP monitor-dump packet, error %d\n", rc);
1080 goto out;
1081 }
1082
1083 /* result contains the actual size */
1084 src_ptr = (__le32 *) mon_dump_cpu_addr;
1085 dst_ptr = data;
1086 for (i = 0; i < (data_size / sizeof(u32)); i++) {
1087 *dst_ptr = le32_to_cpu(*src_ptr);
1088 src_ptr++;
1089 dst_ptr++;
1090 }
1091
1092 out:
1093 hl_cpu_accessible_dma_pool_free(hdev, data_size, mon_dump_cpu_addr);
1094
1095 return rc;
1096 }
1097
hl_fw_cpucp_pci_counters_get(struct hl_device * hdev,struct hl_info_pci_counters * counters)1098 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
1099 struct hl_info_pci_counters *counters)
1100 {
1101 struct cpucp_packet pkt = {};
1102 u64 result;
1103 int rc;
1104
1105 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
1106 CPUCP_PKT_CTL_OPCODE_SHIFT);
1107
1108 /* Fetch PCI rx counter */
1109 pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx);
1110 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1111 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1112 if (rc) {
1113 if (rc != -EAGAIN)
1114 dev_err(hdev->dev,
1115 "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
1116 return rc;
1117 }
1118 counters->rx_throughput = result;
1119
1120 memset(&pkt, 0, sizeof(pkt));
1121 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
1122 CPUCP_PKT_CTL_OPCODE_SHIFT);
1123
1124 /* Fetch PCI tx counter */
1125 pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
1126 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1127 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1128 if (rc) {
1129 if (rc != -EAGAIN)
1130 dev_err(hdev->dev,
1131 "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
1132 return rc;
1133 }
1134 counters->tx_throughput = result;
1135
1136 /* Fetch PCI replay counter */
1137 memset(&pkt, 0, sizeof(pkt));
1138 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
1139 CPUCP_PKT_CTL_OPCODE_SHIFT);
1140
1141 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1142 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1143 if (rc) {
1144 if (rc != -EAGAIN)
1145 dev_err(hdev->dev,
1146 "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
1147 return rc;
1148 }
1149 counters->replay_cnt = (u32) result;
1150
1151 return rc;
1152 }
1153
hl_fw_cpucp_total_energy_get(struct hl_device * hdev,u64 * total_energy)1154 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
1155 {
1156 struct cpucp_packet pkt = {};
1157 u64 result;
1158 int rc;
1159
1160 pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
1161 CPUCP_PKT_CTL_OPCODE_SHIFT);
1162
1163 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1164 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1165 if (rc) {
1166 if (rc != -EAGAIN)
1167 dev_err(hdev->dev,
1168 "Failed to handle CpuCP total energy pkt, error %d\n", rc);
1169 return rc;
1170 }
1171
1172 *total_energy = result;
1173
1174 return rc;
1175 }
1176
get_used_pll_index(struct hl_device * hdev,u32 input_pll_index,enum pll_index * pll_index)1177 int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
1178 enum pll_index *pll_index)
1179 {
1180 struct asic_fixed_properties *prop = &hdev->asic_prop;
1181 u8 pll_byte, pll_bit_off;
1182 bool dynamic_pll;
1183 int fw_pll_idx;
1184
1185 dynamic_pll = !!(prop->fw_app_cpu_boot_dev_sts0 &
1186 CPU_BOOT_DEV_STS0_DYN_PLL_EN);
1187
1188 if (!dynamic_pll) {
1189 /*
1190 * in case we are working with legacy FW (each asic has unique
1191 * PLL numbering) use the driver based index as they are
1192 * aligned with fw legacy numbering
1193 */
1194 *pll_index = input_pll_index;
1195 return 0;
1196 }
1197
1198 /* retrieve a FW compatible PLL index based on
1199 * ASIC specific user request
1200 */
1201 fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index);
1202 if (fw_pll_idx < 0) {
1203 dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n",
1204 input_pll_index, fw_pll_idx);
1205 return -EINVAL;
1206 }
1207
1208 /* PLL map is a u8 array */
1209 pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3];
1210 pll_bit_off = fw_pll_idx & 0x7;
1211
1212 if (!(pll_byte & BIT(pll_bit_off))) {
1213 dev_err(hdev->dev, "PLL index %d is not supported\n",
1214 fw_pll_idx);
1215 return -EINVAL;
1216 }
1217
1218 *pll_index = fw_pll_idx;
1219
1220 return 0;
1221 }
1222
hl_fw_cpucp_pll_info_get(struct hl_device * hdev,u32 pll_index,u16 * pll_freq_arr)1223 int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
1224 u16 *pll_freq_arr)
1225 {
1226 struct cpucp_packet pkt;
1227 enum pll_index used_pll_idx;
1228 u64 result;
1229 int rc;
1230
1231 rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
1232 if (rc)
1233 return rc;
1234
1235 memset(&pkt, 0, sizeof(pkt));
1236
1237 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
1238 CPUCP_PKT_CTL_OPCODE_SHIFT);
1239 pkt.pll_type = __cpu_to_le16((u16)used_pll_idx);
1240
1241 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1242 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1243 if (rc) {
1244 if (rc != -EAGAIN)
1245 dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
1246 return rc;
1247 }
1248
1249 pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
1250 pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
1251 pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
1252 pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
1253
1254 return 0;
1255 }
1256
hl_fw_cpucp_power_get(struct hl_device * hdev,u64 * power)1257 int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
1258 {
1259 struct cpucp_packet pkt;
1260 u64 result;
1261 int rc;
1262
1263 memset(&pkt, 0, sizeof(pkt));
1264
1265 pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
1266 CPUCP_PKT_CTL_OPCODE_SHIFT);
1267 pkt.type = cpu_to_le16(CPUCP_POWER_INPUT);
1268
1269 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1270 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1271 if (rc) {
1272 if (rc != -EAGAIN)
1273 dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
1274 return rc;
1275 }
1276
1277 *power = result;
1278
1279 return rc;
1280 }
1281
hl_fw_dram_replaced_row_get(struct hl_device * hdev,struct cpucp_hbm_row_info * info)1282 int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
1283 struct cpucp_hbm_row_info *info)
1284 {
1285 struct cpucp_hbm_row_info *cpucp_repl_rows_info_cpu_addr;
1286 dma_addr_t cpucp_repl_rows_info_dma_addr;
1287 struct cpucp_packet pkt = {};
1288 u64 result;
1289 int rc;
1290
1291 cpucp_repl_rows_info_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev,
1292 sizeof(struct cpucp_hbm_row_info),
1293 &cpucp_repl_rows_info_dma_addr);
1294 if (!cpucp_repl_rows_info_cpu_addr) {
1295 dev_err(hdev->dev,
1296 "Failed to allocate DMA memory for CPU-CP replaced rows info packet\n");
1297 return -ENOMEM;
1298 }
1299
1300 memset(cpucp_repl_rows_info_cpu_addr, 0, sizeof(struct cpucp_hbm_row_info));
1301
1302 pkt.ctl = cpu_to_le32(CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET <<
1303 CPUCP_PKT_CTL_OPCODE_SHIFT);
1304 pkt.addr = cpu_to_le64(cpucp_repl_rows_info_dma_addr);
1305 pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_hbm_row_info));
1306
1307 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1308 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
1309 if (rc) {
1310 if (rc != -EAGAIN)
1311 dev_err(hdev->dev,
1312 "Failed to handle CPU-CP replaced rows info pkt, error %d\n", rc);
1313 goto out;
1314 }
1315
1316 memcpy(info, cpucp_repl_rows_info_cpu_addr, sizeof(*info));
1317
1318 out:
1319 hl_cpu_accessible_dma_pool_free(hdev, sizeof(struct cpucp_hbm_row_info),
1320 cpucp_repl_rows_info_cpu_addr);
1321
1322 return rc;
1323 }
1324
hl_fw_dram_pending_row_get(struct hl_device * hdev,u32 * pend_rows_num)1325 int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num)
1326 {
1327 struct cpucp_packet pkt;
1328 u64 result;
1329 int rc;
1330
1331 memset(&pkt, 0, sizeof(pkt));
1332
1333 pkt.ctl = cpu_to_le32(CPUCP_PACKET_HBM_PENDING_ROWS_STATUS << CPUCP_PKT_CTL_OPCODE_SHIFT);
1334
1335 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
1336 if (rc) {
1337 if (rc != -EAGAIN)
1338 dev_err(hdev->dev,
1339 "Failed to handle CPU-CP pending rows info pkt, error %d\n", rc);
1340 goto out;
1341 }
1342
1343 *pend_rows_num = (u32) result;
1344 out:
1345 return rc;
1346 }
1347
hl_fw_cpucp_engine_core_asid_set(struct hl_device * hdev,u32 asid)1348 int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid)
1349 {
1350 struct cpucp_packet pkt;
1351 int rc;
1352
1353 memset(&pkt, 0, sizeof(pkt));
1354
1355 pkt.ctl = cpu_to_le32(CPUCP_PACKET_ENGINE_CORE_ASID_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
1356 pkt.value = cpu_to_le64(asid);
1357
1358 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
1359 HL_CPUCP_INFO_TIMEOUT_USEC, NULL);
1360 if (rc)
1361 dev_err(hdev->dev,
1362 "Failed on ASID configuration request for engine core, error %d\n",
1363 rc);
1364
1365 return rc;
1366 }
1367
hl_fw_ask_hard_reset_without_linux(struct hl_device * hdev)1368 void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev)
1369 {
1370 struct static_fw_load_mgr *static_loader =
1371 &hdev->fw_loader.static_loader;
1372 int rc;
1373
1374 if (hdev->asic_prop.dynamic_fw_load) {
1375 rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
1376 COMMS_RST_DEV, 0, false,
1377 hdev->fw_loader.cpu_timeout);
1378 if (rc)
1379 dev_err(hdev->dev, "Failed sending COMMS_RST_DEV\n");
1380 } else {
1381 WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_RST_DEV);
1382 }
1383 }
1384
hl_fw_ask_halt_machine_without_linux(struct hl_device * hdev)1385 void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
1386 {
1387 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
1388 u32 status, cpu_boot_status_reg, cpu_timeout;
1389 struct static_fw_load_mgr *static_loader;
1390 struct pre_fw_load_props *pre_fw_load;
1391 int rc;
1392
1393 if (hdev->device_cpu_is_halted)
1394 return;
1395
1396 /* Stop device CPU to make sure nothing bad happens */
1397 if (hdev->asic_prop.dynamic_fw_load) {
1398 pre_fw_load = &fw_loader->pre_fw_load;
1399 cpu_timeout = fw_loader->cpu_timeout;
1400 cpu_boot_status_reg = pre_fw_load->cpu_boot_status_reg;
1401
1402 rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
1403 COMMS_GOTO_WFE, 0, false, cpu_timeout);
1404 if (rc) {
1405 dev_err(hdev->dev, "Failed sending COMMS_GOTO_WFE\n");
1406 } else {
1407 rc = hl_poll_timeout(
1408 hdev,
1409 cpu_boot_status_reg,
1410 status,
1411 status == CPU_BOOT_STATUS_IN_WFE,
1412 hdev->fw_poll_interval_usec,
1413 cpu_timeout);
1414 if (rc)
1415 dev_err(hdev->dev, "Current status=%u. Timed-out updating to WFE\n",
1416 status);
1417 }
1418 } else {
1419 static_loader = &hdev->fw_loader.static_loader;
1420 WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
1421 msleep(static_loader->cpu_reset_wait_msec);
1422
1423 /* Must clear this register in order to prevent preboot
1424 * from reading WFE after reboot
1425 */
1426 WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_NA);
1427 }
1428
1429 hdev->device_cpu_is_halted = true;
1430 }
1431
detect_cpu_boot_status(struct hl_device * hdev,u32 status)1432 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
1433 {
1434 /* Some of the status codes below are deprecated in newer f/w
1435 * versions but we keep them here for backward compatibility
1436 */
1437 switch (status) {
1438 case CPU_BOOT_STATUS_NA:
1439 dev_err(hdev->dev,
1440 "Device boot progress - BTL/ROM did NOT run\n");
1441 break;
1442 case CPU_BOOT_STATUS_IN_WFE:
1443 dev_err(hdev->dev,
1444 "Device boot progress - Stuck inside WFE loop\n");
1445 break;
1446 case CPU_BOOT_STATUS_IN_BTL:
1447 dev_err(hdev->dev,
1448 "Device boot progress - Stuck in BTL\n");
1449 break;
1450 case CPU_BOOT_STATUS_IN_PREBOOT:
1451 dev_err(hdev->dev,
1452 "Device boot progress - Stuck in Preboot\n");
1453 break;
1454 case CPU_BOOT_STATUS_IN_SPL:
1455 dev_err(hdev->dev,
1456 "Device boot progress - Stuck in SPL\n");
1457 break;
1458 case CPU_BOOT_STATUS_IN_UBOOT:
1459 dev_err(hdev->dev,
1460 "Device boot progress - Stuck in u-boot\n");
1461 break;
1462 case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
1463 dev_err(hdev->dev,
1464 "Device boot progress - DRAM initialization failed\n");
1465 break;
1466 case CPU_BOOT_STATUS_UBOOT_NOT_READY:
1467 dev_err(hdev->dev,
1468 "Device boot progress - Cannot boot\n");
1469 break;
1470 case CPU_BOOT_STATUS_TS_INIT_FAIL:
1471 dev_err(hdev->dev,
1472 "Device boot progress - Thermal Sensor initialization failed\n");
1473 break;
1474 case CPU_BOOT_STATUS_SECURITY_READY:
1475 dev_err(hdev->dev,
1476 "Device boot progress - Stuck in preboot after security initialization\n");
1477 break;
1478 case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP:
1479 dev_err(hdev->dev,
1480 "Device boot progress - Stuck in preparation for shutdown\n");
1481 break;
1482 default:
1483 dev_err(hdev->dev,
1484 "Device boot progress - Invalid or unexpected status code %d\n", status);
1485 break;
1486 }
1487 }
1488
hl_fw_wait_preboot_ready(struct hl_device * hdev)1489 int hl_fw_wait_preboot_ready(struct hl_device *hdev)
1490 {
1491 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
1492 u32 status = 0, timeout;
1493 int rc, tries = 1, fw_err = 0;
1494 bool preboot_still_runs;
1495
1496 /* Need to check two possible scenarios:
1497 *
1498 * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
1499 * the preboot is waiting for the boot fit
1500 *
1501 * All other status values - for older firmwares where the uboot was
1502 * loaded from the FLASH
1503 */
1504 timeout = pre_fw_load->wait_for_preboot_timeout;
1505 retry:
1506 rc = hl_poll_timeout(
1507 hdev,
1508 pre_fw_load->cpu_boot_status_reg,
1509 status,
1510 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
1511 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
1512 (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
1513 hdev->fw_poll_interval_usec,
1514 timeout);
1515 /*
1516 * if F/W reports "security-ready" it means preboot might take longer.
1517 * If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again
1518 * with that timeout
1519 */
1520 preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY ||
1521 status == CPU_BOOT_STATUS_IN_PREBOOT ||
1522 status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP ||
1523 status == CPU_BOOT_STATUS_DRAM_RDY);
1524
1525 if (rc && tries && preboot_still_runs) {
1526 tries--;
1527 if (pre_fw_load->wait_for_preboot_extended_timeout) {
1528 timeout = pre_fw_load->wait_for_preboot_extended_timeout;
1529 goto retry;
1530 }
1531 }
1532
1533 /* If we read all FF, then something is totally wrong, no point
1534 * of reading specific errors
1535 */
1536 if (status != -1)
1537 fw_err = fw_read_errors(hdev, pre_fw_load->boot_err0_reg,
1538 pre_fw_load->boot_err1_reg,
1539 pre_fw_load->sts_boot_dev_sts0_reg,
1540 pre_fw_load->sts_boot_dev_sts1_reg);
1541 if (rc || fw_err) {
1542 detect_cpu_boot_status(hdev, status);
1543 dev_err(hdev->dev, "CPU boot %s (status = %d)\n",
1544 fw_err ? "failed due to an error" : "ready timeout", status);
1545 return -EIO;
1546 }
1547
1548 hdev->fw_loader.fw_comp_loaded |= FW_TYPE_PREBOOT_CPU;
1549
1550 return 0;
1551 }
1552
hl_fw_read_preboot_caps(struct hl_device * hdev)1553 static int hl_fw_read_preboot_caps(struct hl_device *hdev)
1554 {
1555 struct pre_fw_load_props *pre_fw_load;
1556 struct asic_fixed_properties *prop;
1557 u32 reg_val;
1558 int rc;
1559
1560 prop = &hdev->asic_prop;
1561 pre_fw_load = &hdev->fw_loader.pre_fw_load;
1562
1563 rc = hl_fw_wait_preboot_ready(hdev);
1564 if (rc)
1565 return rc;
1566
1567 /*
1568 * the registers DEV_STS* contain FW capabilities/features.
1569 * We can rely on this registers only if bit CPU_BOOT_DEV_STS*_ENABLED
1570 * is set.
1571 * In the first read of this register we store the value of this
1572 * register ONLY if the register is enabled (which will be propagated
1573 * to next stages) and also mark the register as valid.
1574 * In case it is not enabled the stored value will be left 0- all
1575 * caps/features are off
1576 */
1577 reg_val = RREG32(pre_fw_load->sts_boot_dev_sts0_reg);
1578 if (reg_val & CPU_BOOT_DEV_STS0_ENABLED) {
1579 prop->fw_cpu_boot_dev_sts0_valid = true;
1580 prop->fw_preboot_cpu_boot_dev_sts0 = reg_val;
1581 }
1582
1583 reg_val = RREG32(pre_fw_load->sts_boot_dev_sts1_reg);
1584 if (reg_val & CPU_BOOT_DEV_STS1_ENABLED) {
1585 prop->fw_cpu_boot_dev_sts1_valid = true;
1586 prop->fw_preboot_cpu_boot_dev_sts1 = reg_val;
1587 }
1588
1589 prop->dynamic_fw_load = !!(prop->fw_preboot_cpu_boot_dev_sts0 &
1590 CPU_BOOT_DEV_STS0_FW_LD_COM_EN);
1591
1592 /* initialize FW loader once we know what load protocol is used */
1593 hdev->asic_funcs->init_firmware_loader(hdev);
1594
1595 dev_dbg(hdev->dev, "Attempting %s FW load\n",
1596 prop->dynamic_fw_load ? "dynamic" : "legacy");
1597 return 0;
1598 }
1599
hl_fw_static_read_device_fw_version(struct hl_device * hdev,enum hl_fw_component fwc)1600 static int hl_fw_static_read_device_fw_version(struct hl_device *hdev,
1601 enum hl_fw_component fwc)
1602 {
1603 struct asic_fixed_properties *prop = &hdev->asic_prop;
1604 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
1605 struct static_fw_load_mgr *static_loader;
1606 char *dest, *boot_ver, *preboot_ver;
1607 u32 ver_off, limit;
1608 const char *name;
1609 char btl_ver[32];
1610
1611 static_loader = &hdev->fw_loader.static_loader;
1612
1613 switch (fwc) {
1614 case FW_COMP_BOOT_FIT:
1615 ver_off = RREG32(static_loader->boot_fit_version_offset_reg);
1616 dest = prop->uboot_ver;
1617 name = "Boot-fit";
1618 limit = static_loader->boot_fit_version_max_off;
1619 break;
1620 case FW_COMP_PREBOOT:
1621 ver_off = RREG32(static_loader->preboot_version_offset_reg);
1622 dest = prop->preboot_ver;
1623 name = "Preboot";
1624 limit = static_loader->preboot_version_max_off;
1625 break;
1626 default:
1627 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
1628 return -EIO;
1629 }
1630
1631 ver_off &= static_loader->sram_offset_mask;
1632
1633 if (ver_off < limit) {
1634 memcpy_fromio(dest,
1635 hdev->pcie_bar[fw_loader->sram_bar_id] + ver_off,
1636 VERSION_MAX_LEN);
1637 } else {
1638 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
1639 name, ver_off);
1640 strscpy(dest, "unavailable", VERSION_MAX_LEN);
1641 return -EIO;
1642 }
1643
1644 if (fwc == FW_COMP_BOOT_FIT) {
1645 boot_ver = extract_fw_ver_from_str(prop->uboot_ver);
1646 if (boot_ver) {
1647 dev_info(hdev->dev, "boot-fit version %s\n", boot_ver);
1648 kfree(boot_ver);
1649 }
1650 } else if (fwc == FW_COMP_PREBOOT) {
1651 preboot_ver = strnstr(prop->preboot_ver, "Preboot",
1652 VERSION_MAX_LEN);
1653 if (preboot_ver && preboot_ver != prop->preboot_ver) {
1654 strscpy(btl_ver, prop->preboot_ver,
1655 min((int) (preboot_ver - prop->preboot_ver),
1656 31));
1657 dev_info(hdev->dev, "%s\n", btl_ver);
1658 }
1659
1660 preboot_ver = extract_fw_ver_from_str(prop->preboot_ver);
1661 if (preboot_ver) {
1662 dev_info(hdev->dev, "preboot version %s\n",
1663 preboot_ver);
1664 kfree(preboot_ver);
1665 }
1666 }
1667
1668 return 0;
1669 }
1670
1671 /**
1672 * hl_fw_preboot_update_state - update internal data structures during
1673 * handshake with preboot
1674 *
1675 *
1676 * @hdev: pointer to the habanalabs device structure
1677 *
1678 * @return 0 on success, otherwise non-zero error code
1679 */
hl_fw_preboot_update_state(struct hl_device * hdev)1680 static void hl_fw_preboot_update_state(struct hl_device *hdev)
1681 {
1682 struct asic_fixed_properties *prop = &hdev->asic_prop;
1683 u32 cpu_boot_dev_sts0, cpu_boot_dev_sts1;
1684
1685 cpu_boot_dev_sts0 = prop->fw_preboot_cpu_boot_dev_sts0;
1686 cpu_boot_dev_sts1 = prop->fw_preboot_cpu_boot_dev_sts1;
1687
1688 /* We read boot_dev_sts registers multiple times during boot:
1689 * 1. preboot - a. Check whether the security status bits are valid
1690 * b. Check whether fw security is enabled
1691 * c. Check whether hard reset is done by preboot
1692 * 2. boot cpu - a. Fetch boot cpu security status
1693 * b. Check whether hard reset is done by boot cpu
1694 * 3. FW application - a. Fetch fw application security status
1695 * b. Check whether hard reset is done by fw app
1696 */
1697 prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
1698
1699 prop->fw_security_enabled = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN);
1700
1701 dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n",
1702 cpu_boot_dev_sts0);
1703
1704 dev_dbg(hdev->dev, "Firmware preboot boot device status1 %#x\n",
1705 cpu_boot_dev_sts1);
1706
1707 dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
1708 prop->hard_reset_done_by_fw ? "enabled" : "disabled");
1709
1710 dev_dbg(hdev->dev, "firmware-level security is %s\n",
1711 prop->fw_security_enabled ? "enabled" : "disabled");
1712
1713 dev_dbg(hdev->dev, "GIC controller is %s\n",
1714 prop->gic_interrupts_enable ? "enabled" : "disabled");
1715 }
1716
hl_fw_static_read_preboot_status(struct hl_device * hdev)1717 static int hl_fw_static_read_preboot_status(struct hl_device *hdev)
1718 {
1719 int rc;
1720
1721 rc = hl_fw_static_read_device_fw_version(hdev, FW_COMP_PREBOOT);
1722 if (rc)
1723 return rc;
1724
1725 return 0;
1726 }
1727
hl_fw_read_preboot_status(struct hl_device * hdev)1728 int hl_fw_read_preboot_status(struct hl_device *hdev)
1729 {
1730 int rc;
1731
1732 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
1733 return 0;
1734
1735 /* get FW pre-load parameters */
1736 hdev->asic_funcs->init_firmware_preload_params(hdev);
1737
1738 /*
1739 * In order to determine boot method (static VS dynamic) we need to
1740 * read the boot caps register
1741 */
1742 rc = hl_fw_read_preboot_caps(hdev);
1743 if (rc)
1744 return rc;
1745
1746 hl_fw_preboot_update_state(hdev);
1747
1748 /* no need to read preboot status in dynamic load */
1749 if (hdev->asic_prop.dynamic_fw_load)
1750 return 0;
1751
1752 return hl_fw_static_read_preboot_status(hdev);
1753 }
1754
1755 /* associate string with COMM status */
1756 static char *hl_dynamic_fw_status_str[COMMS_STS_INVLD_LAST] = {
1757 [COMMS_STS_NOOP] = "NOOP",
1758 [COMMS_STS_ACK] = "ACK",
1759 [COMMS_STS_OK] = "OK",
1760 [COMMS_STS_ERR] = "ERR",
1761 [COMMS_STS_VALID_ERR] = "VALID_ERR",
1762 [COMMS_STS_TIMEOUT_ERR] = "TIMEOUT_ERR",
1763 };
1764
1765 /**
1766 * hl_fw_dynamic_report_error_status - report error status
1767 *
1768 * @hdev: pointer to the habanalabs device structure
1769 * @status: value of FW status register
1770 * @expected_status: the expected status
1771 */
hl_fw_dynamic_report_error_status(struct hl_device * hdev,u32 status,enum comms_sts expected_status)1772 static void hl_fw_dynamic_report_error_status(struct hl_device *hdev,
1773 u32 status,
1774 enum comms_sts expected_status)
1775 {
1776 enum comms_sts comm_status =
1777 FIELD_GET(COMMS_STATUS_STATUS_MASK, status);
1778
1779 if (comm_status < COMMS_STS_INVLD_LAST)
1780 dev_err(hdev->dev, "Device status %s, expected status: %s\n",
1781 hl_dynamic_fw_status_str[comm_status],
1782 hl_dynamic_fw_status_str[expected_status]);
1783 else
1784 dev_err(hdev->dev, "Device status unknown %d, expected status: %s\n",
1785 comm_status,
1786 hl_dynamic_fw_status_str[expected_status]);
1787 }
1788
1789 /**
1790 * hl_fw_dynamic_send_cmd - send LKD to FW cmd
1791 *
1792 * @hdev: pointer to the habanalabs device structure
1793 * @fw_loader: managing structure for loading device's FW
1794 * @cmd: LKD to FW cmd code
1795 * @size: size of next FW component to be loaded (0 if not necessary)
1796 *
1797 * LDK to FW exact command layout is defined at struct comms_command.
1798 * note: the size argument is used only when the next FW component should be
1799 * loaded, otherwise it shall be 0. the size is used by the FW in later
1800 * protocol stages and when sending only indicating the amount of memory
1801 * to be allocated by the FW to receive the next boot component.
1802 */
hl_fw_dynamic_send_cmd(struct hl_device * hdev,struct fw_load_mgr * fw_loader,enum comms_cmd cmd,unsigned int size)1803 static void hl_fw_dynamic_send_cmd(struct hl_device *hdev,
1804 struct fw_load_mgr *fw_loader,
1805 enum comms_cmd cmd, unsigned int size)
1806 {
1807 struct cpu_dyn_regs *dyn_regs;
1808 u32 val;
1809
1810 dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
1811
1812 val = FIELD_PREP(COMMS_COMMAND_CMD_MASK, cmd);
1813 val |= FIELD_PREP(COMMS_COMMAND_SIZE_MASK, size);
1814
1815 trace_habanalabs_comms_send_cmd(&hdev->pdev->dev, comms_cmd_str_arr[cmd]);
1816 WREG32(le32_to_cpu(dyn_regs->kmd_msg_to_cpu), val);
1817 }
1818
1819 /**
1820 * hl_fw_dynamic_extract_fw_response - update the FW response
1821 *
1822 * @hdev: pointer to the habanalabs device structure
1823 * @fw_loader: managing structure for loading device's FW
1824 * @response: FW response
1825 * @status: the status read from CPU status register
1826 *
1827 * @return 0 on success, otherwise non-zero error code
1828 */
hl_fw_dynamic_extract_fw_response(struct hl_device * hdev,struct fw_load_mgr * fw_loader,struct fw_response * response,u32 status)1829 static int hl_fw_dynamic_extract_fw_response(struct hl_device *hdev,
1830 struct fw_load_mgr *fw_loader,
1831 struct fw_response *response,
1832 u32 status)
1833 {
1834 response->status = FIELD_GET(COMMS_STATUS_STATUS_MASK, status);
1835 response->ram_offset = FIELD_GET(COMMS_STATUS_OFFSET_MASK, status) <<
1836 COMMS_STATUS_OFFSET_ALIGN_SHIFT;
1837 response->ram_type = FIELD_GET(COMMS_STATUS_RAM_TYPE_MASK, status);
1838
1839 if ((response->ram_type != COMMS_SRAM) &&
1840 (response->ram_type != COMMS_DRAM)) {
1841 dev_err(hdev->dev, "FW status: invalid RAM type %u\n",
1842 response->ram_type);
1843 return -EIO;
1844 }
1845
1846 return 0;
1847 }
1848
1849 /**
1850 * hl_fw_dynamic_wait_for_status - wait for status in dynamic FW load
1851 *
1852 * @hdev: pointer to the habanalabs device structure
1853 * @fw_loader: managing structure for loading device's FW
1854 * @expected_status: expected status to wait for
1855 * @timeout: timeout for status wait
1856 *
1857 * @return 0 on success, otherwise non-zero error code
1858 *
1859 * waiting for status from FW include polling the FW status register until
1860 * expected status is received or timeout occurs (whatever occurs first).
1861 */
hl_fw_dynamic_wait_for_status(struct hl_device * hdev,struct fw_load_mgr * fw_loader,enum comms_sts expected_status,u32 timeout)1862 static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev,
1863 struct fw_load_mgr *fw_loader,
1864 enum comms_sts expected_status,
1865 u32 timeout)
1866 {
1867 struct cpu_dyn_regs *dyn_regs;
1868 u32 status;
1869 int rc;
1870
1871 dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
1872
1873 trace_habanalabs_comms_wait_status(&hdev->pdev->dev, comms_sts_str_arr[expected_status]);
1874
1875 /* Wait for expected status */
1876 rc = hl_poll_timeout(
1877 hdev,
1878 le32_to_cpu(dyn_regs->cpu_cmd_status_to_host),
1879 status,
1880 FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status,
1881 hdev->fw_comms_poll_interval_usec,
1882 timeout);
1883
1884 if (rc) {
1885 hl_fw_dynamic_report_error_status(hdev, status,
1886 expected_status);
1887 return -EIO;
1888 }
1889
1890 trace_habanalabs_comms_wait_status_done(&hdev->pdev->dev,
1891 comms_sts_str_arr[expected_status]);
1892
1893 /*
1894 * skip storing FW response for NOOP to preserve the actual desired
1895 * FW status
1896 */
1897 if (expected_status == COMMS_STS_NOOP)
1898 return 0;
1899
1900 rc = hl_fw_dynamic_extract_fw_response(hdev, fw_loader,
1901 &fw_loader->dynamic_loader.response,
1902 status);
1903 return rc;
1904 }
1905
1906 /**
1907 * hl_fw_dynamic_send_clear_cmd - send clear command to FW
1908 *
1909 * @hdev: pointer to the habanalabs device structure
1910 * @fw_loader: managing structure for loading device's FW
1911 *
1912 * @return 0 on success, otherwise non-zero error code
1913 *
1914 * after command cycle between LKD to FW CPU (i.e. LKD got an expected status
1915 * from FW) we need to clear the CPU status register in order to avoid garbage
1916 * between command cycles.
1917 * This is done by sending clear command and polling the CPU to LKD status
1918 * register to hold the status NOOP
1919 */
hl_fw_dynamic_send_clear_cmd(struct hl_device * hdev,struct fw_load_mgr * fw_loader)1920 static int hl_fw_dynamic_send_clear_cmd(struct hl_device *hdev,
1921 struct fw_load_mgr *fw_loader)
1922 {
1923 hl_fw_dynamic_send_cmd(hdev, fw_loader, COMMS_CLR_STS, 0);
1924
1925 return hl_fw_dynamic_wait_for_status(hdev, fw_loader, COMMS_STS_NOOP,
1926 fw_loader->cpu_timeout);
1927 }
1928
1929 /**
1930 * hl_fw_dynamic_send_protocol_cmd - send LKD to FW cmd and wait for ACK
1931 *
1932 * @hdev: pointer to the habanalabs device structure
1933 * @fw_loader: managing structure for loading device's FW
1934 * @cmd: LKD to FW cmd code
1935 * @size: size of next FW component to be loaded (0 if not necessary)
1936 * @wait_ok: if true also wait for OK response from FW
1937 * @timeout: timeout for status wait
1938 *
1939 * @return 0 on success, otherwise non-zero error code
1940 *
1941 * brief:
1942 * when sending protocol command we have the following steps:
1943 * - send clear (clear command and verify clear status register)
1944 * - send the actual protocol command
1945 * - wait for ACK on the protocol command
1946 * - send clear
1947 * - send NOOP
1948 * if, in addition, the specific protocol command should wait for OK then:
1949 * - wait for OK
1950 * - send clear
1951 * - send NOOP
1952 *
1953 * NOTES:
1954 * send clear: this is necessary in order to clear the status register to avoid
1955 * leftovers between command
1956 * NOOP command: necessary to avoid loop on the clear command by the FW
1957 */
hl_fw_dynamic_send_protocol_cmd(struct hl_device * hdev,struct fw_load_mgr * fw_loader,enum comms_cmd cmd,unsigned int size,bool wait_ok,u32 timeout)1958 int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
1959 struct fw_load_mgr *fw_loader,
1960 enum comms_cmd cmd, unsigned int size,
1961 bool wait_ok, u32 timeout)
1962 {
1963 int rc;
1964
1965 trace_habanalabs_comms_protocol_cmd(&hdev->pdev->dev, comms_cmd_str_arr[cmd]);
1966
1967 /* first send clear command to clean former commands */
1968 rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader);
1969 if (rc)
1970 return rc;
1971
1972 /* send the actual command */
1973 hl_fw_dynamic_send_cmd(hdev, fw_loader, cmd, size);
1974
1975 /* wait for ACK for the command */
1976 rc = hl_fw_dynamic_wait_for_status(hdev, fw_loader, COMMS_STS_ACK,
1977 timeout);
1978 if (rc)
1979 return rc;
1980
1981 /* clear command to prepare for NOOP command */
1982 rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader);
1983 if (rc)
1984 return rc;
1985
1986 /* send the actual NOOP command */
1987 hl_fw_dynamic_send_cmd(hdev, fw_loader, COMMS_NOOP, 0);
1988
1989 if (!wait_ok)
1990 return 0;
1991
1992 rc = hl_fw_dynamic_wait_for_status(hdev, fw_loader, COMMS_STS_OK,
1993 timeout);
1994 if (rc)
1995 return rc;
1996
1997 /* clear command to prepare for NOOP command */
1998 rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader);
1999 if (rc)
2000 return rc;
2001
2002 /* send the actual NOOP command */
2003 hl_fw_dynamic_send_cmd(hdev, fw_loader, COMMS_NOOP, 0);
2004
2005 return 0;
2006 }
2007
2008 /**
2009 * hl_fw_compat_crc32 - CRC compatible with FW
2010 *
2011 * @data: pointer to the data
2012 * @size: size of the data
2013 *
2014 * @return the CRC32 result
2015 *
2016 * NOTE: kernel's CRC32 differs from standard CRC32 calculation.
2017 * in order to be aligned we need to flip the bits of both the input
2018 * initial CRC and kernel's CRC32 result.
2019 * in addition both sides use initial CRC of 0,
2020 */
hl_fw_compat_crc32(u8 * data,size_t size)2021 static u32 hl_fw_compat_crc32(u8 *data, size_t size)
2022 {
2023 return ~crc32_le(~((u32)0), data, size);
2024 }
2025
2026 /**
2027 * hl_fw_dynamic_validate_memory_bound - validate memory bounds for memory
2028 * transfer (image or descriptor) between
2029 * host and FW
2030 *
2031 * @hdev: pointer to the habanalabs device structure
2032 * @addr: device address of memory transfer
2033 * @size: memory transfer size
2034 * @region: PCI memory region
2035 *
2036 * @return 0 on success, otherwise non-zero error code
2037 */
hl_fw_dynamic_validate_memory_bound(struct hl_device * hdev,u64 addr,size_t size,struct pci_mem_region * region)2038 static int hl_fw_dynamic_validate_memory_bound(struct hl_device *hdev,
2039 u64 addr, size_t size,
2040 struct pci_mem_region *region)
2041 {
2042 u64 end_addr;
2043
2044 /* now make sure that the memory transfer is within region's bounds */
2045 end_addr = addr + size;
2046 if (end_addr >= region->region_base + region->region_size) {
2047 dev_err(hdev->dev,
2048 "dynamic FW load: memory transfer end address out of memory region bounds. addr: %llx\n",
2049 end_addr);
2050 return -EIO;
2051 }
2052
2053 /*
2054 * now make sure memory transfer is within predefined BAR bounds.
2055 * this is to make sure we do not need to set the bar (e.g. for DRAM
2056 * memory transfers)
2057 */
2058 if (end_addr >= region->region_base - region->offset_in_bar +
2059 region->bar_size) {
2060 dev_err(hdev->dev,
2061 "FW image beyond PCI BAR bounds\n");
2062 return -EIO;
2063 }
2064
2065 return 0;
2066 }
2067
2068 /**
2069 * hl_fw_dynamic_validate_descriptor - validate FW descriptor
2070 *
2071 * @hdev: pointer to the habanalabs device structure
2072 * @fw_loader: managing structure for loading device's FW
2073 * @fw_desc: the descriptor from FW
2074 *
2075 * @return 0 on success, otherwise non-zero error code
2076 */
hl_fw_dynamic_validate_descriptor(struct hl_device * hdev,struct fw_load_mgr * fw_loader,struct lkd_fw_comms_desc * fw_desc)2077 static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
2078 struct fw_load_mgr *fw_loader,
2079 struct lkd_fw_comms_desc *fw_desc)
2080 {
2081 struct pci_mem_region *region;
2082 enum pci_region region_id;
2083 size_t data_size;
2084 u32 data_crc32;
2085 u8 *data_ptr;
2086 u64 addr;
2087 int rc;
2088
2089 if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC)
2090 dev_dbg(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n",
2091 fw_desc->header.magic);
2092
2093 if (fw_desc->header.version != HL_COMMS_DESC_VER)
2094 dev_dbg(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n",
2095 fw_desc->header.version);
2096
2097 /*
2098 * Calc CRC32 of data without header. use the size of the descriptor
2099 * reported by firmware, without calculating it ourself, to allow adding
2100 * more fields to the lkd_fw_comms_desc structure.
2101 * note that no alignment/stride address issues here as all structures
2102 * are 64 bit padded.
2103 */
2104 data_ptr = (u8 *)fw_desc + sizeof(struct comms_msg_header);
2105 data_size = le16_to_cpu(fw_desc->header.size);
2106
2107 data_crc32 = hl_fw_compat_crc32(data_ptr, data_size);
2108 if (data_crc32 != le32_to_cpu(fw_desc->header.crc32)) {
2109 dev_err(hdev->dev, "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n",
2110 data_crc32, fw_desc->header.crc32);
2111 return -EIO;
2112 }
2113
2114 /* find memory region to which to copy the image */
2115 addr = le64_to_cpu(fw_desc->img_addr);
2116 region_id = hl_get_pci_memory_region(hdev, addr);
2117 if ((region_id != PCI_REGION_SRAM) && ((region_id != PCI_REGION_DRAM))) {
2118 dev_err(hdev->dev, "Invalid region to copy FW image address=%llx\n", addr);
2119 return -EIO;
2120 }
2121
2122 region = &hdev->pci_mem_region[region_id];
2123
2124 /* store the region for the copy stage */
2125 fw_loader->dynamic_loader.image_region = region;
2126
2127 /*
2128 * here we know that the start address is valid, now make sure that the
2129 * image is within region's bounds
2130 */
2131 rc = hl_fw_dynamic_validate_memory_bound(hdev, addr,
2132 fw_loader->dynamic_loader.fw_image_size,
2133 region);
2134 if (rc) {
2135 dev_err(hdev->dev, "invalid mem transfer request for FW image\n");
2136 return rc;
2137 }
2138
2139 /* here we can mark the descriptor as valid as the content has been validated */
2140 fw_loader->dynamic_loader.fw_desc_valid = true;
2141
2142 return 0;
2143 }
2144
hl_fw_dynamic_validate_response(struct hl_device * hdev,struct fw_response * response,struct pci_mem_region * region)2145 static int hl_fw_dynamic_validate_response(struct hl_device *hdev,
2146 struct fw_response *response,
2147 struct pci_mem_region *region)
2148 {
2149 u64 device_addr;
2150 int rc;
2151
2152 device_addr = region->region_base + response->ram_offset;
2153
2154 /*
2155 * validate that the descriptor is within region's bounds
2156 * Note that as the start address was supplied according to the RAM
2157 * type- testing only the end address is enough
2158 */
2159 rc = hl_fw_dynamic_validate_memory_bound(hdev, device_addr,
2160 sizeof(struct lkd_fw_comms_desc),
2161 region);
2162 return rc;
2163 }
2164
2165 /*
2166 * hl_fw_dynamic_read_descriptor_msg - read and show the ascii msg that sent by fw
2167 *
2168 * @hdev: pointer to the habanalabs device structure
2169 * @fw_desc: the descriptor from FW
2170 */
hl_fw_dynamic_read_descriptor_msg(struct hl_device * hdev,struct lkd_fw_comms_desc * fw_desc)2171 static void hl_fw_dynamic_read_descriptor_msg(struct hl_device *hdev,
2172 struct lkd_fw_comms_desc *fw_desc)
2173 {
2174 int i;
2175 char *msg;
2176
2177 for (i = 0 ; i < LKD_FW_ASCII_MSG_MAX ; i++) {
2178 if (!fw_desc->ascii_msg[i].valid)
2179 return;
2180
2181 /* force NULL termination */
2182 msg = fw_desc->ascii_msg[i].msg;
2183 msg[LKD_FW_ASCII_MSG_MAX_LEN - 1] = '\0';
2184
2185 switch (fw_desc->ascii_msg[i].msg_lvl) {
2186 case LKD_FW_ASCII_MSG_ERR:
2187 dev_err(hdev->dev, "fw: %s", fw_desc->ascii_msg[i].msg);
2188 break;
2189 case LKD_FW_ASCII_MSG_WRN:
2190 dev_warn(hdev->dev, "fw: %s", fw_desc->ascii_msg[i].msg);
2191 break;
2192 case LKD_FW_ASCII_MSG_INF:
2193 dev_info(hdev->dev, "fw: %s", fw_desc->ascii_msg[i].msg);
2194 break;
2195 default:
2196 dev_dbg(hdev->dev, "fw: %s", fw_desc->ascii_msg[i].msg);
2197 break;
2198 }
2199 }
2200 }
2201
2202 /**
2203 * hl_fw_dynamic_read_and_validate_descriptor - read and validate FW descriptor
2204 *
2205 * @hdev: pointer to the habanalabs device structure
2206 * @fw_loader: managing structure for loading device's FW
2207 *
2208 * @return 0 on success, otherwise non-zero error code
2209 */
hl_fw_dynamic_read_and_validate_descriptor(struct hl_device * hdev,struct fw_load_mgr * fw_loader)2210 static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev,
2211 struct fw_load_mgr *fw_loader)
2212 {
2213 struct lkd_fw_comms_desc *fw_desc;
2214 struct pci_mem_region *region;
2215 struct fw_response *response;
2216 void *temp_fw_desc;
2217 void __iomem *src;
2218 u16 fw_data_size;
2219 enum pci_region region_id;
2220 int rc;
2221
2222 fw_desc = &fw_loader->dynamic_loader.comm_desc;
2223 response = &fw_loader->dynamic_loader.response;
2224
2225 region_id = (response->ram_type == COMMS_SRAM) ?
2226 PCI_REGION_SRAM : PCI_REGION_DRAM;
2227
2228 region = &hdev->pci_mem_region[region_id];
2229
2230 rc = hl_fw_dynamic_validate_response(hdev, response, region);
2231 if (rc) {
2232 dev_err(hdev->dev,
2233 "invalid mem transfer request for FW descriptor\n");
2234 return rc;
2235 }
2236
2237 /*
2238 * extract address to copy the descriptor from
2239 * in addition, as the descriptor value is going to be over-ridden by new data- we mark it
2240 * as invalid.
2241 * it will be marked again as valid once validated
2242 */
2243 fw_loader->dynamic_loader.fw_desc_valid = false;
2244 src = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
2245 response->ram_offset;
2246
2247 /*
2248 * We do the copy of the fw descriptor in 2 phases:
2249 * 1. copy the header + data info according to our lkd_fw_comms_desc definition.
2250 * then we're able to read the actual data size provided by fw.
2251 * this is needed for cases where data in descriptor was changed(add/remove)
2252 * in embedded specs header file before updating lkd copy of the header file
2253 * 2. copy descriptor to temporary buffer with aligned size and send it to validation
2254 */
2255 memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc));
2256 fw_data_size = le16_to_cpu(fw_desc->header.size);
2257
2258 temp_fw_desc = vzalloc(sizeof(struct comms_msg_header) + fw_data_size);
2259 if (!temp_fw_desc)
2260 return -ENOMEM;
2261
2262 memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_msg_header) + fw_data_size);
2263
2264 rc = hl_fw_dynamic_validate_descriptor(hdev, fw_loader,
2265 (struct lkd_fw_comms_desc *) temp_fw_desc);
2266
2267 if (!rc)
2268 hl_fw_dynamic_read_descriptor_msg(hdev, temp_fw_desc);
2269
2270 vfree(temp_fw_desc);
2271
2272 return rc;
2273 }
2274
2275 /**
2276 * hl_fw_dynamic_request_descriptor - handshake with CPU to get FW descriptor
2277 *
2278 * @hdev: pointer to the habanalabs device structure
2279 * @fw_loader: managing structure for loading device's FW
2280 * @next_image_size: size to allocate for next FW component
2281 *
2282 * @return 0 on success, otherwise non-zero error code
2283 */
hl_fw_dynamic_request_descriptor(struct hl_device * hdev,struct fw_load_mgr * fw_loader,size_t next_image_size)2284 static int hl_fw_dynamic_request_descriptor(struct hl_device *hdev,
2285 struct fw_load_mgr *fw_loader,
2286 size_t next_image_size)
2287 {
2288 int rc;
2289
2290 rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_PREP_DESC,
2291 next_image_size, true,
2292 fw_loader->cpu_timeout);
2293 if (rc)
2294 return rc;
2295
2296 return hl_fw_dynamic_read_and_validate_descriptor(hdev, fw_loader);
2297 }
2298
2299 /**
2300 * hl_fw_dynamic_read_device_fw_version - read FW version to exposed properties
2301 *
2302 * @hdev: pointer to the habanalabs device structure
2303 * @fwc: the firmware component
2304 * @fw_version: fw component's version string
2305 */
hl_fw_dynamic_read_device_fw_version(struct hl_device * hdev,enum hl_fw_component fwc,const char * fw_version)2306 static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev,
2307 enum hl_fw_component fwc,
2308 const char *fw_version)
2309 {
2310 struct asic_fixed_properties *prop = &hdev->asic_prop;
2311 char *preboot_ver, *boot_ver;
2312 char btl_ver[32];
2313 int rc;
2314
2315 switch (fwc) {
2316 case FW_COMP_BOOT_FIT:
2317 strscpy(prop->uboot_ver, fw_version, VERSION_MAX_LEN);
2318 boot_ver = extract_fw_ver_from_str(prop->uboot_ver);
2319 if (boot_ver) {
2320 dev_info(hdev->dev, "boot-fit version %s\n", boot_ver);
2321 kfree(boot_ver);
2322 }
2323
2324 break;
2325 case FW_COMP_PREBOOT:
2326 strscpy(prop->preboot_ver, fw_version, VERSION_MAX_LEN);
2327 preboot_ver = strnstr(prop->preboot_ver, "Preboot", VERSION_MAX_LEN);
2328 dev_info(hdev->dev, "preboot full version: '%s'\n", preboot_ver);
2329
2330 if (preboot_ver && preboot_ver != prop->preboot_ver) {
2331 strscpy(btl_ver, prop->preboot_ver,
2332 min((int) (preboot_ver - prop->preboot_ver), 31));
2333 dev_info(hdev->dev, "%s\n", btl_ver);
2334 }
2335
2336 rc = hl_get_sw_major_minor_subminor(hdev, preboot_ver);
2337 if (rc)
2338 return rc;
2339 preboot_ver = extract_fw_ver_from_str(prop->preboot_ver);
2340 if (preboot_ver) {
2341 rc = hl_get_preboot_major_minor(hdev, preboot_ver);
2342 kfree(preboot_ver);
2343 if (rc)
2344 return rc;
2345 }
2346
2347 break;
2348 default:
2349 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2350 return -EINVAL;
2351 }
2352
2353 return 0;
2354 }
2355
2356 /**
2357 * hl_fw_dynamic_copy_image - copy image to memory allocated by the FW
2358 *
2359 * @hdev: pointer to the habanalabs device structure
2360 * @fw: fw descriptor
2361 * @fw_loader: managing structure for loading device's FW
2362 */
hl_fw_dynamic_copy_image(struct hl_device * hdev,const struct firmware * fw,struct fw_load_mgr * fw_loader)2363 static int hl_fw_dynamic_copy_image(struct hl_device *hdev,
2364 const struct firmware *fw,
2365 struct fw_load_mgr *fw_loader)
2366 {
2367 struct lkd_fw_comms_desc *fw_desc;
2368 struct pci_mem_region *region;
2369 void __iomem *dest;
2370 u64 addr;
2371 int rc;
2372
2373 fw_desc = &fw_loader->dynamic_loader.comm_desc;
2374 addr = le64_to_cpu(fw_desc->img_addr);
2375
2376 /* find memory region to which to copy the image */
2377 region = fw_loader->dynamic_loader.image_region;
2378
2379 dest = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
2380 (addr - region->region_base);
2381
2382 rc = hl_fw_copy_fw_to_device(hdev, fw, dest,
2383 fw_loader->boot_fit_img.src_off,
2384 fw_loader->boot_fit_img.copy_size);
2385
2386 return rc;
2387 }
2388
2389 /**
2390 * hl_fw_dynamic_copy_msg - copy msg to memory allocated by the FW
2391 *
2392 * @hdev: pointer to the habanalabs device structure
2393 * @msg: message
2394 * @fw_loader: managing structure for loading device's FW
2395 */
hl_fw_dynamic_copy_msg(struct hl_device * hdev,struct lkd_msg_comms * msg,struct fw_load_mgr * fw_loader)2396 static int hl_fw_dynamic_copy_msg(struct hl_device *hdev,
2397 struct lkd_msg_comms *msg, struct fw_load_mgr *fw_loader)
2398 {
2399 struct lkd_fw_comms_desc *fw_desc;
2400 struct pci_mem_region *region;
2401 void __iomem *dest;
2402 u64 addr;
2403 int rc;
2404
2405 fw_desc = &fw_loader->dynamic_loader.comm_desc;
2406 addr = le64_to_cpu(fw_desc->img_addr);
2407
2408 /* find memory region to which to copy the image */
2409 region = fw_loader->dynamic_loader.image_region;
2410
2411 dest = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
2412 (addr - region->region_base);
2413
2414 rc = hl_fw_copy_msg_to_device(hdev, msg, dest, 0, 0);
2415
2416 return rc;
2417 }
2418
2419 /**
2420 * hl_fw_boot_fit_update_state - update internal data structures after boot-fit
2421 * is loaded
2422 *
2423 * @hdev: pointer to the habanalabs device structure
2424 * @cpu_boot_dev_sts0_reg: register holding CPU boot dev status 0
2425 * @cpu_boot_dev_sts1_reg: register holding CPU boot dev status 1
2426 *
2427 * @return 0 on success, otherwise non-zero error code
2428 */
hl_fw_boot_fit_update_state(struct hl_device * hdev,u32 cpu_boot_dev_sts0_reg,u32 cpu_boot_dev_sts1_reg)2429 static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
2430 u32 cpu_boot_dev_sts0_reg,
2431 u32 cpu_boot_dev_sts1_reg)
2432 {
2433 struct asic_fixed_properties *prop = &hdev->asic_prop;
2434
2435 hdev->fw_loader.fw_comp_loaded |= FW_TYPE_BOOT_CPU;
2436
2437 /* Read boot_cpu status bits */
2438 if (prop->fw_preboot_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_ENABLED) {
2439 prop->fw_bootfit_cpu_boot_dev_sts0 =
2440 RREG32(cpu_boot_dev_sts0_reg);
2441
2442 prop->hard_reset_done_by_fw = !!(prop->fw_bootfit_cpu_boot_dev_sts0 &
2443 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
2444
2445 dev_dbg(hdev->dev, "Firmware boot CPU status0 %#x\n",
2446 prop->fw_bootfit_cpu_boot_dev_sts0);
2447 }
2448
2449 if (prop->fw_cpu_boot_dev_sts1_valid) {
2450 prop->fw_bootfit_cpu_boot_dev_sts1 =
2451 RREG32(cpu_boot_dev_sts1_reg);
2452
2453 dev_dbg(hdev->dev, "Firmware boot CPU status1 %#x\n",
2454 prop->fw_bootfit_cpu_boot_dev_sts1);
2455 }
2456
2457 dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
2458 prop->hard_reset_done_by_fw ? "enabled" : "disabled");
2459 }
2460
hl_fw_dynamic_update_linux_interrupt_if(struct hl_device * hdev)2461 static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev)
2462 {
2463 struct cpu_dyn_regs *dyn_regs =
2464 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2465
2466 /* Check whether all 3 interrupt interfaces are set, if not use a
2467 * single interface
2468 */
2469 if (!hdev->asic_prop.gic_interrupts_enable &&
2470 !(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2471 CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
2472 dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_pi_upd_irq;
2473 dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_pi_upd_irq;
2474
2475 dev_warn(hdev->dev,
2476 "Using a single interrupt interface towards cpucp");
2477 }
2478 }
2479 /**
2480 * hl_fw_dynamic_load_image - load FW image using dynamic protocol
2481 *
2482 * @hdev: pointer to the habanalabs device structure
2483 * @fw_loader: managing structure for loading device's FW
2484 * @load_fwc: the FW component to be loaded
2485 * @img_ld_timeout: image load timeout
2486 *
2487 * @return 0 on success, otherwise non-zero error code
2488 */
hl_fw_dynamic_load_image(struct hl_device * hdev,struct fw_load_mgr * fw_loader,enum hl_fw_component load_fwc,u32 img_ld_timeout)2489 static int hl_fw_dynamic_load_image(struct hl_device *hdev,
2490 struct fw_load_mgr *fw_loader,
2491 enum hl_fw_component load_fwc,
2492 u32 img_ld_timeout)
2493 {
2494 enum hl_fw_component cur_fwc;
2495 const struct firmware *fw;
2496 char *fw_name;
2497 int rc = 0;
2498
2499 /*
2500 * when loading image we have one of 2 scenarios:
2501 * 1. current FW component is preboot and we want to load boot-fit
2502 * 2. current FW component is boot-fit and we want to load linux
2503 */
2504 if (load_fwc == FW_COMP_BOOT_FIT) {
2505 cur_fwc = FW_COMP_PREBOOT;
2506 fw_name = fw_loader->boot_fit_img.image_name;
2507 } else {
2508 cur_fwc = FW_COMP_BOOT_FIT;
2509 fw_name = fw_loader->linux_img.image_name;
2510 }
2511
2512 /* request FW in order to communicate to FW the size to be allocated */
2513 rc = hl_request_fw(hdev, &fw, fw_name);
2514 if (rc)
2515 return rc;
2516
2517 /* store the image size for future validation */
2518 fw_loader->dynamic_loader.fw_image_size = fw->size;
2519
2520 rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, fw->size);
2521 if (rc)
2522 goto release_fw;
2523
2524 /* read preboot version */
2525 rc = hl_fw_dynamic_read_device_fw_version(hdev, cur_fwc,
2526 fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
2527 if (rc)
2528 goto release_fw;
2529
2530 /* copy boot fit to space allocated by FW */
2531 rc = hl_fw_dynamic_copy_image(hdev, fw, fw_loader);
2532 if (rc)
2533 goto release_fw;
2534
2535 rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_DATA_RDY,
2536 0, true,
2537 fw_loader->cpu_timeout);
2538 if (rc)
2539 goto release_fw;
2540
2541 rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_EXEC,
2542 0, false,
2543 img_ld_timeout);
2544
2545 release_fw:
2546 hl_release_firmware(fw);
2547 return rc;
2548 }
2549
hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device * hdev,struct fw_load_mgr * fw_loader)2550 static int hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device *hdev,
2551 struct fw_load_mgr *fw_loader)
2552 {
2553 struct dynamic_fw_load_mgr *dyn_loader;
2554 u32 status;
2555 int rc;
2556
2557 dyn_loader = &fw_loader->dynamic_loader;
2558
2559 /*
2560 * Make sure CPU boot-loader is running
2561 * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux
2562 * yet there is a debug scenario in which we loading uboot (without Linux)
2563 * which at later stage is relocated to DRAM. In this case we expect
2564 * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the
2565 * poll flags
2566 */
2567 rc = hl_poll_timeout(
2568 hdev,
2569 le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
2570 status,
2571 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
2572 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2573 hdev->fw_poll_interval_usec,
2574 dyn_loader->wait_for_bl_timeout);
2575 if (rc) {
2576 dev_err(hdev->dev, "failed to wait for boot (status = %d)\n", status);
2577 return rc;
2578 }
2579
2580 dev_dbg(hdev->dev, "uboot status = %d\n", status);
2581 return 0;
2582 }
2583
hl_fw_dynamic_wait_for_linux_active(struct hl_device * hdev,struct fw_load_mgr * fw_loader)2584 static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev,
2585 struct fw_load_mgr *fw_loader)
2586 {
2587 struct dynamic_fw_load_mgr *dyn_loader;
2588 u32 status;
2589 int rc;
2590
2591 dyn_loader = &fw_loader->dynamic_loader;
2592
2593 /* Make sure CPU linux is running */
2594
2595 rc = hl_poll_timeout(
2596 hdev,
2597 le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
2598 status,
2599 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2600 hdev->fw_poll_interval_usec,
2601 fw_loader->cpu_timeout);
2602 if (rc) {
2603 dev_err(hdev->dev, "failed to wait for Linux (status = %d)\n", status);
2604 return rc;
2605 }
2606
2607 dev_dbg(hdev->dev, "Boot status = %d\n", status);
2608 return 0;
2609 }
2610
2611 /**
2612 * hl_fw_linux_update_state - update internal data structures after Linux
2613 * is loaded.
2614 * Note: Linux initialization is comprised mainly
2615 * of two stages - loading kernel (SRAM_AVAIL)
2616 * & loading ARMCP.
2617 * Therefore reading boot device status in any of
2618 * these stages might result in different values.
2619 *
2620 * @hdev: pointer to the habanalabs device structure
2621 * @cpu_boot_dev_sts0_reg: register holding CPU boot dev status 0
2622 * @cpu_boot_dev_sts1_reg: register holding CPU boot dev status 1
2623 *
2624 * @return 0 on success, otherwise non-zero error code
2625 */
hl_fw_linux_update_state(struct hl_device * hdev,u32 cpu_boot_dev_sts0_reg,u32 cpu_boot_dev_sts1_reg)2626 static void hl_fw_linux_update_state(struct hl_device *hdev,
2627 u32 cpu_boot_dev_sts0_reg,
2628 u32 cpu_boot_dev_sts1_reg)
2629 {
2630 struct asic_fixed_properties *prop = &hdev->asic_prop;
2631
2632 hdev->fw_loader.fw_comp_loaded |= FW_TYPE_LINUX;
2633
2634 /* Read FW application security bits */
2635 if (prop->fw_cpu_boot_dev_sts0_valid) {
2636 prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
2637
2638 prop->hard_reset_done_by_fw = !!(prop->fw_app_cpu_boot_dev_sts0 &
2639 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
2640
2641 if (prop->fw_app_cpu_boot_dev_sts0 &
2642 CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN)
2643 prop->gic_interrupts_enable = false;
2644
2645 dev_dbg(hdev->dev,
2646 "Firmware application CPU status0 %#x\n",
2647 prop->fw_app_cpu_boot_dev_sts0);
2648
2649 dev_dbg(hdev->dev, "GIC controller is %s\n",
2650 prop->gic_interrupts_enable ?
2651 "enabled" : "disabled");
2652 }
2653
2654 if (prop->fw_cpu_boot_dev_sts1_valid) {
2655 prop->fw_app_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
2656
2657 dev_dbg(hdev->dev,
2658 "Firmware application CPU status1 %#x\n",
2659 prop->fw_app_cpu_boot_dev_sts1);
2660 }
2661
2662 dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
2663 prop->hard_reset_done_by_fw ? "enabled" : "disabled");
2664
2665 dev_info(hdev->dev, "Successfully loaded firmware to device\n");
2666 }
2667
2668 /**
2669 * hl_fw_dynamic_send_msg - send a COMMS message with attached data
2670 *
2671 * @hdev: pointer to the habanalabs device structure
2672 * @fw_loader: managing structure for loading device's FW
2673 * @msg_type: message type
2674 * @data: data to be sent
2675 *
2676 * @return 0 on success, otherwise non-zero error code
2677 */
hl_fw_dynamic_send_msg(struct hl_device * hdev,struct fw_load_mgr * fw_loader,u8 msg_type,void * data)2678 static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
2679 struct fw_load_mgr *fw_loader, u8 msg_type, void *data)
2680 {
2681 struct lkd_msg_comms *msg;
2682 int rc;
2683
2684 msg = kzalloc(sizeof(*msg), GFP_KERNEL);
2685 if (!msg)
2686 return -ENOMEM;
2687
2688 /* create message to be sent */
2689 msg->header.type = msg_type;
2690 msg->header.size = cpu_to_le16(sizeof(struct comms_msg_header));
2691 msg->header.magic = cpu_to_le32(HL_COMMS_MSG_MAGIC);
2692
2693 switch (msg_type) {
2694 case HL_COMMS_RESET_CAUSE_TYPE:
2695 msg->reset_cause = *(__u8 *) data;
2696 break;
2697
2698 default:
2699 dev_err(hdev->dev,
2700 "Send COMMS message - invalid message type %u\n",
2701 msg_type);
2702 rc = -EINVAL;
2703 goto out;
2704 }
2705
2706 rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
2707 sizeof(struct lkd_msg_comms));
2708 if (rc)
2709 goto out;
2710
2711 /* copy message to space allocated by FW */
2712 rc = hl_fw_dynamic_copy_msg(hdev, msg, fw_loader);
2713 if (rc)
2714 goto out;
2715
2716 rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_DATA_RDY,
2717 0, true,
2718 fw_loader->cpu_timeout);
2719 if (rc)
2720 goto out;
2721
2722 rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_EXEC,
2723 0, true,
2724 fw_loader->cpu_timeout);
2725
2726 out:
2727 kfree(msg);
2728 return rc;
2729 }
2730
2731 /**
2732 * hl_fw_dynamic_init_cpu - initialize the device CPU using dynamic protocol
2733 *
2734 * @hdev: pointer to the habanalabs device structure
2735 * @fw_loader: managing structure for loading device's FW
2736 *
2737 * @return 0 on success, otherwise non-zero error code
2738 *
2739 * brief: the dynamic protocol is master (LKD) slave (FW CPU) protocol.
2740 * the communication is done using registers:
2741 * - LKD command register
2742 * - FW status register
2743 * the protocol is race free. this goal is achieved by splitting the requests
2744 * and response to known synchronization points between the LKD and the FW.
2745 * each response to LKD request is known and bound to a predefined timeout.
2746 * in case of timeout expiration without the desired status from FW- the
2747 * protocol (and hence the boot) will fail.
2748 */
hl_fw_dynamic_init_cpu(struct hl_device * hdev,struct fw_load_mgr * fw_loader)2749 static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
2750 struct fw_load_mgr *fw_loader)
2751 {
2752 struct cpu_dyn_regs *dyn_regs;
2753 int rc, fw_error_rc;
2754
2755 dev_info(hdev->dev,
2756 "Loading %sfirmware to device, may take some time...\n",
2757 hdev->asic_prop.fw_security_enabled ? "secured " : "");
2758
2759 /* initialize FW descriptor as invalid */
2760 fw_loader->dynamic_loader.fw_desc_valid = false;
2761
2762 /*
2763 * In this stage, "cpu_dyn_regs" contains only LKD's hard coded values!
2764 * It will be updated from FW after hl_fw_dynamic_request_descriptor().
2765 */
2766 dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
2767
2768 rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
2769 0, true,
2770 fw_loader->cpu_timeout);
2771 if (rc)
2772 goto protocol_err;
2773
2774 if (hdev->reset_info.curr_reset_cause) {
2775 rc = hl_fw_dynamic_send_msg(hdev, fw_loader,
2776 HL_COMMS_RESET_CAUSE_TYPE, &hdev->reset_info.curr_reset_cause);
2777 if (rc)
2778 goto protocol_err;
2779
2780 /* Clear current reset cause */
2781 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
2782 }
2783
2784 rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, sizeof(struct lkd_msg_comms));
2785 if (rc)
2786 goto protocol_err;
2787
2788 if (hdev->asic_prop.support_dynamic_resereved_fw_size)
2789 hdev->asic_prop.reserved_fw_mem_size =
2790 le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb) * SZ_1M;
2791
2792 if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
2793 struct lkd_fw_binning_info *binning_info;
2794
2795 /* read preboot version */
2796 rc = hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
2797 fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
2798 if (rc)
2799 return rc;
2800
2801 /* read binning info from preboot */
2802 if (hdev->support_preboot_binning) {
2803 binning_info = &fw_loader->dynamic_loader.comm_desc.binning_info;
2804 hdev->tpc_binning = le64_to_cpu(binning_info->tpc_mask_l);
2805 hdev->dram_binning = le32_to_cpu(binning_info->dram_mask);
2806 hdev->edma_binning = le32_to_cpu(binning_info->edma_mask);
2807 hdev->decoder_binning = le32_to_cpu(binning_info->dec_mask);
2808 hdev->rotator_binning = le32_to_cpu(binning_info->rot_mask);
2809
2810 rc = hdev->asic_funcs->set_dram_properties(hdev);
2811 if (rc)
2812 return rc;
2813
2814 rc = hdev->asic_funcs->set_binning_masks(hdev);
2815 if (rc)
2816 return rc;
2817
2818 dev_dbg(hdev->dev,
2819 "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x, rot:0x%x\n",
2820 hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
2821 hdev->decoder_binning, hdev->rotator_binning);
2822 }
2823
2824 return 0;
2825 }
2826
2827 /* load boot fit to FW */
2828 rc = hl_fw_dynamic_load_image(hdev, fw_loader, FW_COMP_BOOT_FIT,
2829 fw_loader->boot_fit_timeout);
2830 if (rc) {
2831 dev_err(hdev->dev, "failed to load boot fit\n");
2832 goto protocol_err;
2833 }
2834
2835 rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader);
2836 if (rc)
2837 goto protocol_err;
2838
2839 hl_fw_boot_fit_update_state(hdev,
2840 le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
2841 le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
2842
2843 /*
2844 * when testing FW load (without Linux) on PLDM we don't want to
2845 * wait until boot fit is active as it may take several hours.
2846 * instead, we load the bootfit and let it do all initialization in
2847 * the background.
2848 */
2849 if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX))
2850 return 0;
2851
2852 /* Enable DRAM scrambling before Linux boot and after successful
2853 * UBoot
2854 */
2855 hdev->asic_funcs->init_cpu_scrambler_dram(hdev);
2856
2857 if (!(hdev->fw_components & FW_TYPE_LINUX)) {
2858 dev_dbg(hdev->dev, "Skip loading Linux F/W\n");
2859 return 0;
2860 }
2861
2862 if (fw_loader->skip_bmc) {
2863 rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader,
2864 COMMS_SKIP_BMC, 0,
2865 true,
2866 fw_loader->cpu_timeout);
2867 if (rc) {
2868 dev_err(hdev->dev, "failed to load boot fit\n");
2869 goto protocol_err;
2870 }
2871 }
2872
2873 /* load Linux image to FW */
2874 rc = hl_fw_dynamic_load_image(hdev, fw_loader, FW_COMP_LINUX,
2875 fw_loader->cpu_timeout);
2876 if (rc) {
2877 dev_err(hdev->dev, "failed to load Linux\n");
2878 goto protocol_err;
2879 }
2880
2881 rc = hl_fw_dynamic_wait_for_linux_active(hdev, fw_loader);
2882 if (rc)
2883 goto protocol_err;
2884
2885 hl_fw_linux_update_state(hdev,
2886 le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
2887 le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
2888
2889 hl_fw_dynamic_update_linux_interrupt_if(hdev);
2890
2891 protocol_err:
2892 if (fw_loader->dynamic_loader.fw_desc_valid) {
2893 fw_error_rc = fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0),
2894 le32_to_cpu(dyn_regs->cpu_boot_err1),
2895 le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
2896 le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
2897
2898 if (fw_error_rc)
2899 return fw_error_rc;
2900 }
2901
2902 return rc;
2903 }
2904
2905 /**
2906 * hl_fw_static_init_cpu - initialize the device CPU using static protocol
2907 *
2908 * @hdev: pointer to the habanalabs device structure
2909 * @fw_loader: managing structure for loading device's FW
2910 *
2911 * @return 0 on success, otherwise non-zero error code
2912 */
hl_fw_static_init_cpu(struct hl_device * hdev,struct fw_load_mgr * fw_loader)2913 static int hl_fw_static_init_cpu(struct hl_device *hdev,
2914 struct fw_load_mgr *fw_loader)
2915 {
2916 u32 cpu_msg_status_reg, cpu_timeout, msg_to_cpu_reg, status;
2917 u32 cpu_boot_dev_status0_reg, cpu_boot_dev_status1_reg;
2918 struct static_fw_load_mgr *static_loader;
2919 u32 cpu_boot_status_reg;
2920 int rc;
2921
2922 if (!(hdev->fw_components & FW_TYPE_BOOT_CPU))
2923 return 0;
2924
2925 /* init common loader parameters */
2926 cpu_timeout = fw_loader->cpu_timeout;
2927
2928 /* init static loader parameters */
2929 static_loader = &fw_loader->static_loader;
2930 cpu_msg_status_reg = static_loader->cpu_cmd_status_to_host_reg;
2931 msg_to_cpu_reg = static_loader->kmd_msg_to_cpu_reg;
2932 cpu_boot_dev_status0_reg = static_loader->cpu_boot_dev_status0_reg;
2933 cpu_boot_dev_status1_reg = static_loader->cpu_boot_dev_status1_reg;
2934 cpu_boot_status_reg = static_loader->cpu_boot_status_reg;
2935
2936 dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
2937 cpu_timeout / USEC_PER_SEC);
2938
2939 /* Wait for boot FIT request */
2940 rc = hl_poll_timeout(
2941 hdev,
2942 cpu_boot_status_reg,
2943 status,
2944 status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
2945 hdev->fw_poll_interval_usec,
2946 fw_loader->boot_fit_timeout);
2947
2948 if (rc) {
2949 dev_dbg(hdev->dev,
2950 "No boot fit request received (status = %d), resuming boot\n", status);
2951 } else {
2952 rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
2953 if (rc)
2954 goto out;
2955
2956 /* Clear device CPU message status */
2957 WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
2958
2959 /* Signal device CPU that boot loader is ready */
2960 WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
2961
2962 /* Poll for CPU device ack */
2963 rc = hl_poll_timeout(
2964 hdev,
2965 cpu_msg_status_reg,
2966 status,
2967 status == CPU_MSG_OK,
2968 hdev->fw_poll_interval_usec,
2969 fw_loader->boot_fit_timeout);
2970
2971 if (rc) {
2972 dev_err(hdev->dev,
2973 "Timeout waiting for boot fit load ack (status = %d)\n", status);
2974 goto out;
2975 }
2976
2977 /* Clear message */
2978 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
2979 }
2980
2981 /*
2982 * Make sure CPU boot-loader is running
2983 * Note that the CPU_BOOT_STATUS_SRAM_AVAIL is generally set by Linux
2984 * yet there is a debug scenario in which we loading uboot (without Linux)
2985 * which at later stage is relocated to DRAM. In this case we expect
2986 * uboot to set the CPU_BOOT_STATUS_SRAM_AVAIL and so we add it to the
2987 * poll flags
2988 */
2989 rc = hl_poll_timeout(
2990 hdev,
2991 cpu_boot_status_reg,
2992 status,
2993 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
2994 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
2995 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
2996 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2997 hdev->fw_poll_interval_usec,
2998 cpu_timeout);
2999
3000 dev_dbg(hdev->dev, "uboot status = %d\n", status);
3001
3002 /* Read U-Boot version now in case we will later fail */
3003 hl_fw_static_read_device_fw_version(hdev, FW_COMP_BOOT_FIT);
3004
3005 /* update state according to boot stage */
3006 hl_fw_boot_fit_update_state(hdev, cpu_boot_dev_status0_reg,
3007 cpu_boot_dev_status1_reg);
3008
3009 if (rc) {
3010 detect_cpu_boot_status(hdev, status);
3011 rc = -EIO;
3012 goto out;
3013 }
3014
3015 /* Enable DRAM scrambling before Linux boot and after successful
3016 * UBoot
3017 */
3018 hdev->asic_funcs->init_cpu_scrambler_dram(hdev);
3019
3020 if (!(hdev->fw_components & FW_TYPE_LINUX)) {
3021 dev_info(hdev->dev, "Skip loading Linux F/W\n");
3022 rc = 0;
3023 goto out;
3024 }
3025
3026 if (status == CPU_BOOT_STATUS_SRAM_AVAIL) {
3027 rc = 0;
3028 goto out;
3029 }
3030
3031 dev_info(hdev->dev,
3032 "Loading firmware to device, may take some time...\n");
3033
3034 rc = hdev->asic_funcs->load_firmware_to_device(hdev);
3035 if (rc)
3036 goto out;
3037
3038 if (fw_loader->skip_bmc) {
3039 WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
3040
3041 rc = hl_poll_timeout(
3042 hdev,
3043 cpu_boot_status_reg,
3044 status,
3045 (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
3046 hdev->fw_poll_interval_usec,
3047 cpu_timeout);
3048
3049 if (rc) {
3050 dev_err(hdev->dev,
3051 "Failed to get ACK on skipping BMC (status = %d)\n",
3052 status);
3053 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
3054 rc = -EIO;
3055 goto out;
3056 }
3057 }
3058
3059 WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
3060
3061 rc = hl_poll_timeout(
3062 hdev,
3063 cpu_boot_status_reg,
3064 status,
3065 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
3066 hdev->fw_poll_interval_usec,
3067 cpu_timeout);
3068
3069 /* Clear message */
3070 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
3071
3072 if (rc) {
3073 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
3074 dev_err(hdev->dev,
3075 "Device reports FIT image is corrupted\n");
3076 else
3077 dev_err(hdev->dev,
3078 "Failed to load firmware to device (status = %d)\n",
3079 status);
3080
3081 rc = -EIO;
3082 goto out;
3083 }
3084
3085 rc = fw_read_errors(hdev, fw_loader->static_loader.boot_err0_reg,
3086 fw_loader->static_loader.boot_err1_reg,
3087 cpu_boot_dev_status0_reg,
3088 cpu_boot_dev_status1_reg);
3089 if (rc)
3090 return rc;
3091
3092 hl_fw_linux_update_state(hdev, cpu_boot_dev_status0_reg,
3093 cpu_boot_dev_status1_reg);
3094
3095 return 0;
3096
3097 out:
3098 fw_read_errors(hdev, fw_loader->static_loader.boot_err0_reg,
3099 fw_loader->static_loader.boot_err1_reg,
3100 cpu_boot_dev_status0_reg,
3101 cpu_boot_dev_status1_reg);
3102
3103 return rc;
3104 }
3105
3106 /**
3107 * hl_fw_init_cpu - initialize the device CPU
3108 *
3109 * @hdev: pointer to the habanalabs device structure
3110 *
3111 * @return 0 on success, otherwise non-zero error code
3112 *
3113 * perform necessary initializations for device's CPU. takes into account if
3114 * init protocol is static or dynamic.
3115 */
hl_fw_init_cpu(struct hl_device * hdev)3116 int hl_fw_init_cpu(struct hl_device *hdev)
3117 {
3118 struct asic_fixed_properties *prop = &hdev->asic_prop;
3119 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3120
3121 return prop->dynamic_fw_load ?
3122 hl_fw_dynamic_init_cpu(hdev, fw_loader) :
3123 hl_fw_static_init_cpu(hdev, fw_loader);
3124 }
3125
hl_fw_set_pll_profile(struct hl_device * hdev)3126 void hl_fw_set_pll_profile(struct hl_device *hdev)
3127 {
3128 hl_fw_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
3129 hdev->asic_prop.max_freq_value);
3130 }
3131
hl_fw_get_clk_rate(struct hl_device * hdev,u32 * cur_clk,u32 * max_clk)3132 int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
3133 {
3134 long value;
3135
3136 if (!hl_device_operational(hdev, NULL))
3137 return -ENODEV;
3138
3139 if (!hdev->pdev) {
3140 *cur_clk = 0;
3141 *max_clk = 0;
3142 return 0;
3143 }
3144
3145 value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
3146
3147 if (value < 0) {
3148 dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", value);
3149 return value;
3150 }
3151
3152 *max_clk = (value / 1000 / 1000);
3153
3154 value = hl_fw_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
3155
3156 if (value < 0) {
3157 dev_err(hdev->dev, "Failed to retrieve device current clock %ld\n", value);
3158 return value;
3159 }
3160
3161 *cur_clk = (value / 1000 / 1000);
3162
3163 return 0;
3164 }
3165
hl_fw_get_frequency(struct hl_device * hdev,u32 pll_index,bool curr)3166 long hl_fw_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
3167 {
3168 struct cpucp_packet pkt;
3169 u32 used_pll_idx;
3170 u64 result;
3171 int rc;
3172
3173 rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
3174 if (rc)
3175 return rc;
3176
3177 memset(&pkt, 0, sizeof(pkt));
3178
3179 if (curr)
3180 pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
3181 CPUCP_PKT_CTL_OPCODE_SHIFT);
3182 else
3183 pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
3184
3185 pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
3186
3187 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
3188 if (rc) {
3189 if (rc != -EAGAIN)
3190 dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n",
3191 used_pll_idx, rc);
3192 return rc;
3193 }
3194
3195 return (long) result;
3196 }
3197
hl_fw_set_frequency(struct hl_device * hdev,u32 pll_index,u64 freq)3198 void hl_fw_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
3199 {
3200 struct cpucp_packet pkt;
3201 u32 used_pll_idx;
3202 int rc;
3203
3204 rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
3205 if (rc)
3206 return;
3207
3208 memset(&pkt, 0, sizeof(pkt));
3209
3210 pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
3211 pkt.pll_index = cpu_to_le32((u32)used_pll_idx);
3212 pkt.value = cpu_to_le64(freq);
3213
3214 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
3215 if (rc && rc != -EAGAIN)
3216 dev_err(hdev->dev, "Failed to set frequency to PLL %d, error %d\n",
3217 used_pll_idx, rc);
3218 }
3219
hl_fw_get_max_power(struct hl_device * hdev)3220 long hl_fw_get_max_power(struct hl_device *hdev)
3221 {
3222 struct cpucp_packet pkt;
3223 u64 result;
3224 int rc;
3225
3226 memset(&pkt, 0, sizeof(pkt));
3227
3228 pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << CPUCP_PKT_CTL_OPCODE_SHIFT);
3229
3230 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result);
3231 if (rc) {
3232 if (rc != -EAGAIN)
3233 dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
3234 return rc;
3235 }
3236
3237 return result;
3238 }
3239
hl_fw_set_max_power(struct hl_device * hdev)3240 void hl_fw_set_max_power(struct hl_device *hdev)
3241 {
3242 struct cpucp_packet pkt;
3243 int rc;
3244
3245 /* TODO: remove this after simulator supports this packet */
3246 if (!hdev->pdev)
3247 return;
3248
3249 memset(&pkt, 0, sizeof(pkt));
3250
3251 pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
3252 pkt.value = cpu_to_le64(hdev->max_power);
3253
3254 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
3255 if (rc && rc != -EAGAIN)
3256 dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
3257 }
3258
hl_fw_get_sec_attest_data(struct hl_device * hdev,u32 packet_id,void * data,u32 size,u32 nonce,u32 timeout)3259 static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void *data, u32 size,
3260 u32 nonce, u32 timeout)
3261 {
3262 struct cpucp_packet pkt = {};
3263 dma_addr_t req_dma_addr;
3264 void *req_cpu_addr;
3265 int rc;
3266
3267 req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr);
3268 if (!req_cpu_addr) {
3269 dev_err(hdev->dev,
3270 "Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id);
3271 return -ENOMEM;
3272 }
3273
3274 memset(data, 0, size);
3275
3276 pkt.ctl = cpu_to_le32(packet_id << CPUCP_PKT_CTL_OPCODE_SHIFT);
3277 pkt.addr = cpu_to_le64(req_dma_addr);
3278 pkt.data_max_size = cpu_to_le32(size);
3279 pkt.nonce = cpu_to_le32(nonce);
3280
3281 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), timeout, NULL);
3282 if (rc) {
3283 if (rc != -EAGAIN)
3284 dev_err(hdev->dev,
3285 "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc);
3286 goto out;
3287 }
3288
3289 memcpy(data, req_cpu_addr, size);
3290
3291 out:
3292 hl_cpu_accessible_dma_pool_free(hdev, size, req_cpu_addr);
3293
3294 return rc;
3295 }
3296
hl_fw_get_sec_attest_info(struct hl_device * hdev,struct cpucp_sec_attest_info * sec_attest_info,u32 nonce)3297 int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
3298 u32 nonce)
3299 {
3300 return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_SEC_ATTEST_GET, sec_attest_info,
3301 sizeof(struct cpucp_sec_attest_info), nonce,
3302 HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
3303 }
3304
hl_fw_get_dev_info_signed(struct hl_device * hdev,struct cpucp_dev_info_signed * dev_info_signed,u32 nonce)3305 int hl_fw_get_dev_info_signed(struct hl_device *hdev,
3306 struct cpucp_dev_info_signed *dev_info_signed, u32 nonce)
3307 {
3308 return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_INFO_SIGNED_GET, dev_info_signed,
3309 sizeof(struct cpucp_dev_info_signed), nonce,
3310 HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
3311 }
3312
hl_fw_send_generic_request(struct hl_device * hdev,enum hl_passthrough_type sub_opcode,dma_addr_t buff,u32 * size)3313 int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode,
3314 dma_addr_t buff, u32 *size)
3315 {
3316 struct cpucp_packet pkt = {};
3317 u64 result;
3318 int rc = 0;
3319
3320 pkt.ctl = cpu_to_le32(CPUCP_PACKET_GENERIC_PASSTHROUGH << CPUCP_PKT_CTL_OPCODE_SHIFT);
3321 pkt.addr = cpu_to_le64(buff);
3322 pkt.data_max_size = cpu_to_le32(*size);
3323 pkt.pkt_subidx = cpu_to_le32(sub_opcode);
3324
3325 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)&pkt, sizeof(pkt),
3326 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
3327 if (rc) {
3328 if (rc != -EAGAIN)
3329 dev_err(hdev->dev, "failed to send CPUCP data of generic fw pkt\n");
3330 } else {
3331 dev_dbg(hdev->dev, "generic pkt was successful, result: 0x%llx\n", result);
3332 }
3333
3334 *size = (u32)result;
3335
3336 return rc;
3337 }
3338